Advertisement
Guest User

Untitled

a guest
Aug 26th, 2020 (edited)
561
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 195.43 KB | None | 0 0
  1. diff --git a/sys/arch/amd64/amd64/conf.c b/sys/arch/amd64/amd64/conf.c
  2. index ece073225..ad10a38a1 100644
  3. --- a/sys/arch/amd64/amd64/conf.c
  4. +++ b/sys/arch/amd64/amd64/conf.c
  5. @@ -103,7 +103,7 @@ int nblkdev = nitems(bdevsw);
  6. (dev_type_write((*))) enodev, \
  7. dev_init(c,n,ioctl), \
  8. (dev_type_stop((*))) enodev, 0, seltrue, \
  9. - (dev_type_mmap((*))) enodev, 0, 0, seltrue_kqfilter }
  10. + dev_init(c,n,mmap) }
  11.  
  12. #define mmread mmrw
  13. #define mmwrite mmrw
  14. diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c
  15. index 84fcb23a5..a71765555 100644
  16. --- a/sys/arch/amd64/amd64/vmm.c
  17. +++ b/sys/arch/amd64/amd64/vmm.c
  18. @@ -41,10 +41,17 @@
  19. #include <dev/isa/isareg.h>
  20. #include <dev/pv/pvreg.h>
  21.  
  22. +#include <dev/pci/pcireg.h>
  23. +#include <dev/pci/pcivar.h>
  24. +#include <dev/pci/pcidevs.h>
  25. +
  26. /* #define VMM_DEBUG */
  27.  
  28. void *l1tf_flush_region;
  29.  
  30. +extern void *_iommu_domain(int, int, int, int, int *);
  31. +extern void _iommu_map(void *, vaddr_t, bus_addr_t, bus_size_t);
  32. +
  33. #ifdef VMM_DEBUG
  34. #define DPRINTF(x...) do { printf(x); } while(0)
  35. #else
  36. @@ -114,6 +121,7 @@ void vmm_attach(struct device *, struct device *, void *);
  37. int vmmopen(dev_t, int, int, struct proc *);
  38. int vmmioctl(dev_t, u_long, caddr_t, int, struct proc *);
  39. int vmmclose(dev_t, int, int, struct proc *);
  40. +paddr_t vmmmmap(dev_t, off_t, int);
  41. int vmm_start(void);
  42. int vmm_stop(void);
  43. size_t vm_create_check_mem_ranges(struct vm_create_params *);
  44. @@ -303,6 +311,252 @@ extern struct gate_descriptor *idt;
  45. #define CR_CLTS 2
  46. #define CR_LMSW 3
  47.  
  48. +/* Keep track of interrupts for PCI device */
  49. +struct vppt {
  50. + pci_chipset_tag_t pc;
  51. + pcitag_t tag;
  52. + pci_intr_handle_t ih;
  53. + uint32_t pending;
  54. + void *cookie;
  55. + TAILQ_ENTRY(vppt) next;
  56. +};
  57. +TAILQ_HEAD(,vppt) vppts = TAILQ_HEAD_INITIALIZER(vppts);
  58. +
  59. +void
  60. +vmm_mapintr(pci_chipset_tag_t pc, struct pci_attach_args *pa) {
  61. + int bus, dev, fun;
  62. + struct vppt *ppt;
  63. +
  64. + TAILQ_FOREACH(ppt, &vppts, next) {
  65. + if (ppt->pc == pc && ppt->tag == pa->pa_tag)
  66. + return;
  67. + }
  68. +
  69. + /* Add PCI device to list */
  70. + ppt = malloc(sizeof(*ppt), M_DEVBUF, M_ZERO | M_WAITOK);
  71. + if (!ppt)
  72. + return;
  73. + TAILQ_INSERT_TAIL(&vppts, ppt, next);
  74. +
  75. + ppt->pc = pc;
  76. + ppt->tag = pa->pa_tag;
  77. + pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
  78. + printf("Check Interrupt: %d/%d/%d : %d\n", bus, dev, fun, pa->pa_intrpin);
  79. + if (pci_intr_map_msi(pa, &ppt->ih) || pci_intr_map(pa, &ppt->ih)) {
  80. + printf("Couldn't map %d/%d/%d\n", bus, dev, fun);
  81. + return;
  82. + }
  83. + printf("Mapped %d/%d/%d intr %d/%d\n", bus, dev, fun, ppt->ih.line, ppt->ih.pin);
  84. +}
  85. +
  86. +/* Issue PCI Read/Write to physical device */
  87. +static int
  88. +vm_pciio(struct vm_pciio *ptd)
  89. +{
  90. + pci_chipset_tag_t pc = NULL;
  91. + pcitag_t tag;
  92. +
  93. + if (ptd->reg & 3)
  94. + return (EINVAL);
  95. + tag = pci_make_tag(pc, ptd->bus, ptd->dev, ptd->func);
  96. + if (ptd->dir == VEI_DIR_OUT) {
  97. + pci_conf_write(pc, tag, ptd->reg, ptd->val);
  98. + } else {
  99. + ptd->val = pci_conf_read(pc, tag, ptd->reg);
  100. + }
  101. + return 0;
  102. +}
  103. +
  104. +/* Probably should pre-register bus_space_map/bus_space_read_xx? */
  105. +static int
  106. +vm_pio(struct vm_pio *pio)
  107. +{
  108. + bus_space_tag_t iot;
  109. + bus_space_handle_t ioh;
  110. + int rc;
  111. +
  112. + iot = (pio->type == 1 ? X86_BUS_SPACE_IO : X86_BUS_SPACE_MEM);
  113. + rc = bus_space_map(iot, pio->base, pio->size, 0, &ioh);
  114. + if (rc != 0) {
  115. + printf("iomap of %x fails %x\n", pio->base, rc);
  116. + return -EINVAL;
  117. + }
  118. + if (pio->dir == VEI_DIR_OUT) {
  119. + switch (pio->size) {
  120. + case 1:
  121. + bus_space_write_1(iot, ioh, 0, pio->data);
  122. + break;
  123. + case 2:
  124. + bus_space_write_2(iot, ioh, 0, pio->data);
  125. + break;
  126. + case 4:
  127. + bus_space_write_4(iot, ioh, 0, pio->data);
  128. + break;
  129. + default:
  130. + printf("pio:no wrsize: %d\n", pio->base);
  131. + return EINVAL;
  132. + }
  133. + } else {
  134. + switch (pio->size) {
  135. + case 1:
  136. + pio->data = bus_space_read_1(iot, ioh, 0);
  137. + break;
  138. + case 2:
  139. + pio->data = bus_space_read_2(iot, ioh, 0);
  140. + break;
  141. + case 4:
  142. + pio->data = bus_space_read_4(iot, ioh, 0);
  143. + break;
  144. + default:
  145. + printf("pio:no rdsize: %d\n", pio->base);
  146. + return EINVAL;
  147. + }
  148. + }
  149. + bus_space_unmap(iot, ioh, pio->size);
  150. +#if 0
  151. + if (pio->dir == VEI_DIR_OUT) {
  152. + switch (pio->size) {
  153. + case 1:
  154. + outb(pio->base, pio->data);
  155. + break;
  156. + case 2:
  157. + outw(pio->base, pio->data);
  158. + break;
  159. + case 4:
  160. + outl(pio->base, pio->data);
  161. + break;
  162. + default:
  163. + printf("pio:no wrsize: %d\n", pio->base);
  164. + return EINVAL;
  165. + }
  166. + } else {
  167. + switch (pio->size) {
  168. + case 1:
  169. + pio->data = inb(pio->base);
  170. + break;
  171. + case 2:
  172. + pio->data = inw(pio->base);
  173. + break;
  174. + case 4:
  175. + pio->data = inl(pio->base);
  176. + break;
  177. + default:
  178. + printf("pio:no rdsize: %d\n", pio->base);
  179. + return EINVAL;
  180. + }
  181. + }
  182. +#endif
  183. +#if 0
  184. + printf("%ld pio; %s(%x,%llx)\n", sizeof(*pio),
  185. + pio->dir == VEI_DIR_OUT ? "out" : "in", pio->base, pio->data);
  186. +#endif
  187. + return 0;
  188. +}
  189. +
  190. +/* Device interrupt handler. Increase pending count */
  191. +static int
  192. +vmm_intr(void *arg)
  193. +{
  194. + struct vppt *ppt = arg;
  195. +
  196. + ppt->pending++;
  197. + return 1;
  198. +}
  199. +
  200. +/* Get interrupt pending count for a device */
  201. +static int
  202. +vm_getintr(struct vm_ptdpci *ptd)
  203. +{
  204. + pci_chipset_tag_t pc = NULL;
  205. + pcitag_t tag;
  206. + struct vppt *ppt;
  207. +
  208. + tag = pci_make_tag(pc, ptd->bus, ptd->dev, ptd->func);
  209. + TAILQ_FOREACH(ppt, &vppts, next) {
  210. + if (ppt->tag == tag) {
  211. + ptd->pending = ppt->pending;
  212. + }
  213. + }
  214. + return (0);
  215. +}
  216. +
  217. +/* Get PCI/Bar info */
  218. +static int
  219. +vm_getbar(struct vm_ptdpci *ptd)
  220. +{
  221. + pci_chipset_tag_t pc = NULL;
  222. + pcitag_t tag;
  223. + bus_addr_t base;
  224. + bus_size_t size;
  225. + pcireg_t type = 0;
  226. + int i, reg, did;
  227. + void *dom;
  228. + struct vm *vm;
  229. + struct vppt *ppt;
  230. + uint32_t id_reg;
  231. +
  232. + /* Make sure this is a valid PCI device */
  233. + tag = pci_make_tag(pc, ptd->bus, ptd->dev, ptd->func);
  234. + id_reg = pci_conf_read(pc, tag, PCI_ID_REG);
  235. + printf("getbar: %d.%d.%d %x\n",
  236. + ptd->bus, ptd->dev, ptd->func, id_reg);
  237. + if (PCI_VENDOR(id_reg) == PCI_VENDOR_INVALID)
  238. + return ENODEV;
  239. + if (PCI_VENDOR(id_reg) == 0)
  240. + return ENODEV;
  241. +
  242. + /* Scan all BARs and get type/address/length */
  243. + memset(&ptd->barinfo, 0, sizeof(ptd->barinfo));
  244. + for (i = 0, reg = PCI_MAPREG_START; reg < PCI_MAPREG_END; i++, reg += 4) {
  245. + if (!pci_mapreg_probe(pc, tag, reg, &type))
  246. + continue;
  247. + if (pci_mapreg_info(pc, tag, reg, type, &base, &size, NULL))
  248. + continue;
  249. + printf(" %d: %x %.8lx %.16lx\n", i, type, size, base);
  250. + ptd->barinfo[i].type = type;
  251. + ptd->barinfo[i].size = size;
  252. + ptd->barinfo[i].addr = base;
  253. + /* Skip next BAR for 64-bit type */
  254. + if (type & PCI_MAPREG_MEM_TYPE_64BIT) {
  255. + reg += 4;
  256. + i++;
  257. + }
  258. + }
  259. +
  260. + /* don't support if mmio and no domain? */
  261. + did = 0xdeadcafe;
  262. + dom = _iommu_domain(0, ptd->bus, ptd->dev, ptd->func, &did);
  263. + printf("domain is: %p:%x\n", dom, did);
  264. + if (!dom) {
  265. + return (ENODEV);
  266. + }
  267. + /* Map VMM DMA to iommu */
  268. + vm = SLIST_FIRST(&vmm_softc->vm_list);
  269. + if (vm != NULL) {
  270. + paddr_t pa;
  271. +
  272. + for (i = 0; i < vm->vm_nmemranges; i++) {
  273. + printf("mapping va:%lx pa:%lx\n", vm->vm_memranges[i].vmr_va, pa);
  274. + _iommu_map(dom,
  275. + vm->vm_memranges[i].vmr_va,
  276. + vm->vm_memranges[i].vmr_gpa,
  277. + vm->vm_memranges[i].vmr_size);
  278. + }
  279. + }
  280. + /* Setup interrupt */
  281. + TAILQ_FOREACH(ppt, &vppts, next) {
  282. + if (ppt->tag == tag) {
  283. + if (!ppt->cookie) {
  284. + ppt->cookie = pci_intr_establish(ppt->pc, ppt->ih, IPL_BIO, vmm_intr,
  285. + ppt, "ppt");
  286. + }
  287. + printf("Establish intr : %p\n", ppt->cookie);
  288. + ppt->pending = 0;
  289. + }
  290. + }
  291. + return 0;
  292. +}
  293. +
  294. /*
  295. * vmm_enabled
  296. *
  297. @@ -506,7 +760,18 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
  298. case VMM_IOC_WRITEVMPARAMS:
  299. ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 1);
  300. break;
  301. -
  302. + case VMM_IOC_BARINFO:
  303. + ret = vm_getbar((struct vm_ptdpci *)data);
  304. + break;
  305. + case VMM_IOC_GETINTR:
  306. + ret = vm_getintr((struct vm_ptdpci *)data);
  307. + break;
  308. + case VMM_IOC_PCIIO:
  309. + ret = vm_pciio((struct vm_pciio *)data);
  310. + break;
  311. + case VMM_IOC_PIO:
  312. + ret = vm_pio((struct vm_pio *)data);
  313. + break;
  314. default:
  315. DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd);
  316. ret = ENOTTY;
  317. @@ -515,6 +780,13 @@ vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
  318. return (ret);
  319. }
  320.  
  321. +/* MMAP any address (TODO: fixme) for mapping BAR */
  322. +paddr_t
  323. +vmmmmap(dev_t dev, off_t off, int prot)
  324. +{
  325. + return off;
  326. +}
  327. +
  328. /*
  329. * pledge_ioctl_vmm
  330. *
  331. @@ -541,6 +813,10 @@ pledge_ioctl_vmm(struct proc *p, long com)
  332. case VMM_IOC_MPROTECT_EPT:
  333. case VMM_IOC_READVMPARAMS:
  334. case VMM_IOC_WRITEVMPARAMS:
  335. + case VMM_IOC_BARINFO:
  336. + case VMM_IOC_PCIIO:
  337. + case VMM_IOC_PIO:
  338. + case VMM_IOC_GETINTR:
  339. return (0);
  340. }
  341.  
  342. @@ -558,6 +834,34 @@ vmmclose(dev_t dev, int flag, int mode, struct proc *p)
  343. return 0;
  344. }
  345.  
  346. +/*
  347. + * vm_find_vcpu
  348. + *
  349. + * Lookup VMM VCPU by ID number
  350. + *
  351. + * Parameters:
  352. + * vm: vm structure
  353. + * id: index id of vcpu
  354. + *
  355. + * Returns pointer to vcpu structure if successful, NULL otherwise
  356. + */
  357. +static struct vcpu *
  358. +vm_find_vcpu(struct vm *vm, uint32_t id)
  359. +{
  360. + struct vcpu *vcpu;
  361. +
  362. + if (vm == NULL)
  363. + return NULL;
  364. + rw_enter_read(&vm->vm_vcpu_lock);
  365. + SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
  366. + if (vcpu->vc_id == id)
  367. + break;
  368. + }
  369. + rw_exit_read(&vm->vm_vcpu_lock);
  370. + return vcpu;
  371. +}
  372. +
  373. +
  374. /*
  375. * vm_resetcpu
  376. *
  377. @@ -591,12 +895,7 @@ vm_resetcpu(struct vm_resetcpu_params *vrp)
  378. return (error);
  379. }
  380.  
  381. - rw_enter_read(&vm->vm_vcpu_lock);
  382. - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
  383. - if (vcpu->vc_id == vrp->vrp_vcpu_id)
  384. - break;
  385. - }
  386. - rw_exit_read(&vm->vm_vcpu_lock);
  387. + vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
  388.  
  389. if (vcpu == NULL) {
  390. DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
  391. @@ -657,12 +956,7 @@ vm_intr_pending(struct vm_intr_params *vip)
  392. return (error);
  393. }
  394.  
  395. - rw_enter_read(&vm->vm_vcpu_lock);
  396. - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
  397. - if (vcpu->vc_id == vip->vip_vcpu_id)
  398. - break;
  399. - }
  400. - rw_exit_read(&vm->vm_vcpu_lock);
  401. + vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id);
  402. rw_exit_read(&vmm_softc->vm_lock);
  403.  
  404. if (vcpu == NULL)
  405. @@ -722,12 +1016,7 @@ vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir) {
  406. return (error);
  407. }
  408.  
  409. - rw_enter_read(&vm->vm_vcpu_lock);
  410. - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
  411. - if (vcpu->vc_id == vpp->vpp_vcpu_id)
  412. - break;
  413. - }
  414. - rw_exit_read(&vm->vm_vcpu_lock);
  415. + vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id);
  416. rw_exit_read(&vmm_softc->vm_lock);
  417.  
  418. if (vcpu == NULL)
  419. @@ -786,12 +1075,7 @@ vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
  420. return (error);
  421. }
  422.  
  423. - rw_enter_read(&vm->vm_vcpu_lock);
  424. - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
  425. - if (vcpu->vc_id == vrwp->vrwp_vcpu_id)
  426. - break;
  427. - }
  428. - rw_exit_read(&vm->vm_vcpu_lock);
  429. + vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id);
  430. rw_exit_read(&vmm_softc->vm_lock);
  431.  
  432. if (vcpu == NULL)
  433. @@ -858,12 +1142,7 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
  434. return (ret);
  435. }
  436.  
  437. - rw_enter_read(&vm->vm_vcpu_lock);
  438. - SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link) {
  439. - if (vcpu->vc_id == vmep->vmep_vcpu_id)
  440. - break;
  441. - }
  442. - rw_exit_read(&vm->vm_vcpu_lock);
  443. + vcpu = vm_find_vcpu(vm, vmep->vmep_vcpu_id);
  444.  
  445. if (vcpu == NULL) {
  446. DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
  447. @@ -1907,6 +2186,7 @@ vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask,
  448. gprs[VCPU_REGS_R14] = vcpu->vc_gueststate.vg_r14;
  449. gprs[VCPU_REGS_R15] = vcpu->vc_gueststate.vg_r15;
  450. gprs[VCPU_REGS_RBP] = vcpu->vc_gueststate.vg_rbp;
  451. + gprs[VCPU_REGS_RAX] = vmcb->v_rax;
  452. gprs[VCPU_REGS_RIP] = vmcb->v_rip;
  453. gprs[VCPU_REGS_RSP] = vmcb->v_rsp;
  454. gprs[VCPU_REGS_RFLAGS] = vmcb->v_rflags;
  455. @@ -2186,6 +2466,7 @@ vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask,
  456. vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP];
  457. vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP];
  458.  
  459. + vmcb->v_rax = gprs[VCPU_REGS_RAX];
  460. vmcb->v_rip = gprs[VCPU_REGS_RIP];
  461. vmcb->v_rsp = gprs[VCPU_REGS_RSP];
  462. vmcb->v_rflags = gprs[VCPU_REGS_RFLAGS];
  463. @@ -5348,7 +5629,6 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
  464. struct vm_mem_range *vmr;
  465.  
  466. if (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) {
  467. - DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa);
  468. return (VMM_MEM_TYPE_REGULAR);
  469. }
  470.  
  471. @@ -5457,6 +5737,11 @@ svm_fault_page(struct vcpu *vcpu, paddr_t gpa)
  472.  
  473. fault_type = svm_get_guest_faulttype(vmcb);
  474.  
  475. + vcpu->vc_exit.vee.vee_gpa = gpa;
  476. + if ((gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) || fault_type == VM_FAULT_PROTECT) {
  477. + vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
  478. + return (EAGAIN);
  479. + }
  480. ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, fault_type,
  481. PROT_READ | PROT_WRITE | PROT_EXEC);
  482. if (ret)
  483. @@ -5517,7 +5802,8 @@ vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
  484. return (EINVAL);
  485. }
  486.  
  487. - if (fault_type == VM_FAULT_PROTECT) {
  488. + vcpu->vc_exit.vee.vee_gpa = gpa;
  489. + if ((gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) || fault_type == VM_FAULT_PROTECT) {
  490. vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
  491. return (EAGAIN);
  492. }
  493. diff --git a/sys/arch/amd64/conf/GENERIC b/sys/arch/amd64/conf/GENERIC
  494. index 2c49f91a1..6f4f6b74c 100644
  495. --- a/sys/arch/amd64/conf/GENERIC
  496. +++ b/sys/arch/amd64/conf/GENERIC
  497. @@ -45,6 +45,7 @@ acpibtn* at acpi?
  498. acpicpu* at acpi?
  499. acpicmos* at acpi?
  500. acpidock* at acpi?
  501. +acpidmar0 at acpi?
  502. acpiec* at acpi?
  503. acpipci* at acpi?
  504. acpiprt* at acpi?
  505. @@ -379,7 +380,7 @@ drm0 at radeondrm? primary 1
  506. drm* at radeondrm?
  507. wsdisplay0 at radeondrm? primary 1
  508. wsdisplay* at radeondrm? mux -1
  509. -amdgpu* at pci?
  510. +amdgpu* at pci? disable
  511. drm0 at amdgpu? primary 1
  512. drm* at amdgpu?
  513. wsdisplay0 at amdgpu? primary 1
  514. diff --git a/sys/arch/amd64/conf/RAMDISK_CD b/sys/arch/amd64/conf/RAMDISK_CD
  515. index 91022751e..c87cea352 100644
  516. --- a/sys/arch/amd64/conf/RAMDISK_CD
  517. +++ b/sys/arch/amd64/conf/RAMDISK_CD
  518. @@ -48,6 +48,7 @@ sdhc* at acpi?
  519. acpihve* at acpi?
  520. chvgpio* at acpi?
  521. glkgpio* at acpi?
  522. +acpidmar0 at acpi?
  523.  
  524. mpbios0 at bios0
  525.  
  526. diff --git a/sys/arch/amd64/include/pci_machdep.h b/sys/arch/amd64/include/pci_machdep.h
  527. index bc295cc22..c725bdc73 100644
  528. --- a/sys/arch/amd64/include/pci_machdep.h
  529. +++ b/sys/arch/amd64/include/pci_machdep.h
  530. @@ -91,7 +91,8 @@ void *pci_intr_establish_cpu(pci_chipset_tag_t, pci_intr_handle_t,
  531. int, struct cpu_info *,
  532. int (*)(void *), void *, const char *);
  533. void pci_intr_disestablish(pci_chipset_tag_t, void *);
  534. -#define pci_probe_device_hook(c, a) (0)
  535. +int pci_probe_device_hook(pci_chipset_tag_t,
  536. + struct pci_attach_args *);
  537.  
  538. void pci_dev_postattach(struct device *, struct pci_attach_args *);
  539.  
  540. diff --git a/sys/arch/amd64/include/vmmvar.h b/sys/arch/amd64/include/vmmvar.h
  541. index 4990a5c53..ffbc74528 100644
  542. --- a/sys/arch/amd64/include/vmmvar.h
  543. +++ b/sys/arch/amd64/include/vmmvar.h
  544. @@ -32,6 +32,7 @@
  545. #define VMM_MAX_VCPUS_PER_VM 64
  546. #define VMM_MAX_VM_MEM_SIZE 32768
  547. #define VMM_MAX_NICS_PER_VM 4
  548. +#define VMM_MAX_PCI_PTHRU 4
  549.  
  550. #define VMM_PCI_MMIO_BAR_BASE 0xF0000000ULL
  551. #define VMM_PCI_MMIO_BAR_END 0xFFFFFFFFULL
  552. @@ -359,6 +360,7 @@ struct vm_exit_inout {
  553. */
  554. struct vm_exit_eptviolation {
  555. uint8_t vee_fault_type;
  556. + uint64_t vee_gpa;
  557. };
  558.  
  559. /*
  560. @@ -480,6 +482,9 @@ struct vm_create_params {
  561.  
  562. /* Output parameter from VMM_IOC_CREATE */
  563. uint32_t vcp_id;
  564. +
  565. + size_t vcp_npcis;
  566. + uint32_t vcp_pcis[VMM_MAX_PCI_PTHRU];
  567. };
  568.  
  569. struct vm_run_params {
  570. @@ -578,6 +583,47 @@ struct vm_mprotect_ept_params {
  571. int vmep_prot;
  572. };
  573.  
  574. +struct vm_pciio {
  575. + /* input */
  576. + uint32_t seg;
  577. + uint32_t bus;
  578. + uint32_t dev;
  579. + uint32_t func;
  580. +
  581. + uint32_t dir;
  582. + uint32_t reg;
  583. +
  584. + /* output */
  585. + uint32_t val;
  586. +};
  587. +
  588. +#define MAXBAR 6
  589. +struct vm_pio {
  590. + uint32_t type;
  591. + uint32_t dir;
  592. + uint32_t size;
  593. + uint32_t base;
  594. + uint64_t data;
  595. +};
  596. +
  597. +/* Passthrough PCI device structure */
  598. +struct vm_ptdpci {
  599. + uint8_t bus;
  600. + uint8_t dev;
  601. + uint8_t func;
  602. +
  603. + uint8_t id;
  604. + uint32_t pending;
  605. + uint32_t flags;
  606. +
  607. + struct {
  608. + uint32_t type;
  609. + uint32_t size;
  610. + uint64_t addr;
  611. + void *va;
  612. + } barinfo[MAXBAR];
  613. +};
  614. +
  615. /* IOCTL definitions */
  616. #define VMM_IOC_CREATE _IOWR('V', 1, struct vm_create_params) /* Create VM */
  617. #define VMM_IOC_RUN _IOWR('V', 2, struct vm_run_params) /* Run VCPU */
  618. @@ -594,6 +640,11 @@ struct vm_mprotect_ept_params {
  619. /* Control the protection of ept pages*/
  620. #define VMM_IOC_MPROTECT_EPT _IOW('V', 11, struct vm_mprotect_ept_params)
  621.  
  622. +#define VMM_IOC_BARINFO _IOWR('V', 12, struct vm_ptdpci)
  623. +#define VMM_IOC_GETINTR _IOWR('V', 13, struct vm_ptdpci)
  624. +#define VMM_IOC_PCIIO _IOWR('V', 14, struct vm_pciio)
  625. +#define VMM_IOC_PIO _IOWR('V', 15, struct vm_pio)
  626. +
  627. /* CPUID masks */
  628. /*
  629. * clone host capabilities minus:
  630. diff --git a/sys/arch/amd64/pci/pci_machdep.c b/sys/arch/amd64/pci/pci_machdep.c
  631. index cf4e835de..e9902a231 100644
  632. --- a/sys/arch/amd64/pci/pci_machdep.c
  633. +++ b/sys/arch/amd64/pci/pci_machdep.c
  634. @@ -89,6 +89,11 @@
  635. #include <machine/mpbiosvar.h>
  636. #endif
  637.  
  638. +#include "acpi.h"
  639. +#if NACPI > 0
  640. +#include <dev/acpi/acpidmar.h>
  641. +#endif
  642. +
  643. /*
  644. * Memory Mapped Configuration space access.
  645. *
  646. @@ -797,7 +802,19 @@ pci_init_extents(void)
  647. }
  648. }
  649.  
  650. -#include "acpi.h"
  651. +extern void vmm_mapintr(pci_chipset_tag_t pc, struct pci_attach_args *pa);
  652. +
  653. +int
  654. +pci_probe_device_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
  655. +{
  656. +#if NACPI > 0
  657. + if (acpidmar_sc)
  658. + acpidmar_pci_hook(pc, pa);
  659. +#endif
  660. + vmm_mapintr(pc, pa);
  661. + return 0;
  662. +}
  663. +
  664. #if NACPI > 0
  665. void acpi_pci_match(struct device *, struct pci_attach_args *);
  666. pcireg_t acpi_pci_min_powerstate(pci_chipset_tag_t, pcitag_t);
  667. diff --git a/sys/dev/acpi/acpi.c b/sys/dev/acpi/acpi.c
  668. index a6239198e..484f03fa1 100644
  669. --- a/sys/dev/acpi/acpi.c
  670. +++ b/sys/dev/acpi/acpi.c
  671. @@ -49,6 +49,7 @@
  672. #include <dev/acpi/amltypes.h>
  673. #include <dev/acpi/acpidev.h>
  674. #include <dev/acpi/dsdt.h>
  675. +#include <dev/acpi/acpidmar.h>
  676. #include <dev/wscons/wsdisplayvar.h>
  677.  
  678. #include <dev/pci/pcidevs.h>
  679. @@ -2448,6 +2449,9 @@ acpi_sleep_pm(struct acpi_softc *sc, int state)
  680. sc->sc_fadt->pm2_cnt_blk && sc->sc_fadt->pm2_cnt_len)
  681. acpi_write_pmreg(sc, ACPIREG_PM2_CNT, 0, ACPI_PM2_ARB_DIS);
  682.  
  683. + if (acpidmar_sc)
  684. + acpidmar_sw(DVACT_SUSPEND);
  685. +
  686. /* Write SLP_TYPx values */
  687. rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
  688. regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
  689. @@ -2483,6 +2487,9 @@ acpi_resume_pm(struct acpi_softc *sc, int fromstate)
  690. {
  691. uint16_t rega, regb, en;
  692.  
  693. + if (acpidmar_sc)
  694. + acpidmar_sw(DVACT_RESUME);
  695. +
  696. /* Write SLP_TYPx values */
  697. rega = acpi_read_pmreg(sc, ACPIREG_PM1A_CNT, 0);
  698. regb = acpi_read_pmreg(sc, ACPIREG_PM1B_CNT, 0);
  699. diff --git a/sys/dev/acpi/acpidmar.c b/sys/dev/acpi/acpidmar.c
  700. new file mode 100644
  701. index 000000000..c384f4a15
  702. --- /dev/null
  703. +++ b/sys/dev/acpi/acpidmar.c
  704. @@ -0,0 +1,3051 @@
  705. +/*
  706. + * Copyright (c) 2015 Jordan Hargrave <[email protected]>
  707. + *
  708. + * Permission to use, copy, modify, and distribute this software for any
  709. + * purpose with or without fee is hereby granted, provided that the above
  710. + * copyright notice and this permission notice appear in all copies.
  711. + *
  712. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  713. + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  714. + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  715. + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  716. + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  717. + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  718. + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  719. + */
  720. +
  721. +#include <sys/param.h>
  722. +#include <sys/systm.h>
  723. +#include <sys/kernel.h>
  724. +#include <sys/device.h>
  725. +#include <sys/malloc.h>
  726. +#include <sys/queue.h>
  727. +#include <sys/types.h>
  728. +#include <sys/mbuf.h>
  729. +#include <sys/proc.h>
  730. +
  731. +#include <uvm/uvm_extern.h>
  732. +
  733. +#include <machine/apicvar.h>
  734. +#include <machine/biosvar.h>
  735. +#include <machine/cpuvar.h>
  736. +#include <machine/bus.h>
  737. +
  738. +#include <dev/acpi/acpireg.h>
  739. +#include <dev/acpi/acpivar.h>
  740. +#include <dev/acpi/acpidev.h>
  741. +#include <dev/acpi/amltypes.h>
  742. +#include <dev/acpi/dsdt.h>
  743. +
  744. +#include <uvm/uvm_extern.h>
  745. +
  746. +#include <machine/i8259.h>
  747. +#include <machine/i82093reg.h>
  748. +#include <machine/i82093var.h>
  749. +#include <machine/i82489reg.h>
  750. +#include <machine/i82489var.h>
  751. +
  752. +#include <machine/mpbiosvar.h>
  753. +
  754. +#include <dev/pci/pcireg.h>
  755. +#include <dev/pci/pcivar.h>
  756. +#include <dev/pci/pcidevs.h>
  757. +#include <dev/pci/ppbreg.h>
  758. +
  759. +#include "ioapic.h"
  760. +
  761. +#include "acpidmar.h"
  762. +#include "amd_iommu.h"
  763. +
  764. +#define dprintf(x...)
  765. +
  766. +#ifdef DDB
  767. +int acpidmar_ddb = 0;
  768. +#endif
  769. +
  770. +int intel_iommu_gfx_mapped = 0;
  771. +int force_cm = 1;
  772. +
  773. +void showahci(void *);
  774. +
  775. +/* Page Table Entry per domain */
  776. +struct iommu_softc;
  777. +
  778. +static inline int
  779. +mksid(int b, int d, int f)
  780. +{
  781. + return (b << 8) + (d << 3) + f;
  782. +}
  783. +
  784. +static inline int
  785. +sid_devfn(int sid)
  786. +{
  787. + return sid & 0xff;
  788. +}
  789. +
  790. +static inline int
  791. +sid_bus(int sid)
  792. +{
  793. + return (sid >> 8) & 0xff;
  794. +}
  795. +
  796. +static inline int
  797. +sid_dev(int sid)
  798. +{
  799. + return (sid >> 3) & 0x1f;
  800. +}
  801. +
  802. +static inline int
  803. +sid_fun(int sid)
  804. +{
  805. + return (sid >> 0) & 0x7;
  806. +}
  807. +
  808. +/* Page Table Entry per domain */
  809. +static struct ivhd_dte hwdte[65536] __aligned(PAGE_SIZE);
  810. +
  811. +/* Alias mapping */
  812. +#define ALIAS_VALID 0x10000
  813. +static int sid_alias[65536];
  814. +
  815. +struct domain_dev {
  816. + int sid;
  817. + int sec;
  818. + int sub;
  819. + TAILQ_ENTRY(domain_dev) link;
  820. +};
  821. +
  822. +struct domain {
  823. + struct iommu_softc *iommu;
  824. + int did;
  825. + int gaw;
  826. + struct pte_entry *pte;
  827. + paddr_t ptep;
  828. + struct bus_dma_tag dmat;
  829. + int flag;
  830. +
  831. + struct mutex exlck;
  832. + char exname[32];
  833. + struct extent *iovamap;
  834. + TAILQ_HEAD(,domain_dev) devices;
  835. + TAILQ_ENTRY(domain) link;
  836. +};
  837. +
  838. +#define DOM_DEBUG 0x1
  839. +#define DOM_NOMAP 0x2
  840. +
  841. +struct dmar_devlist {
  842. + int type;
  843. + int bus;
  844. + int ndp;
  845. + struct acpidmar_devpath *dp;
  846. + TAILQ_ENTRY(dmar_devlist) link;
  847. +};
  848. +
  849. +TAILQ_HEAD(devlist_head, dmar_devlist);
  850. +
  851. +struct ivhd_devlist {
  852. + int start_id;
  853. + int end_id;
  854. + int cfg;
  855. + TAILQ_ENTRY(ivhd_devlist) link;
  856. +};
  857. +
  858. +struct rmrr_softc {
  859. + TAILQ_ENTRY(rmrr_softc) link;
  860. + struct devlist_head devices;
  861. + int segment;
  862. + uint64_t start;
  863. + uint64_t end;
  864. +};
  865. +
  866. +struct atsr_softc {
  867. + TAILQ_ENTRY(atsr_softc) link;
  868. + struct devlist_head devices;
  869. + int segment;
  870. + int flags;
  871. +};
  872. +
  873. +struct iommu_pic {
  874. + struct pic pic;
  875. + struct iommu_softc *iommu;
  876. +};
  877. +
  878. +#define IOMMU_FLAGS_CATCHALL 0x1
  879. +#define IOMMU_FLAGS_BAD 0x2
  880. +#define IOMMU_FLAGS_SUSPEND 0x4
  881. +
  882. +struct iommu_softc {
  883. + TAILQ_ENTRY(iommu_softc)link;
  884. + struct devlist_head devices;
  885. + int id;
  886. + int flags;
  887. + int segment;
  888. +
  889. + struct mutex reg_lock;
  890. +
  891. + bus_space_tag_t iot;
  892. + bus_space_handle_t ioh;
  893. +
  894. + uint64_t cap;
  895. + uint64_t ecap;
  896. + uint32_t gcmd;
  897. +
  898. + int mgaw;
  899. + int agaw;
  900. + int ndoms;
  901. +
  902. + struct root_entry *root;
  903. + struct context_entry *ctx[256];
  904. +
  905. + void *intr;
  906. + struct iommu_pic pic;
  907. + int fedata;
  908. + uint64_t feaddr;
  909. + uint64_t rtaddr;
  910. +
  911. + // Queued Invalidation
  912. + int qi_head;
  913. + int qi_tail;
  914. + paddr_t qip;
  915. + struct qi_entry *qi;
  916. +
  917. + struct domain *unity;
  918. + TAILQ_HEAD(,domain) domains;
  919. +
  920. + // AMD iommu
  921. + struct ivhd_dte *dte;
  922. + void *cmd_tbl;
  923. + void *evt_tbl;
  924. +};
  925. +
  926. +static inline int iommu_bad(struct iommu_softc *sc)
  927. +{
  928. + return (sc->flags & IOMMU_FLAGS_BAD);
  929. +}
  930. +
  931. +static inline int iommu_enabled(struct iommu_softc *sc)
  932. +{
  933. + if (sc->dte) {
  934. + return 1;
  935. + }
  936. + return (sc->gcmd & GCMD_TE);
  937. +}
  938. +
  939. +struct acpidmar_softc {
  940. + struct device sc_dev;
  941. +
  942. + pci_chipset_tag_t sc_pc;
  943. + bus_space_tag_t sc_memt;
  944. + int sc_haw;
  945. + int sc_flags;
  946. +
  947. + TAILQ_HEAD(,iommu_softc)sc_drhds;
  948. + TAILQ_HEAD(,rmrr_softc) sc_rmrrs;
  949. + TAILQ_HEAD(,atsr_softc) sc_atsrs;
  950. +};
  951. +
  952. +int acpidmar_activate(struct device *, int);
  953. +int acpidmar_match(struct device *, void *, void *);
  954. +void acpidmar_attach(struct device *, struct device *, void *);
  955. +struct domain *acpidmar_pci_attach(struct acpidmar_softc *, int, int, int);
  956. +
  957. +struct cfattach acpidmar_ca = {
  958. + sizeof(struct acpidmar_softc), acpidmar_match, acpidmar_attach,
  959. +};
  960. +
  961. +struct cfdriver acpidmar_cd = {
  962. + NULL, "acpidmar", DV_DULL
  963. +};
  964. +
  965. +struct acpidmar_softc *acpidmar_sc;
  966. +int acpidmar_intr(void *);
  967. +
  968. +#define DID_UNITY 0x1
  969. +
  970. +void _dumppte(struct pte_entry *, int, vaddr_t);
  971. +
  972. +struct domain *domain_create(struct iommu_softc *, int);
  973. +struct domain *domain_lookup(struct acpidmar_softc *, int, int);
  974. +
  975. +void domain_unload_map(struct domain *, bus_dmamap_t);
  976. +void domain_load_map(struct domain *, bus_dmamap_t, int, int, const char *);
  977. +
  978. +void (*domain_map_page)(struct domain *, vaddr_t, paddr_t, uint64_t);
  979. +void domain_map_page_amd(struct domain *, vaddr_t, paddr_t, uint64_t);
  980. +void domain_map_page_intel(struct domain *, vaddr_t, paddr_t, uint64_t);
  981. +void domain_map_pthru(struct domain *, paddr_t, paddr_t);
  982. +
  983. +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
  984. +void acpidmar_parse_devscope(union acpidmar_entry *, int, int,
  985. + struct devlist_head *);
  986. +int acpidmar_match_devscope(struct devlist_head *, pci_chipset_tag_t, int);
  987. +
  988. +void acpidmar_init(struct acpidmar_softc *, struct acpi_dmar *);
  989. +void acpidmar_drhd(struct acpidmar_softc *, union acpidmar_entry *);
  990. +void acpidmar_rmrr(struct acpidmar_softc *, union acpidmar_entry *);
  991. +void acpidmar_atsr(struct acpidmar_softc *, union acpidmar_entry *);
  992. +void acpiivrs_init(struct acpidmar_softc *, struct acpi_ivrs *);
  993. +
  994. +void *acpidmar_intr_establish(void *, int, int (*)(void *), void *,
  995. + const char *);
  996. +
  997. +void iommu_writel(struct iommu_softc *, int, uint32_t);
  998. +uint32_t iommu_readl(struct iommu_softc *, int);
  999. +void iommu_writeq(struct iommu_softc *, int, uint64_t);
  1000. +uint64_t iommu_readq(struct iommu_softc *, int);
  1001. +void iommu_showfault(struct iommu_softc *, int,
  1002. + struct fault_entry *);
  1003. +void iommu_showcfg(struct iommu_softc *, int);
  1004. +
  1005. +int iommu_init(struct acpidmar_softc *, struct iommu_softc *,
  1006. + struct acpidmar_drhd *);
  1007. +int iommu_enable_translation(struct iommu_softc *, int);
  1008. +void iommu_enable_qi(struct iommu_softc *, int);
  1009. +void iommu_flush_cache(struct iommu_softc *, void *, size_t);
  1010. +void *iommu_alloc_page(struct iommu_softc *, paddr_t *);
  1011. +void iommu_flush_write_buffer(struct iommu_softc *);
  1012. +void iommu_issue_qi(struct iommu_softc *, struct qi_entry *);
  1013. +
  1014. +void iommu_flush_ctx(struct iommu_softc *, int, int, int, int);
  1015. +void iommu_flush_ctx_qi(struct iommu_softc *, int, int, int, int);
  1016. +void iommu_flush_tlb(struct iommu_softc *, int, int);
  1017. +void iommu_flush_tlb_qi(struct iommu_softc *, int, int);
  1018. +
  1019. +void iommu_set_rtaddr(struct iommu_softc *, paddr_t);
  1020. +void acpidmar_sw(int);
  1021. +
  1022. +const char *dmar_bdf(int);
  1023. +
  1024. +const char *
  1025. +dmar_bdf(int sid)
  1026. +{
  1027. + static char bdf[32];
  1028. +
  1029. + snprintf(bdf, sizeof(bdf), "%.4x:%.2x:%.2x.%x", 0,
  1030. + sid_bus(sid), sid_dev(sid), sid_fun(sid));
  1031. +
  1032. + return (bdf);
  1033. +}
  1034. +
  1035. +/* busdma */
  1036. +static int dmar_dmamap_create(bus_dma_tag_t, bus_size_t, int, bus_size_t,
  1037. + bus_size_t, int, bus_dmamap_t *);
  1038. +static void dmar_dmamap_destroy(bus_dma_tag_t, bus_dmamap_t);
  1039. +static int dmar_dmamap_load(bus_dma_tag_t, bus_dmamap_t, void *, bus_size_t,
  1040. + struct proc *, int);
  1041. +static int dmar_dmamap_load_mbuf(bus_dma_tag_t, bus_dmamap_t, struct mbuf *,
  1042. + int);
  1043. +static int dmar_dmamap_load_uio(bus_dma_tag_t, bus_dmamap_t, struct uio *, int);
  1044. +static int dmar_dmamap_load_raw(bus_dma_tag_t, bus_dmamap_t,
  1045. + bus_dma_segment_t *, int, bus_size_t, int);
  1046. +static void dmar_dmamap_unload(bus_dma_tag_t, bus_dmamap_t);
  1047. +static void dmar_dmamap_sync(bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
  1048. + bus_size_t, int);
  1049. +static int dmar_dmamem_alloc(bus_dma_tag_t, bus_size_t, bus_size_t, bus_size_t,
  1050. + bus_dma_segment_t *, int, int *, int);
  1051. +static void dmar_dmamem_free(bus_dma_tag_t, bus_dma_segment_t *, int);
  1052. +static int dmar_dmamem_map(bus_dma_tag_t, bus_dma_segment_t *, int, size_t,
  1053. + caddr_t *, int);
  1054. +static void dmar_dmamem_unmap(bus_dma_tag_t, caddr_t, size_t);
  1055. +static paddr_t dmar_dmamem_mmap(bus_dma_tag_t, bus_dma_segment_t *, int, off_t,
  1056. + int, int);
  1057. +
  1058. +static void dmar_dumpseg(bus_dma_tag_t, int, bus_dma_segment_t *, const char *);
  1059. +const char *dom_bdf(struct domain *dom);
  1060. +void domain_map_check(struct domain *dom);
  1061. +
  1062. +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *npte, vaddr_t va, int shift, uint64_t flags);
  1063. +int ivhd_poll_events(struct iommu_softc *iommu);
  1064. +void ivhd_showit(struct iommu_softc *);
  1065. +void ivhd_showdte(void);
  1066. +void ivhd_showcmd(struct iommu_softc *);
  1067. +void ivhd_showevt(struct iommu_softc *);
  1068. +
  1069. +static inline int
  1070. +debugme(struct domain *dom)
  1071. +{
  1072. + return 0;
  1073. + return (dom->flag & DOM_DEBUG);
  1074. +}
  1075. +
  1076. +void
  1077. +domain_map_check(struct domain *dom)
  1078. +{
  1079. + struct iommu_softc *iommu;
  1080. + struct domain_dev *dd;
  1081. + struct context_entry *ctx;
  1082. + int v;
  1083. +
  1084. + iommu = dom->iommu;
  1085. + TAILQ_FOREACH(dd, &dom->devices, link) {
  1086. + acpidmar_pci_attach(acpidmar_sc, iommu->segment, dd->sid, 1);
  1087. +
  1088. + if (iommu->dte)
  1089. + continue;
  1090. +
  1091. + /* Check if this is the first time we are mapped */
  1092. + ctx = &iommu->ctx[sid_bus(dd->sid)][sid_devfn(dd->sid)];
  1093. + v = context_user(ctx);
  1094. + if (v != 0xA) {
  1095. + printf(" map: %.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
  1096. + iommu->segment,
  1097. + sid_bus(dd->sid),
  1098. + sid_dev(dd->sid),
  1099. + sid_fun(dd->sid),
  1100. + iommu->id,
  1101. + dom->did);
  1102. + context_set_user(ctx, 0xA);
  1103. + }
  1104. + }
  1105. +}
  1106. +
  1107. +/* Map a single page as passthrough - used for DRM */
  1108. +void
  1109. +dmar_ptmap(bus_dma_tag_t tag, bus_addr_t addr)
  1110. +{
  1111. + struct domain *dom = tag->_cookie;
  1112. +
  1113. + if (!acpidmar_sc)
  1114. + return;
  1115. + domain_map_check(dom);
  1116. + domain_map_page(dom, addr, addr, PTE_P | PTE_R | PTE_W);
  1117. +}
  1118. +
  1119. +/* Map a range of pages 1:1 */
  1120. +void
  1121. +domain_map_pthru(struct domain *dom, paddr_t start, paddr_t end)
  1122. +{
  1123. + domain_map_check(dom);
  1124. + while (start < end) {
  1125. + domain_map_page(dom, start, start, PTE_P | PTE_R | PTE_W);
  1126. + start += VTD_PAGE_SIZE;
  1127. + }
  1128. +}
  1129. +
  1130. +/* Map a single paddr to IOMMU paddr */
  1131. +void
  1132. +domain_map_page_intel(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
  1133. +{
  1134. + paddr_t paddr;
  1135. + struct pte_entry *pte, *npte;
  1136. + int lvl, idx;
  1137. + struct iommu_softc *iommu;
  1138. +
  1139. + iommu = dom->iommu;
  1140. + /* Insert physical address into virtual address map
  1141. + * XXX: could we use private pmap here?
  1142. + * essentially doing a pmap_enter(map, va, pa, prot);
  1143. + */
  1144. +
  1145. + /* Only handle 4k pages for now */
  1146. + npte = dom->pte;
  1147. + for (lvl = iommu->agaw - VTD_STRIDE_SIZE; lvl>= VTD_LEVEL0;
  1148. + lvl -= VTD_STRIDE_SIZE) {
  1149. + idx = (va >> lvl) & VTD_STRIDE_MASK;
  1150. + pte = &npte[idx];
  1151. + if (lvl == VTD_LEVEL0) {
  1152. + /* Level 1: Page Table - add physical address */
  1153. + pte->val = pa | flags;
  1154. + iommu_flush_cache(iommu, pte, sizeof(*pte));
  1155. + break;
  1156. + } else if (!(pte->val & PTE_P)) {
  1157. + /* Level N: Point to lower level table */
  1158. + iommu_alloc_page(iommu, &paddr);
  1159. + pte->val = paddr | PTE_P | PTE_R | PTE_W;
  1160. + iommu_flush_cache(iommu, pte, sizeof(*pte));
  1161. + }
  1162. + npte = (void *)PMAP_DIRECT_MAP((pte->val & VTD_PTE_MASK));
  1163. + }
  1164. +}
  1165. +
  1166. +/* Map a single paddr to IOMMU paddr: AMD
  1167. + * physical address breakdown into levels:
  1168. + * xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx.xxxxxxxx
  1169. + * 5.55555555.44444444.43333333,33222222.22211111.1111----.--------
  1170. + * mode:
  1171. + * 000 = none shift
  1172. + * 001 = 1 [21].12
  1173. + * 010 = 2 [30].21
  1174. + * 011 = 3 [39].30
  1175. + * 100 = 4 [48].39
  1176. + * 101 = 5 [57]
  1177. + * 110 = 6
  1178. + * 111 = reserved
  1179. + */
  1180. +struct pte_entry *pte_lvl(struct iommu_softc *iommu, struct pte_entry *pte, vaddr_t va,
  1181. + int shift, uint64_t flags)
  1182. +{
  1183. + paddr_t paddr;
  1184. + int idx;
  1185. +
  1186. + idx = (va >> shift) & VTD_STRIDE_MASK;
  1187. + if (!(pte[idx].val & PTE_P)) {
  1188. + /* Page Table entry is not present... create a new page entry */
  1189. + iommu_alloc_page(iommu, &paddr);
  1190. + pte[idx].val = paddr | flags;
  1191. + iommu_flush_cache(iommu, &pte[idx], sizeof(pte[idx]));
  1192. + }
  1193. + return (void *)PMAP_DIRECT_MAP((pte[idx].val & PTE_PADDR_MASK));
  1194. +}
  1195. +
  1196. +void
  1197. +domain_map_page_amd(struct domain *dom, vaddr_t va, paddr_t pa, uint64_t flags)
  1198. +{
  1199. + struct pte_entry *pte;
  1200. + struct iommu_softc *iommu;
  1201. + int idx;
  1202. +
  1203. + iommu = dom->iommu;
  1204. + /* Insert physical address into virtual address map
  1205. + * XXX: could we use private pmap here?
  1206. + * essentially doing a pmap_enter(map, va, pa, prot);
  1207. + */
  1208. +
  1209. + /* Always assume AMD levels=4 */
  1210. + /* 39 30 21 12 */
  1211. + /* ---------|---------|---------|---------|------------ */
  1212. + pte = dom->pte;
  1213. + //pte = pte_lvl(iommu, pte, va, 39, PTE_NXTLVL(3) | PTE_IR | PTE_IW | PTE_P);
  1214. + pte = pte_lvl(iommu, pte, va, 30, PTE_NXTLVL(2) | PTE_IR | PTE_IW | PTE_P);
  1215. + pte = pte_lvl(iommu, pte, va, 21, PTE_NXTLVL(1) | PTE_IR | PTE_IW | PTE_P);
  1216. + //pte = pte_lvl(iommu, pte, va, 12, PTE_NXTLVL(7) | PTE_IR | PTE_IW | PTE_P);
  1217. +
  1218. + if (flags)
  1219. + flags = PTE_P | PTE_R | PTE_W | PTE_IW | PTE_IR | PTE_NXTLVL(0);
  1220. +
  1221. + /* Level 1: Page Table - add physical address */
  1222. + idx = (va >> 12) & 0x1FF;
  1223. + pte[idx].val = pa | flags;
  1224. +
  1225. + iommu_flush_cache(iommu, pte, sizeof(*pte));
  1226. +}
  1227. +
  1228. +static void
  1229. +dmar_dumpseg(bus_dma_tag_t tag, int nseg, bus_dma_segment_t *segs,
  1230. + const char *lbl)
  1231. +{
  1232. + struct domain *dom = tag->_cookie;
  1233. + int i;
  1234. +
  1235. + return;
  1236. + if (!debugme(dom))
  1237. + return;
  1238. + printf("%s: %s\n", lbl, dom_bdf(dom));
  1239. + for (i = 0; i < nseg; i++) {
  1240. + printf(" %.16llx %.8x\n",
  1241. + (uint64_t)segs[i].ds_addr,
  1242. + (uint32_t)segs[i].ds_len);
  1243. + }
  1244. +}
  1245. +
  1246. +/* Unload mapping */
  1247. +void
  1248. +domain_unload_map(struct domain *dom, bus_dmamap_t dmam)
  1249. +{
  1250. + bus_dma_segment_t *seg;
  1251. + paddr_t base, end, idx;
  1252. + psize_t alen;
  1253. + int i;
  1254. +
  1255. + if (iommu_bad(dom->iommu)) {
  1256. + printf("unload map no iommu\n");
  1257. + return;
  1258. + }
  1259. +
  1260. + //acpidmar_intr(dom->iommu);
  1261. + for (i = 0; i < dmam->dm_nsegs; i++) {
  1262. + seg = &dmam->dm_segs[i];
  1263. +
  1264. + base = trunc_page(seg->ds_addr);
  1265. + end = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
  1266. + alen = end - base;
  1267. +
  1268. + if (debugme(dom)) {
  1269. + printf(" va:%.16llx len:%x\n",
  1270. + (uint64_t)base, (uint32_t)alen);
  1271. + }
  1272. +
  1273. + /* Clear PTE */
  1274. + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE)
  1275. + domain_map_page(dom, base + idx, 0, 0);
  1276. +
  1277. + if (dom->flag & DOM_NOMAP) {
  1278. + //printf("%s: nomap %.16llx\n", dom_bdf(dom), (uint64_t)base);
  1279. + continue;
  1280. + }
  1281. +
  1282. + mtx_enter(&dom->exlck);
  1283. + if (extent_free(dom->iovamap, base, alen, EX_NOWAIT)) {
  1284. + panic("domain_unload_map: extent_free");
  1285. + }
  1286. + mtx_leave(&dom->exlck);
  1287. + }
  1288. +}
  1289. +
  1290. +/* map.segs[x].ds_addr is modified to IOMMU virtual PA */
  1291. +void
  1292. +domain_load_map(struct domain *dom, bus_dmamap_t map, int flags, int pteflag, const char *fn)
  1293. +{
  1294. + bus_dma_segment_t *seg;
  1295. + struct iommu_softc *iommu;
  1296. + paddr_t base, end, idx;
  1297. + psize_t alen;
  1298. + u_long res;
  1299. + int i;
  1300. +
  1301. + iommu = dom->iommu;
  1302. + if (!iommu_enabled(iommu)) {
  1303. + /* Lazy enable translation when required */
  1304. + if (iommu_enable_translation(iommu, 1)) {
  1305. + return;
  1306. + }
  1307. + }
  1308. + domain_map_check(dom);
  1309. + //acpidmar_intr(iommu);
  1310. + for (i = 0; i < map->dm_nsegs; i++) {
  1311. + seg = &map->dm_segs[i];
  1312. +
  1313. + base = trunc_page(seg->ds_addr);
  1314. + end = roundup(seg->ds_addr + seg->ds_len, VTD_PAGE_SIZE);
  1315. + alen = end - base;
  1316. + res = base;
  1317. +
  1318. + if (dom->flag & DOM_NOMAP) {
  1319. + goto nomap;
  1320. + }
  1321. +
  1322. + /* Allocate DMA Virtual Address */
  1323. + mtx_enter(&dom->exlck);
  1324. + if (extent_alloc(dom->iovamap, alen, VTD_PAGE_SIZE, 0,
  1325. + map->_dm_boundary, EX_NOWAIT, &res)) {
  1326. + panic("domain_load_map: extent_alloc");
  1327. + }
  1328. + if (res == -1) {
  1329. + panic("got -1 address\n");
  1330. + }
  1331. + mtx_leave(&dom->exlck);
  1332. +
  1333. + /* Reassign DMA address */
  1334. + seg->ds_addr = res | (seg->ds_addr & VTD_PAGE_MASK);
  1335. +nomap:
  1336. + if (debugme(dom)) {
  1337. + printf(" LOADMAP: %.16llx %x => %.16llx\n",
  1338. + (uint64_t)seg->ds_addr, (uint32_t)seg->ds_len,
  1339. + (uint64_t)res);
  1340. + }
  1341. + for (idx = 0; idx < alen; idx += VTD_PAGE_SIZE) {
  1342. + domain_map_page(dom, res + idx, base + idx,
  1343. + PTE_P | pteflag);
  1344. + }
  1345. + }
  1346. + if ((iommu->cap & CAP_CM) || force_cm) {
  1347. + iommu_flush_tlb(iommu, IOTLB_DOMAIN, dom->did);
  1348. + } else {
  1349. + iommu_flush_write_buffer(iommu);
  1350. + }
  1351. +}
  1352. +
  1353. +const char *
  1354. +dom_bdf(struct domain *dom)
  1355. +{
  1356. + struct domain_dev *dd;
  1357. + static char mmm[48];
  1358. +
  1359. + dd = TAILQ_FIRST(&dom->devices);
  1360. + snprintf(mmm, sizeof(mmm), "%s iommu:%d did:%.4x%s",
  1361. + dmar_bdf(dd->sid), dom->iommu->id, dom->did,
  1362. + dom->did == DID_UNITY ? " [unity]" : "");
  1363. + return (mmm);
  1364. +}
  1365. +
  1366. +/* Bus DMA Map functions */
  1367. +static int
  1368. +dmar_dmamap_create(bus_dma_tag_t tag, bus_size_t size, int nsegments,
  1369. + bus_size_t maxsegsz, bus_size_t boundary, int flags, bus_dmamap_t *dmamp)
  1370. +{
  1371. + int rc;
  1372. +
  1373. + rc = _bus_dmamap_create(tag, size, nsegments, maxsegsz, boundary,
  1374. + flags, dmamp);
  1375. + if (!rc) {
  1376. + dmar_dumpseg(tag, (*dmamp)->dm_nsegs, (*dmamp)->dm_segs,
  1377. + __FUNCTION__);
  1378. + }
  1379. + return (rc);
  1380. +}
  1381. +
  1382. +static void
  1383. +dmar_dmamap_destroy(bus_dma_tag_t tag, bus_dmamap_t dmam)
  1384. +{
  1385. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
  1386. + _bus_dmamap_destroy(tag, dmam);
  1387. +}
  1388. +
  1389. +static int
  1390. +dmar_dmamap_load(bus_dma_tag_t tag, bus_dmamap_t dmam, void *buf,
  1391. + bus_size_t buflen, struct proc *p, int flags)
  1392. +{
  1393. + struct domain *dom = tag->_cookie;
  1394. + int rc;
  1395. +
  1396. + rc = _bus_dmamap_load(tag, dmam, buf, buflen, p, flags);
  1397. + if (!rc) {
  1398. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
  1399. + __FUNCTION__);
  1400. + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
  1401. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
  1402. + __FUNCTION__);
  1403. + }
  1404. + return (rc);
  1405. +}
  1406. +
  1407. +static int
  1408. +dmar_dmamap_load_mbuf(bus_dma_tag_t tag, bus_dmamap_t dmam, struct mbuf *chain,
  1409. + int flags)
  1410. +{
  1411. + struct domain *dom = tag->_cookie;
  1412. + int rc;
  1413. +
  1414. + rc = _bus_dmamap_load_mbuf(tag, dmam, chain, flags);
  1415. + if (!rc) {
  1416. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
  1417. + __FUNCTION__);
  1418. + domain_load_map(dom, dmam, flags, PTE_R|PTE_W,__FUNCTION__);
  1419. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
  1420. + __FUNCTION__);
  1421. + }
  1422. + return (rc);
  1423. +}
  1424. +
  1425. +static int
  1426. +dmar_dmamap_load_uio(bus_dma_tag_t tag, bus_dmamap_t dmam, struct uio *uio,
  1427. + int flags)
  1428. +{
  1429. + struct domain *dom = tag->_cookie;
  1430. + int rc;
  1431. +
  1432. + rc = _bus_dmamap_load_uio(tag, dmam, uio, flags);
  1433. + if (!rc) {
  1434. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
  1435. + __FUNCTION__);
  1436. + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
  1437. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
  1438. + __FUNCTION__);
  1439. + }
  1440. + return (rc);
  1441. +}
  1442. +
  1443. +static int
  1444. +dmar_dmamap_load_raw(bus_dma_tag_t tag, bus_dmamap_t dmam,
  1445. + bus_dma_segment_t *segs, int nsegs, bus_size_t size, int flags)
  1446. +{
  1447. + struct domain *dom = tag->_cookie;
  1448. + int rc;
  1449. +
  1450. + rc = _bus_dmamap_load_raw(tag, dmam, segs, nsegs, size, flags);
  1451. + if (!rc) {
  1452. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
  1453. + __FUNCTION__);
  1454. + domain_load_map(dom, dmam, flags, PTE_R|PTE_W, __FUNCTION__);
  1455. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs,
  1456. + __FUNCTION__);
  1457. + }
  1458. + return (rc);
  1459. +}
  1460. +
  1461. +static void
  1462. +dmar_dmamap_unload(bus_dma_tag_t tag, bus_dmamap_t dmam)
  1463. +{
  1464. + struct domain *dom = tag->_cookie;
  1465. +
  1466. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
  1467. + domain_unload_map(dom, dmam);
  1468. + _bus_dmamap_unload(tag, dmam);
  1469. +}
  1470. +
  1471. +static void
  1472. +dmar_dmamap_sync(bus_dma_tag_t tag, bus_dmamap_t dmam, bus_addr_t offset,
  1473. + bus_size_t len, int ops)
  1474. +{
  1475. +#if 0
  1476. + struct domain *dom = tag->_cookie;
  1477. + //int flag;
  1478. +
  1479. + flag = PTE_P;
  1480. + //acpidmar_intr(dom->iommu);
  1481. + if (ops == BUS_DMASYNC_PREREAD) {
  1482. + /* make readable */
  1483. + flag |= PTE_R;
  1484. + }
  1485. + else if (ops == BUS_DMASYNC_PREWRITE) {
  1486. + /* make writeable */
  1487. + flag |= PTE_W;
  1488. + }
  1489. + dmar_dumpseg(tag, dmam->dm_nsegs, dmam->dm_segs, __FUNCTION__);
  1490. +#endif
  1491. + _bus_dmamap_sync(tag, dmam, offset, len, ops);
  1492. +}
  1493. +
  1494. +static int
  1495. +dmar_dmamem_alloc(bus_dma_tag_t tag, bus_size_t size, bus_size_t alignment,
  1496. + bus_size_t boundary, bus_dma_segment_t *segs, int nsegs, int *rsegs,
  1497. + int flags)
  1498. +{
  1499. + int rc;
  1500. +
  1501. + rc = _bus_dmamem_alloc(tag, size, alignment, boundary, segs, nsegs,
  1502. + rsegs, flags);
  1503. + if (!rc) {
  1504. + dmar_dumpseg(tag, *rsegs, segs, __FUNCTION__);
  1505. + }
  1506. + return (rc);
  1507. +}
  1508. +
  1509. +static void
  1510. +dmar_dmamem_free(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs)
  1511. +{
  1512. + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
  1513. + _bus_dmamem_free(tag, segs, nsegs);
  1514. +}
  1515. +
  1516. +static int
  1517. +dmar_dmamem_map(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
  1518. + size_t size, caddr_t *kvap, int flags)
  1519. +{
  1520. + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
  1521. + return (_bus_dmamem_map(tag, segs, nsegs, size, kvap, flags));
  1522. +}
  1523. +
  1524. +static void
  1525. +dmar_dmamem_unmap(bus_dma_tag_t tag, caddr_t kva, size_t size)
  1526. +{
  1527. + struct domain *dom = tag->_cookie;
  1528. +
  1529. + if (debugme(dom)) {
  1530. + printf("dmamap_unmap: %s\n", dom_bdf(dom));
  1531. + }
  1532. + _bus_dmamem_unmap(tag, kva, size);
  1533. +}
  1534. +
  1535. +static paddr_t
  1536. +dmar_dmamem_mmap(bus_dma_tag_t tag, bus_dma_segment_t *segs, int nsegs,
  1537. + off_t off, int prot, int flags)
  1538. +{
  1539. + dmar_dumpseg(tag, nsegs, segs, __FUNCTION__);
  1540. + return (_bus_dmamem_mmap(tag, segs, nsegs, off, prot, flags));
  1541. +}
  1542. +
  1543. +/*===================================
  1544. + * IOMMU code
  1545. + *===================================*/
  1546. +
  1547. +/* Intel: Set Context Root Address */
  1548. +void
  1549. +iommu_set_rtaddr(struct iommu_softc *iommu, paddr_t paddr)
  1550. +{
  1551. + int i, sts;
  1552. +
  1553. + mtx_enter(&iommu->reg_lock);
  1554. + iommu_writeq(iommu, DMAR_RTADDR_REG, paddr);
  1555. + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_SRTP);
  1556. + for (i = 0; i < 5; i++) {
  1557. + sts = iommu_readl(iommu, DMAR_GSTS_REG);
  1558. + if (sts & GSTS_RTPS)
  1559. + break;
  1560. + }
  1561. + mtx_leave(&iommu->reg_lock);
  1562. +
  1563. + if (i == 5) {
  1564. + printf("set_rtaddr fails\n");
  1565. + }
  1566. +}
  1567. +
  1568. +/* COMMON: Allocate a new memory page */
  1569. +void *
  1570. +iommu_alloc_page(struct iommu_softc *iommu, paddr_t *paddr)
  1571. +{
  1572. + void *va;
  1573. +
  1574. + *paddr = 0;
  1575. + va = km_alloc(VTD_PAGE_SIZE, &kv_page, &kp_zero, &kd_nowait);
  1576. + if (va == NULL) {
  1577. + panic("can't allocate page\n");
  1578. + }
  1579. + pmap_extract(pmap_kernel(), (vaddr_t)va, paddr);
  1580. + return (va);
  1581. +}
  1582. +
  1583. +
  1584. +/* Intel: Issue command via queued invalidation */
  1585. +void
  1586. +iommu_issue_qi(struct iommu_softc *iommu, struct qi_entry *qi)
  1587. +{
  1588. +#if 0
  1589. + struct qi_entry *pi, *pw;
  1590. +
  1591. + idx = iommu->qi_head;
  1592. + pi = &iommu->qi[idx];
  1593. + pw = &iommu->qi[(idx+1) % MAXQ];
  1594. + iommu->qi_head = (idx+2) % MAXQ;
  1595. +
  1596. + memcpy(pw, &qi, sizeof(qi));
  1597. + issue command;
  1598. + while (pw->xxx)
  1599. + ;
  1600. +#endif
  1601. +}
  1602. +
  1603. +/* Intel: Flush TLB entries, Queued Invalidation mode */
  1604. +void
  1605. +iommu_flush_tlb_qi(struct iommu_softc *iommu, int mode, int did)
  1606. +{
  1607. + struct qi_entry qi;
  1608. +
  1609. + /* Use queued invalidation */
  1610. + qi.hi = 0;
  1611. + switch (mode) {
  1612. + case IOTLB_GLOBAL:
  1613. + qi.lo = QI_IOTLB | QI_IOTLB_IG_GLOBAL;
  1614. + break;
  1615. + case IOTLB_DOMAIN:
  1616. + qi.lo = QI_IOTLB | QI_IOTLB_IG_DOMAIN |
  1617. + QI_IOTLB_DID(did);
  1618. + break;
  1619. + case IOTLB_PAGE:
  1620. + qi.lo = QI_IOTLB | QI_IOTLB_IG_PAGE | QI_IOTLB_DID(did);
  1621. + qi.hi = 0;
  1622. + break;
  1623. + }
  1624. + if (iommu->cap & CAP_DRD)
  1625. + qi.lo |= QI_IOTLB_DR;
  1626. + if (iommu->cap & CAP_DWD)
  1627. + qi.lo |= QI_IOTLB_DW;
  1628. + iommu_issue_qi(iommu, &qi);
  1629. +}
  1630. +
  1631. +/* Intel: Flush Context entries, Queued Invalidation mode */
  1632. +void
  1633. +iommu_flush_ctx_qi(struct iommu_softc *iommu, int mode, int did,
  1634. + int sid, int fm)
  1635. +{
  1636. + struct qi_entry qi;
  1637. +
  1638. + /* Use queued invalidation */
  1639. + qi.hi = 0;
  1640. + switch (mode) {
  1641. + case CTX_GLOBAL:
  1642. + qi.lo = QI_CTX | QI_CTX_IG_GLOBAL;
  1643. + break;
  1644. + case CTX_DOMAIN:
  1645. + qi.lo = QI_CTX | QI_CTX_IG_DOMAIN | QI_CTX_DID(did);
  1646. + break;
  1647. + case CTX_DEVICE:
  1648. + qi.lo = QI_CTX | QI_CTX_IG_DEVICE | QI_CTX_DID(did) |
  1649. + QI_CTX_SID(sid) | QI_CTX_FM(fm);
  1650. + break;
  1651. + }
  1652. + iommu_issue_qi(iommu, &qi);
  1653. +}
  1654. +
  1655. +/* Intel: Flush write buffers */
  1656. +void
  1657. +iommu_flush_write_buffer(struct iommu_softc *iommu)
  1658. +{
  1659. + int i, sts;
  1660. +
  1661. + if (iommu->dte)
  1662. + return;
  1663. + if (!(iommu->cap & CAP_RWBF))
  1664. + return;
  1665. + printf("writebuf\n");
  1666. + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd | GCMD_WBF);
  1667. + for (i = 0; i < 5; i++) {
  1668. + sts = iommu_readl(iommu, DMAR_GSTS_REG);
  1669. + if (sts & GSTS_WBFS)
  1670. + break;
  1671. + delay(10000);
  1672. + }
  1673. + if (i == 5) {
  1674. + printf("write buffer flush fails\n");
  1675. + }
  1676. +}
  1677. +
  1678. +void
  1679. +iommu_flush_cache(struct iommu_softc *iommu, void *addr, size_t size)
  1680. +{
  1681. + if (iommu->dte) {
  1682. + pmap_flush_cache((vaddr_t)addr, size);
  1683. + return;
  1684. + }
  1685. + if (!(iommu->ecap & ECAP_C))
  1686. + pmap_flush_cache((vaddr_t)addr, size);
  1687. +}
  1688. +
  1689. +/*
  1690. + * Intel: Flush IOMMU TLB Entries
  1691. + * Flushing can occur globally, per domain or per page
  1692. + */
  1693. +void
  1694. +iommu_flush_tlb(struct iommu_softc *iommu, int mode, int did)
  1695. +{
  1696. + int n;
  1697. + uint64_t val;
  1698. +
  1699. + /* Call AMD */
  1700. + if (iommu->dte) {
  1701. + ivhd_invalidate_domain(iommu, did);
  1702. + //ivhd_poll_events(iommu);
  1703. + return;
  1704. + }
  1705. + val = IOTLB_IVT;
  1706. + switch (mode) {
  1707. + case IOTLB_GLOBAL:
  1708. + val |= IIG_GLOBAL;
  1709. + break;
  1710. + case IOTLB_DOMAIN:
  1711. + val |= IIG_DOMAIN | IOTLB_DID(did);
  1712. + break;
  1713. + case IOTLB_PAGE:
  1714. + val |= IIG_PAGE | IOTLB_DID(did);
  1715. + break;
  1716. + }
  1717. +
  1718. + /* Check for Read/Write Drain */
  1719. + if (iommu->cap & CAP_DRD)
  1720. + val |= IOTLB_DR;
  1721. + if (iommu->cap & CAP_DWD)
  1722. + val |= IOTLB_DW;
  1723. +
  1724. + mtx_enter(&iommu->reg_lock);
  1725. +
  1726. + iommu_writeq(iommu, DMAR_IOTLB_REG(iommu), val);
  1727. + n = 0;
  1728. + do {
  1729. + val = iommu_readq(iommu, DMAR_IOTLB_REG(iommu));
  1730. + } while (n++ < 5 && val & IOTLB_IVT);
  1731. +
  1732. + mtx_leave(&iommu->reg_lock);
  1733. +
  1734. +#ifdef DEBUG
  1735. + {
  1736. + static int rg;
  1737. + int a, r;
  1738. +
  1739. + if (!rg) {
  1740. + a = (val >> IOTLB_IAIG_SHIFT) & IOTLB_IAIG_MASK;
  1741. + r = (val >> IOTLB_IIRG_SHIFT) & IOTLB_IIRG_MASK;
  1742. + if (a != r) {
  1743. + printf("TLB Requested:%d Actual:%d\n", r, a);
  1744. + rg = 1;
  1745. + }
  1746. + }
  1747. + }
  1748. +#endif
  1749. +}
  1750. +
  1751. +/* Intel: Flush IOMMU settings
  1752. + * Flushes can occur globally, per domain, or per device
  1753. + */
  1754. +void
  1755. +iommu_flush_ctx(struct iommu_softc *iommu, int mode, int did, int sid, int fm)
  1756. +{
  1757. + uint64_t val;
  1758. + int n;
  1759. +
  1760. + if (iommu->dte)
  1761. + return;
  1762. + val = CCMD_ICC;
  1763. + switch (mode) {
  1764. + case CTX_GLOBAL:
  1765. + val |= CIG_GLOBAL;
  1766. + break;
  1767. + case CTX_DOMAIN:
  1768. + val |= CIG_DOMAIN | CCMD_DID(did);
  1769. + break;
  1770. + case CTX_DEVICE:
  1771. + val |= CIG_DEVICE | CCMD_DID(did) |
  1772. + CCMD_SID(sid) | CCMD_FM(fm);
  1773. + break;
  1774. + }
  1775. +
  1776. + mtx_enter(&iommu->reg_lock);
  1777. +
  1778. + n = 0;
  1779. + iommu_writeq(iommu, DMAR_CCMD_REG, val);
  1780. + do {
  1781. + val = iommu_readq(iommu, DMAR_CCMD_REG);
  1782. + } while (n++ < 5 && val & CCMD_ICC);
  1783. +
  1784. + mtx_leave(&iommu->reg_lock);
  1785. +
  1786. +#ifdef DEBUG
  1787. + {
  1788. + static int rg;
  1789. + int a, r;
  1790. +
  1791. + if (!rg) {
  1792. + a = (val >> CCMD_CAIG_SHIFT) & CCMD_CAIG_MASK;
  1793. + r = (val >> CCMD_CIRG_SHIFT) & CCMD_CIRG_MASK;
  1794. + if (a != r) {
  1795. + printf("CTX Requested:%d Actual:%d\n", r, a);
  1796. + rg = 1;
  1797. + }
  1798. + }
  1799. + }
  1800. +#endif
  1801. +}
  1802. +
  1803. +/* Intel: Enable Queued Invalidation */
  1804. +void
  1805. +iommu_enable_qi(struct iommu_softc *iommu, int enable)
  1806. +{
  1807. + int n = 0;
  1808. + int sts;
  1809. +
  1810. + if (!(iommu->ecap & ECAP_QI))
  1811. + return;
  1812. +
  1813. + if (enable) {
  1814. + iommu->gcmd |= GCMD_QIE;
  1815. +
  1816. + mtx_enter(&iommu->reg_lock);
  1817. +
  1818. + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
  1819. + do {
  1820. + sts = iommu_readl(iommu, DMAR_GSTS_REG);
  1821. + } while (n++ < 5 && !(sts & GSTS_QIES));
  1822. +
  1823. + mtx_leave(&iommu->reg_lock);
  1824. +
  1825. + printf("set.qie: %d\n", n);
  1826. + } else {
  1827. + iommu->gcmd &= ~GCMD_QIE;
  1828. +
  1829. + mtx_enter(&iommu->reg_lock);
  1830. +
  1831. + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
  1832. + do {
  1833. + sts = iommu_readl(iommu, DMAR_GSTS_REG);
  1834. + } while (n++ < 5 && sts & GSTS_QIES);
  1835. +
  1836. + mtx_leave(&iommu->reg_lock);
  1837. +
  1838. + printf("clr.qie: %d\n", n);
  1839. + }
  1840. +}
  1841. +
  1842. +/* Intel: Enable IOMMU translation */
  1843. +int
  1844. +iommu_enable_translation(struct iommu_softc *iommu, int enable)
  1845. +{
  1846. + uint32_t sts;
  1847. + uint64_t reg;
  1848. + int n = 0;
  1849. +
  1850. + if (iommu->dte)
  1851. + return (0);
  1852. + reg = 0;
  1853. + if (enable) {
  1854. + printf("enable iommu %d\n", iommu->id);
  1855. + iommu_showcfg(iommu, -1);
  1856. +
  1857. + iommu->gcmd |= GCMD_TE;
  1858. +
  1859. + /* Enable translation */
  1860. + printf(" pre tes: ");
  1861. +
  1862. + mtx_enter(&iommu->reg_lock);
  1863. + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
  1864. + printf("xxx");
  1865. + do {
  1866. + printf("yyy");
  1867. + sts = iommu_readl(iommu, DMAR_GSTS_REG);
  1868. + delay(n * 10000);
  1869. + } while (n++ < 5 && !(sts & GSTS_TES));
  1870. + mtx_leave(&iommu->reg_lock);
  1871. +
  1872. + printf(" set.tes: %d\n", n);
  1873. +
  1874. + if (n >= 5) {
  1875. + printf("error.. unable to initialize iommu %d\n",
  1876. + iommu->id);
  1877. + iommu->flags |= IOMMU_FLAGS_BAD;
  1878. +
  1879. + /* Disable IOMMU */
  1880. + iommu->gcmd &= ~GCMD_TE;
  1881. + mtx_enter(&iommu->reg_lock);
  1882. + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
  1883. + mtx_leave(&iommu->reg_lock);
  1884. +
  1885. + return (1);
  1886. + }
  1887. +
  1888. + iommu_flush_ctx(iommu, CTX_GLOBAL, 0, 0, 0);
  1889. + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
  1890. + } else {
  1891. + iommu->gcmd &= ~GCMD_TE;
  1892. +
  1893. + mtx_enter(&iommu->reg_lock);
  1894. +
  1895. + iommu_writel(iommu, DMAR_GCMD_REG, iommu->gcmd);
  1896. + do {
  1897. + sts = iommu_readl(iommu, DMAR_GSTS_REG);
  1898. + } while (n++ < 5 && sts & GSTS_TES);
  1899. + mtx_leave(&iommu->reg_lock);
  1900. +
  1901. + printf(" clr.tes: %d\n", n);
  1902. + }
  1903. +
  1904. + return (0);
  1905. +}
  1906. +
  1907. +/* Intel: Initialize IOMMU */
  1908. +int
  1909. +iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
  1910. + struct acpidmar_drhd *dh)
  1911. +{
  1912. + static int niommu;
  1913. + int len = VTD_PAGE_SIZE;
  1914. + int i, gaw;
  1915. + uint32_t sts;
  1916. + paddr_t paddr;
  1917. +
  1918. + if (_bus_space_map(sc->sc_memt, dh->address, len, 0, &iommu->ioh) != 0) {
  1919. + return (-1);
  1920. + }
  1921. +
  1922. + TAILQ_INIT(&iommu->domains);
  1923. + iommu->id = ++niommu;
  1924. + iommu->flags = dh->flags;
  1925. + iommu->segment = dh->segment;
  1926. + iommu->iot = sc->sc_memt;
  1927. +
  1928. + iommu->cap = iommu_readq(iommu, DMAR_CAP_REG);
  1929. + iommu->ecap = iommu_readq(iommu, DMAR_ECAP_REG);
  1930. + iommu->ndoms = cap_nd(iommu->cap);
  1931. +
  1932. + printf(" caps: %s%s%s%s%s%s%s%s%s%s%s\n",
  1933. + iommu->cap & CAP_AFL ? "afl " : "", // adv fault
  1934. + iommu->cap & CAP_RWBF ? "rwbf " : "", // write-buffer flush
  1935. + iommu->cap & CAP_PLMR ? "plmr " : "", // protected lo region
  1936. + iommu->cap & CAP_PHMR ? "phmr " : "", // protected hi region
  1937. + iommu->cap & CAP_CM ? "cm " : "", // caching mode
  1938. + iommu->cap & CAP_ZLR ? "zlr " : "", // zero-length read
  1939. + iommu->cap & CAP_PSI ? "psi " : "", // page invalidate
  1940. + iommu->cap & CAP_DWD ? "dwd " : "", // write drain
  1941. + iommu->cap & CAP_DRD ? "drd " : "", // read drain
  1942. + iommu->cap & CAP_FL1GP ? "Gb " : "", // 1Gb pages
  1943. + iommu->cap & CAP_PI ? "pi " : ""); // posted interrupts
  1944. + printf(" ecap: %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
  1945. + iommu->ecap & ECAP_C ? "c " : "", // coherent
  1946. + iommu->ecap & ECAP_QI ? "qi " : "", // queued invalidate
  1947. + iommu->ecap & ECAP_DT ? "dt " : "", // device iotlb
  1948. + iommu->ecap & ECAP_IR ? "ir " : "", // intr remap
  1949. + iommu->ecap & ECAP_EIM ? "eim " : "", // x2apic
  1950. + iommu->ecap & ECAP_PT ? "pt " : "", // passthrough
  1951. + iommu->ecap & ECAP_SC ? "sc " : "", // snoop control
  1952. + iommu->ecap & ECAP_ECS ? "ecs " : "", // extended context
  1953. + iommu->ecap & ECAP_MTS ? "mts " : "", // memory type
  1954. + iommu->ecap & ECAP_NEST ? "nest " : "", // nested translations
  1955. + iommu->ecap & ECAP_DIS ? "dis " : "", // deferred invalidation
  1956. + iommu->ecap & ECAP_PASID ? "pas " : "", // pasid
  1957. + iommu->ecap & ECAP_PRS ? "prs " : "", // page request
  1958. + iommu->ecap & ECAP_ERS ? "ers " : "", // execute request
  1959. + iommu->ecap & ECAP_SRS ? "srs " : "", // supervisor request
  1960. + iommu->ecap & ECAP_NWFS ? "nwfs " : "", // no write flag
  1961. + iommu->ecap & ECAP_EAFS ? "eafs " : ""); // extended accessed flag
  1962. +
  1963. + mtx_init(&iommu->reg_lock, IPL_HIGH);
  1964. +
  1965. + /* Clear Interrupt Masking */
  1966. + iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
  1967. +
  1968. + iommu->intr = acpidmar_intr_establish(iommu, IPL_HIGH,
  1969. + acpidmar_intr, iommu, "dmarintr");
  1970. +
  1971. + /* Enable interrupts */
  1972. + sts = iommu_readl(iommu, DMAR_FECTL_REG);
  1973. + iommu_writel(iommu, DMAR_FECTL_REG, sts & ~FECTL_IM);
  1974. +
  1975. + /* Allocate root pointer */
  1976. + iommu->root = iommu_alloc_page(iommu, &paddr);
  1977. +#ifdef DEBUG
  1978. + printf("Allocated root pointer: pa:%.16llx va:%p\n",
  1979. + (uint64_t)paddr, iommu->root);
  1980. +#endif
  1981. + iommu->rtaddr = paddr;
  1982. + iommu_flush_write_buffer(iommu);
  1983. + iommu_set_rtaddr(iommu, paddr);
  1984. +
  1985. +#if 0
  1986. + if (iommu->ecap & ECAP_QI) {
  1987. + /* Queued Invalidation support */
  1988. + iommu->qi = iommu_alloc_page(iommu, &iommu->qip);
  1989. + iommu_writeq(iommu, DMAR_IQT_REG, 0);
  1990. + iommu_writeq(iommu, DMAR_IQA_REG, iommu->qip | IQA_QS_256);
  1991. + }
  1992. + if (iommu->ecap & ECAP_IR) {
  1993. + /* Interrupt remapping support */
  1994. + iommu_writeq(iommu, DMAR_IRTA_REG, 0);
  1995. + }
  1996. +#endif
  1997. +
  1998. + /* Calculate guest address width and supported guest widths */
  1999. + gaw = -1;
  2000. + iommu->mgaw = cap_mgaw(iommu->cap);
  2001. + printf("gaw: %d { ", iommu->mgaw);
  2002. + for (i = 0; i < 5; i++) {
  2003. + if (cap_sagaw(iommu->cap) & (1L << i)) {
  2004. + gaw = VTD_LEVELTOAW(i);
  2005. + printf("%d ", gaw);
  2006. + iommu->agaw = gaw;
  2007. + }
  2008. + }
  2009. + printf("}\n");
  2010. +
  2011. + /* Cache current status register bits */
  2012. + sts = iommu_readl(iommu, DMAR_GSTS_REG);
  2013. + if (sts & GSTS_TES)
  2014. + iommu->gcmd |= GCMD_TE;
  2015. + if (sts & GSTS_QIES)
  2016. + iommu->gcmd |= GCMD_QIE;
  2017. + if (sts & GSTS_IRES)
  2018. + iommu->gcmd |= GCMD_IRE;
  2019. + if (iommu->gcmd) {
  2020. + printf("gcmd: %x preset\n", iommu->gcmd);
  2021. + }
  2022. + acpidmar_intr(iommu);
  2023. + return (0);
  2024. +}
  2025. +
  2026. +const char *dmar_rn(int reg);
  2027. +
  2028. +const char *
  2029. +dmar_rn(int reg)
  2030. +{
  2031. + switch (reg) {
  2032. + case EVT_HEAD_REG: return "evthead";
  2033. + case EVT_TAIL_REG: return "evttail";
  2034. + case CMD_HEAD_REG: return "cmdhead";
  2035. + case CMD_TAIL_REG: return "cmdtail";
  2036. + case CMD_BASE_REG: return "cmdbase";
  2037. + case EVT_BASE_REG: return "evtbase";
  2038. + case DEV_TAB_BASE_REG: return "devtblbase";
  2039. + case IOMMUCTL_REG: return "iommuctl";
  2040. +#if 0
  2041. + case DMAR_VER_REG: return "ver";
  2042. + case DMAR_CAP_REG: return "cap";
  2043. + case DMAR_ECAP_REG: return "ecap";
  2044. + case DMAR_GSTS_REG: return "gsts";
  2045. + case DMAR_GCMD_REG: return "gcmd";
  2046. + case DMAR_FSTS_REG: return "fsts";
  2047. + case DMAR_FECTL_REG: return "fectl";
  2048. + case DMAR_RTADDR_REG: return "rtaddr";
  2049. + case DMAR_FEDATA_REG: return "fedata";
  2050. + case DMAR_FEADDR_REG: return "feaddr";
  2051. + case DMAR_FEUADDR_REG: return "feuaddr";
  2052. + case DMAR_PMEN_REG: return "pmen";
  2053. + case DMAR_IEDATA_REG: return "iedata";
  2054. + case DMAR_IEADDR_REG: return "ieaddr";
  2055. + case DMAR_IEUADDR_REG: return "ieuaddr";
  2056. + case DMAR_IRTA_REG: return "irta";
  2057. + case DMAR_CCMD_REG: return "ccmd";
  2058. + case DMAR_IQH_REG: return "iqh";
  2059. + case DMAR_IQT_REG: return "iqt";
  2060. + case DMAR_IQA_REG: return "iqa";
  2061. +#endif
  2062. + }
  2063. + return "unknown";
  2064. +}
  2065. +
  2066. +/* Read/Write IOMMU register */
  2067. +uint32_t
  2068. +iommu_readl(struct iommu_softc *iommu, int reg)
  2069. +{
  2070. + uint32_t v;
  2071. +
  2072. + v = bus_space_read_4(iommu->iot, iommu->ioh, reg);
  2073. + if (reg < 00) {
  2074. + printf("iommu%d: read %x %.8lx [%s]\n",
  2075. + iommu->id, reg, (unsigned long)v, dmar_rn(reg));
  2076. + }
  2077. +
  2078. + return (v);
  2079. +}
  2080. +
  2081. +
  2082. +#define dbprintf(x...)
  2083. +
  2084. +void
  2085. +iommu_writel(struct iommu_softc *iommu, int reg, uint32_t v)
  2086. +{
  2087. + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
  2088. + iommu->id, reg, (unsigned long)v, dmar_rn(reg));
  2089. + bus_space_write_4(iommu->iot, iommu->ioh, reg, (uint32_t)v);
  2090. +}
  2091. +
  2092. +uint64_t
  2093. +iommu_readq(struct iommu_softc *iommu, int reg)
  2094. +{
  2095. + uint64_t v;
  2096. +
  2097. + v = bus_space_read_8(iommu->iot, iommu->ioh, reg);
  2098. + if (reg < 00) {
  2099. + printf("iommu%d: read %x %.8lx [%s]\n",
  2100. + iommu->id, reg, (unsigned long)v, dmar_rn(reg));
  2101. + }
  2102. +
  2103. + return (v);
  2104. +}
  2105. +
  2106. +void
  2107. +iommu_writeq(struct iommu_softc *iommu, int reg, uint64_t v)
  2108. +{
  2109. + dbprintf("iommu%d: write %.8x %.16lx [%s]\n",
  2110. + iommu->id, reg, (unsigned long)v, dmar_rn(reg));
  2111. + bus_space_write_8(iommu->iot, iommu->ioh, reg, v);
  2112. +}
  2113. +
  2114. +/* Check if a device is within a device scope */
  2115. +int
  2116. +acpidmar_match_devscope(struct devlist_head *devlist, pci_chipset_tag_t pc,
  2117. + int sid)
  2118. +{
  2119. + struct dmar_devlist *ds;
  2120. + int sub, sec, i;
  2121. + int bus, dev, fun, sbus;
  2122. + pcireg_t reg;
  2123. + pcitag_t tag;
  2124. +
  2125. + sbus = sid_bus(sid);
  2126. + TAILQ_FOREACH(ds, devlist, link) {
  2127. + bus = ds->bus;
  2128. + dev = ds->dp[0].device;
  2129. + fun = ds->dp[0].function;
  2130. + /* Walk PCI bridges in path */
  2131. + for (i = 1; i < ds->ndp; i++) {
  2132. + tag = pci_make_tag(pc, bus, dev, fun);
  2133. + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
  2134. + bus = PPB_BUSINFO_SECONDARY(reg);
  2135. + dev = ds->dp[i].device;
  2136. + fun = ds->dp[i].function;
  2137. + }
  2138. +
  2139. + /* Check for device exact match */
  2140. + if (sid == mksid(bus, dev, fun)) {
  2141. + return DMAR_ENDPOINT;
  2142. + }
  2143. +
  2144. + /* Check for device subtree match */
  2145. + if (ds->type == DMAR_BRIDGE) {
  2146. + tag = pci_make_tag(pc, bus, dev, fun);
  2147. + reg = pci_conf_read(pc, tag, PPB_REG_BUSINFO);
  2148. + sec = PPB_BUSINFO_SECONDARY(reg);
  2149. + sub = PPB_BUSINFO_SUBORDINATE(reg);
  2150. + if (sec <= sbus && sbus <= sub) {
  2151. + return DMAR_BRIDGE;
  2152. + }
  2153. + }
  2154. + }
  2155. +
  2156. + return (0);
  2157. +}
  2158. +
  2159. +struct domain *
  2160. +domain_create(struct iommu_softc *iommu, int did)
  2161. +{
  2162. + struct domain *dom;
  2163. + int gaw;
  2164. +
  2165. + printf("iommu%d: create domain: %.4x\n", iommu->id, did);
  2166. + dom = malloc(sizeof(*dom), M_DEVBUF, M_ZERO | M_WAITOK);
  2167. + dom->did = did;
  2168. + dom->iommu = iommu;
  2169. + dom->pte = iommu_alloc_page(iommu, &dom->ptep);
  2170. + TAILQ_INIT(&dom->devices);
  2171. +
  2172. + /* Setup DMA */
  2173. + dom->dmat._cookie = dom;
  2174. + dom->dmat._dmamap_create = dmar_dmamap_create; // nop
  2175. + dom->dmat._dmamap_destroy = dmar_dmamap_destroy; // nop
  2176. + dom->dmat._dmamap_load = dmar_dmamap_load; // lm
  2177. + dom->dmat._dmamap_load_mbuf = dmar_dmamap_load_mbuf; // lm
  2178. + dom->dmat._dmamap_load_uio = dmar_dmamap_load_uio; // lm
  2179. + dom->dmat._dmamap_load_raw = dmar_dmamap_load_raw; // lm
  2180. + dom->dmat._dmamap_unload = dmar_dmamap_unload; // um
  2181. + dom->dmat._dmamap_sync = dmar_dmamap_sync; // lm
  2182. + dom->dmat._dmamem_alloc = dmar_dmamem_alloc; // nop
  2183. + dom->dmat._dmamem_free = dmar_dmamem_free; // nop
  2184. + dom->dmat._dmamem_map = dmar_dmamem_map; // nop
  2185. + dom->dmat._dmamem_unmap = dmar_dmamem_unmap; // nop
  2186. + dom->dmat._dmamem_mmap = dmar_dmamem_mmap;
  2187. +
  2188. + snprintf(dom->exname, sizeof(dom->exname), "did:%x.%.4x",
  2189. + iommu->id, dom->did);
  2190. +
  2191. + /* Setup IOMMU address map */
  2192. + gaw = min(iommu->agaw, iommu->mgaw);
  2193. + printf("Creating Domain with %d bits\n", gaw);
  2194. + dom->iovamap = extent_create(dom->exname, 1024*1024*16,
  2195. + (1LL << gaw)-1,
  2196. + M_DEVBUF, NULL, 0,
  2197. + EX_WAITOK|EX_NOCOALESCE);
  2198. +
  2199. + /* Zero out Interrupt region */
  2200. + extent_alloc_region(dom->iovamap, 0xFEE00000L, 0x100000,
  2201. + EX_WAITOK);
  2202. + mtx_init(&dom->exlck, IPL_HIGH);
  2203. +
  2204. + TAILQ_INSERT_TAIL(&iommu->domains, dom, link);
  2205. +
  2206. + return dom;
  2207. +}
  2208. +
  2209. +void domain_add_device(struct domain *dom, int sid)
  2210. +{
  2211. + struct domain_dev *ddev;
  2212. +
  2213. + printf("add %s to iommu%d.%.4x\n", dmar_bdf(sid), dom->iommu->id, dom->did);
  2214. + ddev = malloc(sizeof(*ddev), M_DEVBUF, M_ZERO | M_WAITOK);
  2215. + ddev->sid = sid;
  2216. + TAILQ_INSERT_TAIL(&dom->devices, ddev, link);
  2217. +
  2218. + /* Should set context entry here?? */
  2219. +}
  2220. +
  2221. +void domain_remove_device(struct domain *dom, int sid)
  2222. +{
  2223. + struct domain_dev *ddev, *tmp;
  2224. +
  2225. + TAILQ_FOREACH_SAFE(ddev, &dom->devices, link, tmp) {
  2226. + if (ddev->sid == sid) {
  2227. + TAILQ_REMOVE(&dom->devices, ddev, link);
  2228. + free(ddev, sizeof(*ddev), M_DEVBUF);
  2229. + }
  2230. + }
  2231. +}
  2232. +
  2233. +/* Lookup domain by segment & source id (bus.device.function) */
  2234. +struct domain *
  2235. +domain_lookup(struct acpidmar_softc *sc, int segment, int sid)
  2236. +{
  2237. + struct iommu_softc *iommu;
  2238. + struct domain_dev *ddev;
  2239. + struct domain *dom;
  2240. + int rc;
  2241. +
  2242. + if (sc == NULL) {
  2243. + return NULL;
  2244. + }
  2245. +
  2246. + /* Lookup IOMMU for this device */
  2247. + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
  2248. + if (iommu->segment != segment)
  2249. + continue;
  2250. + /* Check for devscope match or catchall iommu */
  2251. + rc = acpidmar_match_devscope(&iommu->devices, sc->sc_pc, sid);
  2252. + if (rc != 0 || iommu->flags) {
  2253. + break;
  2254. + }
  2255. + }
  2256. + if (!iommu) {
  2257. + printf("%s: no iommu found\n", dmar_bdf(sid));
  2258. + return NULL;
  2259. + }
  2260. +
  2261. + //acpidmar_intr(iommu);
  2262. +
  2263. + /* Search domain devices */
  2264. + TAILQ_FOREACH(dom, &iommu->domains, link) {
  2265. + TAILQ_FOREACH(ddev, &dom->devices, link) {
  2266. + /* XXX: match all functions? */
  2267. + if (ddev->sid == sid) {
  2268. + return dom;
  2269. + }
  2270. + }
  2271. + }
  2272. + if (iommu->ndoms <= 2) {
  2273. + /* Running out of domains.. create catchall domain */
  2274. + if (!iommu->unity) {
  2275. + iommu->unity = domain_create(iommu, 1);
  2276. + }
  2277. + dom = iommu->unity;
  2278. + } else {
  2279. + dom = domain_create(iommu, --iommu->ndoms);
  2280. + }
  2281. + if (!dom) {
  2282. + printf("no domain here\n");
  2283. + return NULL;
  2284. + }
  2285. +
  2286. + /* Add device to domain */
  2287. + domain_add_device(dom, sid);
  2288. +
  2289. + return dom;
  2290. +}
  2291. +
  2292. +/* Map Guest Pages into IOMMU */
  2293. +void _iommu_map(void *dom, vaddr_t va, bus_addr_t gpa, bus_size_t len)
  2294. +{
  2295. + bus_size_t i;
  2296. + paddr_t hpa;
  2297. +
  2298. + if (dom == NULL) {
  2299. + return;
  2300. + }
  2301. + printf("Mapping dma: %lx = %lx/%lx\n", va, gpa, len);
  2302. + for (i = 0; i < len; i += PAGE_SIZE) {
  2303. + hpa = 0;
  2304. + pmap_extract(curproc->p_vmspace->vm_map.pmap, va, &hpa);
  2305. + domain_map_page(dom, gpa, hpa, PTE_P | PTE_R | PTE_W);
  2306. + gpa += PAGE_SIZE;
  2307. + va += PAGE_SIZE;
  2308. + }
  2309. +}
  2310. +
  2311. +/* Find IOMMU for a given PCI device */
  2312. +void *_iommu_domain(int segment, int bus, int dev, int func, int *id)
  2313. +{
  2314. + struct domain *dom;
  2315. +
  2316. + dom = domain_lookup(acpidmar_sc, segment, mksid(bus, dev, func));
  2317. + if (dom) {
  2318. + *id = dom->did;
  2319. + }
  2320. + return dom;
  2321. +}
  2322. +
  2323. +void domain_map_device(struct domain *dom, int sid);
  2324. +void ivhd_intr_map(struct iommu_softc *);
  2325. +
  2326. +void
  2327. +domain_map_device(struct domain *dom, int sid)
  2328. +{
  2329. + struct iommu_softc *iommu;
  2330. + struct context_entry *ctx;
  2331. + paddr_t paddr;
  2332. + int bus, devfn;
  2333. + int tt, lvl;
  2334. +
  2335. + iommu = dom->iommu;
  2336. +
  2337. + bus = sid_bus(sid);
  2338. + devfn = sid_devfn(sid);
  2339. + /* AMD attach device */
  2340. + if (iommu->dte) {
  2341. + struct ivhd_dte *dte = &iommu->dte[sid];
  2342. + if (!dte->dw0) {
  2343. + /* Setup Device Table Entry: bus.devfn */
  2344. + printf("@@@ PCI Attach: %.4x[%s] %.4x\n", sid, dmar_bdf(sid), dom->did);
  2345. + dte_set_host_page_table_root_ptr(dte, dom->ptep);
  2346. + dte_set_domain(dte, dom->did);
  2347. + dte_set_mode(dte, 3); // Set 4 level PTE
  2348. + dte_set_tv(dte);
  2349. + dte_set_valid(dte);
  2350. + ivhd_flush_devtab(iommu, dom->did);
  2351. + //ivhd_showit(iommu);
  2352. + ivhd_showdte();
  2353. + }
  2354. + //ivhd_poll_events(iommu);
  2355. + return;
  2356. + }
  2357. +
  2358. + /* Create Bus mapping */
  2359. + if (!root_entry_is_valid(&iommu->root[bus])) {
  2360. + iommu->ctx[bus] = iommu_alloc_page(iommu, &paddr);
  2361. + iommu->root[bus].lo = paddr | ROOT_P;
  2362. + iommu_flush_cache(iommu, &iommu->root[bus],
  2363. + sizeof(struct root_entry));
  2364. + dprintf("iommu%d: Allocate context for bus: %.2x pa:%.16llx va:%p\n",
  2365. + iommu->id, bus, (uint64_t)paddr,
  2366. + iommu->ctx[bus]);
  2367. + }
  2368. +
  2369. + /* Create DevFn mapping */
  2370. + ctx = iommu->ctx[bus] + devfn;
  2371. + if (!context_entry_is_valid(ctx)) {
  2372. + tt = CTX_T_MULTI;
  2373. + lvl = VTD_AWTOLEVEL(iommu->agaw);
  2374. +
  2375. + /* Initialize context */
  2376. + context_set_slpte(ctx, dom->ptep);
  2377. + context_set_translation_type(ctx, tt);
  2378. + context_set_domain_id(ctx, dom->did);
  2379. + context_set_address_width(ctx, lvl);
  2380. + context_set_present(ctx);
  2381. +
  2382. + /* Flush it */
  2383. + iommu_flush_cache(iommu, ctx, sizeof(struct context_entry));
  2384. + if ((iommu->cap & CAP_CM) || force_cm) {
  2385. + iommu_flush_ctx(iommu, CTX_DEVICE, dom->did, sid, 0);
  2386. + iommu_flush_tlb(iommu, IOTLB_GLOBAL, 0);
  2387. + } else {
  2388. + iommu_flush_write_buffer(iommu);
  2389. + }
  2390. + dprintf("iommu%d: %s set context ptep:%.16llx lvl:%d did:%.4x tt:%d\n",
  2391. + iommu->id, dmar_bdf(sid), (uint64_t)dom->ptep, lvl,
  2392. + dom->did, tt);
  2393. + }
  2394. +}
  2395. +
  2396. +struct domain *
  2397. +acpidmar_pci_attach(struct acpidmar_softc *sc, int segment, int sid, int mapctx)
  2398. +{
  2399. + static struct domain *dom;
  2400. +
  2401. + dom = domain_lookup(sc, segment, sid);
  2402. + if (!dom) {
  2403. + printf("no domain: %s\n", dmar_bdf(sid));
  2404. + return NULL;
  2405. + }
  2406. +
  2407. + if (mapctx) {
  2408. + domain_map_device(dom, sid);
  2409. + }
  2410. +
  2411. + return dom;
  2412. +}
  2413. +
  2414. +int ismap(int bus, int dev, int fun) {
  2415. + return (bus > 0);
  2416. + return (bus == 1);
  2417. +
  2418. + if (bus == 3 && dev == 0 && fun == 6)
  2419. + return 1;
  2420. + if (bus == 0 && dev == 8 && fun == 1)
  2421. + return 1;
  2422. + return 0;
  2423. +}
  2424. +
  2425. +void
  2426. +acpidmar_pci_hook(pci_chipset_tag_t pc, struct pci_attach_args *pa)
  2427. +{
  2428. + int bus, dev, fun;
  2429. + struct domain *dom;
  2430. + pcireg_t reg;
  2431. +
  2432. + if (!acpidmar_sc) {
  2433. + /* No DMAR, ignore */
  2434. + return;
  2435. + }
  2436. +
  2437. + /* Add device to our list */
  2438. + pci_decompose_tag(pc, pa->pa_tag, &bus, &dev, &fun);
  2439. + reg = pci_conf_read(pc, pa->pa_tag, PCI_CLASS_REG);
  2440. + if (!ismap(bus, dev, fun))
  2441. + return;
  2442. +#if 0
  2443. + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
  2444. + PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
  2445. + printf("dmar: %.4x:%.2x:%.2x.%x is VGA, ignoring\n",
  2446. + pa->pa_domain, bus, dev, fun);
  2447. + return;
  2448. + }
  2449. +#endif
  2450. + /* Add device to domain */
  2451. + dom = acpidmar_pci_attach(acpidmar_sc, pa->pa_domain,
  2452. + mksid(bus, dev, fun), 0);
  2453. + if (dom == NULL)
  2454. + return;
  2455. +
  2456. + if (PCI_CLASS(reg) == PCI_CLASS_DISPLAY &&
  2457. + PCI_SUBCLASS(reg) == PCI_SUBCLASS_DISPLAY_VGA) {
  2458. + dom->flag = DOM_NOMAP;
  2459. + }
  2460. + if (PCI_CLASS(reg) == PCI_CLASS_BRIDGE &&
  2461. + PCI_SUBCLASS(reg) == PCI_SUBCLASS_BRIDGE_ISA) {
  2462. + /* For ISA Bridges, map 0-16Mb as 1:1 */
  2463. + printf("dmar: %.4x:%.2x:%.2x.%x mapping ISA\n",
  2464. + pa->pa_domain, bus, dev, fun);
  2465. + domain_map_pthru(dom, 0x00, 16*1024*1024);
  2466. + }
  2467. + ivhd_intr_map(dom->iommu);
  2468. +
  2469. + /* Change DMA tag */
  2470. + pa->pa_dmat = &dom->dmat;
  2471. +}
  2472. +
  2473. +/* Create list of device scope entries from ACPI table */
  2474. +void
  2475. +acpidmar_parse_devscope(union acpidmar_entry *de, int off, int segment,
  2476. + struct devlist_head *devlist)
  2477. +{
  2478. + struct acpidmar_devscope *ds;
  2479. + struct dmar_devlist *d;
  2480. + int dplen, i;
  2481. +
  2482. + TAILQ_INIT(devlist);
  2483. + while (off < de->length) {
  2484. + ds = (struct acpidmar_devscope *)((unsigned char *)de + off);
  2485. + off += ds->length;
  2486. +
  2487. + /* We only care about bridges and endpoints */
  2488. + if (ds->type != DMAR_ENDPOINT && ds->type != DMAR_BRIDGE)
  2489. + continue;
  2490. +
  2491. + dplen = ds->length - sizeof(*ds);
  2492. + d = malloc(sizeof(*d) + dplen, M_DEVBUF, M_ZERO | M_WAITOK);
  2493. + d->bus = ds->bus;
  2494. + d->type = ds->type;
  2495. + d->ndp = dplen / 2;
  2496. + d->dp = (void *)&d[1];
  2497. + memcpy(d->dp, &ds[1], dplen);
  2498. + TAILQ_INSERT_TAIL(devlist, d, link);
  2499. +
  2500. + printf(" %8s %.4x:%.2x.%.2x.%x {",
  2501. + ds->type == DMAR_BRIDGE ? "bridge" : "endpoint",
  2502. + segment, ds->bus,
  2503. + d->dp[0].device,
  2504. + d->dp[0].function);
  2505. +
  2506. + for (i = 1; i < d->ndp; i++) {
  2507. + printf(" %2x.%x ",
  2508. + d->dp[i].device,
  2509. + d->dp[i].function);
  2510. + }
  2511. + printf("}\n");
  2512. + }
  2513. +}
  2514. +
  2515. +/* DMA Remapping Hardware Unit */
  2516. +void
  2517. +acpidmar_drhd(struct acpidmar_softc *sc, union acpidmar_entry *de)
  2518. +{
  2519. + struct iommu_softc *iommu;
  2520. +
  2521. + printf("DRHD: segment:%.4x base:%.16llx flags:%.2x\n",
  2522. + de->drhd.segment,
  2523. + de->drhd.address,
  2524. + de->drhd.flags);
  2525. + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO | M_WAITOK);
  2526. + acpidmar_parse_devscope(de, sizeof(de->drhd), de->drhd.segment,
  2527. + &iommu->devices);
  2528. + iommu_init(sc, iommu, &de->drhd);
  2529. +
  2530. + if (de->drhd.flags) {
  2531. + /* Catchall IOMMU goes at end of list */
  2532. + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
  2533. + } else {
  2534. + TAILQ_INSERT_HEAD(&sc->sc_drhds, iommu, link);
  2535. + }
  2536. +}
  2537. +
  2538. +/* Reserved Memory Region Reporting */
  2539. +void
  2540. +acpidmar_rmrr(struct acpidmar_softc *sc, union acpidmar_entry *de)
  2541. +{
  2542. + struct rmrr_softc *rmrr;
  2543. + bios_memmap_t *im, *jm;
  2544. + uint64_t start, end;
  2545. +
  2546. + printf("RMRR: segment:%.4x range:%.16llx-%.16llx\n",
  2547. + de->rmrr.segment, de->rmrr.base, de->rmrr.limit);
  2548. + if (de->rmrr.limit <= de->rmrr.base) {
  2549. + printf(" buggy BIOS\n");
  2550. + return;
  2551. + }
  2552. +
  2553. + rmrr = malloc(sizeof(*rmrr), M_DEVBUF, M_ZERO | M_WAITOK);
  2554. + rmrr->start = trunc_page(de->rmrr.base);
  2555. + rmrr->end = round_page(de->rmrr.limit);
  2556. + rmrr->segment = de->rmrr.segment;
  2557. + acpidmar_parse_devscope(de, sizeof(de->rmrr), de->rmrr.segment,
  2558. + &rmrr->devices);
  2559. +
  2560. + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
  2561. + if (im->type != BIOS_MAP_RES)
  2562. + continue;
  2563. + /* Search for adjacent reserved regions */
  2564. + start = im->addr;
  2565. + end = im->addr+im->size;
  2566. + for (jm = im+1; jm->type == BIOS_MAP_RES && end == jm->addr;
  2567. + jm++) {
  2568. + end = jm->addr+jm->size;
  2569. + }
  2570. + printf("e820: %.16llx - %.16llx\n", start, end);
  2571. + if (start <= rmrr->start && rmrr->end <= end) {
  2572. + /* Bah.. some buggy BIOS stomp outside RMRR */
  2573. + printf(" ** inside E820 Reserved %.16llx %.16llx\n",
  2574. + start, end);
  2575. + rmrr->start = trunc_page(start);
  2576. + rmrr->end = round_page(end);
  2577. + break;
  2578. + }
  2579. + }
  2580. + TAILQ_INSERT_TAIL(&sc->sc_rmrrs, rmrr, link);
  2581. +}
  2582. +
  2583. +/* Root Port ATS Reporting */
  2584. +void
  2585. +acpidmar_atsr(struct acpidmar_softc *sc, union acpidmar_entry *de)
  2586. +{
  2587. + struct atsr_softc *atsr;
  2588. +
  2589. + printf("ATSR: segment:%.4x flags:%x\n",
  2590. + de->atsr.segment,
  2591. + de->atsr.flags);
  2592. +
  2593. + atsr = malloc(sizeof(*atsr), M_DEVBUF, M_ZERO | M_WAITOK);
  2594. + atsr->flags = de->atsr.flags;
  2595. + atsr->segment = de->atsr.segment;
  2596. + acpidmar_parse_devscope(de, sizeof(de->atsr), de->atsr.segment,
  2597. + &atsr->devices);
  2598. +
  2599. + TAILQ_INSERT_TAIL(&sc->sc_atsrs, atsr, link);
  2600. +}
  2601. +
  2602. +void
  2603. +acpidmar_init(struct acpidmar_softc *sc, struct acpi_dmar *dmar)
  2604. +{
  2605. + struct rmrr_softc *rmrr;
  2606. + struct iommu_softc *iommu;
  2607. + struct domain *dom;
  2608. + struct dmar_devlist *dl;
  2609. + union acpidmar_entry *de;
  2610. + int off, sid, rc;
  2611. +
  2612. + domain_map_page = domain_map_page_intel;
  2613. + printf(": hardware width: %d, intr_remap:%d x2apic_opt_out:%d\n",
  2614. + dmar->haw+1,
  2615. + !!(dmar->flags & 0x1),
  2616. + !!(dmar->flags & 0x2));
  2617. + sc->sc_haw = dmar->haw+1;
  2618. + sc->sc_flags = dmar->flags;
  2619. +
  2620. + TAILQ_INIT(&sc->sc_drhds);
  2621. + TAILQ_INIT(&sc->sc_rmrrs);
  2622. + TAILQ_INIT(&sc->sc_atsrs);
  2623. +
  2624. + off = sizeof(*dmar);
  2625. + while (off < dmar->hdr.length) {
  2626. + de = (union acpidmar_entry *)((unsigned char *)dmar + off);
  2627. + switch (de->type) {
  2628. + case DMAR_DRHD:
  2629. + acpidmar_drhd(sc, de);
  2630. + break;
  2631. + case DMAR_RMRR:
  2632. + acpidmar_rmrr(sc, de);
  2633. + break;
  2634. + case DMAR_ATSR:
  2635. + acpidmar_atsr(sc, de);
  2636. + break;
  2637. + default:
  2638. + printf("DMAR: unknown %x\n", de->type);
  2639. + break;
  2640. + }
  2641. + off += de->length;
  2642. + }
  2643. +
  2644. + /* Pre-create domains for iommu devices */
  2645. + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
  2646. + TAILQ_FOREACH(dl, &iommu->devices, link) {
  2647. + sid = mksid(dl->bus, dl->dp[0].device,
  2648. + dl->dp[0].function);
  2649. + dom = acpidmar_pci_attach(sc, iommu->segment, sid, 0);
  2650. + if (dom != NULL) {
  2651. + printf("%.4x:%.2x:%.2x.%x iommu:%d did:%.4x\n",
  2652. + iommu->segment, dl->bus, dl->dp[0].device, dl->dp[0].function,
  2653. + iommu->id, dom->did);
  2654. + }
  2655. + }
  2656. + }
  2657. + /* Map passthrough pages for RMRR */
  2658. + TAILQ_FOREACH(rmrr, &sc->sc_rmrrs, link) {
  2659. + TAILQ_FOREACH(dl, &rmrr->devices, link) {
  2660. + sid = mksid(dl->bus, dl->dp[0].device,
  2661. + dl->dp[0].function);
  2662. + dom = acpidmar_pci_attach(sc, rmrr->segment, sid, 0);
  2663. + if (dom != NULL) {
  2664. + printf("%s map ident: %.16llx %.16llx\n",
  2665. + dom_bdf(dom), rmrr->start, rmrr->end);
  2666. + domain_map_pthru(dom, rmrr->start, rmrr->end);
  2667. + rc = extent_alloc_region(dom->iovamap,
  2668. + rmrr->start, rmrr->end, EX_WAITOK);
  2669. + }
  2670. + }
  2671. + }
  2672. +}
  2673. +
  2674. +
  2675. +/*=====================================================
  2676. + * AMD Vi
  2677. + *=====================================================*/
  2678. +void acpiivrs_ivhd(struct acpidmar_softc *, struct acpi_ivhd *);
  2679. +int acpiivrs_iommu_match(struct pci_attach_args *);
  2680. +int ivhd_iommu_init(struct acpidmar_softc *, struct iommu_softc *,
  2681. + struct acpi_ivhd *);
  2682. +void iommu_ivhd_add(struct iommu_softc *, int, int, int);
  2683. +int _ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd);
  2684. +void ivhd_show_event(struct iommu_softc *, struct ivhd_event *evt, int);
  2685. +int ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait);
  2686. +int ivhd_invalidate_domain(struct iommu_softc *iommu, int did);
  2687. +void acpiivrs_mkalias(struct acpi_ivhd *, int, uint16_t, uint16_t, uint16_t);
  2688. +
  2689. +/* Setup interrupt for AMD */
  2690. +void ivhd_intr_map(struct iommu_softc *iommu) {
  2691. + struct pci_attach_args ipa;
  2692. + pci_intr_handle_t ih;
  2693. +
  2694. + if (iommu->intr)
  2695. + return;
  2696. + if (pci_find_device(&ipa, acpiivrs_iommu_match)) {
  2697. + printf("found iommu pci\n");
  2698. + if (pci_intr_map_msi(&ipa, &ih) && pci_intr_map(&ipa, &ih)) {
  2699. + printf("couldn't map interrupt\n");
  2700. + }
  2701. + else {
  2702. + iommu->intr = pci_intr_establish(ipa.pa_pc, ih, IPL_NET | IPL_MPSAFE,
  2703. + acpidmar_intr, iommu, "amd_iommu");
  2704. + if (!iommu->intr) {
  2705. + printf("NOINTR\n");
  2706. + iommu->intr = (void *)0xdeadbeef;
  2707. + }
  2708. + }
  2709. + }
  2710. +}
  2711. +
  2712. +void _dumppte(struct pte_entry *pte, int lvl, vaddr_t va)
  2713. +{
  2714. + char *pfx[] = { " ", " ", " ", " ", "" };
  2715. + uint64_t i, sh;
  2716. + struct pte_entry *npte;
  2717. +
  2718. + for (i = 0; i < 512; i++) {
  2719. + sh = (i << (((lvl-1) * 9) + 12));
  2720. + if (pte[i].val & PTE_P) {
  2721. + if (lvl > 1) {
  2722. + npte = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
  2723. + printf("%slvl%d: %.16llx nxt:%llu\n", pfx[lvl], lvl, pte[i].val, (pte[i].val >> 9) & 7);
  2724. + _dumppte(npte, lvl-1, va | sh);
  2725. + }
  2726. + else {
  2727. + printf("%slvl%d: %.16llx <- %.16llx \n", pfx[lvl], lvl, pte[i].val, va | sh);
  2728. + }
  2729. + }
  2730. + }
  2731. +#if 0
  2732. + uint64_t i;
  2733. + struct pte_entry *np;
  2734. +
  2735. + // lvl 48 : 39-47 -> pte[512]
  2736. + // lvl 39 : 30-38 -> pte[512]
  2737. + // lvl 30 : 21-29 -> pte[512]
  2738. + // lvl 21 : 12-20 -> page
  2739. + for (i = 0; i < 512; i++) {
  2740. + if (pte[i].val & PTE_P) {
  2741. + if (lvl > 1) {
  2742. + printf(" lvl%d: %.3lx:%.3lx:%.3lx:%.3lx %.16llx\n", lvl,
  2743. + (va >> 39) & 0x1ff,
  2744. + (va >> 30) & 0x1ff,
  2745. + (va >> 21) & 0x1ff,
  2746. + (va >> 12) & 0x1ff, pte[i].val);
  2747. + np = (void *)PMAP_DIRECT_MAP((pte[i].val & PTE_PADDR_MASK));
  2748. + _dumppte(np, lvl - 9, va | (i << (lvl-9)));
  2749. + }
  2750. + else {
  2751. + printf(" %.16lx %.16llx\n", va, pte[i].val);
  2752. + }
  2753. + }
  2754. + }
  2755. +#endif
  2756. +}
  2757. +
  2758. +void showpage(int sid, paddr_t paddr)
  2759. +{
  2760. + struct domain *dom;
  2761. + static int show = 0;
  2762. +
  2763. + if (show > 10)
  2764. + return;
  2765. + show++;
  2766. + dom = acpidmar_pci_attach(acpidmar_sc, 0, sid, 0);
  2767. + if (!dom)
  2768. + return;
  2769. + printf("DTE: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
  2770. + hwdte[sid].dw0,
  2771. + hwdte[sid].dw1,
  2772. + hwdte[sid].dw2,
  2773. + hwdte[sid].dw3,
  2774. + hwdte[sid].dw4,
  2775. + hwdte[sid].dw5,
  2776. + hwdte[sid].dw6,
  2777. + hwdte[sid].dw7);
  2778. + _dumppte(dom->pte, 3, 0);
  2779. +}
  2780. +
  2781. +/* Display AMD IOMMU Error */
  2782. +void
  2783. +ivhd_show_event(struct iommu_softc *iommu, struct ivhd_event *evt, int head)
  2784. +{
  2785. + int type, sid, did, flag;
  2786. + uint64_t address;
  2787. +
  2788. + /* Get Device, Domain, Address and Type of event */
  2789. + sid = __EXTRACT(evt->dw0, EVT_SID);
  2790. + type = __EXTRACT(evt->dw1, EVT_TYPE);
  2791. + did = __EXTRACT(evt->dw1, EVT_DID);
  2792. + flag = __EXTRACT(evt->dw1, EVT_FLAG);
  2793. + address = _get64(&evt->dw2);
  2794. +
  2795. + printf("=== IOMMU Error[%.4x]: ", head);
  2796. + switch (type) {
  2797. + case ILLEGAL_DEV_TABLE_ENTRY: // ok
  2798. + printf("illegal dev table entry dev=%s addr=0x%.16llx %s, %s, %s, %s\n",
  2799. + dmar_bdf(sid), address,
  2800. + evt->dw1 & EVT_TR ? "translation" : "transaction",
  2801. + evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
  2802. + evt->dw1 & EVT_RW ? "write" : "read",
  2803. + evt->dw1 & EVT_I ? "interrupt" : "memory");
  2804. + ivhd_showdte();
  2805. + break;
  2806. + case IO_PAGE_FAULT: // ok
  2807. + printf("io page fault dev=%s did=0x%.4x addr=0x%.16llx\n%s, %s, %s, %s, %s, %s\n",
  2808. + dmar_bdf(sid), did, address,
  2809. + evt->dw1 & EVT_TR ? "translation" : "transaction",
  2810. + evt->dw1 & EVT_RZ ? "reserved bit" : "invalid level",
  2811. + evt->dw1 & EVT_PE ? "no perm" : "perm",
  2812. + evt->dw1 & EVT_RW ? "write" : "read",
  2813. + evt->dw1 & EVT_PR ? "present" : "not present",
  2814. + evt->dw1 & EVT_I ? "interrupt" : "memory");
  2815. + ivhd_showdte();
  2816. + showpage(sid, address);
  2817. + break;
  2818. + case DEV_TAB_HARDWARE_ERROR: // ok
  2819. + printf("device table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
  2820. + dmar_bdf(sid), address,
  2821. + evt->dw1 & EVT_TR ? "translation" : "transaction",
  2822. + evt->dw1 & EVT_RW ? "write" : "read",
  2823. + evt->dw1 & EVT_I ? "interrupt" : "memory");
  2824. + ivhd_showdte();
  2825. + break;
  2826. + case PAGE_TAB_HARDWARE_ERROR:
  2827. + printf("page table hardware error dev=%s addr=0x%.16llx %s, %s, %s\n",
  2828. + dmar_bdf(sid), address,
  2829. + evt->dw1 & EVT_TR ? "translation" : "transaction",
  2830. + evt->dw1 & EVT_RW ? "write" : "read",
  2831. + evt->dw1 & EVT_I ? "interrupt" : "memory");
  2832. + ivhd_showdte();
  2833. + break;
  2834. + case ILLEGAL_COMMAND_ERROR: // ok
  2835. + printf("illegal command addr=0x%.16llx\n", address);
  2836. + ivhd_showcmd(iommu);
  2837. + break;
  2838. + case COMMAND_HARDWARE_ERROR:
  2839. + printf("command hardware error addr=0x%.16llx flag=0x%.4x\n",
  2840. + address, flag);
  2841. + ivhd_showcmd(iommu);
  2842. + break;
  2843. + case IOTLB_INV_TIMEOUT:
  2844. + printf("iotlb invalidation timeout dev=%s address=0x%.16llx\n",
  2845. + dmar_bdf(sid), address);
  2846. + break;
  2847. + case INVALID_DEVICE_REQUEST:
  2848. + printf("invalid device request dev=%s addr=0x%.16llx flag=0x%.4x\n",
  2849. + dmar_bdf(sid), address, flag);
  2850. + break;
  2851. + default:
  2852. + printf("unknown type=0x%.2x\n", type);
  2853. + break;
  2854. + }
  2855. + //ivhd_showdte();
  2856. + /* Clear old event */
  2857. + evt->dw0 = 0;
  2858. + evt->dw1 = 0;
  2859. + evt->dw2 = 0;
  2860. + evt->dw3 = 0;
  2861. +}
  2862. +
  2863. +/* AMD: Process IOMMU error from hardware */
  2864. +int
  2865. +ivhd_poll_events(struct iommu_softc *iommu)
  2866. +{
  2867. + uint32_t head, tail;
  2868. + int sz;
  2869. +
  2870. + sz = sizeof(struct ivhd_event);
  2871. + head = iommu_readl(iommu, EVT_HEAD_REG);
  2872. + tail = iommu_readl(iommu, EVT_TAIL_REG);
  2873. + if (head == tail) {
  2874. + /* No pending events */
  2875. + return (0);
  2876. + }
  2877. + ivhd_showevt(iommu);
  2878. + while (head != tail) {
  2879. + ivhd_show_event(iommu, iommu->evt_tbl + head, head);
  2880. + head = (head + sz) % EVT_TBL_SIZE;
  2881. + }
  2882. + iommu_writel(iommu, EVT_HEAD_REG, head);
  2883. + return (0);
  2884. +}
  2885. +
  2886. +/* AMD: Issue command to IOMMU queue */
  2887. +int
  2888. +_ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd)
  2889. +{
  2890. + u_long rf;
  2891. + uint32_t head, tail, next;
  2892. + int sz;
  2893. +
  2894. + head = iommu_readl(iommu, CMD_HEAD_REG);
  2895. + sz = sizeof(*cmd);
  2896. + rf = intr_disable();
  2897. + tail = iommu_readl(iommu, CMD_TAIL_REG);
  2898. + next = (tail + sz) % CMD_TBL_SIZE;
  2899. + if (next == head) {
  2900. + /* Queue is full */
  2901. + intr_restore(rf);
  2902. + return -EBUSY;
  2903. + }
  2904. + memcpy(iommu->cmd_tbl + tail, cmd, sz);
  2905. + iommu_writel(iommu, CMD_TAIL_REG, next);
  2906. + intr_restore(rf);
  2907. + return (tail);
  2908. +}
  2909. +
  2910. +int
  2911. +ivhd_issue_command(struct iommu_softc *iommu, const struct ivhd_command *cmd, int wait)
  2912. +{
  2913. + struct ivhd_command wq = { 0 };
  2914. + uint64_t wv __aligned(16) = 0;
  2915. + paddr_t paddr;
  2916. + int rc, i;
  2917. +
  2918. + rc = _ivhd_issue_command(iommu, cmd);
  2919. + if (rc >= 0 && wait) {
  2920. + /* Wait for previous commands to complete.
  2921. + * Store address of completion variable to command */
  2922. + pmap_extract(pmap_kernel(), (vaddr_t)&wv, &paddr);
  2923. + wq.dw0 = (paddr & ~0x7) | 0x1;
  2924. + wq.dw1 = (COMPLETION_WAIT << CMD_SHIFT) | ((paddr >> 32) & 0xFFFFF);
  2925. + wq.dw2 = 0xDEADBEEF;
  2926. + wq.dw3 = 0xFEEDC0DE;
  2927. +
  2928. + rc = _ivhd_issue_command(iommu, &wq);
  2929. + /* wv will change to value in dw2/dw3 when command is complete */
  2930. + for (i = 0; i < 1000 && !wv; i++) {
  2931. + DELAY(1000);
  2932. + }
  2933. + if (i == 1000) {
  2934. + printf("ivhd command timeout: %.8x %.8x %.8x %.8x wv:%llx idx:%x\n",
  2935. + cmd->dw0, cmd->dw1, cmd->dw2, cmd->dw3, wv, rc);
  2936. + ivhd_showcmd(iommu);
  2937. + }
  2938. + }
  2939. + return rc;
  2940. +
  2941. +}
  2942. +
  2943. +/* AMD: Flush changes to Device Table Entry for a specific domain */
  2944. +int ivhd_flush_devtab(struct iommu_softc *iommu, int did)
  2945. +{
  2946. + struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_DEVTAB_ENTRY << CMD_SHIFT };
  2947. + return ivhd_issue_command(iommu, &cmd, 1);
  2948. +}
  2949. +
  2950. +/* AMD: Invalidate all IOMMU device and page tables */
  2951. +int ivhd_invalidate_iommu_all(struct iommu_softc *iommu)
  2952. +{
  2953. + struct ivhd_command cmd = { .dw1 = INVALIDATE_IOMMU_ALL << CMD_SHIFT };
  2954. +#if 0
  2955. + int i;
  2956. +
  2957. + for (i = 0; i < 65536; i++) {
  2958. + if (iommu->dte[i].dw0) {
  2959. + printf("dte%.4x: %.8lx %.8lx %.8lx %.8lx\n",
  2960. + i, (unsigned long)iommu->dte[i].dw0,
  2961. + (unsigned long)iommu->dte[i].dw1,
  2962. + (unsigned long)iommu->dte[i].dw2,
  2963. + (unsigned long)iommu->dte[i].dw3);
  2964. + }
  2965. + }
  2966. +#endif
  2967. + return ivhd_issue_command(iommu, &cmd, 0);
  2968. +}
  2969. +
  2970. +/* AMD: Invalidate interrupt remapping */
  2971. +int ivhd_invalidate_interrupt_table(struct iommu_softc *iommu, int did)
  2972. +{
  2973. + struct ivhd_command cmd = { .dw0 = did, .dw1 = INVALIDATE_INTERRUPT_TABLE << CMD_SHIFT };
  2974. + return ivhd_issue_command(iommu, &cmd, 0);
  2975. +}
  2976. +
  2977. +/* AMD: Invalidate all page tables in a domain */
  2978. +int ivhd_invalidate_domain(struct iommu_softc *iommu, int did)
  2979. +{
  2980. + struct ivhd_command cmd = { .dw1 = did | (INVALIDATE_IOMMU_PAGES << CMD_SHIFT) };
  2981. +
  2982. + cmd.dw2 = 0xFFFFF000 | 0x3;
  2983. + cmd.dw3 = 0x7FFFFFFF;
  2984. + return ivhd_issue_command(iommu, &cmd, 1);
  2985. +}
  2986. +
  2987. +/* AMD: Display Registers */
  2988. +void ivhd_showit(struct iommu_softc *iommu)
  2989. +{
  2990. + printf("---- dt:%.16llx cmd:%.16llx evt:%.16llx ctl:%.16llx sts:%.16llx\n",
  2991. + iommu_readq(iommu, DEV_TAB_BASE_REG),
  2992. + iommu_readq(iommu, CMD_BASE_REG),
  2993. + iommu_readq(iommu, EVT_BASE_REG),
  2994. + iommu_readq(iommu, IOMMUCTL_REG),
  2995. + iommu_readq(iommu, IOMMUSTS_REG));
  2996. + printf("---- cmd queue:%.16llx %.16llx evt queue:%.16llx %.16llx\n",
  2997. + iommu_readq(iommu, CMD_HEAD_REG),
  2998. + iommu_readq(iommu, CMD_TAIL_REG),
  2999. + iommu_readq(iommu, EVT_HEAD_REG),
  3000. + iommu_readq(iommu, EVT_TAIL_REG));
  3001. +}
  3002. +
  3003. +/* AMD: Generate Errors to test event handler */
  3004. +void ivhd_checkerr(struct iommu_softc *iommu);
  3005. +void ivhd_checkerr(struct iommu_softc *iommu)
  3006. +{
  3007. + struct ivhd_command cmd = { -1, -1, -1, -1 };
  3008. +
  3009. + /* Generate ILLEGAL DEV TAB entry? */
  3010. + iommu->dte[0x2303].dw0 = -1; // invalid
  3011. + iommu->dte[0x2303].dw2 = 0x1234; // domain
  3012. + iommu->dte[0x2303].dw7 = -1; // reserved
  3013. + ivhd_flush_devtab(iommu, 0x1234);
  3014. + ivhd_poll_events(iommu);
  3015. +
  3016. + /* Generate ILLEGAL_COMMAND_ERROR : ok */
  3017. + ivhd_issue_command(iommu, &cmd, 0);
  3018. + ivhd_poll_events(iommu);
  3019. +
  3020. + /* Generate page hardware error */
  3021. +}
  3022. +
  3023. +/* AMD: Show Device Table Entry */
  3024. +void ivhd_showdte(void)
  3025. +{
  3026. + int i;
  3027. +
  3028. + for (i = 0; i < 65536; i++) {
  3029. + if (hwdte[i].dw0) {
  3030. + printf("%.2x:%.2x.%x: %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
  3031. + i >> 8, (i >> 3) & 0x1F, i & 0x7,
  3032. + hwdte[i].dw0, hwdte[i].dw1,
  3033. + hwdte[i].dw2, hwdte[i].dw3,
  3034. + hwdte[i].dw4, hwdte[i].dw5,
  3035. + hwdte[i].dw6, hwdte[i].dw7);
  3036. + }
  3037. + }
  3038. +}
  3039. +
  3040. +/* AMD: Show command entries */
  3041. +void ivhd_showcmd(struct iommu_softc *iommu)
  3042. +{
  3043. + struct ivhd_command *ihd;
  3044. + paddr_t phd;
  3045. + int i;
  3046. +
  3047. + ihd = iommu->cmd_tbl;
  3048. + phd = iommu_readq(iommu, CMD_BASE_REG) & CMD_BASE_MASK;
  3049. + for (i = 0; i < 4096 / 128; i++) {
  3050. + printf("%.2x: %.16llx %.8x %.8x %.8x %.8x\n", i,
  3051. + (uint64_t)phd + i * sizeof(*ihd),
  3052. + ihd[i].dw0,ihd[i].dw1,ihd[i].dw2,ihd[i].dw3);
  3053. + }
  3054. +}
  3055. +
  3056. +void ivhd_showevt(struct iommu_softc *iommu)
  3057. +{
  3058. +}
  3059. +
  3060. +#define _c(x) (int)((iommu->ecap >> x ##_SHIFT) & x ## _MASK)
  3061. +
  3062. +/* AMD: Initialize IOMMU */
  3063. +int
  3064. +ivhd_iommu_init(struct acpidmar_softc *sc, struct iommu_softc *iommu,
  3065. + struct acpi_ivhd *ivhd)
  3066. +{
  3067. + static int niommu;
  3068. + paddr_t paddr;
  3069. + uint64_t ov;
  3070. +
  3071. + if (sc == NULL || iommu == NULL || ivhd == NULL) {
  3072. + printf("Bad pointer to iommu_init!\n");
  3073. + return -1;
  3074. + }
  3075. + if (_bus_space_map(sc->sc_memt, ivhd->address, 0x80000, 0, &iommu->ioh) != 0) {
  3076. + printf("Bus Space Map fails\n");
  3077. + return -1;
  3078. + }
  3079. + TAILQ_INIT(&iommu->domains);
  3080. + TAILQ_INIT(&iommu->devices);
  3081. +
  3082. + /* Setup address width and number of domains */
  3083. + iommu->id = ++niommu;
  3084. + iommu->iot = sc->sc_memt;
  3085. + iommu->mgaw = 48;
  3086. + iommu->agaw = 48;
  3087. + iommu->flags = 1;
  3088. + iommu->segment = 0;
  3089. + iommu->ndoms = 256;
  3090. +
  3091. + iommu->ecap = iommu_readq(iommu, EXTFEAT_REG);
  3092. + printf("ecap = %.16llx\n", iommu->ecap);
  3093. + printf("%s%s%s%s%s%s%s%s\n",
  3094. + iommu->ecap & EFR_PREFSUP ? "pref " : "",
  3095. + iommu->ecap & EFR_PPRSUP ? "ppr " : "",
  3096. + iommu->ecap & EFR_NXSUP ? "nx " : "",
  3097. + iommu->ecap & EFR_GTSUP ? "gt " : "",
  3098. + iommu->ecap & EFR_IASUP ? "ia " : "",
  3099. + iommu->ecap & EFR_GASUP ? "ga " : "",
  3100. + iommu->ecap & EFR_HESUP ? "he " : "",
  3101. + iommu->ecap & EFR_PCSUP ? "pc " : "");
  3102. + printf("hats:%x gats:%x glxsup:%x smif:%x smifrc:%x gam:%x\n",
  3103. + _c(EFR_HATS), _c(EFR_GATS), _c(EFR_GLXSUP), _c(EFR_SMIFSUP),
  3104. + _c(EFR_SMIFRC), _c(EFR_GAMSUP));
  3105. +
  3106. + /* Turn off iommu */
  3107. + ov = iommu_readq(iommu, IOMMUCTL_REG);
  3108. + iommu_writeq(iommu, IOMMUCTL_REG, ov & ~(CTL_IOMMUEN | CTL_COHERENT |
  3109. + CTL_HTTUNEN | CTL_RESPASSPW | CTL_PASSPW | CTL_ISOC));
  3110. +
  3111. + /* Setup command buffer with 4k buffer (128 entries) */
  3112. + iommu->cmd_tbl = iommu_alloc_page(iommu, &paddr);
  3113. + iommu_writeq(iommu, CMD_BASE_REG, (paddr & CMD_BASE_MASK) | CMD_TBL_LEN_4K);
  3114. + iommu_writel(iommu, CMD_HEAD_REG, 0x00);
  3115. + iommu_writel(iommu, CMD_TAIL_REG, 0x00);
  3116. +
  3117. + /* Setup event log with 4k buffer (128 entries) */
  3118. + iommu->evt_tbl = iommu_alloc_page(iommu, &paddr);
  3119. + iommu_writeq(iommu, EVT_BASE_REG, (paddr & EVT_BASE_MASK) | EVT_TBL_LEN_4K);
  3120. + iommu_writel(iommu, EVT_HEAD_REG, 0x00);
  3121. + iommu_writel(iommu, EVT_TAIL_REG, 0x00);
  3122. +
  3123. + /* Setup device table
  3124. + * 1 entry per source ID (bus:device:function - 64k entries)
  3125. + */
  3126. + iommu->dte = hwdte;
  3127. + pmap_extract(pmap_kernel(), (vaddr_t)iommu->dte, &paddr);
  3128. + iommu_writeq(iommu, DEV_TAB_BASE_REG, (paddr & DEV_TAB_MASK) | DEV_TAB_LEN);
  3129. +
  3130. + /* Enable IOMMU */
  3131. + ov |= (CTL_IOMMUEN | CTL_EVENTLOGEN | CTL_CMDBUFEN | CTL_EVENTINTEN);
  3132. + if (ivhd->flags & IVHD_COHERENT)
  3133. + ov |= CTL_COHERENT;
  3134. + if (ivhd->flags & IVHD_HTTUNEN)
  3135. + ov |= CTL_HTTUNEN;
  3136. + if (ivhd->flags & IVHD_RESPASSPW)
  3137. + ov |= CTL_RESPASSPW;
  3138. + if (ivhd->flags & IVHD_PASSPW)
  3139. + ov |= CTL_PASSPW;
  3140. + if (ivhd->flags & IVHD_ISOC)
  3141. + ov |= CTL_ISOC;
  3142. + ov &= ~(CTL_INVTIMEOUT_MASK << CTL_INVTIMEOUT_SHIFT);
  3143. + ov |= (CTL_INVTIMEOUT_1MS << CTL_INVTIMEOUT_SHIFT);
  3144. + iommu_writeq(iommu, IOMMUCTL_REG, ov);
  3145. +
  3146. + ivhd_invalidate_iommu_all(iommu);
  3147. + //ivhd_checkerr(iommu);
  3148. +
  3149. + TAILQ_INSERT_TAIL(&sc->sc_drhds, iommu, link);
  3150. + return 0;
  3151. +}
  3152. +
  3153. +void
  3154. +iommu_ivhd_add(struct iommu_softc *iommu, int start, int end, int cfg)
  3155. +{
  3156. + struct ivhd_devlist *idev;
  3157. +
  3158. + idev = malloc(sizeof(*idev), M_DEVBUF, M_ZERO | M_WAITOK);
  3159. + idev->start_id = start;
  3160. + idev->end_id = end;
  3161. + idev->cfg = cfg;
  3162. +}
  3163. +
  3164. +int acpiivrs_iommu_match(struct pci_attach_args *pa)
  3165. +{
  3166. + int b,d,f;
  3167. +
  3168. + pci_decompose_tag(pa->pa_pc, pa->pa_tag, &b, &d, &f);
  3169. + printf(" matchdev: %d.%d.%d\n", b, d, f);
  3170. + if (PCI_CLASS(pa->pa_class) == PCI_CLASS_SYSTEM &&
  3171. + PCI_SUBCLASS(pa->pa_class) == PCI_SUBCLASS_SYSTEM_IOMMU) {
  3172. + printf("iziommu\n");
  3173. + return (1);
  3174. + }
  3175. + return (0);
  3176. +}
  3177. +
  3178. +/* Setup alias mapping, either 1:1 or a->b */
  3179. +void
  3180. +acpiivrs_mkalias(struct acpi_ivhd *ivhd, int off, uint16_t start, uint16_t alias, uint16_t step)
  3181. +{
  3182. + union acpi_ivhd_entry *ie = NULL;
  3183. + int i;
  3184. +
  3185. + if (off+sizeof(ie->eor) >= ivhd->length)
  3186. + return;
  3187. + ie = (void *)ivhd + off;
  3188. + if (ie->type != IVHD_EOR)
  3189. + return;
  3190. + printf("Set Alias: %.4x %.4x : %.4x/%x\n", start, ie->eor.devid, alias, step);
  3191. + for (i = start; i < ie->eor.devid; i++) {
  3192. + sid_alias[i] = alias | ALIAS_VALID;
  3193. + alias += step;
  3194. + }
  3195. +}
  3196. +
  3197. +void
  3198. +acpiivrs_ivhd(struct acpidmar_softc *sc, struct acpi_ivhd *ivhd)
  3199. +{
  3200. + struct iommu_softc *iommu;
  3201. + struct acpi_ivhd_ext *ext;
  3202. + union acpi_ivhd_entry *ie;
  3203. + int off, dte, all_dte = 0;
  3204. + int alias, start;
  3205. +
  3206. + if (ivhd->type == IVRS_IVHD_EXT) {
  3207. + ext = (struct acpi_ivhd_ext *)ivhd;
  3208. + printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x %.16llx\n",
  3209. + ext->type, ext->flags, ext->length,
  3210. + ext->segment, dmar_bdf(ext->devid), ext->cap,
  3211. + ext->address, ext->info,
  3212. + ext->attrib, ext->efr);
  3213. + if (ext->flags & IVHD_PPRSUP)
  3214. + printf(" PPRSup");
  3215. + if (ext->flags & IVHD_PREFSUP)
  3216. + printf(" PreFSup");
  3217. + if (ext->flags & IVHD_COHERENT)
  3218. + printf(" Coherent");
  3219. + if (ext->flags & IVHD_IOTLB)
  3220. + printf(" Iotlb");
  3221. + if (ext->flags & IVHD_ISOC)
  3222. + printf(" ISoc");
  3223. + if (ext->flags & IVHD_RESPASSPW)
  3224. + printf(" ResPassPW");
  3225. + if (ext->flags & IVHD_PASSPW)
  3226. + printf(" PassPW");
  3227. + if (ext->flags & IVHD_HTTUNEN)
  3228. + printf( " HtTunEn");
  3229. + if (ext->flags)
  3230. + printf("\n");
  3231. + off = sizeof(*ext);
  3232. + iommu = malloc(sizeof(*iommu), M_DEVBUF, M_ZERO|M_WAITOK);
  3233. + ivhd_iommu_init(sc, iommu, ivhd);
  3234. + } else {
  3235. + printf("ivhd: %.2x %.2x %.4x %.4x:%s %.4x %.16llx %.4x %.8x\n",
  3236. + ivhd->type, ivhd->flags, ivhd->length,
  3237. + ivhd->segment, dmar_bdf(ivhd->devid), ivhd->cap,
  3238. + ivhd->address, ivhd->info,
  3239. + ivhd->feature);
  3240. + if (ivhd->flags & IVHD_PPRSUP)
  3241. + printf(" PPRSup");
  3242. + if (ivhd->flags & IVHD_PREFSUP)
  3243. + printf(" PreFSup");
  3244. + if (ivhd->flags & IVHD_COHERENT)
  3245. + printf(" Coherent");
  3246. + if (ivhd->flags & IVHD_IOTLB)
  3247. + printf(" Iotlb");
  3248. + if (ivhd->flags & IVHD_ISOC)
  3249. + printf(" ISoc");
  3250. + if (ivhd->flags & IVHD_RESPASSPW)
  3251. + printf(" ResPassPW");
  3252. + if (ivhd->flags & IVHD_PASSPW)
  3253. + printf(" PassPW");
  3254. + if (ivhd->flags & IVHD_HTTUNEN)
  3255. + printf( " HtTunEn");
  3256. + if (ivhd->flags)
  3257. + printf("\n");
  3258. + off = sizeof(*ivhd);
  3259. + }
  3260. + while (off < ivhd->length) {
  3261. + ie = (void *)ivhd + off;
  3262. + switch (ie->type) {
  3263. + case IVHD_ALL:
  3264. + all_dte = ie->all.data;
  3265. + printf(" ALL %.4x\n", dte);
  3266. + off += sizeof(ie->all);
  3267. + break;
  3268. + case IVHD_SEL:
  3269. + dte = ie->sel.data;
  3270. + printf(" SELECT: %s %.4x\n", dmar_bdf(ie->sel.devid), dte);
  3271. + off += sizeof(ie->sel);
  3272. + break;
  3273. + case IVHD_SOR:
  3274. + dte = ie->sor.data;
  3275. + start = ie->sor.devid;
  3276. + printf(" SOR: %s %.4x\n", dmar_bdf(start), dte);
  3277. + off += sizeof(ie->sor);
  3278. + /* Setup 1:1 alias mapping */
  3279. + acpiivrs_mkalias(ivhd, off, start, start, 1);
  3280. + break;
  3281. + case IVHD_EOR:
  3282. + printf(" EOR: %s\n", dmar_bdf(ie->eor.devid));
  3283. + off += sizeof(ie->eor);
  3284. + break;
  3285. + case IVHD_ALIAS_SEL:
  3286. + dte = ie->alias.data;
  3287. + printf(" ALIAS: src=%s: ", dmar_bdf(ie->alias.srcid));
  3288. + printf(" %s %.4x\n", dmar_bdf(ie->alias.devid), dte);
  3289. + off += sizeof(ie->alias);
  3290. + break;
  3291. + case IVHD_ALIAS_SOR:
  3292. + dte = ie->alias.data;
  3293. + start = ie->alias.srcid;
  3294. + alias = ie->alias.devid;
  3295. + printf(" ALIAS_SOR: %s %.4x ", dmar_bdf(ie->alias.devid), dte);
  3296. + printf(" src=%s\n", dmar_bdf(ie->alias.srcid));
  3297. + off += sizeof(ie->alias);
  3298. + /* Setup alias mapping */
  3299. + acpiivrs_mkalias(ivhd, off, start, alias, 0);
  3300. + break;
  3301. + case IVHD_EXT_SEL:
  3302. + dte = ie->ext.data;
  3303. + printf(" EXT SEL: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
  3304. + dte, ie->ext.extdata);
  3305. + off += sizeof(ie->ext);
  3306. + break;
  3307. + case IVHD_EXT_SOR:
  3308. + dte = ie->ext.data;
  3309. + printf(" EXT SOR: %s %.4x %.8x\n", dmar_bdf(ie->ext.devid),
  3310. + dte, ie->ext.extdata);
  3311. + off += sizeof(ie->ext);
  3312. + break;
  3313. + case IVHD_SPECIAL:
  3314. + printf(" SPECIAL\n");
  3315. + off += sizeof(ie->special);
  3316. + break;
  3317. + default:
  3318. + printf(" 2:unknown %x\n", ie->type);
  3319. + off = ivhd->length;
  3320. + break;
  3321. + }
  3322. + }
  3323. +}
  3324. +
  3325. +void
  3326. +acpiivrs_init(struct acpidmar_softc *sc, struct acpi_ivrs *ivrs)
  3327. +{
  3328. + union acpi_ivrs_entry *ie;
  3329. + int off;
  3330. +
  3331. + domain_map_page = domain_map_page_amd;
  3332. + printf("IVRS Version: %d\n", ivrs->hdr.revision);
  3333. + printf(" VA Size: %d\n", (ivrs->ivinfo >> IVRS_VASIZE_SHIFT) & IVRS_VASIZE_MASK);
  3334. + printf(" PA Size: %d\n", (ivrs->ivinfo >> IVRS_PASIZE_SHIFT) & IVRS_PASIZE_MASK);
  3335. +
  3336. + TAILQ_INIT(&sc->sc_drhds);
  3337. + TAILQ_INIT(&sc->sc_rmrrs);
  3338. + TAILQ_INIT(&sc->sc_atsrs);
  3339. +
  3340. + printf("======== IVRS\n");
  3341. + off = sizeof(*ivrs);
  3342. + while (off < ivrs->hdr.length) {
  3343. + ie = (void *)ivrs + off;
  3344. + switch (ie->type) {
  3345. + case IVRS_IVHD:
  3346. + case IVRS_IVHD_EXT:
  3347. + acpiivrs_ivhd(sc, &ie->ivhd);
  3348. + break;
  3349. + case IVRS_IVMD_ALL:
  3350. + case IVRS_IVMD_SPECIFIED:
  3351. + case IVRS_IVMD_RANGE:
  3352. + printf("ivmd\n");
  3353. + break;
  3354. + default:
  3355. + printf("1:unknown: %x\n", ie->type);
  3356. + break;
  3357. + }
  3358. + off += ie->length;
  3359. + }
  3360. + printf("======== End IVRS\n");
  3361. +}
  3362. +
  3363. +static int
  3364. +acpiivhd_activate(struct iommu_softc *iommu, int act)
  3365. +{
  3366. + switch (act) {
  3367. + case DVACT_SUSPEND:
  3368. + iommu->flags |= IOMMU_FLAGS_SUSPEND;
  3369. + break;
  3370. + case DVACT_RESUME:
  3371. + break;
  3372. + }
  3373. + return (0);
  3374. +}
  3375. +
  3376. +int
  3377. +acpidmar_activate(struct device *self, int act)
  3378. +{
  3379. + struct acpidmar_softc *sc = (struct acpidmar_softc *)self;
  3380. + struct iommu_softc *iommu;
  3381. +
  3382. + printf("called acpidmar_activate %d %p\n", act, sc);
  3383. +
  3384. + if (sc == NULL) {
  3385. + return (0);
  3386. + }
  3387. +
  3388. + switch (act) {
  3389. + case DVACT_RESUME:
  3390. + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
  3391. + printf("iommu%d resume\n", iommu->id);
  3392. + if (iommu->dte) {
  3393. + acpiivhd_activate(iommu, act);
  3394. + continue;
  3395. + }
  3396. + iommu_flush_write_buffer(iommu);
  3397. + iommu_set_rtaddr(iommu, iommu->rtaddr);
  3398. + iommu_writel(iommu, DMAR_FEDATA_REG, iommu->fedata);
  3399. + iommu_writel(iommu, DMAR_FEADDR_REG, iommu->feaddr);
  3400. + iommu_writel(iommu, DMAR_FEUADDR_REG,
  3401. + iommu->feaddr >> 32);
  3402. + if ((iommu->flags & (IOMMU_FLAGS_BAD|IOMMU_FLAGS_SUSPEND)) ==
  3403. + IOMMU_FLAGS_SUSPEND) {
  3404. + printf("enable wakeup translation\n");
  3405. + iommu_enable_translation(iommu, 1);
  3406. + }
  3407. + iommu_showcfg(iommu, -1);
  3408. + }
  3409. + break;
  3410. + case DVACT_SUSPEND:
  3411. + TAILQ_FOREACH(iommu, &sc->sc_drhds, link) {
  3412. + printf("iommu%d suspend\n", iommu->id);
  3413. + if (iommu->flags & IOMMU_FLAGS_BAD)
  3414. + continue;
  3415. + if (iommu->dte) {
  3416. + acpiivhd_activate(iommu, act);
  3417. + continue;
  3418. + }
  3419. + iommu->flags |= IOMMU_FLAGS_SUSPEND;
  3420. + iommu_enable_translation(iommu, 0);
  3421. + iommu_showcfg(iommu, -1);
  3422. + }
  3423. + break;
  3424. + }
  3425. + return (0);
  3426. +}
  3427. +
  3428. +void
  3429. +acpidmar_sw(int act)
  3430. +{
  3431. + if (acpidmar_sc)
  3432. + acpidmar_activate((void*)acpidmar_sc, act);
  3433. +}
  3434. +
  3435. +int
  3436. +acpidmar_match(struct device *parent, void *match, void *aux)
  3437. +{
  3438. + struct acpi_attach_args *aaa = aux;
  3439. + struct acpi_table_header *hdr;
  3440. +
  3441. + /* If we do not have a table, it is not us */
  3442. + if (aaa->aaa_table == NULL)
  3443. + return (0);
  3444. +
  3445. + /* If it is an DMAR table, we can attach */
  3446. + hdr = (struct acpi_table_header *)aaa->aaa_table;
  3447. + if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0)
  3448. + return (1);
  3449. + if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0)
  3450. + return (1);
  3451. +
  3452. + return (0);
  3453. +}
  3454. +
  3455. +void
  3456. +acpidmar_attach(struct device *parent, struct device *self, void *aux)
  3457. +{
  3458. + struct acpidmar_softc *sc = (void *)self;
  3459. + struct acpi_attach_args *aaa = aux;
  3460. + struct acpi_dmar *dmar = (struct acpi_dmar *)aaa->aaa_table;
  3461. + struct acpi_ivrs *ivrs = (struct acpi_ivrs *)aaa->aaa_table;
  3462. + struct acpi_table_header *hdr;
  3463. +
  3464. + hdr = (struct acpi_table_header *)aaa->aaa_table;
  3465. + sc->sc_memt = aaa->aaa_memt;
  3466. + if (memcmp(hdr->signature, DMAR_SIG, sizeof(DMAR_SIG) - 1) == 0) {
  3467. + acpidmar_sc = sc;
  3468. + acpidmar_init(sc, dmar);
  3469. + }
  3470. + if (memcmp(hdr->signature, IVRS_SIG, sizeof(IVRS_SIG) - 1) == 0) {
  3471. + acpidmar_sc = sc;
  3472. + acpiivrs_init(sc, ivrs);
  3473. + }
  3474. +}
  3475. +
  3476. +/* Interrupt shiz */
  3477. +void acpidmar_msi_hwmask(struct pic *, int);
  3478. +void acpidmar_msi_hwunmask(struct pic *, int);
  3479. +void acpidmar_msi_addroute(struct pic *, struct cpu_info *, int, int, int);
  3480. +void acpidmar_msi_delroute(struct pic *, struct cpu_info *, int, int, int);
  3481. +
  3482. +void
  3483. +acpidmar_msi_hwmask(struct pic *pic, int pin)
  3484. +{
  3485. + struct iommu_pic *ip = (void *)pic;
  3486. + struct iommu_softc *iommu = ip->iommu;
  3487. +
  3488. + printf("msi_hwmask\n");
  3489. +
  3490. + mtx_enter(&iommu->reg_lock);
  3491. +
  3492. + iommu_writel(iommu, DMAR_FECTL_REG, FECTL_IM);
  3493. + iommu_readl(iommu, DMAR_FECTL_REG);
  3494. +
  3495. + mtx_leave(&iommu->reg_lock);
  3496. +}
  3497. +
  3498. +void
  3499. +acpidmar_msi_hwunmask(struct pic *pic, int pin)
  3500. +{
  3501. + struct iommu_pic *ip = (void *)pic;
  3502. + struct iommu_softc *iommu = ip->iommu;
  3503. +
  3504. + printf("msi_hwunmask\n");
  3505. +
  3506. + mtx_enter(&iommu->reg_lock);
  3507. +
  3508. + iommu_writel(iommu, DMAR_FECTL_REG, 0);
  3509. + iommu_readl(iommu, DMAR_FECTL_REG);
  3510. +
  3511. + mtx_leave(&iommu->reg_lock);
  3512. +}
  3513. +
  3514. +void
  3515. +acpidmar_msi_addroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
  3516. + int type)
  3517. +{
  3518. + struct iommu_pic *ip = (void *)pic;
  3519. + struct iommu_softc *iommu = ip->iommu;
  3520. +
  3521. + mtx_enter(&iommu->reg_lock);
  3522. +
  3523. + iommu->fedata = vec;
  3524. + iommu->feaddr = 0xfee00000L | (ci->ci_apicid << 12);
  3525. + iommu_writel(iommu, DMAR_FEDATA_REG, vec);
  3526. + iommu_writel(iommu, DMAR_FEADDR_REG, iommu->feaddr);
  3527. + iommu_writel(iommu, DMAR_FEUADDR_REG, iommu->feaddr >> 32);
  3528. +
  3529. + mtx_leave(&iommu->reg_lock);
  3530. +}
  3531. +
  3532. +void
  3533. +acpidmar_msi_delroute(struct pic *pic, struct cpu_info *ci, int pin, int vec,
  3534. + int type)
  3535. +{
  3536. + printf("msi_delroute\n");
  3537. +}
  3538. +
  3539. +void *
  3540. +acpidmar_intr_establish(void *ctx, int level, int (*func)(void *),
  3541. + void *arg, const char *what)
  3542. +{
  3543. + struct iommu_softc *iommu = ctx;
  3544. + struct pic *pic;
  3545. +
  3546. + pic = &iommu->pic.pic;
  3547. + iommu->pic.iommu = iommu;
  3548. +
  3549. + strlcpy(pic->pic_dev.dv_xname, "dmarpic",
  3550. + sizeof(pic->pic_dev.dv_xname));
  3551. + pic->pic_type = PIC_MSI;
  3552. + pic->pic_hwmask = acpidmar_msi_hwmask;
  3553. + pic->pic_hwunmask = acpidmar_msi_hwunmask;
  3554. + pic->pic_addroute = acpidmar_msi_addroute;
  3555. + pic->pic_delroute = acpidmar_msi_delroute;
  3556. + pic->pic_edge_stubs = ioapic_edge_stubs;
  3557. +#ifdef MULTIPROCESSOR
  3558. + mtx_init(&pic->pic_mutex, level);
  3559. +#endif
  3560. +
  3561. + return intr_establish(-1, pic, 0, IST_PULSE, level, NULL, func, arg, what);
  3562. +}
  3563. +
  3564. +int
  3565. +acpidmar_intr(void *ctx)
  3566. +{
  3567. + struct iommu_softc *iommu = ctx;
  3568. + struct fault_entry fe;
  3569. + static struct fault_entry ofe;
  3570. + int fro, nfr, fri, i;
  3571. + uint32_t sts;
  3572. +
  3573. + if (iommu->dte) {
  3574. + ivhd_poll_events(iommu);
  3575. + return 1;
  3576. + }
  3577. +
  3578. + //splassert(IPL_HIGH);
  3579. +
  3580. + if (!(iommu->gcmd & GCMD_TE)) {
  3581. + return (1);
  3582. + }
  3583. + mtx_enter(&iommu->reg_lock);
  3584. + sts = iommu_readl(iommu, DMAR_FECTL_REG);
  3585. + sts = iommu_readl(iommu, DMAR_FSTS_REG);
  3586. +
  3587. + if (!(sts & FSTS_PPF)) {
  3588. + mtx_leave(&iommu->reg_lock);
  3589. + return (1);
  3590. + }
  3591. +
  3592. + nfr = cap_nfr(iommu->cap);
  3593. + fro = cap_fro(iommu->cap);
  3594. + fri = (sts >> FSTS_FRI_SHIFT) & FSTS_FRI_MASK;
  3595. + for (i = 0; i < nfr; i++) {
  3596. + fe.hi = iommu_readq(iommu, fro + (fri*16) + 8);
  3597. + if (!(fe.hi & FRCD_HI_F))
  3598. + break;
  3599. +
  3600. + fe.lo = iommu_readq(iommu, fro + (fri*16));
  3601. + if (ofe.hi != fe.hi || ofe.lo != fe.lo) {
  3602. + iommu_showfault(iommu, fri, &fe);
  3603. + ofe.hi = fe.hi;
  3604. + ofe.lo = fe.lo;
  3605. + }
  3606. + fri = (fri + 1) % nfr;
  3607. + }
  3608. +
  3609. + iommu_writel(iommu, DMAR_FSTS_REG, FSTS_PFO | FSTS_PPF);
  3610. +
  3611. + mtx_leave(&iommu->reg_lock);
  3612. +
  3613. + return (1);
  3614. +}
  3615. +
  3616. +const char *vtd_faults[] = {
  3617. + "Software",
  3618. + "Root Entry Not Present", /* ok (rtaddr + 4096) */
  3619. + "Context Entry Not Present", /* ok (no CTX_P) */
  3620. + "Context Entry Invalid", /* ok (tt = 3) */
  3621. + "Address Beyond MGAW",
  3622. + "Write", /* ok */
  3623. + "Read", /* ok */
  3624. + "Paging Entry Invalid", /* ok */
  3625. + "Root Table Invalid",
  3626. + "Context Table Invalid",
  3627. + "Root Entry Reserved", /* ok (root.lo |= 0x4) */
  3628. + "Context Entry Reserved",
  3629. + "Paging Entry Reserved",
  3630. + "Context Entry TT",
  3631. + "Reserved",
  3632. +};
  3633. +
  3634. +void iommu_showpte(uint64_t, int, uint64_t);
  3635. +
  3636. +void
  3637. +iommu_showpte(uint64_t ptep, int lvl, uint64_t base)
  3638. +{
  3639. + uint64_t nb, pb, i;
  3640. + struct pte_entry *pte;
  3641. +
  3642. + pte = (void *)PMAP_DIRECT_MAP(ptep);
  3643. + for (i = 0; i < 512; i++) {
  3644. + if (!(pte[i].val & PTE_P))
  3645. + continue;
  3646. + nb = base + (i << lvl);
  3647. + pb = pte[i].val & ~VTD_PAGE_MASK;
  3648. + if(lvl == VTD_LEVEL0) {
  3649. + printf(" %3llx %.16llx = %.16llx %c%c %s\n",
  3650. + i, nb, pb,
  3651. + pte[i].val == PTE_R ? 'r' : ' ',
  3652. + pte[i].val & PTE_W ? 'w' : ' ',
  3653. + (nb == pb) ? " ident" : "");
  3654. + if (nb == pb)
  3655. + return;
  3656. + } else {
  3657. + iommu_showpte(pb, lvl - VTD_STRIDE_SIZE, nb);
  3658. + }
  3659. + }
  3660. +}
  3661. +
  3662. +void
  3663. +iommu_showcfg(struct iommu_softc *iommu, int sid)
  3664. +{
  3665. + int i, j, sts, cmd;
  3666. + struct context_entry *ctx;
  3667. + pcitag_t tag;
  3668. + pcireg_t clc;
  3669. +
  3670. + cmd = iommu_readl(iommu, DMAR_GCMD_REG);
  3671. + sts = iommu_readl(iommu, DMAR_GSTS_REG);
  3672. + printf("iommu%d: flags:%d root pa:%.16llx %s %s %s %.8x %.8x\n",
  3673. + iommu->id, iommu->flags, iommu_readq(iommu, DMAR_RTADDR_REG),
  3674. + sts & GSTS_TES ? "enabled" : "disabled",
  3675. + sts & GSTS_QIES ? "qi" : "ccmd",
  3676. + sts & GSTS_IRES ? "ir" : "",
  3677. + cmd, sts);
  3678. + for (i = 0; i < 256; i++) {
  3679. + if (!root_entry_is_valid(&iommu->root[i])) {
  3680. + continue;
  3681. + }
  3682. + for (j = 0; j < 256; j++) {
  3683. + ctx = iommu->ctx[i] + j;
  3684. + if (!context_entry_is_valid(ctx)) {
  3685. + continue;
  3686. + }
  3687. + tag = pci_make_tag(NULL, i, (j >> 3), j & 0x7);
  3688. + clc = pci_conf_read(NULL, tag, 0x08) >> 8;
  3689. + printf(" %.2x:%.2x.%x lvl:%d did:%.4x tt:%d ptep:%.16llx flag:%x cc:%.6x\n",
  3690. + i, (j >> 3), j & 7,
  3691. + context_address_width(ctx),
  3692. + context_domain_id(ctx),
  3693. + context_translation_type(ctx),
  3694. + context_pte(ctx),
  3695. + context_user(ctx),
  3696. + clc);
  3697. +#if 0
  3698. + /* dump pagetables */
  3699. + iommu_showpte(ctx->lo & ~VTD_PAGE_MASK, iommu->agaw -
  3700. + VTD_STRIDE_SIZE, 0);
  3701. +#endif
  3702. + }
  3703. + }
  3704. +}
  3705. +
  3706. +void
  3707. +iommu_showfault(struct iommu_softc *iommu, int fri, struct fault_entry *fe)
  3708. +{
  3709. + int bus, dev, fun, type, fr, df;
  3710. + bios_memmap_t *im;
  3711. + const char *mapped;
  3712. +
  3713. + if (!(fe->hi & FRCD_HI_F))
  3714. + return;
  3715. + type = (fe->hi & FRCD_HI_T) ? 'r' : 'w';
  3716. + fr = (fe->hi >> FRCD_HI_FR_SHIFT) & FRCD_HI_FR_MASK;
  3717. + bus = (fe->hi >> FRCD_HI_BUS_SHIFT) & FRCD_HI_BUS_MASK;
  3718. + dev = (fe->hi >> FRCD_HI_DEV_SHIFT) & FRCD_HI_DEV_MASK;
  3719. + fun = (fe->hi >> FRCD_HI_FUN_SHIFT) & FRCD_HI_FUN_MASK;
  3720. + df = (fe->hi >> FRCD_HI_FUN_SHIFT) & 0xFF;
  3721. + iommu_showcfg(iommu, mksid(bus,dev,fun));
  3722. + if (!iommu->ctx[bus]) {
  3723. + /* Bus is not initialized */
  3724. + mapped = "nobus";
  3725. + } else if (!context_entry_is_valid(&iommu->ctx[bus][df])) {
  3726. + /* DevFn not initialized */
  3727. + mapped = "nodevfn";
  3728. + } else if (context_user(&iommu->ctx[bus][df]) != 0xA) {
  3729. + /* no bus_space_map */
  3730. + mapped = "nomap";
  3731. + } else {
  3732. + /* bus_space_map */
  3733. + mapped = "mapped";
  3734. + }
  3735. + printf("fri%d: dmar: %.2x:%.2x.%x %s error at %llx fr:%d [%s] iommu:%d [%s]\n",
  3736. + fri, bus, dev, fun,
  3737. + type == 'r' ? "read" : "write",
  3738. + fe->lo,
  3739. + fr, fr <= 13 ? vtd_faults[fr] : "unknown",
  3740. + iommu->id,
  3741. + mapped);
  3742. + for (im = bios_memmap; im->type != BIOS_MAP_END; im++) {
  3743. + if ((im->type == BIOS_MAP_RES) &&
  3744. + (im->addr <= fe->lo) &&
  3745. + (fe->lo <= im->addr+im->size)) {
  3746. + printf("mem in e820.reserved\n");
  3747. + }
  3748. + }
  3749. +#ifdef DDB
  3750. + if (acpidmar_ddb)
  3751. + db_enter();
  3752. +#endif
  3753. +}
  3754. +
  3755. +
  3756. diff --git a/sys/dev/acpi/acpidmar.h b/sys/dev/acpi/acpidmar.h
  3757. new file mode 100644
  3758. index 000000000..2a0b74b10
  3759. --- /dev/null
  3760. +++ b/sys/dev/acpi/acpidmar.h
  3761. @@ -0,0 +1,536 @@
  3762. +/*
  3763. + * Copyright (c) 2015 Jordan Hargrave <[email protected]>
  3764. + *
  3765. + * Permission to use, copy, modify, and distribute this software for any
  3766. + * purpose with or without fee is hereby granted, provided that the above
  3767. + * copyright notice and this permission notice appear in all copies.
  3768. + *
  3769. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  3770. + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  3771. + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  3772. + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  3773. + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  3774. + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  3775. + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  3776. + */
  3777. +
  3778. +#ifndef _DEV_ACPI_DMARREG_H_
  3779. +#define _DEV_ACPI_DMARREG_H_
  3780. +
  3781. +#define VTD_STRIDE_MASK 0x1FF
  3782. +#define VTD_STRIDE_SIZE 9
  3783. +#define VTD_PAGE_SIZE 4096
  3784. +#define VTD_PAGE_MASK 0xFFF
  3785. +#define VTD_PTE_MASK 0x0000FFFFFFFFF000LL
  3786. +
  3787. +#define VTD_LEVEL0 12
  3788. +#define VTD_LEVEL1 21
  3789. +#define VTD_LEVEL2 30 /* Minimum level supported */
  3790. +#define VTD_LEVEL3 39 /* Also supported */
  3791. +#define VTD_LEVEL4 48
  3792. +#define VTD_LEVEL5 57
  3793. +
  3794. +#define _xbit(x,y) (((x)>> (y)) & 1)
  3795. +#define _xfld(x,y) (uint32_t)(((x)>> y##_SHIFT) & y##_MASK)
  3796. +
  3797. +#define VTD_AWTOLEVEL(x) (((x) - 30) / VTD_STRIDE_SIZE)
  3798. +#define VTD_LEVELTOAW(x) (((x) * VTD_STRIDE_SIZE) + 30)
  3799. +
  3800. +#define DMAR_VER_REG 0x00 /* 32:Arch version supported by this IOMMU */
  3801. +#define DMAR_RTADDR_REG 0x20 /* 64:Root entry table */
  3802. +#define DMAR_FEDATA_REG 0x3c /* 32:Fault event interrupt data register */
  3803. +#define DMAR_FEADDR_REG 0x40 /* 32:Fault event interrupt addr register */
  3804. +#define DMAR_FEUADDR_REG 0x44 /* 32:Upper address register */
  3805. +#define DMAR_AFLOG_REG 0x58 /* 64:Advanced Fault control */
  3806. +#define DMAR_PMEN_REG 0x64 /* 32:Enable Protected Memory Region */
  3807. +#define DMAR_PLMBASE_REG 0x68 /* 32:PMRR Low addr */
  3808. +#define DMAR_PLMLIMIT_REG 0x6c /* 32:PMRR low limit */
  3809. +#define DMAR_PHMBASE_REG 0x70 /* 64:pmrr high base addr */
  3810. +#define DMAR_PHMLIMIT_REG 0x78 /* 64:pmrr high limit */
  3811. +#define DMAR_ICS_REG 0x9C /* 32:Invalidation complete status register */
  3812. +#define DMAR_IECTL_REG 0xa0 /* 32:Invalidation event control register */
  3813. +#define DMAR_IEDATA_REG 0xa4 /* 32:Invalidation event data register */
  3814. +#define DMAR_IEADDR_REG 0xa8 /* 32:Invalidation event address register */
  3815. +#define DMAR_IEUADDR_REG 0xac /* 32:Invalidation event upper address register */
  3816. +#define DMAR_IRTA_REG 0xb8 /* 64:Interrupt remapping table addr register */
  3817. +#define DMAR_CAP_REG 0x08 /* 64:Hardware supported capabilities */
  3818. +#define CAP_PI (1LL << 59)
  3819. +#define CAP_FL1GP (1LL << 56)
  3820. +#define CAP_DRD (1LL << 55)
  3821. +#define CAP_DWD (1LL << 54)
  3822. +#define CAP_MAMV_MASK 0x3F
  3823. +#define CAP_MAMV_SHIFT 48LL
  3824. +#define cap_mamv(x) _xfld(x,CAP_MAMV)
  3825. +#define CAP_NFR_MASK 0xFF
  3826. +#define CAP_NFR_SHIFT 40LL
  3827. +#define cap_nfr(x) (_xfld(x,CAP_NFR) + 1)
  3828. +#define CAP_PSI (1LL << 39)
  3829. +#define CAP_SLLPS_MASK 0xF
  3830. +#define CAP_SLLPS_SHIFT 34LL
  3831. +#define cap_sllps(x) _xfld(x,CAP_SLLPS)
  3832. +#define CAP_FRO_MASK 0x3FF
  3833. +#define CAP_FRO_SHIFT 24LL
  3834. +#define cap_fro(x) (_xfld(x,CAP_FRO) * 16)
  3835. +#define CAP_ZLR (1LL << 22)
  3836. +#define CAP_MGAW_MASK 0x3F
  3837. +#define CAP_MGAW_SHIFT 16LL
  3838. +#define cap_mgaw(x) (_xfld(x,CAP_MGAW) + 1)
  3839. +#define CAP_SAGAW_MASK 0x1F
  3840. +#define CAP_SAGAW_SHIFT 8LL
  3841. +#define cap_sagaw(x) _xfld(x,CAP_SAGAW)
  3842. +#define CAP_CM (1LL << 7)
  3843. +#define CAP_PHMR (1LL << 6)
  3844. +#define CAP_PLMR (1LL << 5)
  3845. +#define CAP_RWBF (1LL << 4)
  3846. +#define CAP_AFL (1LL << 3)
  3847. +#define CAP_ND_MASK 0x7
  3848. +#define CAP_ND_SHIFT 0x00
  3849. +#define cap_nd(x) (16 << (((x) & CAP_ND_MASK) << 1))
  3850. +
  3851. +#define DMAR_ECAP_REG 0x10 /* 64:Extended capabilities supported */
  3852. +#define ECAP_PSS_MASK 0x1F
  3853. +#define ECAP_PSS_SHIFT 35
  3854. +#define ECAP_EAFS (1LL << 34)
  3855. +#define ECAP_NWFS (1LL << 33)
  3856. +#define ECAP_SRS (1LL << 31)
  3857. +#define ECAP_ERS (1LL << 30)
  3858. +#define ECAP_PRS (1LL << 29)
  3859. +#define ECAP_PASID (1LL << 28)
  3860. +#define ECAP_DIS (1LL << 27)
  3861. +#define ECAP_NEST (1LL << 26)
  3862. +#define ECAP_MTS (1LL << 25)
  3863. +#define ECAP_ECS (1LL << 24)
  3864. +#define ECAP_MHMV_MASK 0xF
  3865. +#define ECAP_MHMV_SHIFT 0x20
  3866. +#define ecap_mhmv(x) _xfld(x,ECAP_MHMV)
  3867. +#define ECAP_IRO_MASK 0x3FF /* IOTLB Register */
  3868. +#define ECAP_IRO_SHIFT 0x8
  3869. +#define ecap_iro(x) (_xfld(x,ECAP_IRO) * 16)
  3870. +#define ECAP_SC (1LL << 7) /* Snoop Control */
  3871. +#define ECAP_PT (1LL << 6) /* HW Passthru */
  3872. +#define ECAP_EIM (1LL << 4)
  3873. +#define ECAP_IR (1LL << 3) /* Interrupt remap */
  3874. +#define ECAP_DT (1LL << 2) /* Device IOTLB */
  3875. +#define ECAP_QI (1LL << 1) /* Queued Invalidation */
  3876. +#define ECAP_C (1LL << 0) /* Coherent cache */
  3877. +
  3878. +#define DMAR_GCMD_REG 0x18 /* 32:Global command register */
  3879. +#define GCMD_TE (1LL << 31)
  3880. +#define GCMD_SRTP (1LL << 30)
  3881. +#define GCMD_SFL (1LL << 29)
  3882. +#define GCMD_EAFL (1LL << 28)
  3883. +#define GCMD_WBF (1LL << 27)
  3884. +#define GCMD_QIE (1LL << 26)
  3885. +#define GCMD_IRE (1LL << 25)
  3886. +#define GCMD_SIRTP (1LL << 24)
  3887. +#define GCMD_CFI (1LL << 23)
  3888. +
  3889. +#define DMAR_GSTS_REG 0x1c /* 32:Global status register */
  3890. +#define GSTS_TES (1LL << 31)
  3891. +#define GSTS_RTPS (1LL << 30)
  3892. +#define GSTS_FLS (1LL << 29)
  3893. +#define GSTS_AFLS (1LL << 28)
  3894. +#define GSTS_WBFS (1LL << 27)
  3895. +#define GSTS_QIES (1LL << 26)
  3896. +#define GSTS_IRES (1LL << 25)
  3897. +#define GSTS_IRTPS (1LL << 24)
  3898. +#define GSTS_CFIS (1LL << 23)
  3899. +
  3900. +#define DMAR_CCMD_REG 0x28 /* 64:Context command reg */
  3901. +#define CCMD_ICC (1LL << 63)
  3902. +#define CCMD_CIRG_MASK 0x3
  3903. +#define CCMD_CIRG_SHIFT 61
  3904. +#define CCMD_CIRG(x) ((uint64_t)(x) << CCMD_CIRG_SHIFT)
  3905. +#define CCMD_CAIG_MASK 0x3
  3906. +#define CCMD_CAIG_SHIFT 59
  3907. +#define CCMD_FM_MASK 0x3
  3908. +#define CCMD_FM_SHIFT 32
  3909. +#define CCMD_FM(x) (((uint64_t)(x) << CCMD_FM_SHIFT))
  3910. +#define CCMD_SID_MASK 0xFFFF
  3911. +#define CCMD_SID_SHIFT 8
  3912. +#define CCMD_SID(x) (((x) << CCMD_SID_SHIFT))
  3913. +#define CCMD_DID_MASK 0xFFFF
  3914. +#define CCMD_DID_SHIFT 0
  3915. +#define CCMD_DID(x) (((x) << CCMD_DID_SHIFT))
  3916. +
  3917. +#define CIG_GLOBAL CCMD_CIRG(CTX_GLOBAL)
  3918. +#define CIG_DOMAIN CCMD_CIRG(CTX_DOMAIN)
  3919. +#define CIG_DEVICE CCMD_CIRG(CTX_DEVICE)
  3920. +
  3921. +
  3922. +#define DMAR_FSTS_REG 0x34 /* 32:Fault Status register */
  3923. +#define FSTS_FRI_MASK 0xFF
  3924. +#define FSTS_FRI_SHIFT 8
  3925. +#define FSTS_PRO (1LL << 7)
  3926. +#define FSTS_ITE (1LL << 6)
  3927. +#define FSTS_ICE (1LL << 5)
  3928. +#define FSTS_IQE (1LL << 4)
  3929. +#define FSTS_APF (1LL << 3)
  3930. +#define FSTS_APO (1LL << 2)
  3931. +#define FSTS_PPF (1LL << 1)
  3932. +#define FSTS_PFO (1LL << 0)
  3933. +
  3934. +#define DMAR_FECTL_REG 0x38 /* 32:Fault control register */
  3935. +#define FECTL_IM (1LL << 31)
  3936. +#define FECTL_IP (1LL << 30)
  3937. +
  3938. +#define FRCD_HI_F (1LL << (127-64))
  3939. +#define FRCD_HI_T (1LL << (126-64))
  3940. +#define FRCD_HI_AT_MASK 0x3
  3941. +#define FRCD_HI_AT_SHIFT (124-64)
  3942. +#define FRCD_HI_PV_MASK 0xFFFFF
  3943. +#define FRCD_HI_PV_SHIFT (104-64)
  3944. +#define FRCD_HI_FR_MASK 0xFF
  3945. +#define FRCD_HI_FR_SHIFT (96-64)
  3946. +#define FRCD_HI_PP (1LL << (95-64))
  3947. +
  3948. +#define FRCD_HI_SID_MASK 0xFF
  3949. +#define FRCD_HI_SID_SHIFT 0
  3950. +#define FRCD_HI_BUS_SHIFT 8
  3951. +#define FRCD_HI_BUS_MASK 0xFF
  3952. +#define FRCD_HI_DEV_SHIFT 3
  3953. +#define FRCD_HI_DEV_MASK 0x1F
  3954. +#define FRCD_HI_FUN_SHIFT 0
  3955. +#define FRCD_HI_FUN_MASK 0x7
  3956. +
  3957. +#define DMAR_IOTLB_REG(x) (ecap_iro((x)->ecap) + 8)
  3958. +#define DMAR_IVA_REG(x) (ecap_iro((x)->ecap) + 0)
  3959. +
  3960. +#define DMAR_FRIH_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 8)
  3961. +#define DMAR_FRIL_REG(x,i) (cap_fro((x)->cap) + 16*(i) + 0)
  3962. +
  3963. +#define IOTLB_IVT (1LL << 63)
  3964. +#define IOTLB_IIRG_MASK 0x3
  3965. +#define IOTLB_IIRG_SHIFT 60
  3966. +#define IOTLB_IIRG(x) ((uint64_t)(x) << IOTLB_IIRG_SHIFT)
  3967. +#define IOTLB_IAIG_MASK 0x3
  3968. +#define IOTLB_IAIG_SHIFT 57
  3969. +#define IOTLB_DR (1LL << 49)
  3970. +#define IOTLB_DW (1LL << 48)
  3971. +#define IOTLB_DID_MASK 0xFFFF
  3972. +#define IOTLB_DID_SHIFT 32
  3973. +#define IOTLB_DID(x) ((uint64_t)(x) << IOTLB_DID_SHIFT)
  3974. +
  3975. +#define IIG_GLOBAL IOTLB_IIRG(IOTLB_GLOBAL)
  3976. +#define IIG_DOMAIN IOTLB_IIRG(IOTLB_DOMAIN)
  3977. +#define IIG_PAGE IOTLB_IIRG(IOTLB_PAGE)
  3978. +
  3979. +#define DMAR_IQH_REG 0x80 /* 64:Invalidation queue head register */
  3980. +#define DMAR_IQT_REG 0x88 /* 64:Invalidation queue tail register */
  3981. +#define DMAR_IQA_REG 0x90 /* 64:Invalidation queue addr register */
  3982. +#define IQA_QS_256 0 /* 256 entries */
  3983. +#define IQA_QS_512 1 /* 512 */
  3984. +#define IQA_QS_1K 2 /* 1024 */
  3985. +#define IQA_QS_2K 3 /* 2048 */
  3986. +#define IQA_QS_4K 4 /* 4096 */
  3987. +#define IQA_QS_8K 5 /* 8192 */
  3988. +#define IQA_QS_16K 6 /* 16384 */
  3989. +#define IQA_QS_32K 7 /* 32768 */
  3990. +
  3991. +/* Read-Modify-Write helpers */
  3992. +static inline void iommu_rmw32(void *ov, uint32_t mask, uint32_t shift, uint32_t nv)
  3993. +{
  3994. + *(uint32_t *)ov &= ~(mask << shift);
  3995. + *(uint32_t *)ov |= (nv & mask) << shift;
  3996. +}
  3997. +static inline void iommu_rmw64(void *ov, uint32_t mask, uint32_t shift, uint64_t nv)
  3998. +{
  3999. + *(uint64_t *)ov &= ~(mask << shift);
  4000. + *(uint64_t *)ov |= (nv & mask) << shift;
  4001. +}
  4002. +
  4003. +/*
  4004. + * Root Entry: one per bus (256 x 128 bit = 4k)
  4005. + * 0 = Present
  4006. + * 1:11 = Reserved
  4007. + * 12:HAW-1 = Context Table Pointer
  4008. + * HAW:63 = Reserved
  4009. + * 64:127 = Reserved
  4010. + */
  4011. +#define ROOT_P (1L << 0)
  4012. +struct root_entry {
  4013. + uint64_t lo;
  4014. + uint64_t hi;
  4015. +};
  4016. +
  4017. +/* Check if root entry is valid */
  4018. +static inline bool
  4019. +root_entry_is_valid(struct root_entry *re)
  4020. +{
  4021. + return (re->lo & ROOT_P);
  4022. +}
  4023. +
  4024. +/*
  4025. + * Context Entry: one per devfn (256 x 128 bit = 4k)
  4026. + * 0 = Present
  4027. + * 1 = Fault Processing Disable
  4028. + * 2:3 = Translation Type
  4029. + * 4:11 = Reserved
  4030. + * 12:63 = Second Level Page Translation
  4031. + * 64:66 = Address Width (# PTE levels)
  4032. + * 67:70 = Ignore
  4033. + * 71 = Reserved
  4034. + * 72:87 = Domain ID
  4035. + * 88:127 = Reserved
  4036. + */
  4037. +#define CTX_P (1L << 0)
  4038. +#define CTX_FPD (1L << 1)
  4039. +#define CTX_T_MASK 0x3
  4040. +#define CTX_T_SHIFT 2
  4041. +enum {
  4042. + CTX_T_MULTI,
  4043. + CTX_T_IOTLB,
  4044. + CTX_T_PASSTHRU
  4045. +};
  4046. +
  4047. +#define CTX_H_AW_MASK 0x7
  4048. +#define CTX_H_AW_SHIFT 0
  4049. +#define CTX_H_USER_MASK 0xF
  4050. +#define CTX_H_USER_SHIFT 3
  4051. +#define CTX_H_DID_MASK 0xFFFF
  4052. +#define CTX_H_DID_SHIFT 8
  4053. +
  4054. +struct context_entry {
  4055. + uint64_t lo;
  4056. + uint64_t hi;
  4057. +};
  4058. +
  4059. +/* Set fault processing enable/disable */
  4060. +static inline void
  4061. +context_set_fpd(struct context_entry *ce, int enable)
  4062. +{
  4063. + ce->lo &= ~CTX_FPD;
  4064. + if (enable)
  4065. + ce->lo |= CTX_FPD;
  4066. +}
  4067. +
  4068. +/* Set context entry present */
  4069. +static inline void
  4070. +context_set_present(struct context_entry *ce)
  4071. +{
  4072. + ce->lo |= CTX_P;
  4073. +}
  4074. +
  4075. +/* Set Second Level Page Table Entry PA */
  4076. +static inline void
  4077. +context_set_slpte(struct context_entry *ce, paddr_t slpte)
  4078. +{
  4079. + ce->lo &= VTD_PAGE_MASK;
  4080. + ce->lo |= (slpte & ~VTD_PAGE_MASK);
  4081. +}
  4082. +
  4083. +/* Set translation type */
  4084. +static inline void
  4085. +context_set_translation_type(struct context_entry *ce, int tt)
  4086. +{
  4087. + ce->lo &= ~(CTX_T_MASK << CTX_T_SHIFT);
  4088. + ce->lo |= ((tt & CTX_T_MASK) << CTX_T_SHIFT);
  4089. +}
  4090. +
  4091. +/* Set Address Width (# of Page Table levels) */
  4092. +static inline void
  4093. +context_set_address_width(struct context_entry *ce, int lvl)
  4094. +{
  4095. + ce->hi &= ~(CTX_H_AW_MASK << CTX_H_AW_SHIFT);
  4096. + ce->hi |= ((lvl & CTX_H_AW_MASK) << CTX_H_AW_SHIFT);
  4097. +}
  4098. +
  4099. +/* Set domain ID */
  4100. +static inline void
  4101. +context_set_domain_id(struct context_entry *ce, int did)
  4102. +{
  4103. + ce->hi &= ~(CTX_H_DID_MASK << CTX_H_DID_SHIFT);
  4104. + ce->hi |= ((did & CTX_H_DID_MASK) << CTX_H_DID_SHIFT);
  4105. +}
  4106. +
  4107. +/* Get Second Level Page Table PA */
  4108. +static inline uint64_t
  4109. +context_pte(struct context_entry *ce)
  4110. +{
  4111. + return (ce->lo & ~VTD_PAGE_MASK);
  4112. +}
  4113. +
  4114. +/* Get translation type */
  4115. +static inline int
  4116. +context_translation_type(struct context_entry *ce)
  4117. +{
  4118. + return (ce->lo >> CTX_T_SHIFT) & CTX_T_MASK;
  4119. +}
  4120. +
  4121. +/* Get domain ID */
  4122. +static inline int
  4123. +context_domain_id(struct context_entry *ce)
  4124. +{
  4125. + return (ce->hi >> CTX_H_DID_SHIFT) & CTX_H_DID_MASK;
  4126. +}
  4127. +
  4128. +/* Get Address Width */
  4129. +static inline int
  4130. +context_address_width(struct context_entry *ce)
  4131. +{
  4132. + return VTD_LEVELTOAW((ce->hi >> CTX_H_AW_SHIFT) & CTX_H_AW_MASK);
  4133. +}
  4134. +
  4135. +/* Check if context entry is valid */
  4136. +static inline bool
  4137. +context_entry_is_valid(struct context_entry *ce)
  4138. +{
  4139. + return (ce->lo & CTX_P);
  4140. +}
  4141. +
  4142. +/* User-available bits in context entry */
  4143. +static inline int
  4144. +context_user(struct context_entry *ce)
  4145. +{
  4146. + return (ce->hi >> CTX_H_USER_SHIFT) & CTX_H_USER_MASK;
  4147. +}
  4148. +
  4149. +static inline void
  4150. +context_set_user(struct context_entry *ce, int v)
  4151. +{
  4152. + ce->hi &= ~(CTX_H_USER_MASK << CTX_H_USER_SHIFT);
  4153. + ce->hi |= ((v & CTX_H_USER_MASK) << CTX_H_USER_SHIFT);
  4154. +}
  4155. +
  4156. +/*
  4157. + * Fault entry
  4158. + * 0..HAW-1 = Fault address
  4159. + * HAW:63 = Reserved
  4160. + * 64:71 = Source ID
  4161. + * 96:103 = Fault Reason
  4162. + * 104:123 = PV
  4163. + * 124:125 = Address Translation type
  4164. + * 126 = Type (0 = Read, 1 = Write)
  4165. + * 127 = Fault bit
  4166. + */
  4167. +struct fault_entry
  4168. +{
  4169. + uint64_t lo;
  4170. + uint64_t hi;
  4171. +};
  4172. +
  4173. +/* PTE Entry: 512 x 64-bit = 4k */
  4174. +#define PTE_P (1L << 0)
  4175. +#define PTE_R 0x00
  4176. +#define PTE_W (1L << 1)
  4177. +#define PTE_US (1L << 2)
  4178. +#define PTE_PWT (1L << 3)
  4179. +#define PTE_PCD (1L << 4)
  4180. +#define PTE_A (1L << 5)
  4181. +#define PTE_D (1L << 6)
  4182. +#define PTE_PAT (1L << 7)
  4183. +#define PTE_G (1L << 8)
  4184. +#define PTE_EA (1L << 10)
  4185. +#define PTE_XD (1LL << 63)
  4186. +
  4187. +/* PDE Level entry */
  4188. +#define PTE_PS (1L << 7)
  4189. +
  4190. +/* PDPE Level entry */
  4191. +
  4192. +/* ----------------------------------------------------------------
  4193. + * 5555555444444444333333333222222222111111111000000000------------
  4194. + * [PML4 ->] PDPE.1GB
  4195. + * [PML4 ->] PDPE.PDE -> PDE.2MB
  4196. + * [PML4 ->] PDPE.PDE -> PDE -> PTE
  4197. + * GAW0 = (12.20) (PTE)
  4198. + * GAW1 = (21.29) (PDE)
  4199. + * GAW2 = (30.38) (PDPE)
  4200. + * GAW3 = (39.47) (PML4)
  4201. + * GAW4 = (48.57) (n/a)
  4202. + * GAW5 = (58.63) (n/a)
  4203. + */
  4204. +struct pte_entry {
  4205. + uint64_t val;
  4206. +};
  4207. +
  4208. +/*
  4209. + * Queued Invalidation entry
  4210. + * 0:3 = 01h
  4211. + * 4:5 = Granularity
  4212. + * 6:15 = Reserved
  4213. + * 16:31 = Domain ID
  4214. + * 32:47 = Source ID
  4215. + * 48:49 = FM
  4216. + */
  4217. +
  4218. +/* Invalidate Context Entry */
  4219. +#define QI_CTX_DID_MASK 0xFFFF
  4220. +#define QI_CTX_DID_SHIFT 16
  4221. +#define QI_CTX_SID_MASK 0xFFFF
  4222. +#define QI_CTX_SID_SHIFT 32
  4223. +#define QI_CTX_FM_MASK 0x3
  4224. +#define QI_CTX_FM_SHIFT 48
  4225. +#define QI_CTX_IG_MASK 0x3
  4226. +#define QI_CTX_IG_SHIFT 4
  4227. +#define QI_CTX_DID(x) (((uint64_t)(x) << QI_CTX_DID_SHIFT))
  4228. +#define QI_CTX_SID(x) (((uint64_t)(x) << QI_CTX_SID_SHIFT))
  4229. +#define QI_CTX_FM(x) (((uint64_t)(x) << QI_CTX_FM_SHIFT))
  4230. +
  4231. +#define QI_CTX_IG_GLOBAL (CTX_GLOBAL << QI_CTX_IG_SHIFT)
  4232. +#define QI_CTX_IG_DOMAIN (CTX_DOMAIN << QI_CTX_IG_SHIFT)
  4233. +#define QI_CTX_IG_DEVICE (CTX_DEVICE << QI_CTX_IG_SHIFT)
  4234. +
  4235. +/* Invalidate IOTLB Entry */
  4236. +#define QI_IOTLB_DID_MASK 0xFFFF
  4237. +#define QI_IOTLB_DID_SHIFT 16
  4238. +#define QI_IOTLB_IG_MASK 0x3
  4239. +#define QI_IOTLB_IG_SHIFT 4
  4240. +#define QI_IOTLB_DR (1LL << 6)
  4241. +#define QI_IOTLB_DW (1LL << 5)
  4242. +#define QI_IOTLB_DID(x) (((uint64_t)(x) << QI_IOTLB_DID_SHIFT))
  4243. +
  4244. +#define QI_IOTLB_IG_GLOBAL (1 << QI_IOTLB_IG_SHIFT)
  4245. +#define QI_IOTLB_IG_DOMAIN (2 << QI_IOTLB_IG_SHIFT)
  4246. +#define QI_IOTLB_IG_PAGE (3 << QI_IOTLB_IG_SHIFT)
  4247. +
  4248. +/* QI Commands */
  4249. +#define QI_CTX 0x1
  4250. +#define QI_IOTLB 0x2
  4251. +#define QI_DEVTLB 0x3
  4252. +#define QI_INTR 0x4
  4253. +#define QI_WAIT 0x5
  4254. +#define QI_EXTTLB 0x6
  4255. +#define QI_PAS 0x7
  4256. +#define QI_EXTDEV 0x8
  4257. +
  4258. +struct qi_entry {
  4259. + uint64_t lo;
  4260. + uint64_t hi;
  4261. +};
  4262. +
  4263. +enum {
  4264. + CTX_GLOBAL = 1,
  4265. + CTX_DOMAIN,
  4266. + CTX_DEVICE,
  4267. +
  4268. + IOTLB_GLOBAL = 1,
  4269. + IOTLB_DOMAIN,
  4270. + IOTLB_PAGE,
  4271. +};
  4272. +
  4273. +enum {
  4274. + VTD_FAULT_ROOT_P = 0x1, /* P field in root entry is 0 */
  4275. + VTD_FAULT_CTX_P = 0x2, /* P field in context entry is 0 */
  4276. + VTD_FAULT_CTX_INVAL = 0x3, /* context AW/TT/SLPPTR invalid */
  4277. + VTD_FAULT_LIMIT = 0x4, /* Address is outside of MGAW */
  4278. + VTD_FAULT_WRITE = 0x5, /* Address-translation fault, non-writable */
  4279. + VTD_FAULT_READ = 0x6, /* Address-translation fault, non-readable */
  4280. + VTD_FAULT_PTE_INVAL = 0x7, /* page table hw access error */
  4281. + VTD_FAULT_ROOT_INVAL = 0x8, /* root table hw access error */
  4282. + VTD_FAULT_CTX_TBL_INVAL = 0x9, /* context entry hw access error */
  4283. + VTD_FAULT_ROOT_RESERVED = 0xa, /* non-zero reserved field in root entry */
  4284. + VTD_FAULT_CTX_RESERVED = 0xb, /* non-zero reserved field in context entry */
  4285. + VTD_FAULT_PTE_RESERVED = 0xc, /* non-zero reserved field in paging entry */
  4286. + VTD_FAULT_CTX_TT = 0xd, /* invalid translation type */
  4287. +};
  4288. +
  4289. +#endif
  4290. +
  4291. +void acpidmar_pci_hook(pci_chipset_tag_t, struct pci_attach_args *);
  4292. +void dmar_ptmap(bus_dma_tag_t, bus_addr_t);
  4293. +void acpidmar_sw(int);
  4294. +
  4295. +extern struct acpidmar_softc *acpidmar_sc;
  4296. +
  4297. +#define __EXTRACT(v,m) (((v) >> m##_SHIFT) & m##_MASK)
  4298. diff --git a/sys/dev/acpi/acpireg.h b/sys/dev/acpi/acpireg.h
  4299. index bfbb73ce2..8ba55c8ee 100644
  4300. --- a/sys/dev/acpi/acpireg.h
  4301. +++ b/sys/dev/acpi/acpireg.h
  4302. @@ -623,6 +623,9 @@ struct acpi_ivmd {
  4303. struct acpi_ivhd {
  4304. uint8_t type;
  4305. uint8_t flags;
  4306. +#define IVHD_PPRSUP (1L << 7)
  4307. +#define IVHD_PREFSUP (1L << 6)
  4308. +#define IVHD_COHERENT (1L << 5)
  4309. #define IVHD_IOTLB (1L << 4)
  4310. #define IVHD_ISOC (1L << 3)
  4311. #define IVHD_RESPASSPW (1L << 2)
  4312. @@ -638,13 +641,28 @@ struct acpi_ivhd {
  4313. #define IVHD_UNITID_MASK 0x1F
  4314. #define IVHD_MSINUM_SHIFT 0
  4315. #define IVHD_MSINUM_MASK 0x1F
  4316. - uint32_t reserved;
  4317. + uint32_t feature;
  4318. } __packed;
  4319.  
  4320. +struct acpi_ivhd_ext {
  4321. + uint8_t type;
  4322. + uint8_t flags;
  4323. + uint16_t length;
  4324. + uint16_t devid;
  4325. + uint16_t cap;
  4326. + uint64_t address;
  4327. + uint16_t segment;
  4328. + uint16_t info;
  4329. + uint32_t attrib;
  4330. + uint64_t efr;
  4331. + uint8_t reserved[8];
  4332. +} __packd;
  4333. +
  4334. union acpi_ivrs_entry {
  4335. struct {
  4336. uint8_t type;
  4337. #define IVRS_IVHD 0x10
  4338. +#define IVRS_IVHD_EXT 0x11
  4339. #define IVRS_IVMD_ALL 0x20
  4340. #define IVRS_IVMD_SPECIFIED 0x21
  4341. #define IVRS_IVMD_RANGE 0x22
  4342. @@ -652,6 +670,7 @@ union acpi_ivrs_entry {
  4343. uint16_t length;
  4344. } __packed;
  4345. struct acpi_ivhd ivhd;
  4346. + struct acpi_ivhd_ext ivhd_ext;
  4347. struct acpi_ivmd ivmd;
  4348. } __packed;
  4349.  
  4350. diff --git a/sys/dev/acpi/amd_iommu.h b/sys/dev/acpi/amd_iommu.h
  4351. new file mode 100644
  4352. index 000000000..db6d371aa
  4353. --- /dev/null
  4354. +++ b/sys/dev/acpi/amd_iommu.h
  4355. @@ -0,0 +1,358 @@
  4356. +/*
  4357. + * Copyright (c) 2019 Jordan Hargrave <[email protected]>
  4358. + *
  4359. + * Permission to use, copy, modify, and distribute this software for any
  4360. + * purpose with or without fee is hereby granted, provided that the above
  4361. + * copyright notice and this permission notice appear in all copies.
  4362. + *
  4363. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  4364. + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  4365. + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  4366. + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  4367. + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  4368. + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  4369. + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  4370. + */
  4371. +#ifndef __amd_iommu_h__
  4372. +#define __amd_iommu_h__
  4373. +
  4374. +#define DEV_TAB_BASE_REG 0x0000
  4375. +#define CMD_BASE_REG 0x0008
  4376. +#define EVT_BASE_REG 0x0010
  4377. +
  4378. +#define EXCL_BASE_REG 0x0020
  4379. +#define EXCL_LIMIT_REG 0x0028
  4380. +
  4381. +/* Extended Feature Register */
  4382. +#define EXTFEAT_REG 0x0030
  4383. +#define EFR_PREFSUP (1L << 0)
  4384. +#define EFR_PPRSUP (1L << 1)
  4385. +#define EFR_NXSUP (1L << 3)
  4386. +#define EFR_GTSUP (1L << 4)
  4387. +#define EFR_IASUP (1L << 6)
  4388. +#define EFR_GASUP (1L << 7)
  4389. +#define EFR_HESUP (1L << 8)
  4390. +#define EFR_PCSUP (1L << 9)
  4391. +#define EFR_HATS_SHIFT 10
  4392. +#define EFR_HATS_MASK 0x3
  4393. +#define EFR_GATS_SHIFT 12
  4394. +#define EFR_GATS_MASK 0x3
  4395. +#define EFR_GLXSUP_SHIFT 14
  4396. +#define EFR_GLXSUP_MASK 0x3
  4397. +#define EFR_SMIFSUP_SHIFT 16
  4398. +#define EFR_SMIFSUP_MASK 0x3
  4399. +#define EFR_SMIFRC_SHIFT 18
  4400. +#define EFR_SMIFRC_MASK 0x7
  4401. +#define EFR_GAMSUP_SHIFT 21
  4402. +#define EFR_GAMSUP_MASK 0x7
  4403. +
  4404. +#define CMD_HEAD_REG 0x2000
  4405. +#define CMD_TAIL_REG 0x2008
  4406. +#define EVT_HEAD_REG 0x2010
  4407. +#define EVT_TAIL_REG 0x2018
  4408. +
  4409. +#define IOMMUSTS_REG 0x2020
  4410. +
  4411. +#define DEV_TAB_MASK 0x000FFFFFFFFFF000LL
  4412. +#define DEV_TAB_LEN 0x1FF
  4413. +
  4414. +/* IOMMU Control */
  4415. +#define IOMMUCTL_REG 0x0018
  4416. +#define CTL_IOMMUEN (1L << 0)
  4417. +#define CTL_HTTUNEN (1L << 1)
  4418. +#define CTL_EVENTLOGEN (1L << 2)
  4419. +#define CTL_EVENTINTEN (1L << 3)
  4420. +#define CTL_COMWAITINTEN (1L << 4)
  4421. +#define CTL_INVTIMEOUT_SHIFT 5
  4422. +#define CTL_INVTIMEOUT_MASK 0x7
  4423. +#define CTL_INVTIMEOUT_NONE 0
  4424. +#define CTL_INVTIMEOUT_1MS 1
  4425. +#define CTL_INVTIMEOUT_10MS 2
  4426. +#define CTL_INVTIMEOUT_100MS 3
  4427. +#define CTL_INVTIMEOUT_1S 4
  4428. +#define CTL_INVTIMEOUT_10S 5
  4429. +#define CTL_INVTIMEOUT_100S 6
  4430. +#define CTL_PASSPW (1L << 8)
  4431. +#define CTL_RESPASSPW (1L << 9)
  4432. +#define CTL_COHERENT (1L << 10)
  4433. +#define CTL_ISOC (1L << 11)
  4434. +#define CTL_CMDBUFEN (1L << 12)
  4435. +#define CTL_PPRLOGEN (1L << 13)
  4436. +#define CTL_PPRINTEN (1L << 14)
  4437. +#define CTL_PPREN (1L << 15)
  4438. +#define CTL_GTEN (1L << 16)
  4439. +#define CTL_GAEN (1L << 17)
  4440. +#define CTL_CRW_SHIFT 18
  4441. +#define CTL_CRW_MASK 0xF
  4442. +#define CTL_SMIFEN (1L << 22)
  4443. +#define CTL_SLFWBDIS (1L << 23)
  4444. +#define CTL_SMIFLOGEN (1L << 24)
  4445. +#define CTL_GAMEN_SHIFT 25
  4446. +#define CTL_GAMEN_MASK 0x7
  4447. +#define CTL_GALOGEN (1L << 28)
  4448. +#define CTL_GAINTEN (1L << 29)
  4449. +#define CTL_DUALPPRLOGEN_SHIFT 30
  4450. +#define CTL_DUALPPRLOGEN_MASK 0x3
  4451. +#define CTL_DUALEVTLOGEN_SHIFT 32
  4452. +#define CTL_DUALEVTLOGEN_MASK 0x3
  4453. +#define CTL_DEVTBLSEGEN_SHIFT 34
  4454. +#define CTL_DEVTBLSEGEN_MASK 0x7
  4455. +#define CTL_PRIVABRTEN_SHIFT 37
  4456. +#define CTL_PRIVABRTEN_MASK 0x3
  4457. +#define CTL_PPRAUTORSPEN (1LL << 39)
  4458. +#define CTL_MARCEN (1LL << 40)
  4459. +#define CTL_BLKSTOPMRKEN (1LL << 41)
  4460. +#define CTL_PPRAUTOSPAON (1LL << 42)
  4461. +#define CTL_DOMAINIDPNE (1LL << 43)
  4462. +
  4463. +#define CMD_BASE_MASK 0x000FFFFFFFFFF000LL
  4464. +#define CMD_TBL_SIZE 4096
  4465. +#define CMD_TBL_LEN_4K (8LL << 56)
  4466. +#define CMD_TBL_LEN_8K (9lL << 56)
  4467. +
  4468. +#define EVT_BASE_MASK 0x000FFFFFFFFFF000LL
  4469. +#define EVT_TBL_SIZE 4096
  4470. +#define EVT_TBL_LEN_4K (8LL << 56)
  4471. +#define EVT_TBL_LEN_8K (9LL << 56)
  4472. +
  4473. +/*========================
  4474. + * DEVICE TABLE ENTRY
  4475. + * Contains mapping of bus-device-function
  4476. + *
  4477. + * 0 Valid (V)
  4478. + * 1 Translation Valid (TV)
  4479. + * 7:8 Host Address Dirty (HAD)
  4480. + * 9:11 Page Table Depth (usually 4)
  4481. + * 12:51 Page Table Physical Address
  4482. + * 52 PPR Enable
  4483. + * 53 GPRP
  4484. + * 54 Guest I/O Protection Valid (GIoV)
  4485. + * 55 Guest Translation Valid (GV)
  4486. + * 56:57 Guest Levels translated (GLX)
  4487. + * 58:60 Guest CR3 bits 12:14 (GCR3TRP)
  4488. + * 61 I/O Read Permission (IR)
  4489. + * 62 I/O Write Permission (IW)
  4490. + * 64:79 Domain ID
  4491. + * 80:95 Guest CR3 bits 15:30 (GCR3TRP)
  4492. + * 96 IOTLB Enable (I)
  4493. + * 97 Suppress multiple I/O page faults (I)
  4494. + * 98 Supress all I/O page faults (SA)
  4495. + * 99:100 Port I/O Control (IoCTL)
  4496. + * 101 Cache IOTLB Hint
  4497. + * 102 Snoop Disable (SD)
  4498. + * 103 Allow Exclusion (EX)
  4499. + * 104:105 System Management Message (SysMgt)
  4500. + * 107:127 Guest CR3 bits 31:51 (GCR3TRP)
  4501. + * 128 Interrupt Map Valid (IV)
  4502. + * 129:132 Interrupt Table Length (IntTabLen)
  4503. + *========================*/
  4504. +struct ivhd_dte {
  4505. + uint32_t dw0;
  4506. + uint32_t dw1;
  4507. + uint32_t dw2;
  4508. + uint32_t dw3;
  4509. + uint32_t dw4;
  4510. + uint32_t dw5;
  4511. + uint32_t dw6;
  4512. + uint32_t dw7;
  4513. +} __packed;
  4514. +
  4515. +#define DTE_V (1L << 0) // dw0
  4516. +#define DTE_TV (1L << 1) // dw0
  4517. +#define DTE_LEVEL_SHIFT 9 // dw0
  4518. +#define DTE_LEVEL_MASK 0x7 // dw0
  4519. +#define DTE_HPTRP_MASK 0x000FFFFFFFFFF000LL // dw0,1
  4520. +
  4521. +#define DTE_PPR (1L << 20) // dw1
  4522. +#define DTE_GPRP (1L << 21) // dw1
  4523. +#define DTE_GIOV (1L << 22) // dw1
  4524. +#define DTE_GV (1L << 23) // dw1
  4525. +#define DTE_IR (1L << 29) // dw1
  4526. +#define DTE_IW (1L << 30) // dw1
  4527. +
  4528. +#define DTE_DID_MASK 0xFFFF // dw2
  4529. +
  4530. +#define DTE_IV (1L << 0) // dw3
  4531. +#define DTE_SE (1L << 1)
  4532. +#define DTE_SA (1L << 2)
  4533. +#define DTE_INTTABLEN_SHIFT 1
  4534. +#define DTE_INTTABLEN_MASK 0xF
  4535. +#define DTE_IRTP_MASK 0x000FFFFFFFFFFFC0LL
  4536. +
  4537. +#define PTE_LVL5 48
  4538. +#define PTE_LVL4 39
  4539. +#define PTE_LVL3 30
  4540. +#define PTE_LVL2 21
  4541. +#define PTE_LVL1 12
  4542. +
  4543. +#define PTE_NXTLVL(x) (((x) & 0x7) << 9)
  4544. +#define PTE_PADDR_MASK 0x000FFFFFFFFFF000LL
  4545. +#define PTE_IR (1LL << 61)
  4546. +#define PTE_IW (1LL << 62)
  4547. +
  4548. +#define DTE_GCR312_MASK 0x3
  4549. +#define DTE_GCR312_SHIFT 24
  4550. +
  4551. +#define DTE_GCR315_MASK 0xFFFF
  4552. +#define DTE_GCR315_SHIFT 16
  4553. +
  4554. +#define DTE_GCR331_MASK 0xFFFFF
  4555. +#define DTE_GCR331_SHIFT 12
  4556. +
  4557. +#define _get64(x) *(uint64_t *)(x)
  4558. +#define _put64(x,v) *(uint64_t *)(x) = (v)
  4559. +
  4560. +/* Set Guest CR3 address */
  4561. +static inline void
  4562. +dte_set_guest_cr3(struct ivhd_dte *dte, paddr_t paddr)
  4563. +{
  4564. + iommu_rmw32(&dte->dw1, DTE_GCR312_MASK, DTE_GCR312_SHIFT, paddr >> 12);
  4565. + iommu_rmw32(&dte->dw2, DTE_GCR315_MASK, DTE_GCR315_SHIFT, paddr >> 15);
  4566. + iommu_rmw32(&dte->dw3, DTE_GCR331_MASK, DTE_GCR331_SHIFT, paddr >> 31);
  4567. +}
  4568. +
  4569. +/* Set Interrupt Remapping Root Pointer */
  4570. +static inline void
  4571. +dte_set_interrupt_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
  4572. +{
  4573. + uint64_t ov = _get64(&dte->dw4);
  4574. + _put64(&dte->dw4, (ov & ~DTE_IRTP_MASK) | (paddr & DTE_IRTP_MASK));
  4575. +}
  4576. +
  4577. +/* Set Interrupt Remapping Table length */
  4578. +static inline void
  4579. +dte_set_interrupt_table_length(struct ivhd_dte *dte, int nEnt)
  4580. +{
  4581. + iommu_rmw32(&dte->dw4, DTE_INTTABLEN_MASK, DTE_INTTABLEN_SHIFT, nEnt);
  4582. +}
  4583. +
  4584. +/* Set Interrupt Remapping Valid */
  4585. +static inline void
  4586. +dte_set_interrupt_valid(struct ivhd_dte *dte)
  4587. +{
  4588. + dte->dw4 |= DTE_IV;
  4589. +}
  4590. +
  4591. +/* Set Domain ID in Device Table Entry */
  4592. +static inline void
  4593. +dte_set_domain(struct ivhd_dte *dte, uint16_t did)
  4594. +{
  4595. + dte->dw2 = (dte->dw2 & ~DTE_DID_MASK) | (did & DTE_DID_MASK);
  4596. +}
  4597. +
  4598. +/* Set Page Table Pointer for device */
  4599. +static inline void
  4600. +dte_set_host_page_table_root_ptr(struct ivhd_dte *dte, paddr_t paddr)
  4601. +{
  4602. + uint64_t ov;
  4603. +
  4604. + ov = _get64(&dte->dw0) & ~DTE_HPTRP_MASK;
  4605. + ov |= (paddr & DTE_HPTRP_MASK) | PTE_IW | PTE_IR;
  4606. +
  4607. + _put64(&dte->dw0, ov);
  4608. +}
  4609. +
  4610. +/* Set Page Table Levels Mask */
  4611. +static inline void
  4612. +dte_set_mode(struct ivhd_dte *dte, int mode)
  4613. +{
  4614. + iommu_rmw32(&dte->dw0, DTE_LEVEL_MASK, DTE_LEVEL_SHIFT, mode);
  4615. +}
  4616. +
  4617. +static inline void
  4618. +dte_set_tv(struct ivhd_dte *dte)
  4619. +{
  4620. + dte->dw0 |= DTE_TV;
  4621. +}
  4622. +
  4623. +/* Set Device Table Entry valid.
  4624. + * Domain/Level/Mode/PageTable should already be set
  4625. + */
  4626. +static inline void
  4627. +dte_set_valid(struct ivhd_dte *dte)
  4628. +{
  4629. + dte->dw0 |= DTE_V;
  4630. +}
  4631. +
  4632. +/* Check if Device Table Entry is valid */
  4633. +static inline int
  4634. +dte_is_valid(struct ivhd_dte *dte)
  4635. +{
  4636. + return (dte->dw0 & DTE_V);
  4637. +}
  4638. +
  4639. +/*=========================================
  4640. + * COMMAND
  4641. + *=========================================*/
  4642. +struct ivhd_command {
  4643. + uint32_t dw0;
  4644. + uint32_t dw1;
  4645. + uint32_t dw2;
  4646. + uint32_t dw3;
  4647. +} __packed;
  4648. +
  4649. +#define CMD_SHIFT 28
  4650. +
  4651. +enum {
  4652. + COMPLETION_WAIT = 0x01,
  4653. + INVALIDATE_DEVTAB_ENTRY = 0x02,
  4654. + INVALIDATE_IOMMU_PAGES = 0x03,
  4655. + INVALIDATE_IOTLB_PAGES = 0x04,
  4656. + INVALIDATE_INTERRUPT_TABLE = 0x05,
  4657. + PREFETCH_IOMMU_PAGES = 0x06,
  4658. + COMPLETE_PPR_REQUEST = 0x07,
  4659. + INVALIDATE_IOMMU_ALL = 0x08,
  4660. +};
  4661. +
  4662. +/*=========================================
  4663. + * EVENT
  4664. + *=========================================*/
  4665. +struct ivhd_event {
  4666. + uint32_t dw0;
  4667. + uint32_t dw1;
  4668. + uint32_t dw2; // address.lo
  4669. + uint32_t dw3; // address.hi
  4670. +} __packed;
  4671. +
  4672. +#define EVT_TYPE_SHIFT 28 // dw1.0xF0000000
  4673. +#define EVT_TYPE_MASK 0xF
  4674. +#define EVT_SID_SHIFT 0 // dw0.0x0000FFFF
  4675. +#define EVT_SID_MASK 0xFFFF
  4676. +#define EVT_DID_SHIFT 0
  4677. +#define EVT_DID_MASK 0xFFFF // dw1.0x0000FFFF
  4678. +#define EVT_FLAG_SHIFT 16
  4679. +#define EVT_FLAG_MASK 0xFFF // dw1.0x0FFF0000
  4680. +
  4681. +/* IOMMU Fault reasons */
  4682. +enum {
  4683. + ILLEGAL_DEV_TABLE_ENTRY = 0x1,
  4684. + IO_PAGE_FAULT = 0x2,
  4685. + DEV_TAB_HARDWARE_ERROR = 0x3,
  4686. + PAGE_TAB_HARDWARE_ERROR = 0x4,
  4687. + ILLEGAL_COMMAND_ERROR = 0x5,
  4688. + COMMAND_HARDWARE_ERROR = 0x6,
  4689. + IOTLB_INV_TIMEOUT = 0x7,
  4690. + INVALID_DEVICE_REQUEST = 0x8,
  4691. +};
  4692. +
  4693. +#define EVT_GN (1L << 16)
  4694. +#define EVT_NX (1L << 17)
  4695. +#define EVT_US (1L << 18)
  4696. +#define EVT_I (1L << 19)
  4697. +#define EVT_PR (1L << 20)
  4698. +#define EVT_RW (1L << 21)
  4699. +#define EVT_PE (1L << 22)
  4700. +#define EVT_RZ (1L << 23)
  4701. +#define EVT_TR (1L << 24)
  4702. +
  4703. +struct iommu_softc;
  4704. +
  4705. +int ivhd_flush_devtab(struct iommu_softc *, int);
  4706. +int ivhd_invalidate_iommu_all(struct iommu_softc *);
  4707. +int ivhd_invalidate_interrupt_table(struct iommu_softc *, int);
  4708. +int ivhd_issue_command(struct iommu_softc *, const struct ivhd_command *, int);
  4709. +int ivhd_invalidate_domain(struct iommu_softc *, int);
  4710. +
  4711. +void _dumppte(struct pte_entry *, int, vaddr_t);
  4712. +
  4713. +#endif
  4714. diff --git a/sys/dev/acpi/files.acpi b/sys/dev/acpi/files.acpi
  4715. index e57c39938..1cf6f2fbb 100644
  4716. --- a/sys/dev/acpi/files.acpi
  4717. +++ b/sys/dev/acpi/files.acpi
  4718. @@ -70,6 +70,11 @@ device acpiprt
  4719. attach acpiprt at acpi
  4720. file dev/acpi/acpiprt.c acpiprt needs-flag
  4721.  
  4722. +# DMAR device
  4723. +device acpidmar
  4724. +attach acpidmar at acpi
  4725. +file dev/acpi/acpidmar.c acpidmar
  4726. +
  4727. # Docking station
  4728. device acpidock
  4729. attach acpidock at acpi
  4730. diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
  4731. index bf75f875e..546927971 100644
  4732. --- a/sys/dev/pci/pci.c
  4733. +++ b/sys/dev/pci/pci.c
  4734. @@ -1211,6 +1211,8 @@ pciioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
  4735. if (pci_vga_pci == NULL)
  4736. return EINVAL;
  4737. break;
  4738. + case PCIOCUNBIND:
  4739. + break;
  4740. default:
  4741. return ENOTTY;
  4742. }
  4743. @@ -1234,6 +1236,25 @@ pciioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
  4744. tag = pci_make_tag(pc, sel->pc_bus, sel->pc_dev, sel->pc_func);
  4745.  
  4746. switch (cmd) {
  4747. + case PCIOCUNBIND:
  4748. + {
  4749. + struct pci_dev *pd, *pdt;
  4750. + uint32_t val;
  4751. + int i;
  4752. +
  4753. + LIST_FOREACH_SAFE(pd, &pci->sc_devs, pd_next, pdt) {
  4754. + if (tag == pd->pd_tag) {
  4755. + for (i = PCI_MAPREG_START; i <= PCI_MAPREG_END; i += 4) {
  4756. + int n = (i - PCI_MAPREG_START) / 4;
  4757. + val = pci_conf_read(NULL, tag, i);
  4758. + printf(" bar%d: %x %x\n", n, val, pd->pd_mask[n]);
  4759. + }
  4760. + config_detach(pd->pd_dev, 0);
  4761. + LIST_REMOVE(pd, pd_next);
  4762. + }
  4763. + }
  4764. + }
  4765. + break;
  4766. case PCIOCREAD:
  4767. io = (struct pci_io *)data;
  4768. switch (io->pi_width) {
  4769. diff --git a/sys/sys/pciio.h b/sys/sys/pciio.h
  4770. index 394dd083d..2237f6784 100644
  4771. --- a/sys/sys/pciio.h
  4772. +++ b/sys/sys/pciio.h
  4773. @@ -83,4 +83,6 @@ struct pci_vga {
  4774. #define PCIOCREADMASK _IOWR('p', 8, struct pci_io)
  4775. #define PCIOCGETVPD _IOWR('p', 9, struct pci_vpd_req)
  4776.  
  4777. +#define PCIOCUNBIND _IOWR('p', 9, struct pcisel)
  4778. +
  4779. #endif /* !_SYS_PCIIO_H_ */
  4780. diff --git a/usr.sbin/vmctl/main.c b/usr.sbin/vmctl/main.c
  4781. index 249eaa3de..d2ca5a64e 100644
  4782. --- a/usr.sbin/vmctl/main.c
  4783. +++ b/usr.sbin/vmctl/main.c
  4784. @@ -83,7 +83,7 @@ struct ctl_command ctl_commands[] = {
  4785. { "show", CMD_STATUS, ctl_status, "[id]" },
  4786. { "start", CMD_START, ctl_start,
  4787. "[-cL] [-B device] [-b path] [-d disk] [-i count]\n"
  4788. - "\t\t[-m size] [-n switch] [-r path] [-t name] id | name" },
  4789. + "\t\t[-m size] [-n switch] [-r path] [-t name] [-p bus:dev:func] id | name" },
  4790. { "status", CMD_STATUS, ctl_status, "[id]" },
  4791. { "stop", CMD_STOP, ctl_stop, "[-fw] [id | -a]" },
  4792. { "unpause", CMD_UNPAUSE, ctl_unpause, "id" },
  4793. @@ -224,7 +224,8 @@ vmmaction(struct parse_result *res)
  4794. case CMD_START:
  4795. ret = vm_start(res->id, res->name, res->size, res->nifs,
  4796. res->nets, res->ndisks, res->disks, res->disktypes,
  4797. - res->path, res->isopath, res->instance, res->bootdevice);
  4798. + res->path, res->isopath, res->instance, res->bootdevice,
  4799. + res->npcis, res->pcis);
  4800. if (ret) {
  4801. errno = ret;
  4802. err(1, "start VM operation failed");
  4803. @@ -480,6 +481,32 @@ parse_disktype(const char *s, const char **ret)
  4804. return (VMDF_RAW);
  4805. }
  4806.  
  4807. +int
  4808. +parse_pcis(struct parse_result *res, char *pcipath)
  4809. +{
  4810. + uint32_t *pcis;
  4811. + uint32_t bus, dev, func;
  4812. +
  4813. + if (res->npcis >= VMM_MAX_PCI_PTHRU) {
  4814. + warn("too many pci devices");
  4815. + return -1;
  4816. + }
  4817. + if (sscanf(pcipath, "%d:%d:%d", &bus, &dev, &func) != 3) {
  4818. + warn("pci format b:d:f");
  4819. + return -1;
  4820. + }
  4821. + if ((pcis = reallocarray(res->pcis, res->npcis + 1,
  4822. + sizeof(uint32_t *))) == NULL) {
  4823. + warn("reallocarray");
  4824. + return -1;
  4825. + }
  4826. + pcis[res->npcis] = (bus << 8) | ((dev & 0x1f) << 3) | (func & 0x7);
  4827. + res->pcis = pcis;
  4828. + res->npcis++;
  4829. +
  4830. + return (0);
  4831. +}
  4832. +
  4833. int
  4834. parse_disk(struct parse_result *res, char *word, int type)
  4835. {
  4836. @@ -835,7 +862,7 @@ ctl_start(struct parse_result *res, int argc, char *argv[])
  4837. char path[PATH_MAX];
  4838. const char *s;
  4839.  
  4840. - while ((ch = getopt(argc, argv, "b:B:cd:i:Lm:n:r:t:")) != -1) {
  4841. + while ((ch = getopt(argc, argv, "b:B:cd:i:Lm:n:r:t:p:")) != -1) {
  4842. switch (ch) {
  4843. case 'b':
  4844. if (res->path)
  4845. @@ -899,6 +926,10 @@ ctl_start(struct parse_result *res, int argc, char *argv[])
  4846. if (parse_instance(res, optarg) == -1)
  4847. errx(1, "invalid name: %s", optarg);
  4848. break;
  4849. + case 'p':
  4850. + if (parse_pcis(res, optarg) == -1)
  4851. + errx(1, "invalid pci entry: %s", optarg);
  4852. + break;
  4853. default:
  4854. ctl_usage(res->ctl);
  4855. /* NOTREACHED */
  4856. diff --git a/usr.sbin/vmctl/vmctl.8 b/usr.sbin/vmctl/vmctl.8
  4857. index 6a583d5b2..c77a866ff 100644
  4858. --- a/usr.sbin/vmctl/vmctl.8
  4859. +++ b/usr.sbin/vmctl/vmctl.8
  4860. @@ -155,6 +155,7 @@ command.
  4861. .Op Fl n Ar switch
  4862. .Op Fl r Ar path
  4863. .Op Fl t Ar name
  4864. +.Op Fl p Ar bus:dev:func
  4865. .Ar id | name
  4866. .Ek
  4867. .Xc
  4868. @@ -237,6 +238,10 @@ as a template to create a new VM instance.
  4869. The instance will inherit settings from the parent VM,
  4870. except for exclusive options such as disk, interface lladdr, and
  4871. interface names.
  4872. +.It Fl p Ar bus:dev:func
  4873. +Add Passthrough PCI device at host PCI address
  4874. +.Ar bus:dev:func
  4875. +(decimal) to the guest. Up to 4 devices are allowed.
  4876. .El
  4877. .It Cm status Op Ar id
  4878. List VMs running on the host, optionally listing just the selected VM
  4879. diff --git a/usr.sbin/vmctl/vmctl.c b/usr.sbin/vmctl/vmctl.c
  4880. index dcded0760..4de6dd6f9 100644
  4881. --- a/usr.sbin/vmctl/vmctl.c
  4882. +++ b/usr.sbin/vmctl/vmctl.c
  4883. @@ -73,7 +73,8 @@ unsigned int info_flags;
  4884. int
  4885. vm_start(uint32_t start_id, const char *name, int memsize, int nnics,
  4886. char **nics, int ndisks, char **disks, int *disktypes, char *kernel,
  4887. - char *iso, char *instance, unsigned int bootdevice)
  4888. + char *iso, char *instance, unsigned int bootdevice,
  4889. + int npcis, uint32_t *pcis)
  4890. {
  4891. struct vmop_create_params *vmc;
  4892. struct vm_create_params *vcp;
  4893. @@ -128,6 +129,7 @@ vm_start(uint32_t start_id, const char *name, int memsize, int nnics,
  4894. vcp->vcp_ncpus = 1;
  4895. vcp->vcp_ndisks = ndisks;
  4896. vcp->vcp_nnics = nnics;
  4897. + vcp->vcp_npcis = npcis;
  4898. vcp->vcp_id = start_id;
  4899.  
  4900. for (i = 0 ; i < ndisks; i++) {
  4901. @@ -153,6 +155,9 @@ vm_start(uint32_t start_id, const char *name, int memsize, int nnics,
  4902. errx(1, "interface name too long");
  4903. }
  4904. }
  4905. + for (i = 0; i < npcis; i++)
  4906. + vcp->vcp_pcis[i] = pcis[i];
  4907. +
  4908. if (name != NULL) {
  4909. /*
  4910. * Allow VMs names with alphanumeric characters, dot, hyphen
  4911. diff --git a/usr.sbin/vmctl/vmctl.h b/usr.sbin/vmctl/vmctl.h
  4912. index beb65eae6..aa9cbcba7 100644
  4913. --- a/usr.sbin/vmctl/vmctl.h
  4914. +++ b/usr.sbin/vmctl/vmctl.h
  4915. @@ -55,6 +55,8 @@ struct parse_result {
  4916. size_t ndisks;
  4917. char **disks;
  4918. int *disktypes;
  4919. + int npcis;
  4920. + uint32_t *pcis;
  4921. int verbose;
  4922. char *instance;
  4923. unsigned int flags;
  4924. @@ -80,6 +82,7 @@ int parse_network(struct parse_result *, char *);
  4925. int parse_size(struct parse_result *, char *);
  4926. int parse_disktype(const char *, const char **);
  4927. int parse_disk(struct parse_result *, char *, int);
  4928. +int parse_pcis(struct parse_result *, char *);
  4929. int parse_vmid(struct parse_result *, char *, int);
  4930. int parse_instance(struct parse_result *, char *);
  4931. void parse_free(struct parse_result *);
  4932. @@ -94,7 +97,8 @@ int create_imagefile(int, const char *, const char *, long, const char **);
  4933. int create_raw_imagefile(const char *, long);
  4934. int create_qc2_imagefile(const char *, const char *, long);
  4935. int vm_start(uint32_t, const char *, int, int, char **, int,
  4936. - char **, int *, char *, char *, char *, unsigned int);
  4937. + char **, int *, char *, char *, char *, unsigned int,
  4938. + int, uint32_t *);
  4939. int vm_start_complete(struct imsg *, int *, int);
  4940. void terminate_vm(uint32_t, const char *, unsigned int);
  4941. int terminate_vm_complete(struct imsg *, int *, unsigned int);
  4942. diff --git a/usr.sbin/vmd/Makefile b/usr.sbin/vmd/Makefile
  4943. index 8645df7ae..c819599d2 100644
  4944. --- a/usr.sbin/vmd/Makefile
  4945. +++ b/usr.sbin/vmd/Makefile
  4946. @@ -4,7 +4,7 @@
  4947.  
  4948. PROG= vmd
  4949. SRCS= vmd.c control.c log.c priv.c proc.c config.c vmm.c
  4950. -SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
  4951. +SRCS+= vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c x86emu.c
  4952. SRCS+= ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c packet.c
  4953. SRCS+= parse.y atomicio.c vioscsi.c vioraw.c vioqcow2.c fw_cfg.c
  4954.  
  4955. diff --git a/usr.sbin/vmd/pci.c b/usr.sbin/vmd/pci.c
  4956. index 954235eb6..c930ea5a1 100644
  4957. --- a/usr.sbin/vmd/pci.c
  4958. +++ b/usr.sbin/vmd/pci.c
  4959. @@ -25,42 +25,305 @@
  4960.  
  4961. #include <string.h>
  4962. #include <unistd.h>
  4963. +#include <stdlib.h>
  4964. #include "vmd.h"
  4965. #include "pci.h"
  4966. #include "vmm.h"
  4967. #include "i8259.h"
  4968. #include "atomicio.h"
  4969. +#include <sys/mman.h>
  4970. +#include <sys/ioctl.h>
  4971.  
  4972. struct pci pci;
  4973.  
  4974. +extern struct vmd *env;
  4975. +
  4976. extern char *__progname;
  4977.  
  4978. /* PIC IRQs, assigned to devices in order */
  4979. const uint8_t pci_pic_irqs[PCI_MAX_PIC_IRQS] = {3, 5, 6, 7, 9, 10, 11, 12,
  4980. 14, 15};
  4981.  
  4982. +#define PTD_DEVID(d,b) (void *)(uintptr_t)(((d) << 8) | (b))
  4983. +#define PTD_BAR(x) ((uintptr_t)(x) & 0xFF)
  4984. +#define PTD_DEV(x) ((uintptr_t)(x) >> 8)
  4985. +
  4986. +void io_copy(void *, const void *, int);
  4987. +int mem_chkint(void);
  4988. +
  4989. +int pci_memh2(int, uint64_t, uint32_t, void *, void *);
  4990. +
  4991. +#define PAGE_MASK 0xFFF
  4992. +
  4993. +TAILQ_HEAD(,iohandler) memh = TAILQ_HEAD_INITIALIZER(memh);
  4994. +
  4995. +void
  4996. +register_mem(uint64_t base, uint32_t len, iocb_t handler, void *cookie)
  4997. +{
  4998. + struct iohandler *mem;
  4999. +
  5000. + if (!base)
  5001. + return;
  5002. + fprintf(stderr, "@@@ Registering mem region: %llx - %llx\n", base, base+len-1);
  5003. + TAILQ_FOREACH(mem, &memh, next) {
  5004. + if (base >= mem->start && base+len <= mem->end) {
  5005. + fprintf(stderr,"already registered\n");
  5006. + return;
  5007. + }
  5008. + }
  5009. + mem = calloc(1, sizeof(*mem));
  5010. + mem->start = base;
  5011. + mem->end = base+len-1;
  5012. + mem->handler = handler;
  5013. + mem->cookie = cookie;
  5014. + TAILQ_INSERT_TAIL(&memh, mem, next);
  5015. +}
  5016. +
  5017. +void
  5018. +unregister_mem(uint64_t base)
  5019. +{
  5020. + struct iohandler *mem, *tmp;
  5021. +
  5022. + if (!base)
  5023. + return;
  5024. + fprintf(stderr,"@@@ Unregistering base: %llx\n", base);
  5025. + TAILQ_FOREACH_SAFE(mem, &memh, next, tmp) {
  5026. + if (mem->start == base) {
  5027. + fprintf(stderr, " removed:%llx-%llx\n", mem->start, mem->end);
  5028. + TAILQ_REMOVE(&memh, mem, next);
  5029. + free(mem);
  5030. + }
  5031. + }
  5032. +}
  5033. +
  5034. +int
  5035. +mem_handler(int dir, uint64_t addr, uint32_t size, void *data)
  5036. +{
  5037. + struct iohandler *mem;
  5038. + int rc;
  5039. +
  5040. + TAILQ_FOREACH(mem, &memh, next) {
  5041. + if (addr >= mem->start && addr+size <= mem->end) {
  5042. + rc = mem->handler(dir, addr, size, data, mem->cookie);
  5043. + if (rc != 0) {
  5044. + fprintf(stderr, "Error mem handler: %llx\n", addr);
  5045. + }
  5046. + return rc;
  5047. + }
  5048. + }
  5049. + return -1;
  5050. +}
  5051. +
  5052. +/* Lookup PTD device */
  5053. +static struct vm_ptdpci *
  5054. +ptd_lookup(int devid)
  5055. +{
  5056. + if (devid >= pci.pci_dev_ct)
  5057. + return NULL;
  5058. + return pci.pci_devices[devid].pd_cookie;
  5059. +}
  5060. +
  5061. +/* Map a MMIO Bar Physical address */
  5062. +static void *
  5063. +ptd_mapbar(int bar, uint64_t base, uint64_t size) {
  5064. + uint8_t *va;
  5065. +
  5066. + /* Don't map empty regions */
  5067. + if (!base || !size)
  5068. + return NULL;
  5069. + size = (size + PAGE_MASK) & ~PAGE_MASK;
  5070. + va = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, env->vmd_fd, base & ~PAGE_MASK);
  5071. + if (va == (void *)-1ULL) {
  5072. + fprintf(stderr, "Unable to mmap bar: %.16llx/%.8llx\n",
  5073. + base, size);
  5074. + return NULL;
  5075. + }
  5076. + fprintf(stderr, "0x%.2x: Mapped bar: %.16llx/%.8llx to %p\n",
  5077. + (bar * 4) + 0x10, base, size, va);
  5078. + return va + (base & PAGE_MASK);
  5079. +}
  5080. +
  5081. +/* Unmap MMIO Bar */
  5082. +static void
  5083. +ptd_unmapbar(void *va, uint64_t size) {
  5084. + if (va == NULL)
  5085. + return;
  5086. + size = (size + PAGE_MASK) & ~PAGE_MASK;
  5087. + munmap(va, size);
  5088. + fprintf(stderr, "unmapping bar: %p/%.8llx\n", va, size);
  5089. +}
  5090. +
  5091. +/* Do Passthrough I/O port read/write */
  5092. +static void
  5093. +ptd_pio(int type, int dir, int port, int size, uint32_t *data)
  5094. +{
  5095. + struct vm_pio pio;
  5096. + uint64_t mask;
  5097. + int rc;
  5098. +
  5099. + if (size == 1)
  5100. + mask = 0xff;
  5101. + else if (size == 2)
  5102. + mask = 0xffff;
  5103. + else if (size == 4)
  5104. + mask = 0xffffffff;
  5105. +
  5106. + pio.dir = dir;
  5107. + pio.size = size;
  5108. + pio.base = port;
  5109. + if (dir == VEI_DIR_OUT) {
  5110. + pio.data = *data & mask;
  5111. + rc = ioctl(env->vmd_fd, VMM_IOC_PIO, &pio);
  5112. + }
  5113. + else {
  5114. + rc = ioctl(env->vmd_fd, VMM_IOC_PIO, &pio);
  5115. + *data = (*data & ~mask) | (pio.data & mask);
  5116. + }
  5117. + fprintf(stderr, "pio: rc=%d, %d/%.4x %.8x\n", rc, dir, port, *data);
  5118. +}
  5119. +
  5120. +/* Passthrough PCI config read */
  5121. +static uint32_t
  5122. +ptd_conf_read(uint8_t bus, uint8_t dev, uint8_t func, uint32_t reg)
  5123. +{
  5124. + struct vm_pciio pio;
  5125. +
  5126. + memset(&pio, 0, sizeof(pio));
  5127. + pio.bus = bus;
  5128. + pio.dev = dev;
  5129. + pio.func = func;
  5130. + pio.dir = VEI_DIR_IN;
  5131. + pio.reg = reg & ~0x3;
  5132. + ioctl(env->vmd_fd, VMM_IOC_PCIIO, &pio);
  5133. + return pio.val;
  5134. +}
  5135. +
  5136. +/* Passthrough PCI config write */
  5137. +static void
  5138. +ptd_conf_write(uint8_t bus, uint8_t dev, uint8_t func, uint32_t reg, uint32_t val)
  5139. +{
  5140. + struct vm_pciio pio;
  5141. +
  5142. + memset(&pio, 0, sizeof(pio));
  5143. + pio.bus = bus;
  5144. + pio.dev = dev;
  5145. + pio.func = func;
  5146. + pio.dir = VEI_DIR_OUT;
  5147. + pio.reg = reg & ~0x3;
  5148. + pio.val = val;
  5149. + ioctl(env->vmd_fd, VMM_IOC_PCIIO, &pio);
  5150. +}
  5151. +
  5152. +int
  5153. +mem_chkint(void)
  5154. +{
  5155. + uint32_t pending;
  5156. + struct vm_ptdpci *pd;
  5157. + uint8_t intr = 0xff;
  5158. + int rc, i;
  5159. +
  5160. + /* Loop through all PCI devices, check for interrupt */
  5161. + for (i = 0; i < pci.pci_dev_ct; i++) {
  5162. + pd = ptd_lookup(i);
  5163. + if (pd == NULL)
  5164. + continue;
  5165. +
  5166. + /* Check if pending interrupt count has changed */
  5167. + pending = pd->pending;
  5168. + rc = ioctl(env->vmd_fd, VMM_IOC_GETINTR, pd);
  5169. + if (pd->pending != pending) {
  5170. + fprintf(stderr, "pend:%d %d %d\n", pending, pd->pending, rc);
  5171. + return pci_get_dev_irq(pd->id);
  5172. + }
  5173. + }
  5174. + return intr;
  5175. +}
  5176. +
  5177. +void
  5178. +io_copy(void *dest, const void *src, int size) {
  5179. + memcpy(dest, src, size);
  5180. + return;
  5181. + if (size == 1)
  5182. + *(uint8_t *)dest = *(const uint8_t *)src;
  5183. + else if (size == 2)
  5184. + *(uint16_t *)dest = *(const uint16_t *)src;
  5185. + else if (size == 4)
  5186. + *(uint32_t *)dest = *(const uint32_t *)src;
  5187. + else if (size == 8)
  5188. + *(uint64_t *)dest = *(const uint64_t *)src;
  5189. +}
  5190. +
  5191. +/*
  5192. + * PCI Passthrough MMIO handler
  5193. + * USe memory mapped address of physical bar
  5194. + */
  5195. +int
  5196. +pci_memh2(int dir, uint64_t base, uint32_t size, void *data, void *cookie)
  5197. +{
  5198. + uint8_t devid = PTD_DEV(cookie);
  5199. + uint8_t barid = PTD_BAR(cookie);
  5200. + uint64_t off;
  5201. + uint8_t *va;
  5202. + struct vm_ptdpci *pd;
  5203. +
  5204. + pd = ptd_lookup(devid);
  5205. + if (pd == NULL)
  5206. + return -1;
  5207. + off = base & (pd->barinfo[barid].size - 1);
  5208. + va = pd->barinfo[barid].va;
  5209. + if (va == NULL) {
  5210. + return -1;
  5211. + }
  5212. + if (dir == VEI_DIR_IN) {
  5213. + io_copy(data, va + off, size);
  5214. + }
  5215. + else {
  5216. + io_copy(va + off, data, size);
  5217. + }
  5218. + return 0;
  5219. +}
  5220. +
  5221. +/*
  5222. + * pci_mkbar
  5223. + *
  5224. + * Calculates BAR address is valid
  5225. + * Returns allocated address and updates next address
  5226. + * Returns zero if address is out of range
  5227. + */
  5228. +static uint64_t
  5229. +pci_mkbar(uint64_t *base, uint32_t size, uint64_t maxbase)
  5230. +{
  5231. + uint64_t mask = size - 1;
  5232. + uint64_t cbase;
  5233. +
  5234. + if (*base + size >= maxbase)
  5235. + return (0);
  5236. + cbase = *base;
  5237. + *base = (*base + size + mask) & ~mask;
  5238. + return cbase;
  5239. +}
  5240. +
  5241. /*
  5242. * pci_add_bar
  5243. *
  5244. * Adds a BAR for the PCI device 'id'. On access, 'barfn' will be
  5245. * called, and passed 'cookie' as an identifier.
  5246. *
  5247. - * BARs are fixed size, meaning all I/O BARs requested have the
  5248. - * same size and all MMIO BARs have the same size.
  5249. - *
  5250. * Parameters:
  5251. * id: PCI device to add the BAR to (local count, eg if id == 4,
  5252. * this BAR is to be added to the VM's 5th PCI device)
  5253. * type: type of the BAR to add (PCI_MAPREG_TYPE_xxx)
  5254. + * size: Size of BAR area
  5255. * barfn: callback function invoked on BAR access
  5256. * cookie: cookie passed to barfn on access
  5257. *
  5258. * Returns 0 if the BAR was added successfully, 1 otherwise.
  5259. */
  5260. int
  5261. -pci_add_bar(uint8_t id, uint32_t type, void *barfn, void *cookie)
  5262. +pci_add_bar(uint8_t id, uint32_t type, uint32_t size, void *barfn, void *cookie)
  5263. {
  5264. uint8_t bar_reg_idx, bar_ct;
  5265. + uint64_t base = 0;
  5266.  
  5267. /* Check id */
  5268. if (id >= pci.pci_dev_ct)
  5269. @@ -73,35 +336,50 @@ pci_add_bar(uint8_t id, uint32_t type, void *barfn, void *cookie)
  5270.  
  5271. /* Compute BAR address and add */
  5272. bar_reg_idx = (PCI_MAPREG_START + (bar_ct * 4)) / 4;
  5273. - if (type == PCI_MAPREG_TYPE_MEM) {
  5274. - if (pci.pci_next_mmio_bar >= VMM_PCI_MMIO_BAR_END)
  5275. + if (type == (PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT)) {
  5276. + base = pci_mkbar(&pci.pci_next_mmio_bar, size, VMM_PCI_MMIO_BAR_END);
  5277. + if (base == 0)
  5278. + return (1);
  5279. +
  5280. + pci.pci_devices[id].pd_cfg_space[bar_reg_idx] =
  5281. + PCI_MAPREG_MEM_ADDR(base) | PCI_MAPREG_MEM_TYPE_64BIT;
  5282. + pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
  5283. + pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
  5284. + pci.pci_devices[id].pd_bartype[bar_ct] = PCI_BAR_TYPE_MMIO;
  5285. + pci.pci_devices[id].pd_barsize[bar_ct] = size;
  5286. + pci.pci_devices[id].pd_bartype[bar_ct+1] = PCI_BAR_TYPE_MMIO;
  5287. + pci.pci_devices[id].pd_barsize[bar_ct+1] = 0;
  5288. + pci.pci_devices[id].pd_bar_ct+=2;
  5289. + } else if (type == PCI_MAPREG_TYPE_MEM) {
  5290. + base = pci_mkbar(&pci.pci_next_mmio_bar, size, VMM_PCI_MMIO_BAR_END);
  5291. + if (base == 0)
  5292. return (1);
  5293.  
  5294. pci.pci_devices[id].pd_cfg_space[bar_reg_idx] =
  5295. - PCI_MAPREG_MEM_ADDR(pci.pci_next_mmio_bar);
  5296. - pci.pci_next_mmio_bar += VMM_PCI_MMIO_BAR_SIZE;
  5297. + PCI_MAPREG_MEM_ADDR(base);
  5298. pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
  5299. pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
  5300. pci.pci_devices[id].pd_bartype[bar_ct] = PCI_BAR_TYPE_MMIO;
  5301. - pci.pci_devices[id].pd_barsize[bar_ct] = VMM_PCI_MMIO_BAR_SIZE;
  5302. + pci.pci_devices[id].pd_barsize[bar_ct] = size;
  5303. pci.pci_devices[id].pd_bar_ct++;
  5304. } else if (type == PCI_MAPREG_TYPE_IO) {
  5305. - if (pci.pci_next_io_bar >= VMM_PCI_IO_BAR_END)
  5306. + base = pci_mkbar(&pci.pci_next_io_bar, size, VMM_PCI_IO_BAR_END);
  5307. + if (base == 0)
  5308. return (1);
  5309.  
  5310. pci.pci_devices[id].pd_cfg_space[bar_reg_idx] =
  5311. - PCI_MAPREG_IO_ADDR(pci.pci_next_io_bar) |
  5312. + PCI_MAPREG_IO_ADDR(base) |
  5313. PCI_MAPREG_TYPE_IO;
  5314. - pci.pci_next_io_bar += VMM_PCI_IO_BAR_SIZE;
  5315. pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
  5316. pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
  5317. - DPRINTF("%s: adding pci bar cookie for dev %d bar %d = %p",
  5318. - __progname, id, bar_ct, cookie);
  5319. pci.pci_devices[id].pd_bartype[bar_ct] = PCI_BAR_TYPE_IO;
  5320. - pci.pci_devices[id].pd_barsize[bar_ct] = VMM_PCI_IO_BAR_SIZE;
  5321. + pci.pci_devices[id].pd_barsize[bar_ct] = size;
  5322. pci.pci_devices[id].pd_bar_ct++;
  5323. }
  5324.  
  5325. + log_info("%s: PCI_ADDBAR(%d, %d, %x, %x)", __progname,
  5326. + bar_ct, type, pci.pci_devices[id].pd_cfg_space[bar_reg_idx], size);
  5327. +
  5328. return (0);
  5329. }
  5330.  
  5331. @@ -165,8 +443,10 @@ pci_get_dev_irq(uint8_t id)
  5332. int
  5333. pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid, uint8_t class,
  5334. uint8_t subclass, uint16_t subsys_vid, uint16_t subsys_id,
  5335. - uint8_t irq_needed, pci_cs_fn_t csfunc)
  5336. + uint8_t irq_needed, pci_cs_fn_t csfunc, void *cookie)
  5337. {
  5338. + log_info("%s: add_pci: %x.%x.%x", __progname, vid, pid, class);
  5339. +
  5340. /* Exceeded max devices? */
  5341. if (pci.pci_dev_ct >= PCI_CONFIG_MAX_DEV)
  5342. return (1);
  5343. @@ -186,6 +466,7 @@ pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid, uint8_t class,
  5344. pci.pci_devices[*id].pd_subsys_id = subsys_id;
  5345.  
  5346. pci.pci_devices[*id].pd_csfunc = csfunc;
  5347. + pci.pci_devices[*id].pd_cookie = cookie;
  5348.  
  5349. if (irq_needed) {
  5350. pci.pci_devices[*id].pd_irq =
  5351. @@ -202,6 +483,111 @@ pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid, uint8_t class,
  5352. return (0);
  5353. }
  5354.  
  5355. +#define PCIOCUNBIND _IOWR('p', 9, struct pcisel)
  5356. +
  5357. +/* Callback for I/O ports. Map to new I/O port and do it */
  5358. +static int
  5359. +ptd_iobar(int dir, uint16_t reg, uint32_t *data, uint8_t *intr, void *cookie, uint8_t size)
  5360. +{
  5361. + struct vm_ptdpci *pd;
  5362. + uint8_t devid = PTD_DEV(cookie);
  5363. + uint8_t barid = PTD_BAR(cookie);
  5364. + int hport;
  5365. +
  5366. + *intr = 0xFF;
  5367. +
  5368. + /* Remap guest port to host port */
  5369. + pd = ptd_lookup(devid);
  5370. + if (pd == NULL)
  5371. + return -1;
  5372. + hport = pd->barinfo[barid].addr + reg;
  5373. + ptd_pio(1, dir, hport, size, data);
  5374. + return 0;
  5375. +}
  5376. +
  5377. +static int
  5378. +ptd_mmiobar(int dir, uint32_t ofs, uint32_t *data)
  5379. +{
  5380. + fprintf(stderr,"mmiobar: %d.%x\n", dir, ofs);
  5381. + return 0;
  5382. +}
  5383. +
  5384. +/*
  5385. + * Add Passthrough PCI device to VMM PCI table
  5386. + */
  5387. +void
  5388. +pci_add_pthru(int bus, int dev, int fun)
  5389. +{
  5390. + struct vm_ptdpci *pd;
  5391. + uint32_t id_reg, subid_reg, class_reg, cmd_reg, intr_reg;
  5392. + int i, rc;
  5393. +
  5394. +#if 0
  5395. + /* Unregister previous VMM */
  5396. + for (i = 0; i < MAXBAR; i++) {
  5397. + if (pd->barinfo[i].va) {
  5398. + ptd_unmapbar(pd->barinfo[i].va, pd->barinfo[i].size);
  5399. + }
  5400. + }
  5401. +#endif
  5402. +
  5403. + /* Allocate Passthrough device */
  5404. + pd = malloc(sizeof(*pd));
  5405. + if (pd == NULL)
  5406. + return;
  5407. + pd->bus = bus;
  5408. + pd->dev = dev;
  5409. + pd->func = fun;
  5410. +
  5411. + /* Read physical PCI config space */
  5412. + id_reg = ptd_conf_read(bus, dev, fun, PCI_ID_REG);
  5413. + if (PCI_VENDOR(id_reg) == PCI_VENDOR_INVALID || PCI_VENDOR(id_reg) == 0x0000) {
  5414. + fprintf(stderr, "Error: No PCI device @ %u:%u:%u\n", bus, dev, fun);
  5415. + return;
  5416. + }
  5417. + subid_reg = ptd_conf_read(bus, dev, fun, PCI_SUBSYS_ID_REG);
  5418. + class_reg = ptd_conf_read(bus, dev, fun, PCI_CLASS_REG);
  5419. + cmd_reg = ptd_conf_read(bus, dev, fun, PCI_COMMAND_STATUS_REG);
  5420. + intr_reg = ptd_conf_read(bus, dev, fun, PCI_INTERRUPT_REG);
  5421. +
  5422. + /* Add device to guest */
  5423. + pci_add_device(&pd->id, PCI_VENDOR(id_reg), PCI_PRODUCT(id_reg),
  5424. + PCI_CLASS(class_reg), PCI_SUBCLASS(class_reg),
  5425. + PCI_VENDOR(subid_reg), PCI_PRODUCT(subid_reg),
  5426. + 1, NULL, pd);
  5427. +
  5428. + /* Get BARs of native device */
  5429. + rc = ioctl(env->vmd_fd, VMM_IOC_BARINFO, pd);
  5430. + if (rc != 0) {
  5431. + fprintf(stderr, "%d:%d:%d not valid pci device\n", bus, dev, fun);
  5432. + return;
  5433. + }
  5434. + for (i = 0; i < MAXBAR; i++) {
  5435. + int type;
  5436. +
  5437. + type = pd->barinfo[i].type;
  5438. + fprintf(stderr," Bar%d: type:%x base:%llx size:%x\n",
  5439. + i, pd->barinfo[i].type, pd->barinfo[i].addr, pd->barinfo[i].size);
  5440. + if (!pd->barinfo[i].size) {
  5441. + /* Kick bar index */
  5442. + pci.pci_devices[pd->id].pd_bar_ct++;
  5443. + }
  5444. + else if (PCI_MAPREG_TYPE(type) == PCI_MAPREG_TYPE_MEM) {
  5445. + pci_add_bar(pd->id, type, pd->barinfo[i].size,
  5446. + ptd_mmiobar, PTD_DEVID(pd->id, i));
  5447. + pd->barinfo[i].va = ptd_mapbar(i, pd->barinfo[i].addr, pd->barinfo[i].size);
  5448. + /* Skip empty BAR for 64-bit */
  5449. + if (type == (PCI_MAPREG_TYPE_MEM | PCI_MAPREG_MEM_TYPE_64BIT))
  5450. + i++;
  5451. + }
  5452. + else if (PCI_MAPREG_TYPE(type) == PCI_MAPREG_TYPE_IO) {
  5453. + /* This will get callback via pci_handle_io */
  5454. + pci_add_bar(pd->id, PCI_MAPREG_TYPE_IO, pd->barinfo[i].size,
  5455. + ptd_iobar, PTD_DEVID(pd->id, i));
  5456. + }
  5457. + }
  5458. +}
  5459. +
  5460. /*
  5461. * pci_init
  5462. *
  5463. @@ -219,7 +605,7 @@ pci_init(void)
  5464.  
  5465. if (pci_add_device(&id, PCI_VENDOR_OPENBSD, PCI_PRODUCT_OPENBSD_PCHB,
  5466. PCI_CLASS_BRIDGE, PCI_SUBCLASS_BRIDGE_HOST,
  5467. - PCI_VENDOR_OPENBSD, 0, 0, NULL)) {
  5468. + PCI_VENDOR_OPENBSD, 0, 0, NULL, NULL)) {
  5469. log_warnx("%s: can't add PCI host bridge", __progname);
  5470. return;
  5471. }
  5472. @@ -264,6 +650,8 @@ pci_handle_io(struct vm_run_params *vrp)
  5473.  
  5474. for (i = 0 ; i < pci.pci_dev_ct ; i++) {
  5475. for (j = 0 ; j < pci.pci_devices[i].pd_bar_ct; j++) {
  5476. + if (pci.pci_devices[i].pd_bartype[j] != PCI_BAR_TYPE_IO)
  5477. + continue;
  5478. b_lo = PCI_MAPREG_IO_ADDR(pci.pci_devices[i].pd_bar[j]);
  5479. b_hi = b_lo + VMM_PCI_IO_BAR_SIZE;
  5480. if (reg >= b_lo && reg < b_hi) {
  5481. @@ -286,7 +674,7 @@ pci_handle_io(struct vm_run_params *vrp)
  5482. __progname);
  5483. }
  5484. } else {
  5485. - DPRINTF("%s: no pci i/o function for reg 0x%llx (dir=%d "
  5486. + fprintf(stderr,"%s: no pci i/o function for reg 0x%llx (dir=%d "
  5487. "guest %%rip=0x%llx", __progname, (uint64_t)reg, dir,
  5488. vei->vrs.vrs_gprs[VCPU_REGS_RIP]);
  5489. /* Reads from undefined ports return 0xFF */
  5490. @@ -306,8 +694,10 @@ pci_handle_data_reg(struct vm_run_params *vrp)
  5491. {
  5492. struct vm_exit *vei = vrp->vrp_exit;
  5493. uint8_t b, d, f, o, baridx, ofs, sz;
  5494. + uint32_t barval, barsize, bartype;
  5495. int ret;
  5496. pci_cs_fn_t csfunc;
  5497. + struct vm_ptdpci *pd;
  5498.  
  5499. /* abort if the address register is wack */
  5500. if (!(pci.pci_addr_reg & PCI_MODE1_ENABLE)) {
  5501. @@ -328,9 +718,22 @@ pci_handle_data_reg(struct vm_run_params *vrp)
  5502. f = (pci.pci_addr_reg >> 8) & 0x7;
  5503. o = (pci.pci_addr_reg & 0xfc);
  5504.  
  5505. + /* Do passthrough PCI config space read/write */
  5506. + pd = ptd_lookup(d);
  5507. + if ((o == PCI_COMMAND_STATUS_REG || o == PCI_CLASS_REG ||
  5508. + o == PCI_CAPLISTPTR_REG || o >= 0x40) &&
  5509. + (pd != NULL)) {
  5510. + if (vei->vei.vei_dir == VEI_DIR_IN) {
  5511. + vei->vei.vei_data = ptd_conf_read(pd->bus, pd->dev, pd->func, o);
  5512. + }
  5513. + else {
  5514. + ptd_conf_write(pd->bus, pd->dev, pd->func, o, vei->vei.vei_data);
  5515. + }
  5516. + }
  5517. +
  5518. csfunc = pci.pci_devices[d].pd_csfunc;
  5519. if (csfunc != NULL) {
  5520. - ret = csfunc(vei->vei.vei_dir, (o / 4), &vei->vei.vei_data);
  5521. + ret = csfunc(vei->vei.vei_dir, o, sz, &vei->vei.vei_data, pci.pci_devices[d].pd_cookie);
  5522. if (ret)
  5523. log_warnx("cfg space access function failed for "
  5524. "pci device %d", d);
  5525. @@ -348,31 +751,27 @@ pci_handle_data_reg(struct vm_run_params *vrp)
  5526. * value in the address register.
  5527. */
  5528. if (vei->vei.vei_dir == VEI_DIR_OUT) {
  5529. - if ((o >= 0x10 && o <= 0x24) &&
  5530. - vei->vei.vei_data == 0xffffffff) {
  5531. - /*
  5532. - * Compute BAR index:
  5533. - * o = 0x10 -> baridx = 0
  5534. - * o = 0x14 -> baridx = 1
  5535. - * o = 0x18 -> baridx = 2
  5536. - * o = 0x1c -> baridx = 3
  5537. - * o = 0x20 -> baridx = 4
  5538. - * o = 0x24 -> baridx = 5
  5539. - */
  5540. - baridx = (o / 4) - 4;
  5541. - if (baridx < pci.pci_devices[d].pd_bar_ct)
  5542. - vei->vei.vei_data = 0xfffff000;
  5543. - else
  5544. - vei->vei.vei_data = 0;
  5545. - }
  5546. -
  5547. - /* IOBAR registers must have bit 0 set */
  5548. if (o >= 0x10 && o <= 0x24) {
  5549. - baridx = (o / 4) - 4;
  5550. - if (baridx < pci.pci_devices[d].pd_bar_ct &&
  5551. - pci.pci_devices[d].pd_bartype[baridx] ==
  5552. - PCI_BAR_TYPE_IO)
  5553. - vei->vei.vei_data |= 1;
  5554. + /* When Changing a BAR we must calculate readonly bits */
  5555. + baridx = (o - 0x10) / 4;
  5556. + barval = pci.pci_devices[d].pd_cfg_space[o/4];
  5557. + barsize = pci.pci_devices[d].pd_barsize[baridx];
  5558. + bartype = pci.pci_devices[d].pd_bartype[baridx];
  5559. +
  5560. + /* Mask off size */
  5561. + vei->vei.vei_data &= ~(barsize - 1);
  5562. +
  5563. + /* Keep lower bits of current config space value */
  5564. + if (bartype == PCI_BAR_TYPE_IO)
  5565. + vei->vei.vei_data |= (barval & ~PCI_MAPREG_IO_ADDR_MASK);
  5566. + else {
  5567. + vei->vei.vei_data |= (barval & ~PCI_MAPREG_MEM_ADDR_MASK);
  5568. +
  5569. + /* Remove old BAR value from page fault callback, insert new value */
  5570. + unregister_mem(barval & PCI_MAPREG_MEM_ADDR_MASK);
  5571. + register_mem(vei->vei.vei_data & PCI_MAPREG_MEM_ADDR_MASK,
  5572. + barsize, pci_memh2, PTD_DEVID(d, baridx));
  5573. + }
  5574. }
  5575.  
  5576. /*
  5577. diff --git a/usr.sbin/vmd/pci.h b/usr.sbin/vmd/pci.h
  5578. index 01902d77d..b47a7959f 100644
  5579. --- a/usr.sbin/vmd/pci.h
  5580. +++ b/usr.sbin/vmd/pci.h
  5581. @@ -27,48 +27,65 @@
  5582.  
  5583. #define PCI_MAX_PIC_IRQS 10
  5584.  
  5585. -typedef int (*pci_cs_fn_t)(int dir, uint8_t reg, uint32_t *data);
  5586. +typedef int (*pci_cs_fn_t)(int dir, uint8_t reg, uint8_t sz, uint32_t *data, void *cookie);
  5587. typedef int (*pci_iobar_fn_t)(int dir, uint16_t reg, uint32_t *data, uint8_t *,
  5588. void *, uint8_t);
  5589. typedef int (*pci_mmiobar_fn_t)(int dir, uint32_t ofs, uint32_t *data);
  5590.  
  5591. -union pci_dev {
  5592. - uint32_t pd_cfg_space[PCI_CONFIG_SPACE_SIZE / 4];
  5593. +#define PTD_VALID 0x01
  5594.  
  5595. - struct {
  5596. - uint16_t pd_vid;
  5597. - uint16_t pd_did;
  5598. - uint16_t pd_cmd;
  5599. - uint16_t pd_status;
  5600. - uint8_t pd_rev;
  5601. - uint8_t pd_prog_if;
  5602. - uint8_t pd_subclass;
  5603. - uint8_t pd_class;
  5604. - uint8_t pd_cache_size;
  5605. - uint8_t pd_lat_timer;
  5606. - uint8_t pd_header_type;
  5607. - uint8_t pd_bist;
  5608. - uint32_t pd_bar[PCI_MAX_BARS];
  5609. - uint32_t pd_cardbus_cis;
  5610. - uint16_t pd_subsys_vid;
  5611. - uint16_t pd_subsys_id;
  5612. - uint32_t pd_exp_rom_addr;
  5613. - uint8_t pd_cap;
  5614. - uint32_t pd_reserved0 : 24;
  5615. - uint32_t pd_reserved1;
  5616. - uint8_t pd_irq;
  5617. - uint8_t pd_int;
  5618. - uint8_t pd_min_grant;
  5619. - uint8_t pd_max_grant;
  5620. +typedef int (*iocb_t)(int, uint64_t, uint32_t, void *, void *);
  5621.  
  5622. - uint8_t pd_bar_ct;
  5623. - pci_cs_fn_t pd_csfunc;
  5624. +struct iohandler {
  5625. + uint64_t start;
  5626. + uint64_t end;
  5627. + iocb_t handler;
  5628. + void *cookie;
  5629. + TAILQ_ENTRY(iohandler) next;
  5630. +};
  5631. +
  5632. +void register_mem(uint64_t base, uint32_t len, iocb_t handler, void *cookie);
  5633. +void unregister_mem(uint64_t base);
  5634. +int mem_handler(int dir, uint64_t addr, uint32_t size, void *data);
  5635. +
  5636. +struct pci_dev {
  5637. + union {
  5638. + uint32_t pd_cfg_space[PCI_CONFIG_SPACE_SIZE / 4];
  5639. + struct {
  5640. + uint16_t pd_vid;
  5641. + uint16_t pd_did;
  5642. + uint16_t pd_cmd;
  5643. + uint16_t pd_status;
  5644. + uint8_t pd_rev;
  5645. + uint8_t pd_prog_if;
  5646. + uint8_t pd_subclass;
  5647. + uint8_t pd_class;
  5648. + uint8_t pd_cache_size;
  5649. + uint8_t pd_lat_timer;
  5650. + uint8_t pd_header_type;
  5651. + uint8_t pd_bist;
  5652. + uint32_t pd_bar[PCI_MAX_BARS];
  5653. + uint32_t pd_cardbus_cis;
  5654. + uint16_t pd_subsys_vid;
  5655. + uint16_t pd_subsys_id;
  5656. + uint32_t pd_exp_rom_addr;
  5657. + uint8_t pd_cap;
  5658. + uint32_t pd_reserved0 : 24;
  5659. + uint32_t pd_reserved1;
  5660. + uint8_t pd_irq;
  5661. + uint8_t pd_int;
  5662. + uint8_t pd_min_grant;
  5663. + uint8_t pd_max_grant;
  5664. + } __packed;
  5665. + };
  5666. + uint8_t pd_bar_ct;
  5667. + pci_cs_fn_t pd_csfunc;
  5668.  
  5669. - uint8_t pd_bartype[PCI_MAX_BARS];
  5670. - uint32_t pd_barsize[PCI_MAX_BARS];
  5671. - void *pd_barfunc[PCI_MAX_BARS];
  5672. - void *pd_bar_cookie[PCI_MAX_BARS];
  5673. - } __packed;
  5674. + uint8_t pd_bartype[PCI_MAX_BARS];
  5675. + uint32_t pd_barsize[PCI_MAX_BARS];
  5676. + void *pd_barfunc[PCI_MAX_BARS];
  5677. + void *pd_bar_cookie[PCI_MAX_BARS];
  5678. + void *pd_cookie;
  5679. };
  5680.  
  5681. struct pci {
  5682. @@ -79,7 +96,7 @@ struct pci {
  5683. uint32_t pci_addr_reg;
  5684. uint32_t pci_data_reg;
  5685.  
  5686. - union pci_dev pci_devices[PCI_CONFIG_MAX_DEV];
  5687. + struct pci_dev pci_devices[PCI_CONFIG_MAX_DEV];
  5688. };
  5689.  
  5690. void pci_handle_address_reg(struct vm_run_params *);
  5691. @@ -87,9 +104,10 @@ void pci_handle_data_reg(struct vm_run_params *);
  5692. uint8_t pci_handle_io(struct vm_run_params *);
  5693. void pci_init(void);
  5694. int pci_add_device(uint8_t *, uint16_t, uint16_t, uint8_t, uint8_t, uint16_t,
  5695. - uint16_t, uint8_t, pci_cs_fn_t);
  5696. -int pci_add_bar(uint8_t, uint32_t, void *, void *);
  5697. + uint16_t, uint8_t, pci_cs_fn_t, void *);
  5698. +int pci_add_bar(uint8_t, uint32_t, uint32_t, void *, void *);
  5699. int pci_set_bar_fn(uint8_t, uint8_t, void *, void *);
  5700. uint8_t pci_get_dev_irq(uint8_t);
  5701. int pci_dump(int);
  5702. int pci_restore(int);
  5703. +void pci_add_pthru(int, int, int);
  5704. diff --git a/usr.sbin/vmd/virtio.c b/usr.sbin/vmd/virtio.c
  5705. index 8800594fc..430f41995 100644
  5706. --- a/usr.sbin/vmd/virtio.c
  5707. +++ b/usr.sbin/vmd/virtio.c
  5708. @@ -1797,13 +1797,13 @@ virtio_init(struct vmd_vm *vm, int child_cdrom,
  5709. PCI_PRODUCT_QUMRANET_VIO_RNG, PCI_CLASS_SYSTEM,
  5710. PCI_SUBCLASS_SYSTEM_MISC,
  5711. PCI_VENDOR_OPENBSD,
  5712. - PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL)) {
  5713. + PCI_PRODUCT_VIRTIO_ENTROPY, 1, NULL, NULL)) {
  5714. log_warnx("%s: can't add PCI virtio rng device",
  5715. __progname);
  5716. return;
  5717. }
  5718.  
  5719. - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_rnd_io, NULL)) {
  5720. + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, VMM_PCI_IO_BAR_SIZE, virtio_rnd_io, NULL)) {
  5721. log_warnx("%s: can't add bar for virtio rng device",
  5722. __progname);
  5723. return;
  5724. @@ -1835,14 +1835,14 @@ virtio_init(struct vmd_vm *vm, int child_cdrom,
  5725. PCI_PRODUCT_QUMRANET_VIO_NET, PCI_CLASS_SYSTEM,
  5726. PCI_SUBCLASS_SYSTEM_MISC,
  5727. PCI_VENDOR_OPENBSD,
  5728. - PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL)) {
  5729. + PCI_PRODUCT_VIRTIO_NETWORK, 1, NULL, NULL)) {
  5730. log_warnx("%s: can't add PCI virtio net device",
  5731. __progname);
  5732. return;
  5733. }
  5734.  
  5735. - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_net_io,
  5736. - &vionet[i])) {
  5737. + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, VMM_PCI_IO_BAR_SIZE,
  5738. + virtio_net_io, &vionet[i])) {
  5739. log_warnx("%s: can't add bar for virtio net "
  5740. "device", __progname);
  5741. return;
  5742. @@ -1923,13 +1923,13 @@ virtio_init(struct vmd_vm *vm, int child_cdrom,
  5743. PCI_CLASS_MASS_STORAGE,
  5744. PCI_SUBCLASS_MASS_STORAGE_SCSI,
  5745. PCI_VENDOR_OPENBSD,
  5746. - PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL)) {
  5747. + PCI_PRODUCT_VIRTIO_BLOCK, 1, NULL, NULL)) {
  5748. log_warnx("%s: can't add PCI virtio block "
  5749. "device", __progname);
  5750. return;
  5751. }
  5752. - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, virtio_blk_io,
  5753. - &vioblk[i])) {
  5754. + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, VMM_PCI_IO_BAR_SIZE,
  5755. + virtio_blk_io, &vioblk[i])) {
  5756. log_warnx("%s: can't add bar for virtio block "
  5757. "device", __progname);
  5758. return;
  5759. @@ -1971,13 +1971,14 @@ virtio_init(struct vmd_vm *vm, int child_cdrom,
  5760. PCI_CLASS_MASS_STORAGE,
  5761. PCI_SUBCLASS_MASS_STORAGE_SCSI,
  5762. PCI_VENDOR_OPENBSD,
  5763. - PCI_PRODUCT_VIRTIO_SCSI, 1, NULL)) {
  5764. + PCI_PRODUCT_VIRTIO_SCSI, 1, NULL, NULL)) {
  5765. log_warnx("%s: can't add PCI vioscsi device",
  5766. __progname);
  5767. return;
  5768. }
  5769.  
  5770. - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vioscsi_io, vioscsi)) {
  5771. + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, VMM_PCI_IO_BAR_SIZE,
  5772. + vioscsi_io, vioscsi)) {
  5773. log_warnx("%s: can't add bar for vioscsi device",
  5774. __progname);
  5775. return;
  5776. @@ -2013,13 +2014,13 @@ virtio_init(struct vmd_vm *vm, int child_cdrom,
  5777. PCI_CLASS_COMMUNICATIONS,
  5778. PCI_SUBCLASS_COMMUNICATIONS_MISC,
  5779. PCI_VENDOR_OPENBSD,
  5780. - PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL)) {
  5781. + PCI_PRODUCT_VIRTIO_VMMCI, 1, NULL, NULL)) {
  5782. log_warnx("%s: can't add PCI vmm control device",
  5783. __progname);
  5784. return;
  5785. }
  5786.  
  5787. - if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, vmmci_io, NULL)) {
  5788. + if (pci_add_bar(id, PCI_MAPREG_TYPE_IO, VMM_PCI_IO_BAR_SIZE, vmmci_io, NULL)) {
  5789. log_warnx("%s: can't add bar for vmm control device",
  5790. __progname);
  5791. return;
  5792. diff --git a/usr.sbin/vmd/vm.c b/usr.sbin/vmd/vm.c
  5793. index a9fcce4fa..566c9964c 100644
  5794. --- a/usr.sbin/vmd/vm.c
  5795. +++ b/usr.sbin/vmd/vm.c
  5796. @@ -63,6 +63,7 @@
  5797. #include "mc146818.h"
  5798. #include "fw_cfg.h"
  5799. #include "atomicio.h"
  5800. +#include "x86emu.h"
  5801.  
  5802. io_fn_t ioports_map[MAX_PORTS];
  5803.  
  5804. @@ -947,6 +948,7 @@ alloc_guest_mem(struct vm_create_params *vcp)
  5805. return (ret);
  5806. }
  5807.  
  5808. + memset(p, 0, vmr->vmr_size);
  5809. vmr->vmr_va = (vaddr_t)p;
  5810. }
  5811.  
  5812. @@ -1062,6 +1064,14 @@ init_emulated_hw(struct vmop_create_params *vmc, int child_cdrom,
  5813.  
  5814. /* Initialize virtio devices */
  5815. virtio_init(current_vm, child_cdrom, child_disks, child_taps);
  5816. +
  5817. + /* Add Passthrough Devices */
  5818. + for (i = 0; i < (int)vcp->vcp_npcis; i++) {
  5819. + int bus = (vcp->vcp_pcis[i] >> 8);
  5820. + int dev = (vcp->vcp_pcis[i] >> 3) & 0x1F;
  5821. + int fun = (vcp->vcp_pcis[i] >> 0) & 0x7;
  5822. + pci_add_pthru(bus, dev, fun);
  5823. + }
  5824. }
  5825. /*
  5826. * restore_emulated_hw
  5827. @@ -1585,12 +1595,12 @@ vcpu_exit_inout(struct vm_run_params *vrp)
  5828. if (ioports_map[vei->vei.vei_port] != NULL)
  5829. intr = ioports_map[vei->vei.vei_port](vrp);
  5830. else if (vei->vei.vei_dir == VEI_DIR_IN)
  5831. - set_return_data(vei, 0xFFFFFFFF);
  5832. -
  5833. + set_return_data(vei, 0xFFFFFFFF);
  5834. if (intr != 0xFF)
  5835. vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr);
  5836. }
  5837.  
  5838. +
  5839. /*
  5840. * vcpu_exit_eptviolation
  5841. *
  5842. @@ -1604,10 +1614,70 @@ vcpu_exit_inout(struct vm_run_params *vrp)
  5843. * 0: no action required
  5844. * EAGAIN: a protection fault occured, kill the vm.
  5845. */
  5846. +
  5847. +extern int mem_chkint(void);
  5848. +
  5849. int
  5850. vcpu_exit_eptviolation(struct vm_run_params *vrp)
  5851. {
  5852. struct vm_exit *ve = vrp->vrp_exit;
  5853. + uint64_t gip, gpa;
  5854. + uint8_t instr[16] = { 0 };
  5855. + struct vm_rwregs_params vrwp = { 0 };
  5856. + uint64_t *regrw;
  5857. + struct insn ix;
  5858. +
  5859. + /* Read instruction bytes that caused page fault */
  5860. + translate_gva(ve, ve->vrs.vrs_gprs[VCPU_REGS_RIP], &gip, PROT_READ);
  5861. + read_mem(gip, instr, sizeof(instr));
  5862. + fprintf(stderr, "===============\nept violation: %llx rip:0x%llx %.2x %.2x %.2x %.2x %.2x\n",
  5863. + ve->vee.vee_gpa, ve->vrs.vrs_gprs[VCPU_REGS_RIP], instr[0], instr[1], instr[2],
  5864. + instr[3], instr[4]);
  5865. +#if 0
  5866. + fprintf(stderr, " rax:0x%.16llx rbx:0x%.16llx rcx:0x%.16llx rdx:0x%.16llx\n",
  5867. + ve->vrs.vrs_gprs[VCPU_REGS_RAX],
  5868. + ve->vrs.vrs_gprs[VCPU_REGS_RBX],
  5869. + ve->vrs.vrs_gprs[VCPU_REGS_RCX],
  5870. + ve->vrs.vrs_gprs[VCPU_REGS_RDX]);
  5871. + fprintf(stderr, " rsi:0x%.16llx rdi:0x%.16llx rbp:0x%.16llx rsp:0x%.16llx\n",
  5872. + ve->vrs.vrs_gprs[VCPU_REGS_RSI],
  5873. + ve->vrs.vrs_gprs[VCPU_REGS_RDI],
  5874. + ve->vrs.vrs_gprs[VCPU_REGS_RBP],
  5875. + ve->vrs.vrs_gprs[VCPU_REGS_RSP]);
  5876. + fprintf(stderr, " r8: 0x%.16llx r9: 0x%.16llx r10:0x%.16llx r11:0x%.16llx\n",
  5877. + ve->vrs.vrs_gprs[VCPU_REGS_R8],
  5878. + ve->vrs.vrs_gprs[VCPU_REGS_R9],
  5879. + ve->vrs.vrs_gprs[VCPU_REGS_R10],
  5880. + ve->vrs.vrs_gprs[VCPU_REGS_R11]);
  5881. + fprintf(stderr, " r12:0x%.16llx r13:0x%.16llx r14:0x%.16llx r15:0x%.16llx\n",
  5882. + ve->vrs.vrs_gprs[VCPU_REGS_R12],
  5883. + ve->vrs.vrs_gprs[VCPU_REGS_R13],
  5884. + ve->vrs.vrs_gprs[VCPU_REGS_R14],
  5885. + ve->vrs.vrs_gprs[VCPU_REGS_R15]);
  5886. +#endif
  5887. +
  5888. + vrwp.vrwp_mask = VM_RWREGS_GPRS;
  5889. + vrwp.vrwp_vm_id = vrp->vrp_vm_id;
  5890. + vrwp.vrwp_vcpu_id = vrp->vrp_vcpu_id;
  5891. + vrwp.vrwp_regs = ve->vrs;
  5892. + gpa = ve->vee.vee_gpa;
  5893. +
  5894. + /* Decode instruction and get # of bytes, size register for read/write */
  5895. + memset(&ix, 0, sizeof(ix));
  5896. + dodis(instr, &ix, ve->vrs.vrs_sregs[VCPU_REGS_CS].vsi_ar & 0x2000 ?
  5897. + SIZE_QWORD : SIZE_DWORD);
  5898. + if (ix.incr && (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END)) {
  5899. + regrw = &vrwp.vrwp_regs.vrs_gprs[ix.reg];
  5900. + mem_handler(ix.dir, gpa, ix.size, regrw);
  5901. + fprintf(stderr, "memhandler : %.16llx %d\n", (uint64_t)*regrw, ix.incr);
  5902. + /* skip this instruction when returning to vm */
  5903. + vrwp.vrwp_regs.vrs_gprs[VCPU_REGS_RIP] += ix.incr;
  5904. + if (ioctl(env->vmd_fd, VMM_IOC_WRITEREGS, &vrwp))
  5905. + fprintf(stderr,"writeregs fails\n");
  5906. + return 0;
  5907. + }
  5908. + fprintf(stderr, "nothandled\n");
  5909. +
  5910. /*
  5911. * vmd may be exiting to vmd to handle a pending interrupt
  5912. * but last exit type may have bee VMX_EXIT_EPT_VIOLATION,
  5913. @@ -1653,7 +1723,6 @@ vcpu_exit(struct vm_run_params *vrp)
  5914. case VMX_EXIT_CPUID:
  5915. case VMX_EXIT_EXTINT:
  5916. case SVM_VMEXIT_INTR:
  5917. - case SVM_VMEXIT_NPF:
  5918. case SVM_VMEXIT_MSR:
  5919. case SVM_VMEXIT_CPUID:
  5920. /*
  5921. @@ -1665,10 +1734,10 @@ vcpu_exit(struct vm_run_params *vrp)
  5922. */
  5923. break;
  5924. case VMX_EXIT_EPT_VIOLATION:
  5925. + case SVM_VMEXIT_NPF:
  5926. ret = vcpu_exit_eptviolation(vrp);
  5927. if (ret)
  5928. return (ret);
  5929. -
  5930. break;
  5931. case VMX_EXIT_IO:
  5932. case SVM_VMEXIT_IOIO:
  5933. @@ -1701,7 +1770,12 @@ vcpu_exit(struct vm_run_params *vrp)
  5934.  
  5935. /* Process any pending traffic */
  5936. vionet_process_rx(vrp->vrp_vm_id);
  5937. -
  5938. + {
  5939. + uint8_t intr;
  5940. + if ((intr = mem_chkint()) != 0xff) {
  5941. + vcpu_assert_pic_irq(vrp->vrp_vm_id, vrp->vrp_vcpu_id, intr);
  5942. + }
  5943. + }
  5944. vrp->vrp_continue = 1;
  5945.  
  5946. return (0);
  5947. @@ -2216,12 +2290,13 @@ translate_gva(struct vm_exit* exit, uint64_t va, uint64_t* pa, int mode)
  5948. return (EPERM);
  5949.  
  5950. pte = pte | PG_U;
  5951. - if (mode == PROT_WRITE)
  5952. + if (mode == PROT_WRITE) {
  5953. pte = pte | PG_M;
  5954. - if (write_mem(pte_paddr, &pte, pte_size)) {
  5955. - log_warn("%s: failed to write back flags to pte",
  5956. - __func__);
  5957. - return (EIO);
  5958. + if (write_mem(pte_paddr, &pte, pte_size)) {
  5959. + log_warn("%s: failed to write back flags to pte",
  5960. + __func__);
  5961. + return (EIO);
  5962. + }
  5963. }
  5964.  
  5965. /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
  5966. diff --git a/usr.sbin/vmd/vmm.h b/usr.sbin/vmd/vmm.h
  5967. index 214d41d01..de23fb924 100644
  5968. --- a/usr.sbin/vmd/vmm.h
  5969. +++ b/usr.sbin/vmd/vmm.h
  5970. @@ -22,3 +22,4 @@ void vcpu_assert_pic_irq(uint32_t, uint32_t, int);
  5971. void vcpu_deassert_pic_irq(uint32_t, uint32_t, int);
  5972. void set_return_data(struct vm_exit *, uint32_t);
  5973. void get_input_data(struct vm_exit *, uint32_t *);
  5974. +
  5975. diff --git a/usr.sbin/vmd/x86emu.c b/usr.sbin/vmd/x86emu.c
  5976. new file mode 100644
  5977. index 000000000..857de4710
  5978. --- /dev/null
  5979. +++ b/usr.sbin/vmd/x86emu.c
  5980. @@ -0,0 +1,819 @@
  5981. +/*
  5982. + * Copyright (c) 2020 Jordan Hargrave <[email protected]>
  5983. + *
  5984. + * Permission to use, copy, modify, and distribute this software for any
  5985. + * purpose with or without fee is hereby granted, provided that the above
  5986. + * copyright notice and this permission notice appear in all copies.
  5987. + *
  5988. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  5989. + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  5990. + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  5991. + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  5992. + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  5993. + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  5994. + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  5995. + */
  5996. +#include <stdio.h>
  5997. +#include <stdlib.h>
  5998. +#include <string.h>
  5999. +#include <fcntl.h>
  6000. +#include <inttypes.h>
  6001. +#include "x86emu.h"
  6002. +#include <machine/vmmvar.h>
  6003. +
  6004. +#define printf(x...) fprintf(stderr, x)
  6005. +
  6006. +#define _(m, a...) { .mnem=#m, a }
  6007. +#define _xxx { }
  6008. +#define __ 0
  6009. +
  6010. +struct opcode {
  6011. + const char *mnem;
  6012. + int arg0;
  6013. + int arg1;
  6014. + int arg2;
  6015. + int flag;
  6016. +};
  6017. +
  6018. +struct opcode hicodes[256] = {
  6019. + [0x30] =
  6020. + _(wrmsr),
  6021. + _(rdtsc),
  6022. + _(rdmsr),
  6023. + _(rdpmc),
  6024. + _(sysenter),
  6025. + _(sysexit),
  6026. +
  6027. + /* 0x40 */
  6028. + [0x40] =
  6029. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6030. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6031. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6032. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6033. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6034. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6035. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6036. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6037. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6038. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6039. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6040. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6041. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6042. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6043. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6044. + _(cmovcc, Gv, Ev, __, FLG_MRR),
  6045. +
  6046. + /* 0x80 */
  6047. + [0x80] =
  6048. + _(jcc, Jz, __, __, FLG_D64),
  6049. + _(jcc, Jz, __, __, FLG_D64),
  6050. + _(jcc, Jz, __, __, FLG_D64),
  6051. + _(jcc, Jz, __, __, FLG_D64),
  6052. + _(jcc, Jz, __, __, FLG_D64),
  6053. + _(jcc, Jz, __, __, FLG_D64),
  6054. + _(jcc, Jz, __, __, FLG_D64),
  6055. + _(jcc, Jz, __, __, FLG_D64),
  6056. + _(jcc, Jz, __, __, FLG_D64),
  6057. + _(jcc, Jz, __, __, FLG_D64),
  6058. + _(jcc, Jz, __, __, FLG_D64),
  6059. + _(jcc, Jz, __, __, FLG_D64),
  6060. + _(jcc, Jz, __, __, FLG_D64),
  6061. + _(jcc, Jz, __, __, FLG_D64),
  6062. + _(jcc, Jz, __, __, FLG_D64),
  6063. + _(jcc, Jz, __, __, FLG_D64),
  6064. +
  6065. + /* 0x90 */
  6066. + _(setcc, Eb, __, __, FLG_MRR),
  6067. + _(setcc, Eb, __, __, FLG_MRR),
  6068. + _(setcc, Eb, __, __, FLG_MRR),
  6069. + _(setcc, Eb, __, __, FLG_MRR),
  6070. + _(setcc, Eb, __, __, FLG_MRR),
  6071. + _(setcc, Eb, __, __, FLG_MRR),
  6072. + _(setcc, Eb, __, __, FLG_MRR),
  6073. + _(setcc, Eb, __, __, FLG_MRR),
  6074. + _(setcc, Eb, __, __, FLG_MRR),
  6075. + _(setcc, Eb, __, __, FLG_MRR),
  6076. + _(setcc, Eb, __, __, FLG_MRR),
  6077. + _(setcc, Eb, __, __, FLG_MRR),
  6078. + _(setcc, Eb, __, __, FLG_MRR),
  6079. + _(setcc, Eb, __, __, FLG_MRR),
  6080. + _(setcc, Eb, __, __, FLG_MRR),
  6081. + _(setcc, Eb, __, __, FLG_MRR),
  6082. +
  6083. + /* 0xa0 */
  6084. + _(push, rFS, __, __, FLG_D64),
  6085. + _(pop, rFS, __, __, FLG_D64),
  6086. + _(cpuid),
  6087. + _(bt, Ev, Gv, __, FLG_MRR),
  6088. + _(shld, Ev, Gv, Ib, FLG_MRR),
  6089. + _(shld, Ev, Gv, rCL,FLG_MRR),
  6090. + _xxx,
  6091. + _xxx,
  6092. + _(push, rGS, __, __, FLG_D64),
  6093. + _(pop, rGS, __, __, FLG_D64),
  6094. + _xxx,
  6095. + _(bts, Ev, Gv, __, FLG_MRR),
  6096. + _(shrd, Ev, Gv, Ib, FLG_MRR),
  6097. + _(shrd, Ev, Gv, rCL,FLG_MRR),
  6098. + _xxx,
  6099. + _(imul, Gv, Ev, __, FLG_MRR),
  6100. +
  6101. + /* 0xb0 */
  6102. + _(cmpxchg, Eb, Gb, __, FLG_MRR),
  6103. + _(cmpxchg, Ev, Gv, __, FLG_MRR),
  6104. + _(lss, Gv, Mp, __, FLG_MRR),
  6105. + _(btr, Ev, Gv, __, FLG_MRR),
  6106. + _(lfs, Gv, Mp, __, FLG_MRR),
  6107. + _(lgs, Gv, Mp, __, FLG_MRR),
  6108. + _(movzx, Gv, Eb, __, FLG_MRR),
  6109. + _(movzx, Gv, Ew, __, FLG_MRR),
  6110. + _xxx,
  6111. + _xxx,
  6112. + _xxx,
  6113. + _(btc, Ev, Gv, __, FLG_MRR),
  6114. + _(bsf, Gv, Ev, __, FLG_MRR),
  6115. + _(bsr, Gv, Ev, __, FLG_MRR),
  6116. + _(movsx, Gv, Eb, __, FLG_MRR),
  6117. + _(movsx, Gv, Ew, __, FLG_MRR),
  6118. +
  6119. + /* 0xc0 */
  6120. + _(xadd, Eb, Gb, __, FLG_MRR),
  6121. + _(xadd, Ev, Gv, __, FLG_MRR),
  6122. + _xxx,
  6123. + _xxx,
  6124. + _xxx,
  6125. + _xxx,
  6126. + _xxx,
  6127. + _xxx,
  6128. + _(bswap, gv),
  6129. + _(bswap, gv),
  6130. + _(bswap, gv),
  6131. + _(bswap, gv),
  6132. + _(bswap, gv),
  6133. + _(bswap, gv),
  6134. + _(bswap, gv),
  6135. + _(bswap, gv),
  6136. +};
  6137. +
  6138. +struct opcode locodes[256] = {
  6139. + _(add, Eb, Gb, __, FLG_MRR),
  6140. + _(add, Ev, Gv, __, FLG_MRR),
  6141. + _(add, Gb, Eb, __, FLG_MRR),
  6142. + _(add, Gv, Ev, __, FLG_MRR),
  6143. + _(add, rAL, Ib),
  6144. + _(add, rvAX, Iz),
  6145. + _(push, rES, __, __, FLG_NO64),
  6146. + _(pop, rES, __, __, FLG_NO64),
  6147. + _(or, Eb, Gb, __, FLG_MRR),
  6148. + _(or, Ev, Gv, __, FLG_MRR),
  6149. + _(or, Gb, Eb, __, FLG_MRR),
  6150. + _(or, Gv, Ev, __, FLG_MRR),
  6151. + _(or, rAL, Ib),
  6152. + _(or, rvAX, Iz),
  6153. + _(push, rCS, __, __, FLG_NO64),
  6154. + _xxx,
  6155. +
  6156. + /* 0x10 */
  6157. + _(adc, Eb, Gb, __, FLG_MRR),
  6158. + _(adc, Ev, Gv, __, FLG_MRR),
  6159. + _(adc, Gb, Eb, __, FLG_MRR),
  6160. + _(adc, Gv, Ev, __, FLG_MRR),
  6161. + _(adc, rAL, Ib),
  6162. + _(adc, rvAX, Iz),
  6163. + _(push, rSS, __, __, FLG_NO64),
  6164. + _(pop, rSS, __, __, FLG_NO64),
  6165. + _(sbb, Eb, Gb, __, FLG_MRR),
  6166. + _(sbb, Ev, Gv, __, FLG_MRR),
  6167. + _(sbb, Gb, Eb, __, FLG_MRR),
  6168. + _(sbb, Gv, Ev, __, FLG_MRR),
  6169. + _(sbb, rAL, Ib),
  6170. + _(sbb, rvAX, Iz),
  6171. + _(push, rDS, __, __, FLG_NO64),
  6172. + _(pop, rDS, __, __, FLG_NO64),
  6173. +
  6174. + /* 0x20 */
  6175. + _(and, Eb, Gb, __, FLG_MRR),
  6176. + _(and, Ev, Gv, __, FLG_MRR),
  6177. + _(and, Gb, Eb, __, FLG_MRR),
  6178. + _(and, Gv, Ev, __, FLG_MRR),
  6179. + _(and, rAL, Ib),
  6180. + _(and, rvAX, Iz),
  6181. + _(pfx, rES, __, __, FLG_SEG),
  6182. + _(daa, __, __, __, FLG_NO64),
  6183. + _(sub, Eb, Gb, __, FLG_MRR),
  6184. + _(sub, Ev, Gv, __, FLG_MRR),
  6185. + _(sub, Gb, Eb, __, FLG_MRR),
  6186. + _(sub, Gv, Ev, __, FLG_MRR),
  6187. + _(sub, rAL, Ib),
  6188. + _(sub, rvAX, Iz),
  6189. + _(pfx, rCS, __, __, FLG_SEG),
  6190. + _(das, __, __, __, FLG_NO64),
  6191. +
  6192. + /* 0x30 */
  6193. + _(xor, Eb, Gb, __, FLG_MRR),
  6194. + _(xor, Ev, Gv, __, FLG_MRR),
  6195. + _(xor, Gb, Eb, __, FLG_MRR),
  6196. + _(xor, Gv, Ev, __, FLG_MRR),
  6197. + _(xor, rAL, Ib),
  6198. + _(xor, rvAX, Iz),
  6199. + _(pfx, rSS, __, __, FLG_SEG),
  6200. + _(aaa, __, __, __, FLG_NO64),
  6201. + _(cmp, Eb, Gb, __, FLG_MRR),
  6202. + _(cmp, Ev, Gv, __, FLG_MRR),
  6203. + _(cmp, Gb, Eb, __, FLG_MRR),
  6204. + _(cmp, Gv, Ev, __, FLG_MRR),
  6205. + _(cmp, rAL, Ib),
  6206. + _(cmp, rvAX, Iz),
  6207. + _(pfx, rDS, __, __, FLG_SEG),
  6208. + _(aas, __, __, __, FLG_NO64),
  6209. +
  6210. + /* 0x40 */
  6211. + _(inc, gv, __, __, FLG_REX),
  6212. + _(inc, gv, __, __, FLG_REX),
  6213. + _(inc, gv, __, __, FLG_REX),
  6214. + _(inc, gv, __, __, FLG_REX),
  6215. + _(inc, gv, __, __, FLG_REX),
  6216. + _(inc, gv, __, __, FLG_REX),
  6217. + _(inc, gv, __, __, FLG_REX),
  6218. + _(inc, gv, __, __, FLG_REX),
  6219. + _(dec, gv, __, __, FLG_REX),
  6220. + _(dec, gv, __, __, FLG_REX),
  6221. + _(dec, gv, __, __, FLG_REX),
  6222. + _(dec, gv, __, __, FLG_REX),
  6223. + _(dec, gv, __, __, FLG_REX),
  6224. + _(dec, gv, __, __, FLG_REX),
  6225. + _(dec, gv, __, __, FLG_REX),
  6226. + _(dec, gv, __, __, FLG_REX),
  6227. +
  6228. + /* 0x50 */
  6229. + _(push, gv, __, __, FLG_D64),
  6230. + _(push, gv, __, __, FLG_D64),
  6231. + _(push, gv, __, __, FLG_D64),
  6232. + _(push, gv, __, __, FLG_D64),
  6233. + _(push, gv, __, __, FLG_D64),
  6234. + _(push, gv, __, __, FLG_D64),
  6235. + _(push, gv, __, __, FLG_D64),
  6236. + _(push, gv, __, __, FLG_D64),
  6237. + _(pop, gv, __, __, FLG_D64),
  6238. + _(pop, gv, __, __, FLG_D64),
  6239. + _(pop, gv, __, __, FLG_D64),
  6240. + _(pop, gv, __, __, FLG_D64),
  6241. + _(pop, gv, __, __, FLG_D64),
  6242. + _(pop, gv, __, __, FLG_D64),
  6243. + _(pop, gv, __, __, FLG_D64),
  6244. + _(pop, gv, __, __, FLG_D64),
  6245. +
  6246. + /* 0x60 */
  6247. + _(pusha, __, __, __, FLG_NO64),
  6248. + _(popa, __, __, __, FLG_NO64),
  6249. + _xxx, /* EVEX */
  6250. + _xxx, /* movsxd Gv, Rd */
  6251. + _(pfx, rFS, __, __, FLG_SEG),
  6252. + _(pfx, rGS, __, __, FLG_SEG),
  6253. + _(pfx, __, __, __, FLG_OSZ),
  6254. + _(pfx, __, __, __, FLG_ASZ),
  6255. + _(push, Iz, __, __, FLG_D64),
  6256. + _(imul, Gv, Ev, Iz, FLG_MRR),
  6257. + _(push, Ib, __, __, FLG_D64),
  6258. + _(imul, Gv, Ev, Ib, FLG_MRR),
  6259. + _(insb, Yb, rDX, __, FLG_MEM), /* rep */
  6260. + _(insv, Yv, rDX, __, FLG_MEM), /* rep */
  6261. + _(outsb, rDX, Xb, __, FLG_MEM), /* rep */
  6262. + _(outsv, rDX, Xv, __, FLG_MEM), /* rep */
  6263. +
  6264. + /* 0x70 */
  6265. + _(jcc, Jb, __, __, FLG_D64),
  6266. + _(jcc, Jb, __, __, FLG_D64),
  6267. + _(jcc, Jb, __, __, FLG_D64),
  6268. + _(jcc, Jb, __, __, FLG_D64),
  6269. + _(jcc, Jb, __, __, FLG_D64),
  6270. + _(jcc, Jb, __, __, FLG_D64),
  6271. + _(jcc, Jb, __, __, FLG_D64),
  6272. + _(jcc, Jb, __, __, FLG_D64),
  6273. + _(jcc, Jb, __, __, FLG_D64),
  6274. + _(jcc, Jb, __, __, FLG_D64),
  6275. + _(jcc, Jb, __, __, FLG_D64),
  6276. + _(jcc, Jb, __, __, FLG_D64),
  6277. + _(jcc, Jb, __, __, FLG_D64),
  6278. + _(jcc, Jb, __, __, FLG_D64),
  6279. + _(jcc, Jb, __, __, FLG_D64),
  6280. + _(jcc, Jb, __, __, FLG_D64),
  6281. +
  6282. + /* 0x80 */
  6283. + _(grp1, Eb, Ib, __, FLG_MRR|FLG_GRP),
  6284. + _(grp1, Ev, Iz, __, FLG_MRR|FLG_GRP),
  6285. + _(grp1, Eb, Ib, __, FLG_MRR|FLG_GRP|FLG_NO64),
  6286. + _(grp1, Ev, Ib, __, FLG_MRR|FLG_GRP),
  6287. + _(test, Eb, Gb, __, FLG_MRR),
  6288. + _(test, Ev, Gv, __, FLG_MRR),
  6289. + _(xchg, Eb, Gb, __, FLG_MRR),
  6290. + _(xchg, Ev, Gv, __, FLG_MRR),
  6291. + _(mov, Eb, Gb, __, FLG_MRR),
  6292. + _(mov, Ev, Gv, __, FLG_MRR),
  6293. + _(mov, Gb, Eb, __, FLG_MRR),
  6294. + _(mov, Gv, Ev, __, FLG_MRR),
  6295. + _(mov, Ew, Sw, __, FLG_MRR),
  6296. + _(lea, Gv, Mp, __, FLG_MRR),
  6297. + _(mov, Sw, Ew, __, FLG_MRR),
  6298. + _(pop, Ev, __, __, FLG_MRR), /* GRP1a [pop] */
  6299. +
  6300. + /* 0x90 */
  6301. + _(nop),
  6302. + _(xchg, rvAX, gv),
  6303. + _(xchg, rvAX, gv),
  6304. + _(xchg, rvAX, gv),
  6305. + _(xchg, rvAX, gv),
  6306. + _(xchg, rvAX, gv),
  6307. + _(xchg, rvAX, gv),
  6308. + _(xchg, rvAX, gv),
  6309. + _(cbw), /* AX=AL / EAX=AX / RAX=EAX */
  6310. + _(cwd), /* DX:AX=AX / EDX:EAX=EAX / RDX:RAX=RAX */
  6311. + _(call, Ap, __, __, FLG_NO64),
  6312. + _(wait),
  6313. + _(pushf, __, __, __, FLG_D64),
  6314. + _(popf, __, __, __, FLG_D64),
  6315. + _(sahf),
  6316. + _(lahf),
  6317. +
  6318. + /* 0xa0 */
  6319. + _(mov, rAL, Ob, __, FLG_MEM),
  6320. + _(mov, rvAX, Ov, __, FLG_MEM),
  6321. + _(mov, Ob, rAL, __, FLG_MEM),
  6322. + _(mov, Ov,rvAX, __, FLG_MEM),
  6323. + _(movsb, Yb, Xb, __, FLG_MEM), /* rep */
  6324. + _(movsv, Yv, Xv, __, FLG_MEM), /* rep */
  6325. + _(cmpsb, Yb, Xb, __, FLG_MEM), /* repz/repnz */
  6326. + _(cmpsv, Yb, Xv, __, FLG_MEM), /* repz/repnz */
  6327. + _(test, rAL, Ib),
  6328. + _(test, rvAX, Iz),
  6329. + _(stosb, Yb, rAL, __, FLG_MEM), /* rep */
  6330. + _(stosv, Yv,rvAX, __, FLG_MEM), /* rep */
  6331. + _(lodsb, rAL, Xb, __, FLG_MEM),
  6332. + _(lodsv, rvAX, Xv, __, FLG_MEM),
  6333. + _(scasb, Yb, rAL, __, FLG_MEM), /* repz/repnz */
  6334. + _(scasv, Yv,rvAX, __, FLG_MEM), /* repz/repnz */
  6335. +
  6336. + /* 0xb0 */
  6337. + _(mov, gb, Ib),
  6338. + _(mov, gb, Ib),
  6339. + _(mov, gb, Ib),
  6340. + _(mov, gb, Ib),
  6341. + _(mov, gb, Ib),
  6342. + _(mov, gb, Ib),
  6343. + _(mov, gb, Ib),
  6344. + _(mov, gb, Ib),
  6345. + _(mov, gv, Iv),
  6346. + _(mov, gv, Iv),
  6347. + _(mov, gv, Iv),
  6348. + _(mov, gv, Iv),
  6349. + _(mov, gv, Iv),
  6350. + _(mov, gv, Iv),
  6351. + _(mov, gv, Iv),
  6352. + _(mov, gv, Iv),
  6353. +
  6354. + /* 0xc0 */
  6355. + _(grp2, Eb, Ib, __, FLG_MRR|FLG_GRP),
  6356. + _(grp2, Ev, Ib, __, FLG_MRR|FLG_GRP),
  6357. + _(ret, Iw, __, __, FLG_D64),
  6358. + _(ret, __, __, __, FLG_D64),
  6359. + _(les, Gv, Mp, __, FLG_MRR|FLG_NO64), /* VEX3 */
  6360. + _(lds, Gv, Mp, __, FLG_MRR|FLG_NO64), /* VEX2 */
  6361. + _(mov, Eb, Ib, __, FLG_MRR), /* GRP11 [mov] */
  6362. + _(mov, Ev, Iz, __, FLG_MRR), /* GRP11 [mov] */
  6363. + _(enter, Iw, Ib, __, FLG_D64),
  6364. + _(leave, __, __, __, FLG_D64),
  6365. + _(retf, Iw),
  6366. + _(retf),
  6367. + _(int, i3),
  6368. + _(int, Ib),
  6369. + _(into, __, __, __, FLG_NO64),
  6370. + _(iret),
  6371. +
  6372. + /* 0xd0 */
  6373. + _(grp2, Eb, i1, __, FLG_MRR|FLG_GRP),
  6374. + _(grp2, Ev, i1, __, FLG_MRR|FLG_GRP),
  6375. + _(grp2, Eb, rCL, __, FLG_MRR|FLG_GRP),
  6376. + _(grp2, Eb, rCL, __, FLG_MRR|FLG_GRP),
  6377. + _(aam, Ib, __, __, FLG_NO64),
  6378. + _(aad, Ib, __, __, FLG_NO64),
  6379. + _(salc, __, __, __, FLG_NO64),
  6380. + _(xlat, __, __, __, FLG_MEM),
  6381. + _xxx,
  6382. + _xxx,
  6383. + _xxx,
  6384. + _xxx,
  6385. + _xxx,
  6386. + _xxx,
  6387. + _xxx,
  6388. + _xxx,
  6389. +
  6390. + /* 0xe0 */
  6391. + _(loopnz, Jb, __, __, FLG_D64),
  6392. + _(loopz, Jb, __, __, FLG_D64),
  6393. + _(loop, Jb, __, __, FLG_D64),
  6394. + _(jcxz, Jb, __, __, FLG_D64),
  6395. + _(in, rAL, Ib),
  6396. + _(in, rvAX, Ib),
  6397. + _(out, Ib, rAL),
  6398. + _(out, Ib, rvAX),
  6399. + _(call, Jz, __, __, FLG_D64),
  6400. + _(jmp, Jz, __, __, FLG_D64),
  6401. + _(jmp, Ap, __, __, FLG_NO64),
  6402. + _(jmp, Jb, __, __, FLG_D64),
  6403. + _(in, rAL, rDX),
  6404. + _(in, rvAX, rDX),
  6405. + _(out, rDX, rAL),
  6406. + _(out, rDX, rvAX),
  6407. +
  6408. + /* 0xf0 */
  6409. + _(pfx, __, __, __, FLG_LOCK),
  6410. + _(int, i1),
  6411. + _(pfx, __, __, __, FLG_REP),
  6412. + _(pfx, __, __, __, FLG_REP),
  6413. + _(hlt),
  6414. + _(cmc),
  6415. + _(grp3, __, __, __, FLG_MRR|FLG_GRP), /* Eb */
  6416. + _(grp3, __, __, __, FLG_MRR|FLG_GRP), /* Ev */
  6417. + _(clc),
  6418. + _(stc),
  6419. + _(cli),
  6420. + _(sti),
  6421. + _(cld),
  6422. + _(std),
  6423. + _(grp4, __, __, __, FLG_MRR|FLG_GRP),
  6424. + _(grp5, __, __, __, FLG_MRR|FLG_GRP),
  6425. +};
  6426. +
  6427. +/* instruction state */
  6428. +struct istate {
  6429. + uint32_t op;
  6430. + uint8_t rep;
  6431. + uint8_t rex;
  6432. + uint8_t mrr;
  6433. + uint8_t sib;
  6434. + uint32_t seg;
  6435. + uint32_t flag;
  6436. + uint32_t osz;
  6437. + uint32_t asz;
  6438. + uint32_t mode;
  6439. +
  6440. + /* number of instruction bytes */
  6441. + int nib;
  6442. +
  6443. + uint8_t *pc;
  6444. +};
  6445. +
  6446. +/* Get byte from code stream */
  6447. +static uint64_t
  6448. +get8(struct istate *i) {
  6449. + i->nib++;
  6450. + return *i->pc++;
  6451. +}
  6452. +
  6453. +/* Get operand size (16/32/64-bit) */
  6454. +static int
  6455. +osize(struct istate *i) {
  6456. + switch (i->mode) {
  6457. + case SIZE_QWORD:
  6458. + /* Default opsize or REX.W */
  6459. + if ((i->flag & FLG_D64) || (i->rex & REX_W))
  6460. + return SIZE_QWORD;
  6461. + return (i->flag & FLG_OSZ) ? SIZE_WORD : SIZE_DWORD;
  6462. + case SIZE_DWORD:
  6463. + return (i->flag & FLG_OSZ) ? SIZE_WORD : SIZE_DWORD;
  6464. + case SIZE_WORD:
  6465. + return (i->flag & FLG_OSZ) ? SIZE_DWORD : SIZE_WORD;
  6466. + }
  6467. + return 0;
  6468. +}
  6469. +
  6470. +/* Get address size (16/32/64-bit) */
  6471. +static int
  6472. +asize(struct istate *i) {
  6473. + switch (i->mode) {
  6474. + case SIZE_QWORD:
  6475. + return (i->flag & FLG_ASZ) ? SIZE_DWORD : SIZE_QWORD;
  6476. + case SIZE_DWORD:
  6477. + return (i->flag & FLG_ASZ) ? SIZE_WORD : SIZE_DWORD;
  6478. + case SIZE_WORD:
  6479. + return (i->flag & FLG_ASZ) ? SIZE_DWORD : SIZE_WORD;
  6480. + }
  6481. + return 0;
  6482. +}
  6483. +
  6484. +/*============================*
  6485. + * Decode opcode
  6486. + *============================*/
  6487. +static struct opcode
  6488. +decodeop(struct istate *i)
  6489. +{
  6490. + struct opcode o;
  6491. + int op;
  6492. +
  6493. + for(;;) {
  6494. + op = get8(i);
  6495. + if (op == 0x0f) {
  6496. + /* Decode 2nd byte */
  6497. + op = (op << 8) | get8(i);
  6498. + o = hicodes[op & 0xFF];
  6499. + } else {
  6500. + o = locodes[op];
  6501. + }
  6502. + i->flag |= o.flag;
  6503. + i->op = op;
  6504. +
  6505. + /* Check if this is a prefix opcode */
  6506. + if (o.flag == FLG_SEG)
  6507. + i->seg = o.arg0;
  6508. + else if (o.flag == FLG_REP)
  6509. + i->rep = op;
  6510. + else if (o.flag == FLG_REX && (i->mode == SIZE_QWORD))
  6511. + i->rex = op;
  6512. + else if (!(o.flag & (FLG_OSZ|FLG_ASZ|FLG_LOCK))) {
  6513. + /* get Mod-Reg-RM byte */
  6514. + if (i->flag & FLG_MRR)
  6515. + i->mrr = get8(i);
  6516. + /* Get operand and address size */
  6517. + i->osz = osize(i);
  6518. + i->asz = asize(i);
  6519. + if (!o.mnem)
  6520. + o.mnem = "---";
  6521. + return o;
  6522. + }
  6523. + }
  6524. +}
  6525. +
  6526. +/*
  6527. + * Register names
  6528. + */
  6529. +static const char *bregs[] = {
  6530. + "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh",
  6531. + "r8b","r9b","r10b","r11b","r12b","r13b","r14b","r15b",
  6532. + "spl","bpl","sil", "dil"
  6533. +};
  6534. +static const char *wregs[] = {
  6535. + "ax", "cx", "dx", "bx", "sp", "bp", "si", "di",
  6536. + "r8w","r9w","r10w","r11w","r12w","r13w","r14w","r15w",
  6537. +};
  6538. +static const char *dregs[] = {
  6539. + "eax","ecx","edx", "ebx", "esp", "ebp", "esi", "edi",
  6540. + "r8d","r9d","r10d","r11d","r12d","r13d","r14d","r15d",
  6541. +};
  6542. +static const char *qregs[] = {
  6543. + "rax","rcx","rdx","rbx","rsp","rbp","rsi","rdi",
  6544. + "r8", "r9", "r10","r11","r12","r13","r14","r15",
  6545. +};
  6546. +
  6547. +static const char *
  6548. +regname(int reg) {
  6549. + int vv = reg & VAL_MASK;
  6550. + int sz = reg & SIZE_MASK;
  6551. +
  6552. + if ((sz != SIZE_BYTE && vv >= 16) || vv >= 20)
  6553. + return "xx";
  6554. + switch (sz) {
  6555. + case SIZE_BYTE: return bregs[vv];
  6556. + case SIZE_WORD: return wregs[vv];
  6557. + case SIZE_DWORD:return dregs[vv];
  6558. + case SIZE_QWORD:return qregs[vv];
  6559. + }
  6560. + return "--";
  6561. +}
  6562. +
  6563. +/* Make register */
  6564. +static uint32_t
  6565. +mkreg(struct istate *i, int sz, int vv, int mask) {
  6566. + /* REX byte */
  6567. + if (mask & i->rex)
  6568. + vv += 8;
  6569. + /* Special case for spl/bpl/sil/dil */
  6570. + if (sz == SIZE_BYTE && i->rex && (vv >= 4 && vv <= 7))
  6571. + vv += 12;
  6572. + vv += TYPE_REG+sz;
  6573. + printf("%%%s ", regname(vv));
  6574. + return vv;
  6575. +}
  6576. +
  6577. +/* Get Embedded or Decoded immediate byte */
  6578. +static uint64_t
  6579. +mkimm(struct istate *i, int sz, uint64_t val, const char *fmt) {
  6580. + switch (sz) {
  6581. + case SIZE_BYTE:
  6582. + val = get8(i);
  6583. + break;
  6584. + case SIZE_WORD:
  6585. + val = get8(i);
  6586. + val |= (get8(i) << 8);
  6587. + break;
  6588. + case SIZE_DWORD:
  6589. + val = get8(i);
  6590. + val |= get8(i) << 8;
  6591. + val |= get8(i) << 16;
  6592. + val |= get8(i) << 24;
  6593. + break;
  6594. + case SIZE_QWORD:
  6595. + val = get8(i);
  6596. + val |= get8(i) << 8;
  6597. + val |= get8(i) << 16;
  6598. + val |= get8(i) << 24;
  6599. + val |= get8(i) << 32LL;
  6600. + val |= get8(i) << 40LL;
  6601. + val |= get8(i) << 48LL;
  6602. + val |= get8(i) << 56LL;
  6603. + break;
  6604. + default:
  6605. + /* val already contains value */
  6606. + break;
  6607. + }
  6608. + printf(fmt, val);
  6609. + return val;
  6610. +}
  6611. +
  6612. +/* Decode effective address */
  6613. +static uint32_t
  6614. +mkea(struct istate *i, int sz) {
  6615. + int mm, rrr;
  6616. +
  6617. + mm = mrr_mm(i->mrr);
  6618. + rrr = mrr_rrr(i->mrr);
  6619. + if (mm == 3) {
  6620. + /* register encoding */
  6621. + return mkreg(i, sz, rrr, REX_B);
  6622. + }
  6623. + switch (i->asz) {
  6624. + case SIZE_QWORD:
  6625. + printf("(");
  6626. + if (rrr == 4) {
  6627. + i->sib = get8(i);
  6628. + rrr = sib_bbb(i->sib);
  6629. + printf("%d,", 1 << sib_ss(i->sib));
  6630. + mkreg(i, SIZE_QWORD, sib_iii(i->sib), REX_X);
  6631. + }
  6632. + if (mm == 1) {
  6633. + mkreg(i, SIZE_QWORD, rrr, REX_B);
  6634. + mkimm(i, SIZE_BYTE, 0, "b[$0x%llx]");
  6635. + }
  6636. + else if (mm == 2) {
  6637. + mkreg(i, SIZE_QWORD, rrr, REX_B);
  6638. + mkimm(i, SIZE_DWORD, 0, "d[$0x%llx]");
  6639. + }
  6640. + else if (rrr == 5) {
  6641. + /* Special case RIP-relative */
  6642. + mkimm(i, SIZE_DWORD, 0, "%%rip[$0x%llx]");
  6643. + }
  6644. + else
  6645. + mkreg(i, SIZE_QWORD, rrr, REX_B);
  6646. + printf(") ");
  6647. + break;
  6648. + case SIZE_DWORD:
  6649. + printf("(");
  6650. + if (rrr == 4) {
  6651. + i->sib = get8(i);
  6652. + rrr = sib_bbb(i->sib);
  6653. + printf("%d,", 1 << sib_ss(i->sib));
  6654. + mkreg(i, SIZE_DWORD, sib_iii(i->sib), REX_X);
  6655. + }
  6656. + if (mm == 1) {
  6657. + mkreg(i, SIZE_DWORD, rrr, REX_B);
  6658. + mkimm(i, SIZE_BYTE, 0, "b[$0x%llx]");
  6659. + }
  6660. + else if (mm == 2) {
  6661. + mkreg(i, SIZE_DWORD, rrr, REX_B);
  6662. + mkimm(i, SIZE_DWORD, 0, "d[$0x%llx]");
  6663. + }
  6664. + else if (rrr == 5) {
  6665. + /* Special case d32 */
  6666. + mkimm(i, SIZE_DWORD, 0, "d32[$0x%llx]");
  6667. + }
  6668. + else
  6669. + mkreg(i, SIZE_DWORD, rrr, REX_B);
  6670. + printf(") ");
  6671. + break;
  6672. + }
  6673. + return 0;
  6674. +}
  6675. +
  6676. +/* Decode opcode argument. Return register/immediate if applicable */
  6677. +static uint32_t
  6678. +decodearg(struct istate *i, int arg) {
  6679. + int tt, sz, vv;
  6680. +
  6681. + if (!arg)
  6682. + return 0;
  6683. + tt = arg & TYPE_MASK;
  6684. + sz = arg & SIZE_MASK;
  6685. + vv = arg & VAL_MASK;
  6686. +
  6687. + if (sz == SIZE_VWORD)
  6688. + sz = i->osz;
  6689. + if (sz == SIZE_ZWORD)
  6690. + sz = SIZE_DWORD;
  6691. + switch (tt) {
  6692. + case TYPE_REG: /* specific register */
  6693. + return mkreg(i, sz, vv, 0);
  6694. + case TYPE_EMBREG: /* embedded in opcode */
  6695. + return mkreg(i, sz, i->op & 0x7, REX_B);
  6696. + case TYPE_EAREG: /* embedded in mrr */
  6697. + return mkreg(i, sz, mrr_ggg(i->mrr), REX_R);
  6698. + case TYPE_EA:
  6699. + case TYPE_EAMEM: /* effective address */
  6700. + return mkea(i, sz);
  6701. + case TYPE_IMM: /* immediate value */
  6702. + return mkimm(i, sz, vv, "imm:$0x%llx ");
  6703. + case TYPE_INDEX: /* string operations */
  6704. + break;
  6705. + default:
  6706. + printf("Unknown arg: %.8x ", arg);
  6707. + break;
  6708. + }
  6709. + return 0;
  6710. +}
  6711. +
  6712. +/* Get size of operand in bytes */
  6713. +static int
  6714. +sz(int arg) {
  6715. + switch (arg & SIZE_MASK) {
  6716. + case SIZE_BYTE: return 1;
  6717. + case SIZE_WORD: return 2;
  6718. + case SIZE_DWORD: return 4;
  6719. + case SIZE_QWORD: return 8;
  6720. + }
  6721. + return 0;
  6722. +}
  6723. +
  6724. +/* Map X86 reg to vmm reg */
  6725. +static int vmmreg[] = {
  6726. + VCPU_REGS_RAX,
  6727. + VCPU_REGS_RCX,
  6728. + VCPU_REGS_RDX,
  6729. + VCPU_REGS_RBX,
  6730. + VCPU_REGS_RSP,
  6731. + VCPU_REGS_RBP,
  6732. + VCPU_REGS_RSI,
  6733. + VCPU_REGS_RDI,
  6734. + VCPU_REGS_R8,
  6735. + VCPU_REGS_R9,
  6736. + VCPU_REGS_R10,
  6737. + VCPU_REGS_R11,
  6738. + VCPU_REGS_R12,
  6739. + VCPU_REGS_R13,
  6740. + VCPU_REGS_R14,
  6741. + VCPU_REGS_R15,
  6742. + VCPU_REGS_RSP, /* spl */
  6743. + VCPU_REGS_RBP, /* bpl */
  6744. + VCPU_REGS_RSI, /* sil */
  6745. + VCPU_REGS_RDI, /* dil */
  6746. +};
  6747. +
  6748. +static int
  6749. +Vreg(int arg) {
  6750. + if ((arg & VAL_MASK) < 20)
  6751. + return vmmreg[arg & VAL_MASK];
  6752. + printf("error bad reg: %x\n", arg);
  6753. + return VCPU_REGS_RAX;
  6754. +}
  6755. +
  6756. +/*
  6757. + * Disassemble opcode for MMIO fault.
  6758. + * Returns the direction, size and register to read/write in memory handler
  6759. + */
  6760. +int
  6761. +dodis(uint8_t *ib, struct insn *ix, int mode) {
  6762. + struct istate i = { 0 };
  6763. + struct opcode o;
  6764. + int a0, a1;
  6765. +
  6766. + /* Get opcode */
  6767. + i.pc = ib;
  6768. + i.mode = mode;
  6769. + o = decodeop(&i);
  6770. + printf("%c%c dis: %.2x %.2x %.2x %.2x | %-6s",
  6771. + (i.osz >> 16), (i.asz >> 16), i.seg, i.rep, i.rex, i.op, o.mnem);
  6772. +
  6773. + /* Decode opcode arguments to register/immed/etc */
  6774. + a0 = decodearg(&i, o.arg0);
  6775. + a1 = decodearg(&i, o.arg1);
  6776. + decodearg(&i, o.arg2);
  6777. + printf(" : %d\n", i.nib);
  6778. +
  6779. + /* Convert to format needed by memhandler. # of instruction bytes, register to
  6780. + * read/write and size */
  6781. + if (strncmp(o.mnem, "mov", 3))
  6782. + return 0;
  6783. + memset(ix, 0, sizeof(*ix));
  6784. + if ((a0 & TYPE_MASK) == TYPE_REG) {
  6785. + ix->dir = VEI_DIR_IN;
  6786. + ix->size = sz(a0);
  6787. + ix->reg = Vreg(a0);
  6788. + ix->incr = i.nib;
  6789. + }
  6790. + else if ((a1 & TYPE_MASK) == TYPE_REG) {
  6791. + ix->dir = VEI_DIR_OUT;
  6792. + ix->size = sz(a1);
  6793. + ix->reg = Vreg(a1);
  6794. + ix->incr = i.nib;
  6795. + }
  6796. + printf("dir:%d size:%d reg:%d incr:%d\n", ix->dir, ix->size, ix->reg, ix->incr);
  6797. + return 1;
  6798. +}
  6799. +
  6800. diff --git a/usr.sbin/vmd/x86emu.h b/usr.sbin/vmd/x86emu.h
  6801. new file mode 100644
  6802. index 000000000..1f2997375
  6803. --- /dev/null
  6804. +++ b/usr.sbin/vmd/x86emu.h
  6805. @@ -0,0 +1,178 @@
  6806. +/*
  6807. + * Copyright (c) 2020 Jordan Hargrave <[email protected]>
  6808. + *
  6809. + * Permission to use, copy, modify, and distribute this software for any
  6810. + * purpose with or without fee is hereby granted, provided that the above
  6811. + * copyright notice and this permission notice appear in all copies.
  6812. + *
  6813. + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  6814. + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
  6815. + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
  6816. + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  6817. + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  6818. + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
  6819. + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  6820. + */
  6821. +#ifndef __x86emu_h__
  6822. +#define __x86emu_h__
  6823. +
  6824. +struct insn {
  6825. + uint8_t sig[3];
  6826. + int siglen;
  6827. + int dir;
  6828. + int size;
  6829. + int incr;
  6830. + int reg;
  6831. +};
  6832. +
  6833. +/* decode mod-reg-rm byte */
  6834. +#define mrr_mm(x) (((x) >> 6) & 3)
  6835. +#define mrr_ggg(x) (((x) >> 3) & 7)
  6836. +#define mrr_rrr(x) (((x) >> 0) & 7)
  6837. +
  6838. +/* decode scaled-index-base byte */
  6839. +#define sib_ss(x) (((x) >> 6) & 3)
  6840. +#define sib_iii(x) (((x) >> 3) & 7)
  6841. +#define sib_bbb(x) (((x) >> 0) & 7)
  6842. +
  6843. +/* Opcode argument types: register, immediate, memory, etc */
  6844. +enum {
  6845. + TYPE_SHIFT = 24,
  6846. + SIZE_SHIFT = 16,
  6847. +
  6848. + VAL_MASK = 0xFFFF,
  6849. +
  6850. + TYPE_MASK = 0xFF << TYPE_SHIFT,
  6851. + TYPE_REG = 'r' << TYPE_SHIFT,
  6852. + TYPE_EMBREG = 'g' << TYPE_SHIFT,
  6853. + TYPE_EA = 'E' << TYPE_SHIFT,
  6854. + TYPE_EAMEM = 'M' << TYPE_SHIFT,
  6855. + TYPE_EAREG = 'G' << TYPE_SHIFT,
  6856. + TYPE_IMM = 'I' << TYPE_SHIFT,
  6857. + TYPE_JMP = 'J' << TYPE_SHIFT,
  6858. + TYPE_OFFSET = 'O' << TYPE_SHIFT,
  6859. + TYPE_INDEX = '$' << TYPE_SHIFT,
  6860. +
  6861. + SIZE_MASK = 0xFF << SIZE_SHIFT,
  6862. + SIZE_BYTE = 'b' << SIZE_SHIFT,
  6863. + SIZE_WORD = 'w' << SIZE_SHIFT,
  6864. + SIZE_DWORD = 'd' << SIZE_SHIFT,
  6865. + SIZE_QWORD = 'q' << SIZE_SHIFT,
  6866. + SIZE_VWORD = 'v' << SIZE_SHIFT, /* 16/32/64-bit opsize */
  6867. + SIZE_ZWORD = 'z' << SIZE_SHIFT, /* 16/32-bit opsize */
  6868. + SIZE_PTR = 'p' << SIZE_SHIFT,
  6869. + SIZE_SREG = 's' << SIZE_SHIFT,
  6870. + SIZE_CREG = 'C' << SIZE_SHIFT,
  6871. + SIZE_DREG = 'D' << SIZE_SHIFT,
  6872. + SIZE_TREG = 'T' << SIZE_SHIFT,
  6873. +
  6874. + Ap = TYPE_IMM+SIZE_PTR,
  6875. + Mp = TYPE_EAMEM+SIZE_PTR,
  6876. + Sw = TYPE_EAREG+SIZE_SREG,
  6877. +
  6878. + Ob = TYPE_OFFSET+SIZE_BYTE,
  6879. + Ov = TYPE_OFFSET+SIZE_VWORD,
  6880. +
  6881. + Eb = TYPE_EA+SIZE_BYTE,
  6882. + Ew = TYPE_EA+SIZE_WORD,
  6883. + Ev = TYPE_EA+SIZE_VWORD,
  6884. +
  6885. + Gb = TYPE_EAREG+SIZE_BYTE,
  6886. + Gv = TYPE_EAREG+SIZE_VWORD,
  6887. +
  6888. + gb = TYPE_EMBREG+SIZE_BYTE,
  6889. + gv = TYPE_EMBREG+SIZE_VWORD,
  6890. +
  6891. + Ib = TYPE_IMM+SIZE_BYTE,
  6892. + Iw = TYPE_IMM+SIZE_WORD,
  6893. + Iv = TYPE_IMM+SIZE_VWORD,
  6894. + Iz = TYPE_IMM+SIZE_ZWORD,
  6895. + i1 = TYPE_IMM+0x01,
  6896. + i3 = TYPE_IMM+0x03,
  6897. +
  6898. + Jb = TYPE_JMP+SIZE_BYTE,
  6899. + Jz = TYPE_JMP+SIZE_ZWORD,
  6900. +
  6901. + Xb = TYPE_INDEX+SIZE_BYTE,
  6902. + Xv = TYPE_INDEX+SIZE_VWORD,
  6903. + Xz = TYPE_INDEX+SIZE_ZWORD,
  6904. + Yb = TYPE_INDEX+SIZE_BYTE+0x1,
  6905. + Yv = TYPE_INDEX+SIZE_VWORD+0x1,
  6906. + Yz = TYPE_INDEX+SIZE_ZWORD+0x1,
  6907. +
  6908. + /* Registers */
  6909. + rAL = TYPE_REG+SIZE_BYTE,
  6910. + rCL,
  6911. + rDL,
  6912. + rBL,
  6913. + rAH,
  6914. + rCH,
  6915. + rDH,
  6916. + rBH,
  6917. + rSPL = TYPE_REG+SIZE_BYTE+0x14,
  6918. + rBPL,
  6919. + rSIL,
  6920. + rDIL,
  6921. +
  6922. + rAX = TYPE_REG+SIZE_WORD,
  6923. + rCX,
  6924. + rDX,
  6925. + rBX,
  6926. + rSP,
  6927. + rBP,
  6928. + rSI,
  6929. + rDI,
  6930. +
  6931. + rEAX = TYPE_REG+SIZE_DWORD,
  6932. + rECX,
  6933. + rEDX,
  6934. + rEBX,
  6935. + rESP,
  6936. + rEBP,
  6937. + rESI,
  6938. + rEDI,
  6939. +
  6940. + rRAX = TYPE_REG+SIZE_QWORD,
  6941. + rRCX,
  6942. + rRDX,
  6943. + rRBX,
  6944. + rRSP,
  6945. + rRBP,
  6946. + rRSI,
  6947. + rRDI,
  6948. +
  6949. + rvAX = TYPE_REG+SIZE_VWORD,
  6950. +
  6951. + rES = TYPE_REG+SIZE_SREG,
  6952. + rCS,
  6953. + rSS,
  6954. + rDS,
  6955. + rFS,
  6956. + rGS,
  6957. +};
  6958. +
  6959. +enum {
  6960. + REX_B = 0x1, /* mrr.rrr or sib.bbb or op.ggg */
  6961. + REX_X = 0x2, /* sib.iii */
  6962. + REX_R = 0x4, /* mrr.ggg */
  6963. + REX_W = 0x8, /* operand size=64-bit */
  6964. +
  6965. + /* Opcode prefix flags */
  6966. + FLG_REX = 0x0001, /* REX byte */
  6967. + FLG_SEG = 0x0002, /* segment prefix */
  6968. + FLG_OSZ = 0x0004, /* operand size */
  6969. + FLG_ASZ = 0x0008, /* address size */
  6970. + FLG_LOCK = 0x0010, /* lock */
  6971. + FLG_REP = 0x0020, /* repz/repnz/rep */
  6972. +
  6973. + /* Additional opcode flags */
  6974. + FLG_MRR = 0x0100, /* has mod-reg-rm byte */
  6975. + FLG_GRP = 0x0200, /* opcode based on mrr.reg */
  6976. + FLG_MEM = 0x0400, /* non-mrr memory */
  6977. + FLG_D64 = 0x0800, /* default size = 64-bit */
  6978. + FLG_NO64 = 0x1000, /* invalid in 64-bit mode */
  6979. +};
  6980. +
  6981. +int dodis(uint8_t *, struct insn *ix, int mode);
  6982. +
  6983. +#endif
  6984.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement