Advertisement
Guest User

Untitled

a guest
Sep 24th, 2017
1,183
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 291.86 KB | None | 0 0
  1. diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
  2. index 9ba07d7e0b0..7ff1276f64a 100644
  3. --- a/lib/libvmmapi/vmmapi.c
  4. +++ b/lib/libvmmapi/vmmapi.c
  5. @@ -887,6 +887,57 @@ vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
  6. return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
  7. }
  8.  
  9. +/*
  10. + * Export the file descriptor associated with this VM, userful for external
  11. + * programs (e.g. to issue ioctl()).
  12. + */
  13. +int
  14. +vm_get_fd(struct vmctx *ctx)
  15. +{
  16. + return (ctx->fd);
  17. +}
  18. +
  19. +/*
  20. + * Map an user-space buffer into the VM at a given physical address.
  21. + * To be used for devices that expose internal memory.
  22. + */
  23. +int
  24. +vm_map_user_buf(struct vmctx *ctx, vm_paddr_t gpa, size_t len,
  25. + void *host_buf, int map)
  26. +{
  27. + struct vm_user_buf user_buf;
  28. +
  29. + bzero(&user_buf, sizeof(user_buf));
  30. + user_buf.gpa = gpa;
  31. + user_buf.len = len;
  32. + user_buf.addr = host_buf;
  33. + user_buf.map = map;
  34. +
  35. + return (ioctl(ctx->fd, VM_MAP_USER_BUF, &user_buf));
  36. +}
  37. +
  38. +/*
  39. + * Register handler for guest I/O accesses on a given I/O port, optionally
  40. + * filtering on the data. QEMU/KVM implement a similar functionality.
  41. + */
  42. +int
  43. +vm_io_reg_handler(struct vmctx *ctx, uint16_t port, uint16_t in,
  44. + uint32_t mask_data, uint32_t data,
  45. + enum vm_io_regh_type type, void *arg)
  46. +{
  47. + struct vm_io_reg_handler ioregh;
  48. +
  49. + bzero(&ioregh, sizeof(ioregh));
  50. + ioregh.port = port;
  51. + ioregh.in = in;
  52. + ioregh.mask_data = mask_data;
  53. + ioregh.data = data;
  54. + ioregh.type = type;
  55. + ioregh.arg = arg;
  56. +
  57. + return (ioctl(ctx->fd, VM_IO_REG_HANDLER, &ioregh));
  58. +}
  59. +
  60. int
  61. vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
  62. uint64_t addr, uint64_t msg, int numvec)
  63. @@ -1444,7 +1495,7 @@ vm_get_ioctls(size_t *len)
  64. VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
  65. VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SET_INTINFO, VM_GET_INTINFO,
  66. VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
  67. - VM_RESTART_INSTRUCTION };
  68. + VM_RESTART_INSTRUCTION, VM_MAP_USER_BUF, VM_IO_REG_HANDLER };
  69.  
  70. if (len == NULL) {
  71. cmds = malloc(sizeof(vm_ioctl_cmds));
  72. diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
  73. index df3a81b5aad..05dccd320fc 100644
  74. --- a/lib/libvmmapi/vmmapi.h
  75. +++ b/lib/libvmmapi/vmmapi.h
  76. @@ -163,6 +163,12 @@ int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
  77. int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2);
  78. int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo);
  79.  
  80. +/* The next three functions are documented in vmmapi.c */
  81. +int vm_get_fd(struct vmctx *ctx);
  82. +int vm_map_user_buf(struct vmctx *ctx, vm_paddr_t gpa, size_t len,
  83. + void *host_buf, int map);
  84. +int vm_io_reg_handler(struct vmctx *ctx, uint16_t port, uint16_t in,
  85. + uint32_t mask_data, uint32_t data, enum vm_io_regh_type type, void *arg);
  86. const cap_ioctl_t *vm_get_ioctls(size_t *len);
  87.  
  88. /*
  89. diff --git a/share/man/man4/netmap.4 b/share/man/man4/netmap.4
  90. index e86d3d17a04..7cf58a20399 100644
  91. --- a/share/man/man4/netmap.4
  92. +++ b/share/man/man4/netmap.4
  93. @@ -103,13 +103,12 @@ virtual machines, NICs and the host stack.
  94. .Pp
  95. .Nm
  96. supports both non-blocking I/O through
  97. -.Xr ioctl 2 ,
  98. +.Xr ioctls() ,
  99. synchronization and blocking I/O through a file descriptor
  100. and standard OS mechanisms such as
  101. .Xr select 2 ,
  102. .Xr poll 2 ,
  103. .Xr epoll 2 ,
  104. -and
  105. .Xr kqueue 2 .
  106. All types of
  107. .Nm netmap ports
  108. @@ -156,7 +155,7 @@ All NICs operating in
  109. .Nm
  110. mode use the same memory region,
  111. accessible to all processes who own
  112. -.Pa /dev/netmap
  113. +.Nm /dev/netmap
  114. file descriptors bound to NICs.
  115. Independent
  116. .Nm VALE
  117. @@ -212,7 +211,7 @@ and the number, size and location of all the
  118. data structures, which can be accessed by mmapping the memory
  119. .Dl char *mem = mmap(0, arg.nr_memsize, fd);
  120. .Pp
  121. -Non-blocking I/O is done with special
  122. +Non blocking I/O is done with special
  123. .Xr ioctl 2
  124. .Xr select 2
  125. and
  126. @@ -238,11 +237,10 @@ and returns the NIC to normal mode (reconnecting the data path
  127. to the host stack), or destroys the virtual port.
  128. .Sh DATA STRUCTURES
  129. The data structures in the mmapped memory region are detailed in
  130. -.In sys/net/netmap.h ,
  131. +.Xr sys/net/netmap.h ,
  132. which is the ultimate reference for the
  133. .Nm
  134. -API.
  135. -The main structures and fields are indicated below:
  136. +API. The main structures and fields are indicated below:
  137. .Bl -tag -width XXX
  138. .It Dv struct netmap_if (one per interface)
  139. .Bd -literal
  140. @@ -271,9 +269,7 @@ to be used as temporary storage for packets.
  141. contains the index of the first of these free rings,
  142. which are connected in a list (the first uint32_t of each
  143. buffer being the index of the next buffer in the list).
  144. -A
  145. -.Dv 0
  146. -indicates the end of the list.
  147. +A 0 indicates the end of the list.
  148. .It Dv struct netmap_ring (one per ring)
  149. .Bd -literal
  150. struct netmap_ring {
  151. @@ -293,8 +289,8 @@ struct netmap_ring {
  152. .Ed
  153. .Pp
  154. Implements transmit and receive rings, with read/write
  155. -pointers, metadata and an array of
  156. -.Em slots
  157. +pointers, metadata and and an array of
  158. +.Pa slots
  159. describing the buffers.
  160. .It Dv struct netmap_slot (one per buffer)
  161. .Bd -literal
  162. @@ -317,11 +313,10 @@ The offset of the
  163. in the mmapped region is indicated by the
  164. .Pa nr_offset
  165. field in the structure returned by
  166. -.Dv NIOCREGIF .
  167. +.Pa NIOCREGIF .
  168. From there, all other objects are reachable through
  169. relative references (offsets or indexes).
  170. -Macros and functions in
  171. -.In net/netmap_user.h
  172. +Macros and functions in <net/netmap_user.h>
  173. help converting them into actual pointers:
  174. .Pp
  175. .Dl struct netmap_if *nifp = NETMAP_IF(mem, arg.nr_offset);
  176. @@ -351,9 +346,7 @@ passes
  177. .Va tail
  178. is the first slot reserved to the kernel.
  179. .Pp
  180. -Slot indexes
  181. -.Em must
  182. -only move forward;
  183. +Slot indexes MUST only move forward;
  184. for convenience, the function
  185. .Dl nm_ring_next(ring, index)
  186. returns the next index modulo the ring size.
  187. @@ -482,10 +475,7 @@ One packet is fully contained in a single buffer.
  188. The following flags affect slot and buffer processing:
  189. .Bl -tag -width XXX
  190. .It NS_BUF_CHANGED
  191. -.Em must
  192. -be used when the
  193. -.Va buf_idx
  194. -in the slot is changed.
  195. +it MUST be used when the buf_idx in the slot is changed.
  196. This can be used to implement
  197. zero-copy forwarding, see
  198. .Sx ZERO-COPY FORWARDING .
  199. @@ -494,20 +484,19 @@ reports when this buffer has been transmitted.
  200. Normally,
  201. .Nm
  202. notifies transmit completions in batches, hence signals
  203. -can be delayed indefinitely.
  204. -This flag helps detect
  205. +can be delayed indefinitely. This flag helps detecting
  206. when packets have been sent and a file descriptor can be closed.
  207. .It NS_FORWARD
  208. When a ring is in 'transparent' mode (see
  209. .Sx TRANSPARENT MODE ) ,
  210. -packets marked with this flag are forwarded to the other endpoint
  211. +packets marked with this flags are forwarded to the other endpoint
  212. at the next system call, thus restoring (in a selective way)
  213. the connection between a NIC and the host stack.
  214. .It NS_NO_LEARN
  215. -tells the forwarding code that the source MAC address for this
  216. +tells the forwarding code that the SRC MAC address for this
  217. packet must not be used in the learning bridge code.
  218. .It NS_INDIRECT
  219. -indicates that the packet's payload is in a user-supplied buffer
  220. +indicates that the packet's payload is in a user-supplied buffer,
  221. whose user virtual address is in the 'ptr' field of the slot.
  222. The size can reach 65535 bytes.
  223. .Pp
  224. @@ -540,8 +529,7 @@ Slots with a value greater than 1 also have NS_MOREFRAG set.
  225. .Sh IOCTLS
  226. .Nm
  227. uses two ioctls (NIOCTXSYNC, NIOCRXSYNC)
  228. -for non-blocking I/O.
  229. -They take no argument.
  230. +for non-blocking I/O. They take no argument.
  231. Two more ioctls (NIOCGINFO, NIOCREGIF) are used
  232. to query and configure ports, with the following argument:
  233. .Bd -literal
  234. @@ -553,7 +541,7 @@ struct nmreq {
  235. uint32_t nr_tx_slots; /* (i/o) slots in tx rings */
  236. uint32_t nr_rx_slots; /* (i/o) slots in rx rings */
  237. uint16_t nr_tx_rings; /* (i/o) number of tx rings */
  238. - uint16_t nr_rx_rings; /* (i/o) number of rx rings */
  239. + uint16_t nr_rx_rings; /* (i/o) number of tx rings */
  240. uint16_t nr_ringid; /* (i/o) ring(s) we care about */
  241. uint16_t nr_cmd; /* (i) special command */
  242. uint16_t nr_arg1; /* (i/o) extra arguments */
  243. @@ -579,8 +567,7 @@ interface is actually put in netmap mode.
  244. .It Pa nr_memsize
  245. indicates the size of the
  246. .Nm
  247. -memory region.
  248. -NICs in
  249. +memory region. NICs in
  250. .Nm
  251. mode all share the same memory region,
  252. whereas
  253. @@ -599,8 +586,7 @@ using interface-specific functions (e.g.,
  254. .It Dv NIOCREGIF
  255. binds the port named in
  256. .Va nr_name
  257. -to the file descriptor.
  258. -For a physical device this also switches it into
  259. +to the file descriptor. For a physical device this also switches it into
  260. .Nm
  261. mode, disconnecting
  262. it from the host stack.
  263. @@ -652,7 +638,7 @@ In the example below, "netmap:foo" is any valid netmap port name.
  264. (default) all hardware ring pairs
  265. .It NR_REG_SW "netmap:foo^"
  266. the ``host rings'', connecting to the host stack.
  267. -.It NR_REG_NIC_SW "netmap:foo+"
  268. +.It NR_REG_NIC_SW "netmap:foo+
  269. all hardware rings and the host rings
  270. .It NR_REG_ONE_NIC "netmap:foo-i"
  271. only the i-th hardware ring pair, where the number is in
  272. @@ -665,11 +651,9 @@ the slave side of the netmap pipe whose identifier (i) is in
  273. .Pa nr_ringid .
  274. .Pp
  275. The identifier of a pipe must be thought as part of the pipe name,
  276. -and does not need to be sequential.
  277. -On return the pipe
  278. +and does not need to be sequential. On return the pipe
  279. will only have a single ring pair with index 0,
  280. -irrespective of the value of
  281. -.Va i.
  282. +irrespective of the value of i.
  283. .El
  284. .Pp
  285. By default, a
  286. @@ -719,22 +703,13 @@ are supported too.
  287. .Pp
  288. Packets in transmit rings are normally pushed out
  289. (and buffers reclaimed) even without
  290. -requesting write events.
  291. -Passing the
  292. -.Dv NETMAP_NO_TX_POLL
  293. -flag to
  294. +requesting write events. Passing the NETMAP_NO_TX_POLL flag to
  295. .Em NIOCREGIF
  296. disables this feature.
  297. By default, receive rings are processed only if read
  298. -events are requested.
  299. -Passing the
  300. -.Dv NETMAP_DO_RX_POLL
  301. -flag to
  302. +events are requested. Passing the NETMAP_DO_RX_POLL flag to
  303. .Em NIOCREGIF updates receive rings even without read events.
  304. -Note that on epoll and kqueue,
  305. -.Dv NETMAP_NO_TX_POLL
  306. -and
  307. -.Dv NETMAP_DO_RX_POLL
  308. +Note that on epoll and kqueue, NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL
  309. only have an effect when some event is posted for the file descriptor.
  310. .Sh LIBRARIES
  311. The
  312. @@ -742,13 +717,12 @@ The
  313. API is supposed to be used directly, both because of its simplicity and
  314. for efficient integration with applications.
  315. .Pp
  316. -For convenience, the
  317. -.In net/netmap_user.h
  318. +For conveniency, the
  319. +.Va <net/netmap_user.h>
  320. header provides a few macros and functions to ease creating
  321. a file descriptor and doing I/O with a
  322. .Nm
  323. -port.
  324. -These are loosely modeled after the
  325. +port. These are loosely modeled after the
  326. .Xr pcap 3
  327. API, to ease porting of libpcap-based applications to
  328. .Nm .
  329. @@ -885,8 +859,7 @@ Verbose kernel messages
  330. .It Va dev.netmap.if_num: 100
  331. .It Va dev.netmap.if_size: 1024
  332. Sizes and number of objects (netmap_if, netmap_ring, buffers)
  333. -for the global memory region.
  334. -The only parameter worth modifying is
  335. +for the global memory region. The only parameter worth modifying is
  336. .Va dev.netmap.buf_num
  337. as it impacts the total amount of memory used by netmap.
  338. .It Va dev.netmap.buf_curr_num: 0
  339. @@ -899,8 +872,7 @@ Actual values in use.
  340. .It Va dev.netmap.bridge_batch: 1024
  341. Batch size used when moving packets across a
  342. .Nm VALE
  343. -switch.
  344. -Values above 64 generally guarantee good
  345. +switch. Values above 64 generally guarantee good
  346. performance.
  347. .El
  348. .Sh SYSTEM CALLS
  349. @@ -931,14 +903,12 @@ may be of use.
  350. comes with a few programs that can be used for testing or
  351. simple applications.
  352. See the
  353. -.Pa examples/
  354. +.Va examples/
  355. directory in
  356. .Nm
  357. distributions, or
  358. -.Pa tools/tools/netmap/
  359. -directory in
  360. -.Fx
  361. -distributions.
  362. +.Va tools/tools/netmap/
  363. +directory in FreeBSD distributions.
  364. .Pp
  365. .Xr pkt-gen 8
  366. is a general purpose traffic source/sink.
  367. @@ -958,8 +928,7 @@ rates, and use multiple send/receive threads and cores.
  368. .Xr bridge 4
  369. is another test program which interconnects two
  370. .Nm
  371. -ports.
  372. -It can be used for transparent forwarding between
  373. +ports. It can be used for transparent forwarding between
  374. interfaces, as in
  375. .Dl bridge -i ix0 -i ix1
  376. or even connect the NIC to the host stack using netmap
  377. @@ -1026,8 +995,7 @@ void receiver(void)
  378. .Ss ZERO-COPY FORWARDING
  379. Since physical interfaces share the same memory region,
  380. it is possible to do packet forwarding between ports
  381. -swapping buffers.
  382. -The buffer from the transmit ring is used
  383. +swapping buffers. The buffer from the transmit ring is used
  384. to replenish the receive ring:
  385. .Bd -literal -compact
  386. uint32_t tmp;
  387. @@ -1099,7 +1067,6 @@ and further extended with help from
  388. .An Matteo Landi ,
  389. .An Gaetano Catalli ,
  390. .An Giuseppe Lettieri ,
  391. -and
  392. .An Vincenzo Maffione .
  393. .Pp
  394. .Nm
  395. @@ -1112,8 +1079,7 @@ No matter how fast the CPU and OS are,
  396. achieving line rate on 10G and faster interfaces
  397. requires hardware with sufficient performance.
  398. Several NICs are unable to sustain line rate with
  399. -small packet sizes.
  400. -Insufficient PCIe or memory bandwidth
  401. +small packet sizes. Insufficient PCIe or memory bandwidth
  402. can also cause reduced performance.
  403. .Pp
  404. Another frequent reason for low performance is the use
  405. @@ -1121,6 +1087,7 @@ of flow control on the link: a slow receiver can limit
  406. the transmit speed.
  407. Be sure to disable flow control when running high
  408. speed experiments.
  409. +.Pp
  410. .Ss SPECIAL NIC FEATURES
  411. .Nm
  412. is orthogonal to some NIC features such as
  413. @@ -1140,6 +1107,6 @@ and filtering of incoming traffic.
  414. features such as
  415. .Em checksum offloading , TCP segmentation offloading ,
  416. .Em encryption , VLAN encapsulation/decapsulation ,
  417. -etc.
  418. +etc. .
  419. When using netmap to exchange packets with the host stack,
  420. make sure to disable these features.
  421. diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
  422. index bdfff1f8408..46536912b36 100644
  423. --- a/sys/amd64/include/vmm.h
  424. +++ b/sys/amd64/include/vmm.h
  425. @@ -183,6 +183,8 @@ int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
  426. int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
  427. void vm_free_memseg(struct vm *vm, int ident);
  428. int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
  429. +int vm_map_usermem(struct vm *vm, vm_paddr_t gpa, size_t len, void *buf,
  430. + int map, struct thread *td);
  431. int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
  432. int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
  433. int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
  434. @@ -321,6 +323,7 @@ struct vatpic *vm_atpic(struct vm *vm);
  435. struct vatpit *vm_atpit(struct vm *vm);
  436. struct vpmtmr *vm_pmtmr(struct vm *vm);
  437. struct vrtc *vm_rtc(struct vm *vm);
  438. +struct ioregh *vm_ioregh(struct vm *vm);
  439.  
  440. /*
  441. * Inject exception 'vector' into the guest vcpu. This function returns 0 on
  442. @@ -417,7 +420,14 @@ enum vm_intr_trigger {
  443. EDGE_TRIGGER,
  444. LEVEL_TRIGGER
  445. };
  446. -
  447. +
  448. +/* Operations supported on VM_IO_REG_HANDLER ioctl. */
  449. +enum vm_io_regh_type {
  450. + VM_IO_REGH_DELETE,
  451. + VM_IO_REGH_KWEVENTS, /* kernel wait events */
  452. + VM_IO_REGH_MAX
  453. +};
  454. +
  455. /*
  456. * The 'access' field has the format specified in Table 21-2 of the Intel
  457. * Architecture Manual vol 3b.
  458. diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
  459. index 1af75a3c065..7dd4c72ed6e 100644
  460. --- a/sys/amd64/include/vmm_dev.h
  461. +++ b/sys/amd64/include/vmm_dev.h
  462. @@ -123,6 +123,24 @@ struct vm_pptdev_mmio {
  463. size_t len;
  464. };
  465.  
  466. +/* Argument for VM_MAP_USER_BUF ioctl in vmmapi.c */
  467. +struct vm_user_buf {
  468. + vm_paddr_t gpa;
  469. + void *addr;
  470. + size_t len;
  471. + int map; /* boolean */
  472. +};
  473. +
  474. +/* Argument for VM_IO_REG_HANDLER ioctl in vmmapi.c */
  475. +struct vm_io_reg_handler {
  476. + uint16_t port; /* I/O address */
  477. + uint16_t in; /* 0 out, 1 in */
  478. + uint32_t mask_data; /* 0 means match anything */
  479. + uint32_t data; /* data to match */
  480. + enum vm_io_regh_type type; /* handler type */
  481. + void *arg; /* handler argument */
  482. +};
  483. +
  484. struct vm_pptdev_msi {
  485. int vcpu;
  486. int bus;
  487. @@ -286,6 +304,10 @@ enum {
  488. IOCNUM_RTC_WRITE = 101,
  489. IOCNUM_RTC_SETTIME = 102,
  490. IOCNUM_RTC_GETTIME = 103,
  491. +
  492. + /* host mmap and IO handler */
  493. + IOCNUM_MAP_USER_BUF = 104,
  494. + IOCNUM_IO_REG_HANDLER = 105,
  495. };
  496.  
  497. #define VM_RUN \
  498. @@ -344,6 +366,10 @@ enum {
  499. _IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
  500. #define VM_MAP_PPTDEV_MMIO \
  501. _IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
  502. +#define VM_MAP_USER_BUF \
  503. + _IOW('v', IOCNUM_MAP_USER_BUF, struct vm_user_buf)
  504. +#define VM_IO_REG_HANDLER \
  505. + _IOW('v', IOCNUM_IO_REG_HANDLER, struct vm_io_reg_handler)
  506. #define VM_PPTDEV_MSI \
  507. _IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
  508. #define VM_PPTDEV_MSIX \
  509. diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
  510. index 537454a48e7..dda2e3b0660 100644
  511. --- a/sys/amd64/vmm/vmm.c
  512. +++ b/sys/amd64/vmm/vmm.c
  513. @@ -66,6 +66,7 @@ __FBSDID("$FreeBSD$");
  514. #include "vmm_ktr.h"
  515. #include "vmm_host.h"
  516. #include "vmm_mem.h"
  517. +#include "vmm_usermem.h"
  518. #include "vmm_util.h"
  519. #include "vatpic.h"
  520. #include "vatpit.h"
  521. @@ -148,6 +149,7 @@ struct vm {
  522. struct vatpit *vatpit; /* (i) virtual atpit */
  523. struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */
  524. struct vrtc *vrtc; /* (o) virtual RTC */
  525. + struct ioregh *ioregh; /* () I/O reg handler */
  526. volatile cpuset_t active_cpus; /* (i) active vcpus */
  527. int suspend; /* (i) stop VM execution */
  528. volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
  529. @@ -409,6 +411,7 @@ vm_init(struct vm *vm, bool create)
  530. vm->vpmtmr = vpmtmr_init(vm);
  531. if (create)
  532. vm->vrtc = vrtc_init(vm);
  533. + vm->ioregh = ioregh_init(vm);
  534.  
  535. CPU_ZERO(&vm->active_cpus);
  536.  
  537. @@ -465,11 +468,13 @@ vm_cleanup(struct vm *vm, bool destroy)
  538. vrtc_cleanup(vm->vrtc);
  539. else
  540. vrtc_reset(vm->vrtc);
  541. + ioregh_cleanup(vm->ioregh);
  542. vpmtmr_cleanup(vm->vpmtmr);
  543. vatpit_cleanup(vm->vatpit);
  544. vhpet_cleanup(vm->vhpet);
  545. vatpic_cleanup(vm->vatpic);
  546. vioapic_cleanup(vm->vioapic);
  547. + vmm_usermem_cleanup(vm->vmspace);
  548.  
  549. for (i = 0; i < VM_MAXCPU; i++)
  550. vcpu_cleanup(vm, i, destroy);
  551. @@ -542,6 +547,18 @@ vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
  552. return (0);
  553. }
  554.  
  555. +/* Handler function for VM_MAP_USER_BUF ioctl. */
  556. +int
  557. +vm_map_usermem(struct vm *vm, vm_paddr_t gpa, size_t len, void *buf,
  558. + int map, struct thread *td)
  559. +{
  560. + if (!map) /* this is an unmapping request */
  561. + return vmm_usermem_free(vm->vmspace, gpa, len);
  562. +
  563. + /* this is a mapping request */
  564. + return vmm_usermem_alloc(vm->vmspace, gpa, len, buf, td);
  565. +}
  566. +
  567. int
  568. vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
  569. {
  570. @@ -578,6 +595,9 @@ vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
  571. if (ppt_is_mmio(vm, gpa))
  572. return (true); /* 'gpa' is pci passthru mmio */
  573.  
  574. + if (usermem_mapped(vm->vmspace, gpa))
  575. + return (true); /* 'gpa' is user-space buffer mapped */
  576. +
  577. return (false);
  578. }
  579.  
  580. @@ -2449,6 +2469,12 @@ vm_rtc(struct vm *vm)
  581. return (vm->vrtc);
  582. }
  583.  
  584. +struct ioregh *
  585. +vm_ioregh(struct vm *vm)
  586. +{
  587. + return (vm->ioregh);
  588. +}
  589. +
  590. enum vm_reg_name
  591. vm_segment_name(int seg)
  592. {
  593. diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
  594. index 53a8bdc660a..cd333cd9cf8 100644
  595. --- a/sys/amd64/vmm/vmm_dev.c
  596. +++ b/sys/amd64/vmm/vmm_dev.c
  597. @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
  598. #include "vmm_lapic.h"
  599. #include "vmm_stat.h"
  600. #include "vmm_mem.h"
  601. +#include "vmm_ioport.h"
  602. #include "io/ppt.h"
  603. #include "io/vatpic.h"
  604. #include "io/vioapic.h"
  605. @@ -300,6 +301,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
  606. struct vm_pptdev_mmio *pptmmio;
  607. struct vm_pptdev_msi *pptmsi;
  608. struct vm_pptdev_msix *pptmsix;
  609. + struct vm_user_buf *usermem;
  610. + struct vm_io_reg_handler *ioregh;
  611. struct vm_nmi *vmnmi;
  612. struct vm_stats *vmstats;
  613. struct vm_stat_desc *statdesc;
  614. @@ -358,6 +361,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
  615. case VM_UNBIND_PPTDEV:
  616. case VM_ALLOC_MEMSEG:
  617. case VM_MMAP_MEMSEG:
  618. + case VM_MAP_USER_BUF:
  619. case VM_REINIT:
  620. /*
  621. * ioctls that operate on the entire virtual machine must
  622. @@ -433,6 +437,16 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
  623. pptmmio->func, pptmmio->gpa, pptmmio->len,
  624. pptmmio->hpa);
  625. break;
  626. + case VM_MAP_USER_BUF:
  627. + usermem = (struct vm_user_buf *)data;
  628. + error = vm_map_usermem(sc->vm, usermem->gpa, usermem->len,
  629. + usermem->addr, usermem->map, td);
  630. + break;
  631. + case VM_IO_REG_HANDLER:
  632. + ioregh = (struct vm_io_reg_handler *)data;
  633. + error = vmm_ioport_reg_handler(sc->vm, ioregh->port, ioregh->in, ioregh->mask_data,
  634. + ioregh->data, ioregh->type, ioregh->arg);
  635. + break;
  636. case VM_BIND_PPTDEV:
  637. pptdev = (struct vm_pptdev *)data;
  638. error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
  639. diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c
  640. index 63044e81402..5d37561dcf9 100644
  641. --- a/sys/amd64/vmm/vmm_ioport.c
  642. +++ b/sys/amd64/vmm/vmm_ioport.c
  643. @@ -97,31 +97,267 @@ inout_instruction(struct vm_exit *vmexit)
  644. }
  645. #endif /* KTR */
  646.  
  647. +#ifdef VMM_IOPORT_REG_HANDLER
  648. +#include <sys/kernel.h>
  649. +#include <sys/param.h>
  650. +#include <sys/lock.h>
  651. +#include <sys/sx.h>
  652. +#include <sys/malloc.h>
  653. +#include <sys/systm.h>
  654. +
  655. +static MALLOC_DEFINE(M_IOREGH, "ioregh", "bhyve ioport reg handlers");
  656. +
  657. +#define IOPORT_MAX_REG_HANDLER 16
  658. +
  659. +/*
  660. + * ioport_reg_handler functions allows us to to catch VM write/read
  661. + * on specific I/O address and send notification.
  662. + *
  663. + * When the VM writes or reads a specific value on I/O address, if the address
  664. + * and the value matches with the info stored durign the handler registration,
  665. + * then we send a notification (we can have multiple type of notification,
  666. + * but for now is implemented only the VM_IO_REGH_KWEVENTS handler.
  667. + */
  668. +
  669. +typedef int (*ioport_reg_handler_func_t)(struct vm *vm,
  670. + struct ioport_reg_handler *regh, uint32_t *val);
  671. +
  672. +struct ioport_reg_handler {
  673. + uint16_t port; /* I/O address */
  674. + uint16_t in; /* 0 out, 1 in */
  675. + uint32_t mask_data; /* 0 means match anything */
  676. + uint32_t data; /* data to match */
  677. + ioport_reg_handler_func_t handler; /* handler pointer */
  678. + void *handler_arg; /* handler argument */
  679. +};
  680. +
  681. +struct ioregh {
  682. + struct sx lock;
  683. + /* TODO: use hash table */
  684. + struct ioport_reg_handler handlers[IOPORT_MAX_REG_HANDLER];
  685. +};
  686. +
  687. +/* ----- I/O reg handlers ----- */
  688. +
  689. +/*
  690. + * VM_IO_REGH_KWEVENTS handler
  691. + *
  692. + * wakeup() on specified address that uniquely identifies the event
  693. + *
  694. + */
  695. +static int
  696. +vmm_ioport_reg_wakeup(struct vm *vm, struct ioport_reg_handler *regh, uint32_t *val)
  697. +{
  698. + wakeup(regh->handler_arg);
  699. + return (0);
  700. +}
  701. +
  702. +/* call with ioregh->lock held */
  703. +static struct ioport_reg_handler *
  704. +vmm_ioport_find_handler(struct ioregh *ioregh, uint16_t port, uint16_t in,
  705. + uint32_t mask_data, uint32_t data)
  706. +{
  707. + struct ioport_reg_handler *regh;
  708. + uint32_t mask;
  709. + int i;
  710. +
  711. + regh = ioregh->handlers;
  712. + for (i = 0; i < IOPORT_MAX_REG_HANDLER; i++) {
  713. + if (regh[i].handler != NULL) {
  714. + mask = regh[i].mask_data & mask_data;
  715. + if ((regh[i].port == port) && (regh[i].in == in)
  716. + && ((mask & regh[i].data) == (mask & data))) {
  717. + return &regh[i];
  718. + }
  719. + }
  720. + }
  721. +
  722. + return (NULL);
  723. +}
  724. +
  725. +/* call with ioregh->lock held */
  726. +static struct ioport_reg_handler *
  727. +vmm_ioport_empty_handler(struct ioregh *ioregh)
  728. +{
  729. + struct ioport_reg_handler *regh;
  730. + int i;
  731. +
  732. + regh = ioregh->handlers;
  733. + for (i = 0; i < IOPORT_MAX_REG_HANDLER; i++) {
  734. + if (regh[i].handler == NULL) {
  735. + return &regh[i];
  736. + }
  737. + }
  738. +
  739. + return (NULL);
  740. +}
  741. +
  742. +
  743. +static int
  744. +vmm_ioport_add_handler(struct vm *vm, uint16_t port, uint16_t in, uint32_t mask_data,
  745. + uint32_t data, ioport_reg_handler_func_t handler, void *handler_arg)
  746. +{
  747. + struct ioport_reg_handler *regh;
  748. + struct ioregh *ioregh;
  749. + int ret = 0;
  750. +
  751. + ioregh = vm_ioregh(vm);
  752. +
  753. + sx_xlock(&ioregh->lock);
  754. +
  755. + regh = vmm_ioport_find_handler(ioregh, port, in, mask_data, data);
  756. + if (regh != NULL) {
  757. + printf("%s: handler for port %d in %d mask_data %d data %d \
  758. + already registered\n",
  759. + __FUNCTION__, port, in, mask_data, data);
  760. + ret = EEXIST;
  761. + goto err;
  762. + }
  763. +
  764. + regh = vmm_ioport_empty_handler(ioregh);
  765. + if (regh == NULL) {
  766. + printf("%s: empty reg_handler slot not found\n", __FUNCTION__);
  767. + ret = ENOMEM;
  768. + goto err;
  769. + }
  770. +
  771. + regh->port = port;
  772. + regh->in = in;
  773. + regh->mask_data = mask_data;
  774. + regh->data = data;
  775. + regh->handler = handler;
  776. + regh->handler_arg = handler_arg;
  777. +
  778. +err:
  779. + sx_xunlock(&ioregh->lock);
  780. + return (ret);
  781. +}
  782. +
  783. +static int
  784. +vmm_ioport_del_handler(struct vm *vm, uint16_t port, uint16_t in,
  785. + uint32_t mask_data, uint32_t data)
  786. +{
  787. + struct ioport_reg_handler *regh;
  788. + struct ioregh *ioregh;
  789. + int ret = 0;
  790. +
  791. + ioregh = vm_ioregh(vm);
  792. +
  793. + sx_xlock(&ioregh->lock);
  794. +
  795. + regh = vmm_ioport_find_handler(ioregh, port, in, mask_data, data);
  796. +
  797. + if (regh == NULL) {
  798. + ret = EINVAL;
  799. + goto err;
  800. + }
  801. +
  802. + bzero(regh, sizeof(struct ioport_reg_handler));
  803. +err:
  804. + sx_xunlock(&ioregh->lock);
  805. + return (ret);
  806. +}
  807. +
  808. +/*
  809. + * register or delete a new I/O event handler.
  810. + */
  811. +int
  812. +vmm_ioport_reg_handler(struct vm *vm, uint16_t port, uint16_t in,
  813. + uint32_t mask_data, uint32_t data, enum vm_io_regh_type type, void *arg)
  814. +{
  815. + int ret = 0;
  816. +
  817. + switch (type) {
  818. + case VM_IO_REGH_DELETE:
  819. + ret = vmm_ioport_del_handler(vm, port, in, mask_data, data);
  820. + break;
  821. + case VM_IO_REGH_KWEVENTS:
  822. + ret = vmm_ioport_add_handler(vm, port, in, mask_data, data,
  823. + vmm_ioport_reg_wakeup, arg);
  824. + break;
  825. + default:
  826. + printf("%s: unknown reg_handler type\n", __FUNCTION__);
  827. + ret = EINVAL;
  828. + break;
  829. + }
  830. +
  831. + return (ret);
  832. +}
  833. +
  834. +/*
  835. + * Invoke an handler, if the data matches.
  836. + */
  837. +static int
  838. +invoke_reg_handler(struct vm *vm, int vcpuid, struct vm_exit *vmexit,
  839. + uint32_t *val, int *error)
  840. +{
  841. + struct ioport_reg_handler *regh;
  842. + struct ioregh *ioregh;
  843. + uint32_t mask_data;
  844. +
  845. + mask_data = vie_size2mask(vmexit->u.inout.bytes);
  846. + ioregh = vm_ioregh(vm);
  847. +
  848. + sx_slock(&ioregh->lock);
  849. + regh = vmm_ioport_find_handler(ioregh, vmexit->u.inout.port,
  850. + vmexit->u.inout.in, mask_data, vmexit->u.inout.eax);
  851. + if (regh != NULL) {
  852. + *error = (*(regh->handler))(vm, regh, val);
  853. + }
  854. + sx_sunlock(&ioregh->lock);
  855. + return (regh != NULL);
  856. +}
  857. +
  858. +struct ioregh *
  859. +ioregh_init(struct vm *vm)
  860. +{
  861. + struct ioregh *ioregh;
  862. +
  863. + ioregh = malloc(sizeof(struct ioregh), M_IOREGH, M_WAITOK | M_ZERO);
  864. + sx_init(&ioregh->lock, "ioregh lock");
  865. +
  866. + return (ioregh);
  867. +}
  868. +
  869. +void
  870. +ioregh_cleanup(struct ioregh *ioregh)
  871. +{
  872. + sx_destroy(&ioregh->lock);
  873. + free(ioregh, M_IOREGH);
  874. +}
  875. +#else /* !VMM_IOPORT_REG_HANDLER */
  876. +#define invoke_reg_handler(_1, _2, _3, _4, _5) (0)
  877. +#endif /* VMM_IOPORT_REG_HANDLER */
  878. +
  879. static int
  880. emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit,
  881. bool *retu)
  882. {
  883. ioport_handler_func_t handler;
  884. uint32_t mask, val;
  885. - int error;
  886. + int regh = 0, error = 0;
  887.  
  888. /*
  889. * If there is no handler for the I/O port then punt to userspace.
  890. */
  891. - if (vmexit->u.inout.port >= MAX_IOPORTS ||
  892. - (handler = ioport_handler[vmexit->u.inout.port]) == NULL) {
  893. + if ((vmexit->u.inout.port >= MAX_IOPORTS ||
  894. + (handler = ioport_handler[vmexit->u.inout.port]) == NULL) &&
  895. + (regh = invoke_reg_handler(vm, vcpuid, vmexit, &val, &error)) == 0) {
  896. *retu = true;
  897. return (0);
  898. }
  899.  
  900. - mask = vie_size2mask(vmexit->u.inout.bytes);
  901. + if (!regh) {
  902. + mask = vie_size2mask(vmexit->u.inout.bytes);
  903. +
  904. + if (!vmexit->u.inout.in) {
  905. + val = vmexit->u.inout.eax & mask;
  906. + }
  907.  
  908. - if (!vmexit->u.inout.in) {
  909. - val = vmexit->u.inout.eax & mask;
  910. + error = (*handler)(vm, vcpuid, vmexit->u.inout.in,
  911. + vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
  912. }
  913.  
  914. - error = (*handler)(vm, vcpuid, vmexit->u.inout.in,
  915. - vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
  916. if (error) {
  917. /*
  918. * The value returned by this function is also the return value
  919. diff --git a/sys/amd64/vmm/vmm_ioport.h b/sys/amd64/vmm/vmm_ioport.h
  920. index ba51989b1af..5ef0d16d17b 100644
  921. --- a/sys/amd64/vmm/vmm_ioport.h
  922. +++ b/sys/amd64/vmm/vmm_ioport.h
  923. @@ -29,6 +29,22 @@
  924. #ifndef _VMM_IOPORT_H_
  925. #define _VMM_IOPORT_H_
  926.  
  927. +#define VMM_IOPORT_REG_HANDLER
  928. +#ifdef VMM_IOPORT_REG_HANDLER
  929. +struct ioport_reg_handler;
  930. +struct ioregh;
  931. +
  932. +struct ioregh *ioregh_init(struct vm *vm);
  933. +void ioregh_cleanup(struct ioregh *ioregh);
  934. +
  935. +int vmm_ioport_reg_handler(struct vm *vm, uint16_t port, uint16_t in,
  936. + uint32_t mask_data, uint32_t data, enum vm_io_regh_type type, void *arg);
  937. +#else /* !VMM_IOPORT_REG_HANDLER */
  938. +#define ioregh_init(_1) (NULL)
  939. +#define ioregh_cleanup(_1)
  940. +#define vmm_ioport_reg_handler(_1, _2, _3, _4,_5, _6, _7) (EINVAL)
  941. +#endif /* VMM_IOPORT_REG_HANDLER */
  942. +
  943. typedef int (*ioport_handler_func_t)(struct vm *vm, int vcpuid,
  944. bool in, int port, int bytes, uint32_t *val);
  945.  
  946. diff --git a/sys/amd64/vmm/vmm_usermem.c b/sys/amd64/vmm/vmm_usermem.c
  947. new file mode 100644
  948. index 00000000000..1449e18ef58
  949. --- /dev/null
  950. +++ b/sys/amd64/vmm/vmm_usermem.c
  951. @@ -0,0 +1,188 @@
  952. +/*
  953. + * Copyright (C) 2015 Stefano Garzarella (stefano.garzarella@gmail.com)
  954. + * All rights reserved.
  955. + *
  956. + * Redistribution and use in source and binary forms, with or without
  957. + * modification, are permitted provided that the following conditions
  958. + * are met:
  959. + * 1. Redistributions of source code must retain the above copyright
  960. + * notice, this list of conditions and the following disclaimer.
  961. + * 2. Redistributions in binary form must reproduce the above copyright
  962. + * notice, this list of conditions and the following disclaimer in the
  963. + * documentation and/or other materials provided with the distribution.
  964. + *
  965. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  966. + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  967. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  968. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  969. + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  970. + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  971. + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  972. + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  973. + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  974. + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  975. + * SUCH DAMAGE.
  976. + *
  977. + * $FreeBSD$
  978. + */
  979. +
  980. +#include <sys/cdefs.h>
  981. +__FBSDID("$FreeBSD$");
  982. +
  983. +#include <sys/param.h>
  984. +#include <sys/systm.h>
  985. +#include <sys/malloc.h>
  986. +#include <sys/sglist.h>
  987. +#include <sys/lock.h>
  988. +#include <sys/rwlock.h>
  989. +#include <sys/proc.h>
  990. +
  991. +#include <vm/vm.h>
  992. +#include <vm/vm_param.h>
  993. +#include <vm/pmap.h>
  994. +#include <vm/vm_map.h>
  995. +#include <vm/vm_object.h>
  996. +#include <vm/vm_page.h>
  997. +#include <vm/vm_pager.h>
  998. +
  999. +#include <machine/md_var.h>
  1000. +
  1001. +#include "vmm_mem.h"
  1002. +#include "vmm_usermem.h"
  1003. +
  1004. +/*
  1005. + * usermem functions allow us to map an host userspace buffer (eg. from bhyve)
  1006. + * in the guest VM.
  1007. + *
  1008. + * This feature is used to implement ptnetmap on bhyve, mapping the netmap memory
  1009. + * (returned by the mmap() in the byvhe userspace application) in the guest VM.
  1010. + */
  1011. +
  1012. +/* TODO: we can create a dynamical list of usermem */
  1013. +#define MAX_USERMEMS 64
  1014. +
  1015. +static struct usermem {
  1016. + struct vmspace *vmspace; /* guest address space */
  1017. + vm_paddr_t gpa; /* guest physical address */
  1018. + size_t len;
  1019. +} usermems[MAX_USERMEMS];
  1020. +
  1021. +static int
  1022. +vmm_usermem_add(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
  1023. +{
  1024. + int i;
  1025. +
  1026. + for (i = 0; i < MAX_USERMEMS; i++) {
  1027. + if (usermems[i].len == 0) {
  1028. + usermems[i].vmspace = vmspace;
  1029. + usermems[i].gpa = gpa;
  1030. + usermems[i].len = len;
  1031. + break;
  1032. + }
  1033. + }
  1034. +
  1035. + if (i == MAX_USERMEMS) {
  1036. + printf("vmm_usermem_add: empty usermem slot not found\n");
  1037. + return (ENOMEM);
  1038. + }
  1039. +
  1040. + return 0;
  1041. +}
  1042. +
  1043. +static int
  1044. +vmm_usermem_del(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
  1045. +{
  1046. + int i;
  1047. +
  1048. + for (i = 0; i < MAX_USERMEMS; i++) {
  1049. + if (usermems[i].vmspace == vmspace && usermems[i].gpa == gpa
  1050. + && usermems[i].len == len) {
  1051. + bzero(&usermems[i], sizeof(struct usermem));
  1052. + return 1;
  1053. + }
  1054. + }
  1055. +
  1056. + return 0;
  1057. +}
  1058. +
  1059. +boolean_t
  1060. +usermem_mapped(struct vmspace *vmspace, vm_paddr_t gpa)
  1061. +{
  1062. + int i;
  1063. +
  1064. + for (i = 0; i < MAX_USERMEMS; i++) {
  1065. + if (usermems[i].vmspace != vmspace || usermems[i].len == 0)
  1066. + continue;
  1067. + if (gpa >= usermems[i].gpa &&
  1068. + gpa < usermems[i].gpa + usermems[i].len)
  1069. + return (TRUE);
  1070. + }
  1071. + return (FALSE);
  1072. +}
  1073. +
  1074. +int
  1075. +vmm_usermem_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len,
  1076. + void *buf, struct thread *td)
  1077. +{
  1078. + vm_object_t obj = NULL;
  1079. + vm_map_t map;
  1080. + vm_map_entry_t entry;
  1081. + vm_pindex_t index;
  1082. + vm_prot_t prot;
  1083. + boolean_t wired;
  1084. + int error;
  1085. +
  1086. + map = &td->td_proc->p_vmspace->vm_map;
  1087. +
  1088. + /* lookup the vm_object that describe user addr */
  1089. + error = vm_map_lookup(&map, (unsigned long)buf, VM_PROT_RW, &entry,
  1090. + &obj, &index, &prot, &wired);
  1091. + if (error != KERN_SUCCESS)
  1092. + return EINVAL;
  1093. +
  1094. + /* map th vm_object in the vmspace */
  1095. + error = vm_map_find(&vmspace->vm_map, obj, index, &gpa, len, 0,
  1096. + VMFS_NO_SPACE, VM_PROT_RW, VM_PROT_RW, 0);
  1097. + if (error != KERN_SUCCESS) {
  1098. + vm_object_deallocate(obj);
  1099. + obj = NULL;
  1100. + }
  1101. + vm_map_lookup_done(map, entry);
  1102. +
  1103. + if (error)
  1104. + return EINVAL;
  1105. +
  1106. + /* acquire the reference to the vm_object */
  1107. + vm_object_reference(obj);
  1108. + vmm_usermem_add(vmspace, gpa, len);
  1109. +
  1110. + return 0;
  1111. +}
  1112. +
  1113. +int
  1114. +vmm_usermem_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
  1115. +{
  1116. + int found;
  1117. +
  1118. + found = vmm_usermem_del(vmspace, gpa, len);
  1119. + if (!found)
  1120. + return EINVAL;
  1121. +
  1122. + //TODO should we call vm_object_deallocate ?
  1123. + return vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
  1124. +}
  1125. +
  1126. +void
  1127. +vmm_usermem_cleanup(struct vmspace *vmspace)
  1128. +{
  1129. + int i;
  1130. +
  1131. + for (i = 0; i < MAX_USERMEMS; i++) {
  1132. + if (usermems[i].vmspace == vmspace) {
  1133. + //TODO same as above
  1134. + vm_map_remove(&vmspace->vm_map, usermems[i].gpa,
  1135. + usermems[i].gpa + usermems[i].len);
  1136. + bzero(&usermems[i], sizeof(struct usermem));
  1137. + }
  1138. + }
  1139. +}
  1140. diff --git a/sys/amd64/vmm/vmm_usermem.h b/sys/amd64/vmm/vmm_usermem.h
  1141. new file mode 100644
  1142. index 00000000000..c55d9006dc6
  1143. --- /dev/null
  1144. +++ b/sys/amd64/vmm/vmm_usermem.h
  1145. @@ -0,0 +1,40 @@
  1146. +/*
  1147. + * Copyright (C) 2015 Stefano Garzarella (stefano.garzarella@gmail.com)
  1148. + * All rights reserved.
  1149. + *
  1150. + * Redistribution and use in source and binary forms, with or without
  1151. + * modification, are permitted provided that the following conditions
  1152. + * are met:
  1153. + * 1. Redistributions of source code must retain the above copyright
  1154. + * notice, this list of conditions and the following disclaimer.
  1155. + * 2. Redistributions in binary form must reproduce the above copyright
  1156. + * notice, this list of conditions and the following disclaimer in the
  1157. + * documentation and/or other materials provided with the distribution.
  1158. + *
  1159. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  1160. + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  1161. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  1162. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  1163. + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  1164. + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  1165. + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  1166. + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  1167. + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  1168. + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  1169. + * SUCH DAMAGE.
  1170. + *
  1171. + * $FreeBSD$
  1172. + */
  1173. +
  1174. +#ifndef _VMM_USERMEM_H_
  1175. +#define _VMM_USERMEM_H_
  1176. +
  1177. +struct vm;
  1178. +
  1179. +int vmm_usermem_alloc(struct vmspace *, vm_paddr_t gpa,
  1180. + size_t len, void *buf, struct thread *td);
  1181. +int vmm_usermem_free(struct vmspace *, vm_paddr_t gpa, size_t len);
  1182. +void vmm_usermem_cleanup(struct vmspace *);
  1183. +boolean_t usermem_mapped(struct vmspace *, vm_paddr_t gpa);
  1184. +
  1185. +#endif
  1186. diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h
  1187. index 1fe7563348c..5a66f0e0499 100644
  1188. --- a/sys/dev/netmap/if_em_netmap.h
  1189. +++ b/sys/dev/netmap/if_em_netmap.h
  1190. @@ -24,7 +24,7 @@
  1191. */
  1192.  
  1193. /*
  1194. - * $FreeBSD$
  1195. + * $FreeBSD: head/sys/dev/netmap/if_em_netmap.h 238985 2012-08-02 11:59:43Z luigi $
  1196. *
  1197. * netmap support for: em.
  1198. *
  1199. diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h
  1200. index 33b7b3b6654..884785f719a 100644
  1201. --- a/sys/dev/netmap/if_igb_netmap.h
  1202. +++ b/sys/dev/netmap/if_igb_netmap.h
  1203. @@ -24,7 +24,7 @@
  1204. */
  1205.  
  1206. /*
  1207. - * $FreeBSD$
  1208. + * $FreeBSD: head/sys/dev/netmap/if_igb_netmap.h 256200 2013-10-09 17:32:52Z jfv $
  1209. *
  1210. * Netmap support for igb, partly contributed by Ahmed Kooli
  1211. * For details on netmap support please see ixgbe_netmap.h
  1212. diff --git a/sys/dev/netmap/if_ixl_netmap.h b/sys/dev/netmap/if_ixl_netmap.h
  1213. index 223dc06e36a..14f21e93853 100644
  1214. --- a/sys/dev/netmap/if_ixl_netmap.h
  1215. +++ b/sys/dev/netmap/if_ixl_netmap.h
  1216. @@ -24,7 +24,7 @@
  1217. */
  1218.  
  1219. /*
  1220. - * $FreeBSD$
  1221. + * $FreeBSD: head/sys/dev/netmap/if_ixl_netmap.h 279232 2015-02-24 06:20:50Z luigi $
  1222. *
  1223. * netmap support for: ixl
  1224. *
  1225. @@ -129,7 +129,7 @@ ixl_netmap_attach(struct ixl_vsi *vsi)
  1226. na.ifp = vsi->ifp;
  1227. na.na_flags = NAF_BDG_MAYSLEEP;
  1228. // XXX check that queues is set.
  1229. - printf("queues is %p\n", vsi->queues);
  1230. + nm_prinf("queues is %p\n", vsi->queues);
  1231. if (vsi->queues) {
  1232. na.num_tx_desc = vsi->queues[0].num_desc;
  1233. na.num_rx_desc = vsi->queues[0].num_desc;
  1234. diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h
  1235. index 91c637a8b3f..d8c59014512 100644
  1236. --- a/sys/dev/netmap/if_lem_netmap.h
  1237. +++ b/sys/dev/netmap/if_lem_netmap.h
  1238. @@ -25,7 +25,7 @@
  1239.  
  1240.  
  1241. /*
  1242. - * $FreeBSD$
  1243. + * $FreeBSD: head/sys/dev/netmap/if_lem_netmap.h 271849 2014-09-19 03:51:26Z glebius $
  1244. *
  1245. * netmap support for: lem
  1246. *
  1247. diff --git a/sys/dev/netmap/if_nfe_netmap.h b/sys/dev/netmap/if_nfe_netmap.h
  1248. new file mode 100644
  1249. index 00000000000..c5db32f1323
  1250. --- /dev/null
  1251. +++ b/sys/dev/netmap/if_nfe_netmap.h
  1252. @@ -0,0 +1,384 @@
  1253. +/*
  1254. + * Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved.
  1255. + *
  1256. + * Redistribution and use in source and binary forms, with or without
  1257. + * modification, are permitted provided that the following conditions
  1258. + * are met:
  1259. + * 1. Redistributions of source code must retain the above copyright
  1260. + * notice, this list of conditions and the following disclaimer.
  1261. + * 2. Redistributions in binary form must reproduce the above copyright
  1262. + * notice, this list of conditions and the following disclaimer in the
  1263. + * documentation and/or other materials provided with the distribution.
  1264. + *
  1265. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  1266. + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  1267. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  1268. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  1269. + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  1270. + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  1271. + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  1272. + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  1273. + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  1274. + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  1275. + * SUCH DAMAGE.
  1276. + */
  1277. +
  1278. +/*
  1279. + * $FreeBSD: head/sys/dev/netmap/if_em_netmap.h 231881 2012-02-17 14:09:04Z luigi $
  1280. + *
  1281. + * netmap support for: nfe XXX not yet tested.
  1282. + *
  1283. + * For more details on netmap support please see ixgbe_netmap.h
  1284. + */
  1285. +
  1286. +
  1287. +#include <net/netmap.h>
  1288. +#include <sys/selinfo.h>
  1289. +#include <vm/vm.h>
  1290. +#include <vm/pmap.h>
  1291. +
  1292. +#include <dev/netmap/netmap_kern.h>
  1293. +
  1294. +
  1295. +static int
  1296. +nfe_netmap_init_buffers(struct nfe_softc *sc)
  1297. +{
  1298. + struct netmap_adapter *na = NA(sc->nfe_ifp);
  1299. + struct netmap_slot *slot;
  1300. + int i, l, n, max_avail;
  1301. + struct nfe_desc32 *desc32 = NULL;
  1302. + struct nfe_desc64 *desc64 = NULL;
  1303. + void *addr;
  1304. + uint64_t paddr;
  1305. +
  1306. + slot = netmap_reset(na, NR_TX, 0, 0);
  1307. + if (!slot)
  1308. + return 0; // not in native mode
  1309. + // XXX init the tx ring
  1310. + n = NFE_TX_RING_COUNT;
  1311. + for (i = 0; i < n; i++) {
  1312. + l = netmap_idx_n2k(&na->tx_rings[0], i);
  1313. + addr = PNMB(na, slot + l, &paddr);
  1314. + netmap_reload_map(sc->txq.tx_data_tag,
  1315. + sc->txq.data[l].tx_data_map, addr);
  1316. + slot[l].flags = 0;
  1317. + if (sc->nfe_flags & NFE_40BIT_ADDR) {
  1318. + desc64 = &sc->txq.desc64[l];
  1319. + desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
  1320. + desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
  1321. + desc64->vtag = 0;
  1322. + desc64->length = htole16(0);
  1323. + desc64->flags = htole16(0);
  1324. + } else {
  1325. + desc32 = &sc->txq.desc32[l];
  1326. + desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
  1327. + desc32->length = htole16(0);
  1328. + desc32->flags = htole16(0);
  1329. + }
  1330. + }
  1331. +
  1332. + slot = netmap_reset(na, NR_RX, 0, 0);
  1333. + // XXX init the rx ring
  1334. + /*
  1335. + * preserve buffers still owned by the driver (and keep one empty).
  1336. + */
  1337. + n = NFE_RX_RING_COUNT;
  1338. + max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]);
  1339. + for (i = 0; i < n; i++) {
  1340. + uint16_t flags;
  1341. + l = netmap_idx_n2k(&na->rx_rings[0], i);
  1342. + addr = PNMB(na, slot + l, &paddr);
  1343. + flags = (i < max_avail) ? NFE_RX_READY : 0;
  1344. + if (sc->nfe_flags & NFE_40BIT_ADDR) {
  1345. + desc64 = &sc->rxq.desc64[l];
  1346. + desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
  1347. + desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
  1348. + desc64->vtag = 0;
  1349. + desc64->length = htole16(NETMAP_BUF_SIZE);
  1350. + desc64->flags = htole16(NFE_RX_READY);
  1351. + } else {
  1352. + desc32 = &sc->rxq.desc32[l];
  1353. + desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
  1354. + desc32->length = htole16(NETMAP_BUF_SIZE);
  1355. + desc32->flags = htole16(NFE_RX_READY);
  1356. + }
  1357. +
  1358. + netmap_reload_map(sc->rxq.rx_data_tag,
  1359. + sc->rxq.data[l].rx_data_map, addr);
  1360. + bus_dmamap_sync(sc->rxq.rx_data_tag,
  1361. + sc->rxq.data[l].rx_data_map, BUS_DMASYNC_PREREAD);
  1362. + }
  1363. +
  1364. + return 1;
  1365. +}
  1366. +
  1367. +
  1368. +/*
  1369. + * Register/unregister. We are already under netmap lock.
  1370. + */
  1371. +static int
  1372. +nfe_netmap_reg(struct netmap_adapter *na, int onoff)
  1373. +{
  1374. + struct ifnet *ifp = na->ifp;
  1375. + struct nfe_softc *sc = ifp->if_softc;
  1376. +
  1377. + NFE_LOCK(sc);
  1378. + nfe_stop(ifp); /* also clear IFF_DRV_RUNNING */
  1379. + if (onoff) {
  1380. + nm_set_native_flags(na);
  1381. + } else {
  1382. + nm_clear_native_flags(na);
  1383. + }
  1384. + nfe_init_locked(sc); /* also enable intr */
  1385. + NFE_UNLOCK(sc);
  1386. + return (0);
  1387. +}
  1388. +
  1389. +
  1390. +/*
  1391. + * Reconcile kernel and user view of the transmit ring.
  1392. + */
  1393. +static int
  1394. +nfe_netmap_txsync(struct netmap_kring *kring, int flags)
  1395. +{
  1396. + struct netmap_adapter *na = kring->na;
  1397. + struct ifnet *ifp = na->ifp;
  1398. + struct netmap_ring *ring = kring->ring;
  1399. + u_int nm_i; /* index into the netmap ring */
  1400. + u_int nic_i; /* index into the NIC ring */
  1401. + u_int n;
  1402. + u_int const lim = kring->nkr_num_slots - 1;
  1403. + u_int const head = kring->rhead;
  1404. + /* generate an interrupt approximately every half ring */
  1405. + u_int report_frequency = kring->nkr_num_slots >> 1;
  1406. +
  1407. + /* device-specific */
  1408. + struct nfe_softc *sc = ifp->if_softc;
  1409. + struct nfe_desc32 *desc32 = NULL;
  1410. + struct nfe_desc64 *desc64 = NULL;
  1411. +
  1412. + bus_dmamap_sync(sc->txq.tx_desc_tag, sc->txq.tx_desc_map,
  1413. + BUS_DMASYNC_POSTREAD);
  1414. +
  1415. + /*
  1416. + * First part: process new packets to send.
  1417. + */
  1418. +
  1419. + nm_i = kring->nr_hwcur;
  1420. + if (nm_i != head) { /* we have new packets to send */
  1421. + nic_i = netmap_idx_k2n(kring, nm_i);
  1422. + for (n = 0; nm_i != head; n++) {
  1423. + /* slot is the current slot in the netmap ring */
  1424. + struct netmap_slot *slot = &ring->slot[nm_i];
  1425. + u_int len = slot->len;
  1426. + uint64_t paddr;
  1427. + void *addr = PNMB(na, slot, &paddr);
  1428. +
  1429. + NM_CHECK_ADDR_LEN(addr, len);
  1430. +
  1431. + if (slot->flags & NS_BUF_CHANGED) {
  1432. + /* buffer has changed, reload map */
  1433. + netmap_reload_map(sc->txq.tx_data_tag,
  1434. + sc->txq.data[l].tx_data_map, addr);
  1435. + }
  1436. + slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
  1437. +
  1438. + if (sc->nfe_flags & NFE_40BIT_ADDR) {
  1439. + desc64 = &sc->txq.desc64[l];
  1440. + desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
  1441. + desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
  1442. + desc64->vtag = 0;
  1443. + desc64->length = htole16(len - 1);
  1444. + desc64->flags =
  1445. + htole16(NFE_TX_VALID | NFE_TX_LASTFRAG_V2);
  1446. + } else {
  1447. + desc32 = &sc->txq.desc32[l];
  1448. + desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
  1449. + desc32->length = htole16(len - 1);
  1450. + desc32->flags =
  1451. + htole16(NFE_TX_VALID | NFE_TX_LASTFRAG_V1);
  1452. + }
  1453. +
  1454. + bus_dmamap_sync(sc->txq.tx_data_tag,
  1455. + sc->txq.data[l].tx_data_map, BUS_DMASYNC_PREWRITE);
  1456. + nm_i = nm_next(nm_i, lim);
  1457. + nic_i = nm_next(nic_i, lim);
  1458. + }
  1459. + kring->nr_hwcur = head;
  1460. + sc->txq.cur = nic_i;
  1461. +
  1462. + bus_dmamap_sync(sc->txq.tx_desc_tag, sc->txq.tx_desc_map,
  1463. + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  1464. +
  1465. + /* XXX something missing ? where is the last pkt marker ? */
  1466. + NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_KICKTX | sc->rxtxctl);
  1467. + }
  1468. +
  1469. + /*
  1470. + * Second part: reclaim buffers for completed transmissions.
  1471. + */
  1472. + if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
  1473. + u_int nic_cur = sc->txq.cur;
  1474. + nic_i = sc->txq.next;
  1475. + for (n = 0; nic_i != nic_cur; n++, NFE_INC(nic_i, NFE_TX_RING_COUNT)) {
  1476. + uint16_t flags;
  1477. + if (sc->nfe_flags & NFE_40BIT_ADDR) {
  1478. + desc64 = &sc->txq.desc64[l];
  1479. + flags = le16toh(desc64->flags);
  1480. + } else {
  1481. + desc32 = &sc->txq.desc32[l];
  1482. + flags = le16toh(desc32->flags);
  1483. + }
  1484. + if (flags & NFE_TX_VALID)
  1485. + break;
  1486. + }
  1487. + if (n > 0) {
  1488. + sc->txq.next = nic_i;
  1489. + kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
  1490. + }
  1491. + }
  1492. +
  1493. +
  1494. + return 0;
  1495. +}
  1496. +
  1497. +
  1498. +/*
  1499. + * Reconcile kernel and user view of the receive ring.
  1500. + */
  1501. +static int
  1502. +nfe_netmap_rxsync(struct netmap_kring *kring, int flags)
  1503. +{
  1504. + struct netmap_adapter *na = kring->na;
  1505. + struct ifnet *ifp = na->ifp;
  1506. + struct netmap_ring *ring = kring->ring;
  1507. + u_int nm_i; /* index into the netmap ring */
  1508. + u_int nic_i; /* index into the NIC ring */
  1509. + u_int n;
  1510. + u_int const lim = kring->nkr_num_slots - 1;
  1511. + u_int const head = kring->rhead;
  1512. + int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
  1513. +
  1514. + /* device-specific */
  1515. + struct nfe_softc *sc = ifp->if_softc;
  1516. + struct nfe_desc32 *desc32;
  1517. + struct nfe_desc64 *desc64;
  1518. +
  1519. + if (head > lim)
  1520. + return netmap_ring_reinit(kring);
  1521. +
  1522. + bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map,
  1523. + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
  1524. +
  1525. + /*
  1526. + * First part: import newly received packets.
  1527. + */
  1528. + if (netmap_no_pendintr || force_update) {
  1529. + uint16_t flags, len;
  1530. + uint16_t slot_flags = kring->nkr_slot_flags;
  1531. +
  1532. + nic_i = sc->rxq.cur;
  1533. + nm_i = netmap_idx_n2k(kring, nic_i);
  1534. + for (n = 0; ; n++) {
  1535. + if (sc->nfe_flags & NFE_40BIT_ADDR) {
  1536. + desc64 = &sc->rxq.desc64[sc->rxq.cur];
  1537. + flags = le16toh(desc64->flags);
  1538. + len = le16toh(desc64->length) & NFE_RX_LEN_MASK;
  1539. + } else {
  1540. + desc32 = &sc->rxq.desc32[sc->rxq.cur];
  1541. + flags = le16toh(desc32->flags);
  1542. + len = le16toh(desc32->length) & NFE_RX_LEN_MASK;
  1543. + }
  1544. +
  1545. + if (flags & NFE_RX_READY)
  1546. + break;
  1547. +
  1548. + ring->slot[nm_i].len = len;
  1549. + ring->slot[nm_i].flags = slot_flags;
  1550. + bus_dmamap_sync(sc->rxq.rx_data_tag,
  1551. + sc->rxq.data[nic_i].rx_data_map,
  1552. + BUS_DMASYNC_POSTREAD);
  1553. + nm_i = nm_next(nm_i, lim);
  1554. + nic_i = nm_next(nic_i, lim);
  1555. + }
  1556. + if (n) { /* update the state variables */
  1557. + sc->rxq.cur = nic_i;
  1558. + kring->nr_hwtail = nm_i;
  1559. + }
  1560. + kring->nr_kflags &= ~NKR_PENDINTR;
  1561. + }
  1562. +
  1563. + /*
  1564. + * Second part: skip past packets that userspace has released.
  1565. + */
  1566. + nm_i = kring->nr_hwcur;
  1567. + if (nm_i != head) {
  1568. + nic_i = netmap_idx_k2n(kring, nm_i);
  1569. + for (n = 0; nm_i != head; n++) {
  1570. + struct netmap_slot *slot = &ring->slot[nm_i];
  1571. + uint64_t paddr;
  1572. + void *addr = PNMB(na, slot, &paddr);
  1573. +
  1574. + if (addr == netmap_buffer_base) /* bad buf */
  1575. + goto ring_reset;
  1576. +
  1577. + if (slot->flags & NS_BUF_CHANGED) {
  1578. + /* buffer has changed, reload map */
  1579. + netmap_reload_map(sc->rxq.rx_data_tag,
  1580. + sc->rxq.data[l].rx_data_map, addr);
  1581. + slot->flags &= ~NS_BUF_CHANGED;
  1582. + }
  1583. + if (sc->nfe_flags & NFE_40BIT_ADDR) {
  1584. + desc64 = &sc->rxq.desc64[nic_i];
  1585. + desc64->physaddr[0] =
  1586. + htole32(NFE_ADDR_HI(paddr));
  1587. + desc64->physaddr[1] =
  1588. + htole32(NFE_ADDR_LO(paddr));
  1589. + desc64->length = htole16(NETMAP_BUF_SIZE);
  1590. + desc64->flags = htole16(NFE_RX_READY);
  1591. + } else {
  1592. + desc32 = &sc->rxq.desc32[nic_i];
  1593. + desc32->physaddr =
  1594. + htole32(NFE_ADDR_LO(paddr));
  1595. + desc32->length = htole16(NETMAP_BUF_SIZE);
  1596. + desc32->flags = htole16(NFE_RX_READY);
  1597. + }
  1598. +
  1599. + bus_dmamap_sync(sc->rxq.rx_data_tag,
  1600. + sc->rxq.data[nic_i].rx_data_map,
  1601. + BUS_DMASYNC_PREREAD);
  1602. + nm_i = nm_next(nm_i, lim);
  1603. + nic_i = nm_next(nic_i, lim);
  1604. + }
  1605. + kring->nr_hwcur = head;
  1606. + bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map,
  1607. + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
  1608. + }
  1609. +
  1610. +
  1611. + return 0;
  1612. +
  1613. +ring_reset:
  1614. + return netmap_ring_reinit(kring);
  1615. +}
  1616. +
  1617. +
  1618. +static void
  1619. +nfe_netmap_attach(struct nfe_softc *sc)
  1620. +{
  1621. + struct netmap_adapter na;
  1622. +
  1623. + bzero(&na, sizeof(na));
  1624. +
  1625. + na.ifp = sc->nfe_ifp;
  1626. + na.na_flags = NAF_BDG_MAYSLEEP;
  1627. + na.num_tx_desc = NFE_TX_RING_COUNT;
  1628. + na.num_rx_desc = NFE_RX_RING_COUNT;
  1629. + na.nm_txsync = nfe_netmap_txsync;
  1630. + na.nm_rxsync = nfe_netmap_rxsync;
  1631. + na.nm_register = nfe_netmap_reg;
  1632. + na.num_tx_rings = na.num_rx_rings = 1;
  1633. + netmap_attach(&na, 1);
  1634. +}
  1635. +
  1636. +/* end of file */
  1637. diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h
  1638. index ac08aedd796..28971cb7d93 100644
  1639. --- a/sys/dev/netmap/if_re_netmap.h
  1640. +++ b/sys/dev/netmap/if_re_netmap.h
  1641. @@ -24,7 +24,7 @@
  1642. */
  1643.  
  1644. /*
  1645. - * $FreeBSD$
  1646. + * $FreeBSD: head/sys/dev/netmap/if_re_netmap.h 234225 2012-04-13 15:33:12Z luigi $
  1647. *
  1648. * netmap support for: re
  1649. *
  1650. diff --git a/sys/dev/netmap/if_vtnet_netmap.h b/sys/dev/netmap/if_vtnet_netmap.h
  1651. index 4bed0e718dd..4d8d9e36749 100644
  1652. --- a/sys/dev/netmap/if_vtnet_netmap.h
  1653. +++ b/sys/dev/netmap/if_vtnet_netmap.h
  1654. @@ -24,7 +24,7 @@
  1655. */
  1656.  
  1657. /*
  1658. - * $FreeBSD$
  1659. + * $FreeBSD: head/sys/dev/netmap/if_vtnet_netmap.h 270097 2014-08-17 10:25:27Z luigi $
  1660. */
  1661.  
  1662. #include <net/netmap.h>
  1663. diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
  1664. index 7986c996517..ddfed4a44a5 100644
  1665. --- a/sys/dev/netmap/ixgbe_netmap.h
  1666. +++ b/sys/dev/netmap/ixgbe_netmap.h
  1667. @@ -24,7 +24,7 @@
  1668. */
  1669.  
  1670. /*
  1671. - * $FreeBSD$
  1672. + * $FreeBSD: head/sys/dev/netmap/ixgbe_netmap.h 244514 2012-12-20 22:26:03Z luigi $
  1673. *
  1674. * netmap support for: ixgbe (both ix and ixv)
  1675. *
  1676. diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
  1677. index 15e44815acc..3a3ae0ee0ff 100644
  1678. --- a/sys/dev/netmap/netmap.c
  1679. +++ b/sys/dev/netmap/netmap.c
  1680. @@ -388,7 +388,7 @@ ports attached to the switch)
  1681. *
  1682. * - VALE ports:
  1683. * concurrently:
  1684. - * 1) ioctlNIOCRXSYNC)/netmap_poll() in process context
  1685. + * 1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
  1686. * kring->nm_sync() == netmap_vp_rxsync()
  1687. * 2) from nm_bdg_flush()
  1688. * na->nm_notify() == netmap_notify()
  1689. @@ -484,7 +484,7 @@ int netmap_mitigate = 1;
  1690. int netmap_no_pendintr = 1;
  1691. int netmap_txsync_retry = 2;
  1692. int netmap_flags = 0; /* debug flags */
  1693. -static int netmap_fwd = 0; /* force transparent mode */
  1694. +static int netmap_fwd = 0; /* force transparent forwarding */
  1695.  
  1696. /*
  1697. * netmap_admode selects the netmap mode to use.
  1698. @@ -522,6 +522,9 @@ int netmap_generic_rings = 1;
  1699. /* Non-zero if ptnet devices are allowed to use virtio-net headers. */
  1700. int ptnet_vnet_hdr = 1;
  1701.  
  1702. +/* 0 if ptnetmap should not use worker threads for TX processing */
  1703. +int ptnetmap_tx_workers = 1;
  1704. +
  1705. /*
  1706. * SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated
  1707. * in some other operating systems
  1708. @@ -548,6 +551,7 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_
  1709. SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
  1710. SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, &netmap_generic_txqdisc, 0 , "");
  1711. SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0 , "");
  1712. +SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW, &ptnetmap_tx_workers, 0 , "");
  1713.  
  1714. SYSEND;
  1715.  
  1716. @@ -669,7 +673,7 @@ nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
  1717. op = "Clamp";
  1718. }
  1719. if (op && msg)
  1720. - printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
  1721. + nm_prinf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
  1722. return *v;
  1723. }
  1724.  
  1725. @@ -801,13 +805,18 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
  1726. u_int n[NR_TXRX];
  1727. enum txrx t;
  1728.  
  1729. + if (na->tx_rings != NULL) {
  1730. + D("warning: krings were already created");
  1731. + return 0;
  1732. + }
  1733. +
  1734. /* account for the (possibly fake) host rings */
  1735. n[NR_TX] = na->num_tx_rings + 1;
  1736. n[NR_RX] = na->num_rx_rings + 1;
  1737.  
  1738. len = (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring) + tailroom;
  1739.  
  1740. - na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
  1741. + na->tx_rings = nm_os_malloc((size_t)len);
  1742. if (na->tx_rings == NULL) {
  1743. D("Cannot allocate krings");
  1744. return ENOMEM;
  1745. @@ -866,6 +875,11 @@ netmap_krings_delete(struct netmap_adapter *na)
  1746. struct netmap_kring *kring = na->tx_rings;
  1747. enum txrx t;
  1748.  
  1749. + if (na->tx_rings == NULL) {
  1750. + D("warning: krings were already deleted");
  1751. + return;
  1752. + }
  1753. +
  1754. for_rx_tx(t)
  1755. nm_os_selinfo_uninit(&na->si[t]);
  1756.  
  1757. @@ -874,7 +888,7 @@ netmap_krings_delete(struct netmap_adapter *na)
  1758. mtx_destroy(&kring->q_lock);
  1759. nm_os_selinfo_uninit(&kring->si);
  1760. }
  1761. - free(na->tx_rings, M_DEVBUF);
  1762. + nm_os_free(na->tx_rings);
  1763. na->tx_rings = na->rx_rings = na->tailroom = NULL;
  1764. }
  1765.  
  1766. @@ -983,8 +997,7 @@ netmap_priv_new(void)
  1767. {
  1768. struct netmap_priv_d *priv;
  1769.  
  1770. - priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
  1771. - M_NOWAIT | M_ZERO);
  1772. + priv = nm_os_malloc(sizeof(struct netmap_priv_d));
  1773. if (priv == NULL)
  1774. return NULL;
  1775. priv->np_refs = 1;
  1776. @@ -1016,7 +1029,7 @@ netmap_priv_delete(struct netmap_priv_d *priv)
  1777. }
  1778. netmap_unget_na(na, priv->np_ifp);
  1779. bzero(priv, sizeof(*priv)); /* for safety */
  1780. - free(priv, M_DEVBUF);
  1781. + nm_os_free(priv);
  1782. }
  1783.  
  1784.  
  1785. @@ -1032,20 +1045,27 @@ netmap_dtor(void *data)
  1786. }
  1787.  
  1788.  
  1789. -
  1790. -
  1791. /*
  1792. - * Handlers for synchronization of the queues from/to the host.
  1793. - * Netmap has two operating modes:
  1794. - * - in the default mode, the rings connected to the host stack are
  1795. - * just another ring pair managed by userspace;
  1796. - * - in transparent mode (XXX to be defined) incoming packets
  1797. - * (from the host or the NIC) are marked as NS_FORWARD upon
  1798. - * arrival, and the user application has a chance to reset the
  1799. - * flag for packets that should be dropped.
  1800. - * On the RXSYNC or poll(), packets in RX rings between
  1801. - * kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
  1802. - * to the other side.
  1803. + * Handlers for synchronization of the rings from/to the host stack.
  1804. + * These are associated to a network interface and are just another
  1805. + * ring pair managed by userspace.
  1806. + *
  1807. + * Netmap also supports transparent forwarding (NS_FORWARD and NR_FORWARD
  1808. + * flags):
  1809. + *
  1810. + * - Before releasing buffers on hw RX rings, the application can mark
  1811. + * them with the NS_FORWARD flag. During the next RXSYNC or poll(), they
  1812. + * will be forwarded to the host stack, similarly to what happened if
  1813. + * the application moved them to the host TX ring.
  1814. + *
  1815. + * - Before releasing buffers on the host RX ring, the application can
  1816. + * mark them with the NS_FORWARD flag. During the next RXSYNC or poll(),
  1817. + * they will be forwarded to the hw TX rings, saving the application
  1818. + * from doing the same task in user-space.
  1819. + *
  1820. + * Transparent fowarding can be enabled per-ring, by setting the NR_FORWARD
  1821. + * flag, or globally with the netmap_fwd sysctl.
  1822. + *
  1823. * The transfer NIC --> host is relatively easy, just encapsulate
  1824. * into mbufs and we are done. The host --> NIC side is slightly
  1825. * harder because there might not be room in the tx ring so it
  1826. @@ -1054,8 +1074,9 @@ netmap_dtor(void *data)
  1827.  
  1828.  
  1829. /*
  1830. - * pass a chain of buffers to the host stack as coming from 'dst'
  1831. + * Pass a whole queue of mbufs to the host stack as coming from 'dst'
  1832. * We do not need to lock because the queue is private.
  1833. + * After this call the queue is empty.
  1834. */
  1835. static void
  1836. netmap_send_up(struct ifnet *dst, struct mbq *q)
  1837. @@ -1063,7 +1084,8 @@ netmap_send_up(struct ifnet *dst, struct mbq *q)
  1838. struct mbuf *m;
  1839. struct mbuf *head = NULL, *prev = NULL;
  1840.  
  1841. - /* send packets up, outside the lock */
  1842. + /* Send packets up, outside the lock; head/prev machinery
  1843. + * is only useful for Windows. */
  1844. while ((m = mbq_dequeue(q)) != NULL) {
  1845. if (netmap_verbose & NM_VERB_HOST)
  1846. D("sending up pkt %p size %d", m, MBUF_LEN(m));
  1847. @@ -1078,9 +1100,9 @@ netmap_send_up(struct ifnet *dst, struct mbq *q)
  1848.  
  1849.  
  1850. /*
  1851. - * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
  1852. - * Take packets from hwcur to ring->head marked NS_FORWARD (or forced)
  1853. - * and pass them up. Drop remaining packets in the unlikely event
  1854. + * Scan the buffers from hwcur to ring->head, and put a copy of those
  1855. + * marked NS_FORWARD (or all of them if forced) into a queue of mbufs.
  1856. + * Drop remaining packets in the unlikely event
  1857. * of an mbuf shortage.
  1858. */
  1859. static void
  1860. @@ -1127,16 +1149,24 @@ nm_may_forward_up(struct netmap_kring *kring)
  1861. }
  1862.  
  1863. static inline int
  1864. -nm_may_forward_down(struct netmap_kring *kring)
  1865. +nm_may_forward_down(struct netmap_kring *kring, int sync_flags)
  1866. {
  1867. return _nm_may_forward(kring) &&
  1868. + (sync_flags & NAF_CAN_FORWARD_DOWN) &&
  1869. kring->ring_id == kring->na->num_rx_rings;
  1870. }
  1871.  
  1872. /*
  1873. * Send to the NIC rings packets marked NS_FORWARD between
  1874. - * kring->nr_hwcur and kring->rhead
  1875. - * Called under kring->rx_queue.lock on the sw rx ring,
  1876. + * kring->nr_hwcur and kring->rhead.
  1877. + * Called under kring->rx_queue.lock on the sw rx ring.
  1878. + *
  1879. + * It can only be called if the user opened all the TX hw rings,
  1880. + * see NAF_CAN_FORWARD_DOWN flag.
  1881. + * We can touch the TX netmap rings (slots, head and cur) since
  1882. + * we are in poll/ioctl system call context, and the application
  1883. + * is not supposed to touch the ring (using a different thread)
  1884. + * during the execution of the system call.
  1885. */
  1886. static u_int
  1887. netmap_sw_to_nic(struct netmap_adapter *na)
  1888. @@ -1179,7 +1209,7 @@ netmap_sw_to_nic(struct netmap_adapter *na)
  1889.  
  1890. rdst->head = rdst->cur = nm_next(dst_head, dst_lim);
  1891. }
  1892. - /* if (sent) XXX txsync ? */
  1893. + /* if (sent) XXX txsync ? it would be just an optimization */
  1894. }
  1895. return sent;
  1896. }
  1897. @@ -1200,9 +1230,7 @@ netmap_txsync_to_host(struct netmap_kring *kring, int flags)
  1898. struct mbq q;
  1899.  
  1900. /* Take packets from hwcur to head and pass them up.
  1901. - * force head = cur since netmap_grab_packets() stops at head
  1902. - * In case of no buffers we give up. At the end of the loop,
  1903. - * the queue is drained in all cases.
  1904. + * Force hwcur = head since netmap_grab_packets() stops at head
  1905. */
  1906. mbq_init(&q);
  1907. netmap_grab_packets(kring, &q, 1 /* force */);
  1908. @@ -1222,11 +1250,9 @@ netmap_txsync_to_host(struct netmap_kring *kring, int flags)
  1909. * They have been put in kring->rx_queue by netmap_transmit().
  1910. * We protect access to the kring using kring->rx_queue.lock
  1911. *
  1912. - * This routine also does the selrecord if called from the poll handler
  1913. - * (we know because sr != NULL).
  1914. - *
  1915. - * returns the number of packets delivered to tx queues in
  1916. - * transparent mode, or a negative value if error
  1917. + * also moves to the nic hw rings any packet the user has marked
  1918. + * for transparent-mode forwarding, then sets the NR_FORWARD
  1919. + * flag in the kring to let the caller push them out
  1920. */
  1921. static int
  1922. netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
  1923. @@ -1250,7 +1276,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
  1924. uint32_t stop_i;
  1925.  
  1926. nm_i = kring->nr_hwtail;
  1927. - stop_i = nm_prev(nm_i, lim);
  1928. + stop_i = nm_prev(kring->nr_hwcur, lim);
  1929. while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
  1930. int len = MBUF_LEN(m);
  1931. struct netmap_slot *slot = &ring->slot[nm_i];
  1932. @@ -1273,7 +1299,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
  1933. */
  1934. nm_i = kring->nr_hwcur;
  1935. if (nm_i != head) { /* something was released */
  1936. - if (nm_may_forward_down(kring)) {
  1937. + if (nm_may_forward_down(kring, flags)) {
  1938. ret = netmap_sw_to_nic(na);
  1939. if (ret > 0) {
  1940. kring->nr_kflags |= NR_FORWARD;
  1941. @@ -1317,7 +1343,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
  1942. */
  1943. static void netmap_hw_dtor(struct netmap_adapter *); /* needed by NM_IS_NATIVE() */
  1944. int
  1945. -netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
  1946. +netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na)
  1947. {
  1948. /* generic support */
  1949. int i = netmap_admode; /* Take a snapshot. */
  1950. @@ -1348,7 +1374,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
  1951. #endif
  1952. ) {
  1953. *na = prev_na;
  1954. - return 0;
  1955. + goto assign_mem;
  1956. }
  1957. }
  1958.  
  1959. @@ -1377,10 +1403,17 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
  1960. return error;
  1961.  
  1962. *na = NA(ifp);
  1963. +
  1964. +assign_mem:
  1965. + if (nmd != NULL && !((*na)->na_flags & NAF_MEM_OWNER) &&
  1966. + (*na)->active_fds == 0 && ((*na)->nm_mem != nmd)) {
  1967. + netmap_mem_put((*na)->nm_mem);
  1968. + (*na)->nm_mem = netmap_mem_get(nmd);
  1969. + }
  1970. +
  1971. return 0;
  1972. }
  1973.  
  1974. -
  1975. /*
  1976. * MUST BE CALLED UNDER NMG_LOCK()
  1977. *
  1978. @@ -1400,16 +1433,28 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
  1979. */
  1980. int
  1981. netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
  1982. - struct ifnet **ifp, int create)
  1983. + struct ifnet **ifp, struct netmap_mem_d *nmd, int create)
  1984. {
  1985. int error = 0;
  1986. struct netmap_adapter *ret = NULL;
  1987. + int nmd_ref = 0;
  1988.  
  1989. *na = NULL; /* default return value */
  1990. *ifp = NULL;
  1991.  
  1992. NMG_LOCK_ASSERT();
  1993.  
  1994. + /* if the request contain a memid, try to find the
  1995. + * corresponding memory region
  1996. + */
  1997. + if (nmd == NULL && nmr->nr_arg2) {
  1998. + nmd = netmap_mem_find(nmr->nr_arg2);
  1999. + if (nmd == NULL)
  2000. + return EINVAL;
  2001. + /* keep the rereference */
  2002. + nmd_ref = 1;
  2003. + }
  2004. +
  2005. /* We cascade through all possible types of netmap adapter.
  2006. * All netmap_get_*_na() functions return an error and an na,
  2007. * with the following combinations:
  2008. @@ -1422,24 +1467,24 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
  2009. */
  2010.  
  2011. /* try to see if this is a ptnetmap port */
  2012. - error = netmap_get_pt_host_na(nmr, na, create);
  2013. + error = netmap_get_pt_host_na(nmr, na, nmd, create);
  2014. if (error || *na != NULL)
  2015. - return error;
  2016. + goto out;
  2017.  
  2018. /* try to see if this is a monitor port */
  2019. - error = netmap_get_monitor_na(nmr, na, create);
  2020. + error = netmap_get_monitor_na(nmr, na, nmd, create);
  2021. if (error || *na != NULL)
  2022. - return error;
  2023. + goto out;
  2024.  
  2025. /* try to see if this is a pipe port */
  2026. - error = netmap_get_pipe_na(nmr, na, create);
  2027. + error = netmap_get_pipe_na(nmr, na, nmd, create);
  2028. if (error || *na != NULL)
  2029. - return error;
  2030. + goto out;
  2031.  
  2032. /* try to see if this is a bridge port */
  2033. - error = netmap_get_bdg_na(nmr, na, create);
  2034. + error = netmap_get_bdg_na(nmr, na, nmd, create);
  2035. if (error)
  2036. - return error;
  2037. + goto out;
  2038.  
  2039. if (*na != NULL) /* valid match in netmap_get_bdg_na() */
  2040. goto out;
  2041. @@ -1452,10 +1497,11 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
  2042. */
  2043. *ifp = ifunit_ref(nmr->nr_name);
  2044. if (*ifp == NULL) {
  2045. - return ENXIO;
  2046. + error = ENXIO;
  2047. + goto out;
  2048. }
  2049.  
  2050. - error = netmap_get_hw_na(*ifp, &ret);
  2051. + error = netmap_get_hw_na(*ifp, nmd, &ret);
  2052. if (error)
  2053. goto out;
  2054.  
  2055. @@ -1471,6 +1517,8 @@ out:
  2056. *ifp = NULL;
  2057. }
  2058. }
  2059. + if (nmd_ref)
  2060. + netmap_mem_put(nmd);
  2061.  
  2062. return error;
  2063. }
  2064. @@ -1712,7 +1760,8 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
  2065. D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
  2066. }
  2067.  
  2068. - if ((flags & NR_PTNETMAP_HOST) && (reg != NR_REG_ALL_NIC ||
  2069. + if ((flags & NR_PTNETMAP_HOST) && ((reg != NR_REG_ALL_NIC &&
  2070. + reg != NR_REG_PIPE_MASTER && reg != NR_REG_PIPE_SLAVE) ||
  2071. flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
  2072. D("Error: only NR_REG_ALL_NIC supported with netmap passthrough");
  2073. return EINVAL;
  2074. @@ -1766,6 +1815,13 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
  2075. }
  2076. priv->np_flags = (flags & ~NR_REG_MASK) | reg;
  2077.  
  2078. + /* Allow transparent forwarding mode in the host --> nic
  2079. + * direction only if all the TX hw rings have been opened. */
  2080. + if (priv->np_qfirst[NR_TX] == 0 &&
  2081. + priv->np_qlast[NR_TX] >= na->num_tx_rings) {
  2082. + priv->np_sync_flags |= NAF_CAN_FORWARD_DOWN;
  2083. + }
  2084. +
  2085. if (netmap_verbose) {
  2086. D("%s: tx [%d,%d) rx [%d,%d) id %d",
  2087. na->name,
  2088. @@ -2029,7 +2085,7 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
  2089. goto err_rel_excl;
  2090.  
  2091. /* in all cases, create a new netmap if */
  2092. - nifp = netmap_mem_if_new(na);
  2093. + nifp = netmap_mem_if_new(na, priv);
  2094. if (nifp == NULL) {
  2095. error = ENOMEM;
  2096. goto err_del_rings;
  2097. @@ -2103,6 +2159,16 @@ nm_sync_finalize(struct netmap_kring *kring)
  2098. kring->rhead, kring->rcur, kring->rtail);
  2099. }
  2100.  
  2101. +/* set ring timestamp */
  2102. +static inline void
  2103. +ring_timestamp_set(struct netmap_ring *ring)
  2104. +{
  2105. + if (netmap_no_timestamp == 0 || ring->flags & NR_TIMESTAMP) {
  2106. + microtime(&ring->ts);
  2107. + }
  2108. +}
  2109. +
  2110. +
  2111. /*
  2112. * ioctl(2) support for the "netmap" device.
  2113. *
  2114. @@ -2118,13 +2184,16 @@ nm_sync_finalize(struct netmap_kring *kring)
  2115. int
  2116. netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *td)
  2117. {
  2118. + struct mbq q; /* packets from RX hw queues to host stack */
  2119. struct nmreq *nmr = (struct nmreq *) data;
  2120. struct netmap_adapter *na = NULL;
  2121. + struct netmap_mem_d *nmd = NULL;
  2122. struct ifnet *ifp = NULL;
  2123. int error = 0;
  2124. u_int i, qfirst, qlast;
  2125. struct netmap_if *nifp;
  2126. struct netmap_kring *krings;
  2127. + int sync_flags;
  2128. enum txrx t;
  2129.  
  2130. if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
  2131. @@ -2152,19 +2221,24 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
  2132. NMG_LOCK();
  2133. do {
  2134. /* memsize is always valid */
  2135. - struct netmap_mem_d *nmd = &nm_mem;
  2136. u_int memflags;
  2137.  
  2138. if (nmr->nr_name[0] != '\0') {
  2139.  
  2140. /* get a refcount */
  2141. - error = netmap_get_na(nmr, &na, &ifp, 1 /* create */);
  2142. + error = netmap_get_na(nmr, &na, &ifp, NULL, 1 /* create */);
  2143. if (error) {
  2144. na = NULL;
  2145. ifp = NULL;
  2146. break;
  2147. }
  2148. nmd = na->nm_mem; /* get memory allocator */
  2149. + } else {
  2150. + nmd = netmap_mem_find(nmr->nr_arg2 ? nmr->nr_arg2 : 1);
  2151. + if (nmd == NULL) {
  2152. + error = EINVAL;
  2153. + break;
  2154. + }
  2155. }
  2156.  
  2157. error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
  2158. @@ -2210,7 +2284,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
  2159. struct ifnet *ifp;
  2160.  
  2161. NMG_LOCK();
  2162. - error = netmap_get_na(nmr, &na, &ifp, 0);
  2163. + error = netmap_get_na(nmr, &na, &ifp, NULL, 0);
  2164. if (na && !error) {
  2165. nmr->nr_arg1 = na->virt_hdr_len;
  2166. }
  2167. @@ -2219,7 +2293,14 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
  2168. break;
  2169. } else if (i == NETMAP_POOLS_INFO_GET) {
  2170. /* get information from the memory allocator */
  2171. - error = netmap_mem_pools_info_get(nmr, priv->np_na);
  2172. + NMG_LOCK();
  2173. + if (priv->np_na && priv->np_na->nm_mem) {
  2174. + struct netmap_mem_d *nmd = priv->np_na->nm_mem;
  2175. + error = netmap_mem_pools_info_get(nmr, nmd);
  2176. + } else {
  2177. + error = EINVAL;
  2178. + }
  2179. + NMG_UNLOCK();
  2180. break;
  2181. } else if (i != 0) {
  2182. D("nr_cmd must be 0 not %d", i);
  2183. @@ -2237,26 +2318,32 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
  2184. error = EBUSY;
  2185. break;
  2186. }
  2187. +
  2188. + if (nmr->nr_arg2) {
  2189. + /* find the allocator and get a reference */
  2190. + nmd = netmap_mem_find(nmr->nr_arg2);
  2191. + if (nmd == NULL) {
  2192. + error = EINVAL;
  2193. + break;
  2194. + }
  2195. + }
  2196. /* find the interface and a reference */
  2197. - error = netmap_get_na(nmr, &na, &ifp,
  2198. + error = netmap_get_na(nmr, &na, &ifp, nmd,
  2199. 1 /* create */); /* keep reference */
  2200. if (error)
  2201. break;
  2202. if (NETMAP_OWNED_BY_KERN(na)) {
  2203. - netmap_unget_na(na, ifp);
  2204. error = EBUSY;
  2205. break;
  2206. }
  2207.  
  2208. if (na->virt_hdr_len && !(nmr->nr_flags & NR_ACCEPT_VNET_HDR)) {
  2209. - netmap_unget_na(na, ifp);
  2210. error = EIO;
  2211. break;
  2212. }
  2213.  
  2214. error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags);
  2215. if (error) { /* reg. failed, release priv and ref */
  2216. - netmap_unget_na(na, ifp);
  2217. break;
  2218. }
  2219. nifp = priv->np_nifp;
  2220. @@ -2271,7 +2358,6 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
  2221. &nmr->nr_arg2);
  2222. if (error) {
  2223. netmap_do_unregif(priv);
  2224. - netmap_unget_na(na, ifp);
  2225. break;
  2226. }
  2227. if (memflags & NETMAP_MEM_PRIVATE) {
  2228. @@ -2295,6 +2381,14 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
  2229. /* store ifp reference so that priv destructor may release it */
  2230. priv->np_ifp = ifp;
  2231. } while (0);
  2232. + if (error) {
  2233. + netmap_unget_na(na, ifp);
  2234. + }
  2235. + /* release the reference from netmap_mem_find() or
  2236. + * netmap_mem_ext_create()
  2237. + */
  2238. + if (nmd)
  2239. + netmap_mem_put(nmd);
  2240. NMG_UNLOCK();
  2241. break;
  2242.  
  2243. @@ -2316,10 +2410,12 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
  2244. break;
  2245. }
  2246.  
  2247. + mbq_init(&q);
  2248. t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
  2249. krings = NMR(na, t);
  2250. qfirst = priv->np_qfirst[t];
  2251. qlast = priv->np_qlast[t];
  2252. + sync_flags = priv->np_sync_flags;
  2253.  
  2254. for (i = qfirst; i < qlast; i++) {
  2255. struct netmap_kring *kring = krings + i;
  2256. @@ -2337,7 +2433,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
  2257. kring->nr_hwcur);
  2258. if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
  2259. netmap_ring_reinit(kring);
  2260. - } else if (kring->nm_sync(kring, NAF_FORCE_RECLAIM) == 0) {
  2261. + } else if (kring->nm_sync(kring, sync_flags | NAF_FORCE_RECLAIM) == 0) {
  2262. nm_sync_finalize(kring);
  2263. }
  2264. if (netmap_verbose & NM_VERB_TXSYNC)
  2265. @@ -2347,14 +2443,23 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
  2266. } else {
  2267. if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
  2268. netmap_ring_reinit(kring);
  2269. - } else if (kring->nm_sync(kring, NAF_FORCE_READ) == 0) {
  2270. + }
  2271. + if (nm_may_forward_up(kring)) {
  2272. + /* transparent forwarding, see netmap_poll() */
  2273. + netmap_grab_packets(kring, &q, netmap_fwd);
  2274. + }
  2275. + if (kring->nm_sync(kring, sync_flags | NAF_FORCE_READ) == 0) {
  2276. nm_sync_finalize(kring);
  2277. }
  2278. - microtime(&ring->ts);
  2279. + ring_timestamp_set(ring);
  2280. }
  2281. nm_kr_put(kring);
  2282. }
  2283.  
  2284. + if (mbq_peek(&q)) {
  2285. + netmap_send_up(na->ifp, &q);
  2286. + }
  2287. +
  2288. break;
  2289.  
  2290. #ifdef WITH_VALE
  2291. @@ -2425,7 +2530,7 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
  2292. u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0;
  2293. #define want_tx want[NR_TX]
  2294. #define want_rx want[NR_RX]
  2295. - struct mbq q; /* packets from hw queues to host stack */
  2296. + struct mbq q; /* packets from RX hw queues to host stack */
  2297. enum txrx t;
  2298.  
  2299. /*
  2300. @@ -2435,11 +2540,14 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
  2301. */
  2302. int retry_tx = 1, retry_rx = 1;
  2303.  
  2304. - /* transparent mode: send_down is 1 if we have found some
  2305. - * packets to forward during the rx scan and we have not
  2306. - * sent them down to the nic yet
  2307. + /* Transparent mode: send_down is 1 if we have found some
  2308. + * packets to forward (host RX ring --> NIC) during the rx
  2309. + * scan and we have not sent them down to the NIC yet.
  2310. + * Transparent mode requires to bind all rings to a single
  2311. + * file descriptor.
  2312. */
  2313. int send_down = 0;
  2314. + int sync_flags = priv->np_sync_flags;
  2315.  
  2316. mbq_init(&q);
  2317.  
  2318. @@ -2549,7 +2657,7 @@ flush_tx:
  2319. netmap_ring_reinit(kring);
  2320. revents |= POLLERR;
  2321. } else {
  2322. - if (kring->nm_sync(kring, 0))
  2323. + if (kring->nm_sync(kring, sync_flags))
  2324. revents |= POLLERR;
  2325. else
  2326. nm_sync_finalize(kring);
  2327. @@ -2602,25 +2710,23 @@ do_retry_rx:
  2328. /* now we can use kring->rcur, rtail */
  2329.  
  2330. /*
  2331. - * transparent mode support: collect packets
  2332. - * from the rxring(s).
  2333. + * transparent mode support: collect packets from
  2334. + * hw rxring(s) that have been released by the user
  2335. */
  2336. if (nm_may_forward_up(kring)) {
  2337. - ND(10, "forwarding some buffers up %d to %d",
  2338. - kring->nr_hwcur, ring->cur);
  2339. netmap_grab_packets(kring, &q, netmap_fwd);
  2340. }
  2341.  
  2342. + /* Clear the NR_FORWARD flag anyway, it may be set by
  2343. + * the nm_sync() below only on for the host RX ring (see
  2344. + * netmap_rxsync_from_host()). */
  2345. kring->nr_kflags &= ~NR_FORWARD;
  2346. - if (kring->nm_sync(kring, 0))
  2347. + if (kring->nm_sync(kring, sync_flags))
  2348. revents |= POLLERR;
  2349. else
  2350. nm_sync_finalize(kring);
  2351. - send_down |= (kring->nr_kflags & NR_FORWARD); /* host ring only */
  2352. - if (netmap_no_timestamp == 0 ||
  2353. - ring->flags & NR_TIMESTAMP) {
  2354. - microtime(&ring->ts);
  2355. - }
  2356. + send_down |= (kring->nr_kflags & NR_FORWARD);
  2357. + ring_timestamp_set(ring);
  2358. found = kring->rcur != kring->rtail;
  2359. nm_kr_put(kring);
  2360. if (found) {
  2361. @@ -2634,7 +2740,7 @@ do_retry_rx:
  2362. nm_os_selrecord(sr, check_all_rx ?
  2363. &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
  2364. }
  2365. - if (send_down > 0 || retry_rx) {
  2366. + if (send_down || retry_rx) {
  2367. retry_rx = 0;
  2368. if (send_down)
  2369. goto flush_tx; /* and retry_rx */
  2370. @@ -2644,17 +2750,13 @@ do_retry_rx:
  2371. }
  2372.  
  2373. /*
  2374. - * Transparent mode: marked bufs on rx rings between
  2375. - * kring->nr_hwcur and ring->head
  2376. - * are passed to the other endpoint.
  2377. - *
  2378. - * Transparent mode requires to bind all
  2379. - * rings to a single file descriptor.
  2380. + * Transparent mode: released bufs (i.e. between kring->nr_hwcur and
  2381. + * ring->head) marked with NS_FORWARD on hw rx rings are passed up
  2382. + * to the host stack.
  2383. */
  2384.  
  2385. - if (q.head && !nm_kr_tryget(&na->tx_rings[na->num_tx_rings], 1, &revents)) {
  2386. + if (mbq_peek(&q)) {
  2387. netmap_send_up(na->ifp, &q);
  2388. - nm_kr_put(&na->tx_rings[na->num_tx_rings]);
  2389. }
  2390.  
  2391. return (revents);
  2392. @@ -2683,22 +2785,6 @@ netmap_notify(struct netmap_kring *kring, int flags)
  2393. return NM_IRQ_COMPLETED;
  2394. }
  2395.  
  2396. -#if 0
  2397. -static int
  2398. -netmap_notify(struct netmap_adapter *na, u_int n_ring,
  2399. -enum txrx tx, int flags)
  2400. -{
  2401. - if (tx == NR_TX) {
  2402. - KeSetEvent(notes->TX_EVENT, 0, FALSE);
  2403. - }
  2404. - else
  2405. - {
  2406. - KeSetEvent(notes->RX_EVENT, 0, FALSE);
  2407. - }
  2408. - return 0;
  2409. -}
  2410. -#endif
  2411. -
  2412. /* called by all routines that create netmap_adapters.
  2413. * provide some defaults and get a reference to the
  2414. * memory allocator
  2415. @@ -2729,10 +2815,10 @@ netmap_attach_common(struct netmap_adapter *na)
  2416. na->nm_notify = netmap_notify;
  2417. na->active_fds = 0;
  2418.  
  2419. - if (na->nm_mem == NULL)
  2420. + if (na->nm_mem == NULL) {
  2421. /* use the global allocator */
  2422. - na->nm_mem = &nm_mem;
  2423. - netmap_mem_get(na->nm_mem);
  2424. + na->nm_mem = netmap_mem_get(&nm_mem);
  2425. + }
  2426. #ifdef WITH_VALE
  2427. if (na->nm_bdg_attach == NULL)
  2428. /* no special nm_bdg_attach callback. On VALE
  2429. @@ -2757,7 +2843,7 @@ netmap_detach_common(struct netmap_adapter *na)
  2430. if (na->nm_mem)
  2431. netmap_mem_put(na->nm_mem);
  2432. bzero(na, sizeof(*na));
  2433. - free(na, M_DEVBUF);
  2434. + nm_os_free(na);
  2435. }
  2436.  
  2437. /* Wrapper for the register callback provided netmap-enabled
  2438. @@ -2804,26 +2890,28 @@ netmap_hw_dtor(struct netmap_adapter *na)
  2439.  
  2440.  
  2441. /*
  2442. - * Allocate a ``netmap_adapter`` object, and initialize it from the
  2443. + * Allocate a netmap_adapter object, and initialize it from the
  2444. * 'arg' passed by the driver on attach.
  2445. - * We allocate a block of memory with room for a struct netmap_adapter
  2446. - * plus two sets of N+2 struct netmap_kring (where N is the number
  2447. - * of hardware rings):
  2448. - * krings 0..N-1 are for the hardware queues.
  2449. - * kring N is for the host stack queue
  2450. - * kring N+1 is only used for the selinfo for all queues. // XXX still true ?
  2451. + * We allocate a block of memory of 'size' bytes, which has room
  2452. + * for struct netmap_adapter plus additional room private to
  2453. + * the caller.
  2454. * Return 0 on success, ENOMEM otherwise.
  2455. */
  2456. -static int
  2457. -_netmap_attach(struct netmap_adapter *arg, size_t size)
  2458. +int
  2459. +netmap_attach_ext(struct netmap_adapter *arg, size_t size)
  2460. {
  2461. struct netmap_hw_adapter *hwna = NULL;
  2462. struct ifnet *ifp = NULL;
  2463.  
  2464. + if (size < sizeof(struct netmap_hw_adapter)) {
  2465. + D("Invalid netmap adapter size %d", (int)size);
  2466. + return EINVAL;
  2467. + }
  2468. +
  2469. if (arg == NULL || arg->ifp == NULL)
  2470. goto fail;
  2471. ifp = arg->ifp;
  2472. - hwna = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
  2473. + hwna = nm_os_malloc(size);
  2474. if (hwna == NULL)
  2475. goto fail;
  2476. hwna->up = *arg;
  2477. @@ -2832,7 +2920,7 @@ _netmap_attach(struct netmap_adapter *arg, size_t size)
  2478. hwna->nm_hw_register = hwna->up.nm_register;
  2479. hwna->up.nm_register = netmap_hw_reg;
  2480. if (netmap_attach_common(&hwna->up)) {
  2481. - free(hwna, M_DEVBUF);
  2482. + nm_os_free(hwna);
  2483. goto fail;
  2484. }
  2485. netmap_adapter_get(&hwna->up);
  2486. @@ -2878,46 +2966,8 @@ fail:
  2487. int
  2488. netmap_attach(struct netmap_adapter *arg)
  2489. {
  2490. - return _netmap_attach(arg, sizeof(struct netmap_hw_adapter));
  2491. -}
  2492. -
  2493. -
  2494. -#ifdef WITH_PTNETMAP_GUEST
  2495. -int
  2496. -netmap_pt_guest_attach(struct netmap_adapter *arg, void *csb,
  2497. - unsigned int nifp_offset, unsigned int memid)
  2498. -{
  2499. - struct netmap_pt_guest_adapter *ptna;
  2500. - struct ifnet *ifp = arg ? arg->ifp : NULL;
  2501. - int error;
  2502. -
  2503. - /* get allocator */
  2504. - arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
  2505. - if (arg->nm_mem == NULL)
  2506. - return ENOMEM;
  2507. - arg->na_flags |= NAF_MEM_OWNER;
  2508. - error = _netmap_attach(arg, sizeof(struct netmap_pt_guest_adapter));
  2509. - if (error)
  2510. - return error;
  2511. -
  2512. - /* get the netmap_pt_guest_adapter */
  2513. - ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
  2514. - ptna->csb = csb;
  2515. -
  2516. - /* Initialize a separate pass-through netmap adapter that is going to
  2517. - * be used by the ptnet driver only, and so never exposed to netmap
  2518. - * applications. We only need a subset of the available fields. */
  2519. - memset(&ptna->dr, 0, sizeof(ptna->dr));
  2520. - ptna->dr.up.ifp = ifp;
  2521. - ptna->dr.up.nm_mem = ptna->hwup.up.nm_mem;
  2522. - netmap_mem_get(ptna->dr.up.nm_mem);
  2523. - ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
  2524. -
  2525. - ptna->backend_regifs = 0;
  2526. -
  2527. - return 0;
  2528. + return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter));
  2529. }
  2530. -#endif /* WITH_PTNETMAP_GUEST */
  2531.  
  2532.  
  2533. void
  2534. @@ -3019,7 +3069,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
  2535. u_int error = ENOBUFS;
  2536. unsigned int txr;
  2537. struct mbq *q;
  2538. - int space;
  2539. + int busy;
  2540.  
  2541. kring = &na->rx_rings[na->num_rx_rings];
  2542. // XXX [Linux] we do not need this lock
  2543. @@ -3052,28 +3102,27 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
  2544. }
  2545.  
  2546. if (nm_os_mbuf_has_offld(m)) {
  2547. - RD(1, "%s drop mbuf requiring offloadings", na->name);
  2548. + RD(1, "%s drop mbuf that needs offloadings", na->name);
  2549. goto done;
  2550. }
  2551.  
  2552. - /* protect against rxsync_from_host(), netmap_sw_to_nic()
  2553. + /* protect against netmap_rxsync_from_host(), netmap_sw_to_nic()
  2554. * and maybe other instances of netmap_transmit (the latter
  2555. * not possible on Linux).
  2556. - * Also avoid overflowing the queue.
  2557. + * We enqueue the mbuf only if we are sure there is going to be
  2558. + * enough room in the host RX ring, otherwise we drop it.
  2559. */
  2560. mbq_lock(q);
  2561.  
  2562. - space = kring->nr_hwtail - kring->nr_hwcur;
  2563. - if (space < 0)
  2564. - space += kring->nkr_num_slots;
  2565. - if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX
  2566. - RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p",
  2567. - na->name, kring->nr_hwcur, kring->nr_hwtail, mbq_len(q),
  2568. - len, m);
  2569. + busy = kring->nr_hwtail - kring->nr_hwcur;
  2570. + if (busy < 0)
  2571. + busy += kring->nkr_num_slots;
  2572. + if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) {
  2573. + RD(2, "%s full hwcur %d hwtail %d qlen %d", na->name,
  2574. + kring->nr_hwcur, kring->nr_hwtail, mbq_len(q));
  2575. } else {
  2576. mbq_enqueue(q, m);
  2577. - ND(10, "%s %d bufs in queue len %d m %p",
  2578. - na->name, mbq_len(q), len, m);
  2579. + ND(2, "%s %d bufs in queue", na->name, mbq_len(q));
  2580. /* notify outside the lock */
  2581. m = NULL;
  2582. error = 0;
  2583. @@ -3293,7 +3342,7 @@ netmap_fini(void)
  2584. netmap_uninit_bridges();
  2585. netmap_mem_fini();
  2586. NMG_LOCK_DESTROY();
  2587. - printf("netmap: unloaded module.\n");
  2588. + nm_prinf("netmap: unloaded module.\n");
  2589. }
  2590.  
  2591.  
  2592. @@ -3330,7 +3379,7 @@ netmap_init(void)
  2593. if (error)
  2594. goto fail;
  2595.  
  2596. - printf("netmap: loaded module\n");
  2597. + nm_prinf("netmap: loaded module\n");
  2598. return (0);
  2599. fail:
  2600. netmap_fini();
  2601. diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
  2602. index fbbd9b35734..6d0453d3b82 100644
  2603. --- a/sys/dev/netmap/netmap_freebsd.c
  2604. +++ b/sys/dev/netmap/netmap_freebsd.c
  2605. @@ -23,7 +23,7 @@
  2606. * SUCH DAMAGE.
  2607. */
  2608.  
  2609. -/* $FreeBSD$ */
  2610. +/* $FreeBSD: head/sys/dev/netmap/netmap_freebsd.c 307706 2016-10-21 06:32:45Z sephe $ */
  2611. #include "opt_inet.h"
  2612. #include "opt_inet6.h"
  2613.  
  2614. @@ -89,6 +89,24 @@ nm_os_selinfo_uninit(NM_SELINFO_T *si)
  2615. mtx_destroy(&si->m);
  2616. }
  2617.  
  2618. +void *
  2619. +nm_os_malloc(size_t size)
  2620. +{
  2621. + return malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
  2622. +}
  2623. +
  2624. +void *
  2625. +nm_os_realloc(void *addr, size_t new_size, size_t old_size __unused)
  2626. +{
  2627. + return realloc(addr, new_size, M_DEVBUF, M_NOWAIT | M_ZERO);
  2628. +}
  2629. +
  2630. +void
  2631. +nm_os_free(void *addr)
  2632. +{
  2633. + free(addr, M_DEVBUF);
  2634. +}
  2635. +
  2636. void
  2637. nm_os_ifnet_lock(void)
  2638. {
  2639. @@ -235,7 +253,6 @@ nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
  2640. void *
  2641. nm_os_send_up(struct ifnet *ifp, struct mbuf *m, struct mbuf *prev)
  2642. {
  2643. -
  2644. NA(ifp)->if_input(ifp, m);
  2645. return NULL;
  2646. }
  2647. @@ -251,11 +268,17 @@ nm_os_mbuf_has_offld(struct mbuf *m)
  2648. static void
  2649. freebsd_generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
  2650. {
  2651. - struct netmap_generic_adapter *gna =
  2652. - (struct netmap_generic_adapter *)NA(ifp);
  2653. - int stolen = generic_rx_handler(ifp, m);
  2654. + int stolen;
  2655.  
  2656. + if (!NM_NA_VALID(ifp)) {
  2657. + RD(1, "Warning: got RX packet for invalid emulated adapter");
  2658. + return;
  2659. + }
  2660. +
  2661. + stolen = generic_rx_handler(ifp, m);
  2662. if (!stolen) {
  2663. + struct netmap_generic_adapter *gna =
  2664. + (struct netmap_generic_adapter *)NA(ifp);
  2665. gna->save_if_input(ifp, m);
  2666. }
  2667. }
  2668. @@ -386,7 +409,6 @@ netmap_getna(if_t ifp)
  2669. int
  2670. nm_os_generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
  2671. {
  2672. - D("called, in tx %d rx %d", *tx, *rx);
  2673. return 0;
  2674. }
  2675.  
  2676. @@ -394,9 +416,10 @@ nm_os_generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *r
  2677. void
  2678. nm_os_generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
  2679. {
  2680. - D("called, in txq %d rxq %d", *txq, *rxq);
  2681. - *txq = netmap_generic_rings;
  2682. - *rxq = netmap_generic_rings;
  2683. + unsigned num_rings = netmap_generic_rings ? netmap_generic_rings : 1;
  2684. +
  2685. + *txq = num_rings;
  2686. + *rxq = num_rings;
  2687. }
  2688.  
  2689. void
  2690. @@ -648,7 +671,7 @@ nm_os_pt_memdev_iomap(struct ptnetmap_memdev *ptn_dev, vm_paddr_t *nm_paddr,
  2691. &rid, 0, ~0, *mem_size, RF_ACTIVE);
  2692. if (ptn_dev->pci_mem == NULL) {
  2693. *nm_paddr = 0;
  2694. - *nm_addr = NULL;
  2695. + *nm_addr = 0;
  2696. return ENOMEM;
  2697. }
  2698.  
  2699. @@ -985,32 +1008,32 @@ nm_os_ncpus(void)
  2700. return mp_maxid + 1;
  2701. }
  2702.  
  2703. -struct nm_kthread_ctx {
  2704. +struct nm_kctx_ctx {
  2705. struct thread *user_td; /* thread user-space (kthread creator) to send ioctl */
  2706. struct ptnetmap_cfgentry_bhyve cfg;
  2707.  
  2708. /* worker function and parameter */
  2709. - nm_kthread_worker_fn_t worker_fn;
  2710. + nm_kctx_worker_fn_t worker_fn;
  2711. void *worker_private;
  2712.  
  2713. - struct nm_kthread *nmk;
  2714. + struct nm_kctx *nmk;
  2715.  
  2716. /* integer to manage multiple worker contexts (e.g., RX or TX on ptnetmap) */
  2717. long type;
  2718. };
  2719.  
  2720. -struct nm_kthread {
  2721. +struct nm_kctx {
  2722. struct thread *worker;
  2723. struct mtx worker_lock;
  2724. uint64_t scheduled; /* pending wake_up request */
  2725. - struct nm_kthread_ctx worker_ctx;
  2726. + struct nm_kctx_ctx worker_ctx;
  2727. int run; /* used to stop kthread */
  2728. int attach_user; /* kthread attached to user_process */
  2729. int affinity;
  2730. };
  2731.  
  2732. void inline
  2733. -nm_os_kthread_wakeup_worker(struct nm_kthread *nmk)
  2734. +nm_os_kctx_worker_wakeup(struct nm_kctx *nmk)
  2735. {
  2736. /*
  2737. * There may be a race between FE and BE,
  2738. @@ -1030,9 +1053,9 @@ nm_os_kthread_wakeup_worker(struct nm_kthread *nmk)
  2739. }
  2740.  
  2741. void inline
  2742. -nm_os_kthread_send_irq(struct nm_kthread *nmk)
  2743. +nm_os_kctx_send_irq(struct nm_kctx *nmk)
  2744. {
  2745. - struct nm_kthread_ctx *ctx = &nmk->worker_ctx;
  2746. + struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
  2747. int err;
  2748.  
  2749. if (ctx->user_td && ctx->cfg.ioctl_fd > 0) {
  2750. @@ -1047,10 +1070,10 @@ nm_os_kthread_send_irq(struct nm_kthread *nmk)
  2751. }
  2752.  
  2753. static void
  2754. -nm_kthread_worker(void *data)
  2755. +nm_kctx_worker(void *data)
  2756. {
  2757. - struct nm_kthread *nmk = data;
  2758. - struct nm_kthread_ctx *ctx = &nmk->worker_ctx;
  2759. + struct nm_kctx *nmk = data;
  2760. + struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
  2761. uint64_t old_scheduled = nmk->scheduled;
  2762.  
  2763. if (nmk->affinity >= 0) {
  2764. @@ -1077,7 +1100,7 @@ nm_kthread_worker(void *data)
  2765. * mechanism and we continually execute worker_fn()
  2766. */
  2767. if (!ctx->cfg.wchan) {
  2768. - ctx->worker_fn(ctx->worker_private); /* worker body */
  2769. + ctx->worker_fn(ctx->worker_private, 1); /* worker body */
  2770. } else {
  2771. /* checks if there is a pending notification */
  2772. mtx_lock(&nmk->worker_lock);
  2773. @@ -1085,13 +1108,13 @@ nm_kthread_worker(void *data)
  2774. old_scheduled = nmk->scheduled;
  2775. mtx_unlock(&nmk->worker_lock);
  2776.  
  2777. - ctx->worker_fn(ctx->worker_private); /* worker body */
  2778. + ctx->worker_fn(ctx->worker_private, 1); /* worker body */
  2779.  
  2780. continue;
  2781. } else if (nmk->run) {
  2782. /* wait on event with one second timeout */
  2783. - msleep((void *)(uintptr_t)ctx->cfg.wchan,
  2784. - &nmk->worker_lock, 0, "nmk_ev", hz);
  2785. + msleep((void *)(uintptr_t)ctx->cfg.wchan, &nmk->worker_lock,
  2786. + 0, "nmk_ev", hz);
  2787. nmk->scheduled++;
  2788. }
  2789. mtx_unlock(&nmk->worker_lock);
  2790. @@ -1102,16 +1125,16 @@ nm_kthread_worker(void *data)
  2791. }
  2792.  
  2793. void
  2794. -nm_os_kthread_set_affinity(struct nm_kthread *nmk, int affinity)
  2795. +nm_os_kctx_worker_setaff(struct nm_kctx *nmk, int affinity)
  2796. {
  2797. nmk->affinity = affinity;
  2798. }
  2799.  
  2800. -struct nm_kthread *
  2801. -nm_os_kthread_create(struct nm_kthread_cfg *cfg, unsigned int cfgtype,
  2802. +struct nm_kctx *
  2803. +nm_os_kctx_create(struct nm_kctx_cfg *cfg, unsigned int cfgtype,
  2804. void *opaque)
  2805. {
  2806. - struct nm_kthread *nmk = NULL;
  2807. + struct nm_kctx *nmk = NULL;
  2808.  
  2809. if (cfgtype != PTNETMAP_CFGTYPE_BHYVE) {
  2810. D("Unsupported cfgtype %u", cfgtype);
  2811. @@ -1140,7 +1163,7 @@ nm_os_kthread_create(struct nm_kthread_cfg *cfg, unsigned int cfgtype,
  2812. }
  2813.  
  2814. int
  2815. -nm_os_kthread_start(struct nm_kthread *nmk)
  2816. +nm_os_kctx_worker_start(struct nm_kctx *nmk)
  2817. {
  2818. struct proc *p = NULL;
  2819. int error = 0;
  2820. @@ -1158,7 +1181,7 @@ nm_os_kthread_start(struct nm_kthread *nmk)
  2821. /* enable kthread main loop */
  2822. nmk->run = 1;
  2823. /* create kthread */
  2824. - if((error = kthread_add(nm_kthread_worker, nmk, p,
  2825. + if((error = kthread_add(nm_kctx_worker, nmk, p,
  2826. &nmk->worker, RFNOWAIT /* to be checked */, 0, "nm-kthread-%ld",
  2827. nmk->worker_ctx.type))) {
  2828. goto err;
  2829. @@ -1174,7 +1197,7 @@ err:
  2830. }
  2831.  
  2832. void
  2833. -nm_os_kthread_stop(struct nm_kthread *nmk)
  2834. +nm_os_kctx_worker_stop(struct nm_kctx *nmk)
  2835. {
  2836. if (!nmk->worker) {
  2837. return;
  2838. @@ -1184,18 +1207,18 @@ nm_os_kthread_stop(struct nm_kthread *nmk)
  2839.  
  2840. /* wake up kthread if it sleeps */
  2841. kthread_resume(nmk->worker);
  2842. - nm_os_kthread_wakeup_worker(nmk);
  2843. + nm_os_kctx_worker_wakeup(nmk);
  2844.  
  2845. nmk->worker = NULL;
  2846. }
  2847.  
  2848. void
  2849. -nm_os_kthread_delete(struct nm_kthread *nmk)
  2850. +nm_os_kctx_destroy(struct nm_kctx *nmk)
  2851. {
  2852. if (!nmk)
  2853. return;
  2854. if (nmk->worker) {
  2855. - nm_os_kthread_stop(nmk);
  2856. + nm_os_kctx_worker_stop(nmk);
  2857. }
  2858.  
  2859. memset(&nmk->worker_ctx.cfg, 0, sizeof(nmk->worker_ctx.cfg));
  2860. diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
  2861. index 45dc9918498..f148b228115 100644
  2862. --- a/sys/dev/netmap/netmap_generic.c
  2863. +++ b/sys/dev/netmap/netmap_generic.c
  2864. @@ -65,7 +65,7 @@
  2865. #ifdef __FreeBSD__
  2866.  
  2867. #include <sys/cdefs.h> /* prerequisite */
  2868. -__FBSDID("$FreeBSD$");
  2869. +__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap_generic.c 274353 2014-11-10 20:19:58Z luigi $");
  2870.  
  2871. #include <sys/types.h>
  2872. #include <sys/errno.h>
  2873. @@ -109,13 +109,10 @@ __FBSDID("$FreeBSD$");
  2874. * chain into uma_zfree(zone_pack, mf)
  2875. * (or reinstall the buffer ?)
  2876. */
  2877. -static inline void
  2878. -set_mbuf_destructor(struct mbuf *m, void *fn)
  2879. -{
  2880. -
  2881. - m->m_ext.ext_free = fn;
  2882. - m->m_ext.ext_type = EXT_EXTREF;
  2883. -}
  2884. +#define SET_MBUF_DESTRUCTOR(m, fn) do { \
  2885. + (m)->m_ext.ext_free = (void *)fn; \
  2886. + (m)->m_ext.ext_type = EXT_EXTREF; \
  2887. +} while (0)
  2888.  
  2889. static int
  2890. void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2)
  2891. @@ -170,12 +167,9 @@ nm_os_get_mbuf(struct ifnet *ifp, int len)
  2892.  
  2893. static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { }
  2894.  
  2895. -static inline void
  2896. -set_mbuf_destructor(struct mbuf *m, void *fn)
  2897. -{
  2898. -
  2899. - m->m_ext.ext_free = (fn != NULL) ? fn : (void *)void_mbuf_dtor;
  2900. -}
  2901. +#define SET_MBUF_DESTRUCTOR(m, fn) do { \
  2902. + (m)->m_ext.ext_free = fn ? (void *)fn : (void *)void_mbuf_dtor; \
  2903. +} while (0)
  2904.  
  2905. static inline struct mbuf *
  2906. nm_os_get_mbuf(struct ifnet *ifp, int len)
  2907. @@ -311,7 +305,7 @@ void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi)
  2908. #endif /* !RATE */
  2909.  
  2910.  
  2911. -/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
  2912. +/* ========== GENERIC (EMULATED) NETMAP ADAPTER SUPPORT ============= */
  2913.  
  2914. /*
  2915. * Wrapper used by the generic adapter layer to notify
  2916. @@ -341,7 +335,6 @@ generic_netmap_unregister(struct netmap_adapter *na)
  2917. int i, r;
  2918.  
  2919. if (na->active_fds == 0) {
  2920. - D("Generic adapter %p goes off", na);
  2921. rtnl_lock();
  2922.  
  2923. na->na_flags &= ~NAF_NETMAP_ON;
  2924. @@ -357,14 +350,14 @@ generic_netmap_unregister(struct netmap_adapter *na)
  2925.  
  2926. for_each_rx_kring_h(r, kring, na) {
  2927. if (nm_kring_pending_off(kring)) {
  2928. - D("RX ring %d of generic adapter %p goes off", r, na);
  2929. + D("Emulated adapter: ring '%s' deactivated", kring->name);
  2930. kring->nr_mode = NKR_NETMAP_OFF;
  2931. }
  2932. }
  2933. for_each_tx_kring_h(r, kring, na) {
  2934. if (nm_kring_pending_off(kring)) {
  2935. kring->nr_mode = NKR_NETMAP_OFF;
  2936. - D("TX ring %d of generic adapter %p goes off", r, na);
  2937. + D("Emulated adapter: ring '%s' deactivated", kring->name);
  2938. }
  2939. }
  2940.  
  2941. @@ -387,14 +380,14 @@ generic_netmap_unregister(struct netmap_adapter *na)
  2942. * TX event is consumed. */
  2943. mtx_lock_spin(&kring->tx_event_lock);
  2944. if (kring->tx_event) {
  2945. - set_mbuf_destructor(kring->tx_event, NULL);
  2946. + SET_MBUF_DESTRUCTOR(kring->tx_event, NULL);
  2947. }
  2948. kring->tx_event = NULL;
  2949. mtx_unlock_spin(&kring->tx_event_lock);
  2950. }
  2951.  
  2952. if (na->active_fds == 0) {
  2953. - free(gna->mit, M_DEVBUF);
  2954. + nm_os_free(gna->mit);
  2955.  
  2956. for_each_rx_kring(r, kring, na) {
  2957. mbq_safe_fini(&kring->rx_queue);
  2958. @@ -411,7 +404,7 @@ generic_netmap_unregister(struct netmap_adapter *na)
  2959. m_freem(kring->tx_pool[i]);
  2960. }
  2961. }
  2962. - free(kring->tx_pool, M_DEVBUF);
  2963. + nm_os_free(kring->tx_pool);
  2964. kring->tx_pool = NULL;
  2965. }
  2966.  
  2967. @@ -421,6 +414,7 @@ generic_netmap_unregister(struct netmap_adapter *na)
  2968. del_timer(&rate_ctx.timer);
  2969. }
  2970. #endif
  2971. + D("Emulated adapter for %s deactivated", na->name);
  2972. }
  2973.  
  2974. return 0;
  2975. @@ -445,13 +439,12 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
  2976. }
  2977.  
  2978. if (na->active_fds == 0) {
  2979. - D("Generic adapter %p goes on", na);
  2980. + D("Emulated adapter for %s activated", na->name);
  2981. /* Do all memory allocations when (na->active_fds == 0), to
  2982. * simplify error management. */
  2983.  
  2984. /* Allocate memory for mitigation support on all the rx queues. */
  2985. - gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
  2986. - M_DEVBUF, M_NOWAIT | M_ZERO);
  2987. + gna->mit = nm_os_malloc(na->num_rx_rings * sizeof(struct nm_generic_mit));
  2988. if (!gna->mit) {
  2989. D("mitigation allocation failed");
  2990. error = ENOMEM;
  2991. @@ -478,8 +471,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
  2992. }
  2993. for_each_tx_kring(r, kring, na) {
  2994. kring->tx_pool =
  2995. - malloc(na->num_tx_desc * sizeof(struct mbuf *),
  2996. - M_DEVBUF, M_NOWAIT | M_ZERO);
  2997. + nm_os_malloc(na->num_tx_desc * sizeof(struct mbuf *));
  2998. if (!kring->tx_pool) {
  2999. D("tx_pool allocation failed");
  3000. error = ENOMEM;
  3001. @@ -492,14 +484,14 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
  3002.  
  3003. for_each_rx_kring_h(r, kring, na) {
  3004. if (nm_kring_pending_on(kring)) {
  3005. - D("RX ring %d of generic adapter %p goes on", r, na);
  3006. + D("Emulated adapter: ring '%s' activated", kring->name);
  3007. kring->nr_mode = NKR_NETMAP_ON;
  3008. }
  3009.  
  3010. }
  3011. for_each_tx_kring_h(r, kring, na) {
  3012. if (nm_kring_pending_on(kring)) {
  3013. - D("TX ring %d of generic adapter %p goes on", r, na);
  3014. + D("Emulated adapter: ring '%s' activated", kring->name);
  3015. kring->nr_mode = NKR_NETMAP_ON;
  3016. }
  3017. }
  3018. @@ -560,13 +552,13 @@ free_tx_pools:
  3019. if (kring->tx_pool == NULL) {
  3020. continue;
  3021. }
  3022. - free(kring->tx_pool, M_DEVBUF);
  3023. + nm_os_free(kring->tx_pool);
  3024. kring->tx_pool = NULL;
  3025. }
  3026. for_each_rx_kring(r, kring, na) {
  3027. mbq_safe_fini(&kring->rx_queue);
  3028. }
  3029. - free(gna->mit, M_DEVBUF);
  3030. + nm_os_free(gna->mit);
  3031. out:
  3032.  
  3033. return error;
  3034. @@ -768,7 +760,7 @@ generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
  3035. return;
  3036. }
  3037.  
  3038. - set_mbuf_destructor(m, generic_mbuf_destructor);
  3039. + SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
  3040. kring->tx_event = m;
  3041. mtx_unlock_spin(&kring->tx_event_lock);
  3042.  
  3043. @@ -1161,7 +1153,6 @@ generic_netmap_dtor(struct netmap_adapter *na)
  3044. struct netmap_adapter *prev_na = gna->prev;
  3045.  
  3046. if (prev_na != NULL) {
  3047. - D("Released generic NA %p", gna);
  3048. netmap_adapter_put(prev_na);
  3049. if (nm_iszombie(na)) {
  3050. /*
  3051. @@ -1170,6 +1161,7 @@ generic_netmap_dtor(struct netmap_adapter *na)
  3052. */
  3053. netmap_adapter_put(prev_na);
  3054. }
  3055. + D("Native netmap adapter %p restored", prev_na);
  3056. }
  3057. NM_ATTACH_NA(ifp, prev_na);
  3058. /*
  3059. @@ -1177,7 +1169,13 @@ generic_netmap_dtor(struct netmap_adapter *na)
  3060. * overrides WNA(ifp) if na->ifp is not NULL.
  3061. */
  3062. na->ifp = NULL;
  3063. - D("Restored native NA %p", prev_na);
  3064. + D("Emulated netmap adapter for %s destroyed", na->name);
  3065. +}
  3066. +
  3067. +int
  3068. +na_is_generic(struct netmap_adapter *na)
  3069. +{
  3070. + return na->nm_register == generic_netmap_register;
  3071. }
  3072.  
  3073. /*
  3074. @@ -1208,7 +1206,7 @@ generic_netmap_attach(struct ifnet *ifp)
  3075. return EINVAL;
  3076. }
  3077.  
  3078. - gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
  3079. + gna = nm_os_malloc(sizeof(*gna));
  3080. if (gna == NULL) {
  3081. D("no memory on attach, give up");
  3082. return ENOMEM;
  3083. @@ -1237,7 +1235,7 @@ generic_netmap_attach(struct ifnet *ifp)
  3084.  
  3085. retval = netmap_attach_common(na);
  3086. if (retval) {
  3087. - free(gna, M_DEVBUF);
  3088. + nm_os_free(gna);
  3089. return retval;
  3090. }
  3091.  
  3092. @@ -1249,7 +1247,7 @@ generic_netmap_attach(struct ifnet *ifp)
  3093.  
  3094. nm_os_generic_set_features(gna);
  3095.  
  3096. - D("Created generic NA %p (prev %p)", gna, gna->prev);
  3097. + D("Emulated adapter for %s created (prev was %p)", na->name, gna->prev);
  3098.  
  3099. return retval;
  3100. }
  3101. diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
  3102. index f904476721b..3972f82d6fe 100644
  3103. --- a/sys/dev/netmap/netmap_kern.h
  3104. +++ b/sys/dev/netmap/netmap_kern.h
  3105. @@ -26,7 +26,7 @@
  3106. */
  3107.  
  3108. /*
  3109. - * $FreeBSD$
  3110. + * $FreeBSD: head/sys/dev/netmap/netmap_kern.h 238985 2012-08-02 11:59:43Z luigi $
  3111. *
  3112. * The header contains the definitions of constants and function
  3113. * prototypes used only in kernelspace.
  3114. @@ -55,6 +55,9 @@
  3115. #if defined(CONFIG_NETMAP_PTNETMAP_HOST)
  3116. #define WITH_PTNETMAP_HOST
  3117. #endif
  3118. +#if defined(CONFIG_NETMAP_SINK)
  3119. +#define WITH_SINK
  3120. +#endif
  3121.  
  3122. #elif defined (_WIN32)
  3123. #define WITH_VALE // comment out to disable VALE support
  3124. @@ -240,12 +243,23 @@ typedef struct hrtimer{
  3125. #define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock)
  3126. #define NMG_LOCK_ASSERT() NM_MTX_ASSERT(netmap_global_lock)
  3127.  
  3128. +#if defined(__FreeBSD__)
  3129. +#define nm_prerr printf
  3130. +#define nm_prinf printf
  3131. +#elif defined (_WIN32)
  3132. +#define nm_prerr DbgPrint
  3133. +#define nm_prinf DbgPrint
  3134. +#elif defined(linux)
  3135. +#define nm_prerr(fmt, arg...) printk(KERN_ERR fmt, ##arg)
  3136. +#define nm_prinf(fmt, arg...) printk(KERN_INFO fmt, ##arg)
  3137. +#endif
  3138. +
  3139. #define ND(format, ...)
  3140. #define D(format, ...) \
  3141. do { \
  3142. struct timeval __xxts; \
  3143. microtime(&__xxts); \
  3144. - printf("%03d.%06d [%4d] %-25s " format "\n", \
  3145. + nm_prerr("%03d.%06d [%4d] %-25s " format "\n", \
  3146. (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
  3147. __LINE__, __FUNCTION__, ##__VA_ARGS__); \
  3148. } while (0)
  3149. @@ -287,6 +301,11 @@ void nm_os_put_module(void);
  3150. void netmap_make_zombie(struct ifnet *);
  3151. void netmap_undo_zombie(struct ifnet *);
  3152.  
  3153. +/* os independent alloc/realloc/free */
  3154. +void *nm_os_malloc(size_t);
  3155. +void *nm_os_realloc(void *, size_t new_size, size_t old_size);
  3156. +void nm_os_free(void *);
  3157. +
  3158. /* passes a packet up to the host stack.
  3159. * If the packet is sent (or dropped) immediately it returns NULL,
  3160. * otherwise it links the packet to prev and returns m.
  3161. @@ -317,6 +336,12 @@ nm_txrx_swap(enum txrx t)
  3162.  
  3163. #define for_rx_tx(t) for ((t) = 0; (t) < NR_TXRX; (t)++)
  3164.  
  3165. +#ifdef WITH_MONITOR
  3166. +struct netmap_zmon_list {
  3167. + struct netmap_kring *next;
  3168. + struct netmap_kring *prev;
  3169. +};
  3170. +#endif /* WITH_MONITOR */
  3171.  
  3172. /*
  3173. * private, kernel view of a ring. Keeps track of the status of
  3174. @@ -491,6 +516,12 @@ struct netmap_kring {
  3175. struct netmap_kring **monitors;
  3176. uint32_t max_monitors; /* current size of the monitors array */
  3177. uint32_t n_monitors; /* next unused entry in the monitor array */
  3178. + uint32_t mon_pos[NR_TXRX]; /* index of this ring in the monitored ring array */
  3179. + uint32_t mon_tail; /* last seen slot on rx */
  3180. +
  3181. + /* circular list of zero-copy monitors */
  3182. + struct netmap_zmon_list zmon_list[NR_TXRX];
  3183. +
  3184. /*
  3185. * Monitors work by intercepting the sync and notify callbacks of the
  3186. * monitored krings. This is implemented by replacing the pointers
  3187. @@ -499,8 +530,6 @@ struct netmap_kring {
  3188. int (*mon_sync)(struct netmap_kring *kring, int flags);
  3189. int (*mon_notify)(struct netmap_kring *kring, int flags);
  3190.  
  3191. - uint32_t mon_tail; /* last seen slot on rx */
  3192. - uint32_t mon_pos; /* index of this ring in the monitored ring array */
  3193. #endif
  3194. }
  3195. #ifdef _WIN32
  3196. @@ -731,8 +760,9 @@ struct netmap_adapter {
  3197. int (*nm_txsync)(struct netmap_kring *kring, int flags);
  3198. int (*nm_rxsync)(struct netmap_kring *kring, int flags);
  3199. int (*nm_notify)(struct netmap_kring *kring, int flags);
  3200. -#define NAF_FORCE_READ 1
  3201. -#define NAF_FORCE_RECLAIM 2
  3202. +#define NAF_FORCE_READ 1
  3203. +#define NAF_FORCE_RECLAIM 2
  3204. +#define NAF_CAN_FORWARD_DOWN 4
  3205. /* return configuration information */
  3206. int (*nm_config)(struct netmap_adapter *,
  3207. u_int *txr, u_int *txd, u_int *rxr, u_int *rxd);
  3208. @@ -854,6 +884,7 @@ struct netmap_vp_adapter { /* VALE software port */
  3209. int bdg_port;
  3210. struct nm_bridge *na_bdg;
  3211. int retry;
  3212. + int autodelete; /* remove the ifp on last reference */
  3213.  
  3214. /* Maximum Frame Size, used in bdg_mismatch_datapath() */
  3215. u_int mfs;
  3216. @@ -977,7 +1008,10 @@ struct netmap_bwrap_adapter {
  3217. struct nm_bdg_polling_state *na_polling_state;
  3218. };
  3219. int netmap_bwrap_attach(const char *name, struct netmap_adapter *);
  3220. +int netmap_vi_create(struct nmreq *, int);
  3221.  
  3222. +#else /* !WITH_VALE */
  3223. +#define netmap_vi_create(nmr, a) (EOPNOTSUPP)
  3224. #endif /* WITH_VALE */
  3225.  
  3226. #ifdef WITH_PIPES
  3227. @@ -993,6 +1027,7 @@ struct netmap_pipe_adapter {
  3228. struct netmap_adapter *parent; /* adapter that owns the memory */
  3229. struct netmap_pipe_adapter *peer; /* the other end of the pipe */
  3230. int peer_ref; /* 1 iff we are holding a ref to the peer */
  3231. + struct ifnet *parent_ifp; /* maybe null */
  3232.  
  3233. u_int parent_slot; /* index in the parent pipe array */
  3234. };
  3235. @@ -1149,6 +1184,7 @@ static __inline void nm_kr_start(struct netmap_kring *kr)
  3236. * virtual ports (vale, pipes, monitor)
  3237. */
  3238. int netmap_attach(struct netmap_adapter *);
  3239. +int netmap_attach_ext(struct netmap_adapter *, size_t size);
  3240. void netmap_detach(struct ifnet *);
  3241. int netmap_transmit(struct ifnet *, struct mbuf *);
  3242. struct netmap_slot *netmap_reset(struct netmap_adapter *na,
  3243. @@ -1380,9 +1416,10 @@ void netmap_do_unregif(struct netmap_priv_d *priv);
  3244.  
  3245. u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
  3246. int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
  3247. - struct ifnet **ifp, int create);
  3248. + struct ifnet **ifp, struct netmap_mem_d *nmd, int create);
  3249. void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp);
  3250. -int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
  3251. +int netmap_get_hw_na(struct ifnet *ifp,
  3252. + struct netmap_mem_d *nmd, struct netmap_adapter **na);
  3253.  
  3254.  
  3255. #ifdef WITH_VALE
  3256. @@ -1414,7 +1451,8 @@ u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
  3257. #define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
  3258.  
  3259. /* these are redefined in case of no VALE support */
  3260. -int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
  3261. +int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
  3262. + struct netmap_mem_d *nmd, int create);
  3263. struct nm_bridge *netmap_init_bridges2(u_int);
  3264. void netmap_uninit_bridges2(struct nm_bridge *, u_int);
  3265. int netmap_init_bridges(void);
  3266. @@ -1423,7 +1461,7 @@ int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops);
  3267. int netmap_bdg_config(struct nmreq *nmr);
  3268.  
  3269. #else /* !WITH_VALE */
  3270. -#define netmap_get_bdg_na(_1, _2, _3) 0
  3271. +#define netmap_get_bdg_na(_1, _2, _3, _4) 0
  3272. #define netmap_init_bridges(_1) 0
  3273. #define netmap_uninit_bridges()
  3274. #define netmap_bdg_ctl(_1, _2) EINVAL
  3275. @@ -1433,22 +1471,24 @@ int netmap_bdg_config(struct nmreq *nmr);
  3276. /* max number of pipes per device */
  3277. #define NM_MAXPIPES 64 /* XXX how many? */
  3278. void netmap_pipe_dealloc(struct netmap_adapter *);
  3279. -int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
  3280. +int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
  3281. + struct netmap_mem_d *nmd, int create);
  3282. #else /* !WITH_PIPES */
  3283. #define NM_MAXPIPES 0
  3284. #define netmap_pipe_alloc(_1, _2) 0
  3285. #define netmap_pipe_dealloc(_1)
  3286. -#define netmap_get_pipe_na(nmr, _2, _3) \
  3287. +#define netmap_get_pipe_na(nmr, _2, _3, _4) \
  3288. ({ int role__ = (nmr)->nr_flags & NR_REG_MASK; \
  3289. (role__ == NR_REG_PIPE_MASTER || \
  3290. role__ == NR_REG_PIPE_SLAVE) ? EOPNOTSUPP : 0; })
  3291. #endif
  3292.  
  3293. #ifdef WITH_MONITOR
  3294. -int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
  3295. +int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
  3296. + struct netmap_mem_d *nmd, int create);
  3297. void netmap_monitor_stop(struct netmap_adapter *na);
  3298. #else
  3299. -#define netmap_get_monitor_na(nmr, _2, _3) \
  3300. +#define netmap_get_monitor_na(nmr, _2, _3, _4) \
  3301. ((nmr)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
  3302. #endif
  3303.  
  3304. @@ -1532,6 +1572,7 @@ extern int netmap_generic_mit;
  3305. extern int netmap_generic_ringsize;
  3306. extern int netmap_generic_rings;
  3307. extern int netmap_generic_txqdisc;
  3308. +extern int ptnetmap_tx_workers;
  3309.  
  3310. /*
  3311. * NA returns a pointer to the struct netmap adapter from the ifp,
  3312. @@ -1781,6 +1822,7 @@ struct netmap_priv_d {
  3313. u_int np_qfirst[NR_TXRX],
  3314. np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
  3315. uint16_t np_txpoll; /* XXX and also np_rxpoll ? */
  3316. + int np_sync_flags; /* to be passed to nm_sync */
  3317.  
  3318. int np_refs; /* use with NMG_LOCK held */
  3319.  
  3320. @@ -1812,6 +1854,11 @@ static inline int nm_kring_pending(struct netmap_priv_d *np)
  3321. return 0;
  3322. }
  3323.  
  3324. +#ifdef WITH_PIPES
  3325. +int netmap_pipe_txsync(struct netmap_kring *txkring, int flags);
  3326. +int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags);
  3327. +#endif /* WITH_PIPES */
  3328. +
  3329. #ifdef WITH_MONITOR
  3330.  
  3331. struct netmap_monitor_adapter {
  3332. @@ -1835,6 +1882,8 @@ int generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
  3333. int nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept);
  3334. int nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept);
  3335.  
  3336. +int na_is_generic(struct netmap_adapter *na);
  3337. +
  3338. /*
  3339. * the generic transmit routine is passed a structure to optionally
  3340. * build a queue of descriptors, in an OS-specific way.
  3341. @@ -1891,6 +1940,7 @@ int nm_os_mitigation_active(struct nm_generic_mit *mit);
  3342. void nm_os_mitigation_cleanup(struct nm_generic_mit *mit);
  3343. #else /* !WITH_GENERIC */
  3344. #define generic_netmap_attach(ifp) (EOPNOTSUPP)
  3345. +#define na_is_generic(na) (0)
  3346. #endif /* WITH_GENERIC */
  3347.  
  3348. /* Shared declarations for the VALE switch. */
  3349. @@ -2003,26 +2053,29 @@ void nm_os_vi_init_index(void);
  3350. /*
  3351. * kernel thread routines
  3352. */
  3353. -struct nm_kthread; /* OS-specific kthread - opaque */
  3354. -typedef void (*nm_kthread_worker_fn_t)(void *data);
  3355. +struct nm_kctx; /* OS-specific kernel context - opaque */
  3356. +typedef void (*nm_kctx_worker_fn_t)(void *data, int is_kthread);
  3357. +typedef void (*nm_kctx_notify_fn_t)(void *data);
  3358.  
  3359. /* kthread configuration */
  3360. -struct nm_kthread_cfg {
  3361. - long type; /* kthread type/identifier */
  3362. - nm_kthread_worker_fn_t worker_fn; /* worker function */
  3363. - void *worker_private;/* worker parameter */
  3364. - int attach_user; /* attach kthread to user process */
  3365. +struct nm_kctx_cfg {
  3366. + long type; /* kthread type/identifier */
  3367. + nm_kctx_worker_fn_t worker_fn; /* worker function */
  3368. + void *worker_private;/* worker parameter */
  3369. + nm_kctx_notify_fn_t notify_fn; /* notify function */
  3370. + int attach_user; /* attach kthread to user process */
  3371. + int use_kthread; /* use a kthread for the context */
  3372. };
  3373. /* kthread configuration */
  3374. -struct nm_kthread *nm_os_kthread_create(struct nm_kthread_cfg *cfg,
  3375. +struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg,
  3376. unsigned int cfgtype,
  3377. void *opaque);
  3378. -int nm_os_kthread_start(struct nm_kthread *);
  3379. -void nm_os_kthread_stop(struct nm_kthread *);
  3380. -void nm_os_kthread_delete(struct nm_kthread *);
  3381. -void nm_os_kthread_wakeup_worker(struct nm_kthread *nmk);
  3382. -void nm_os_kthread_send_irq(struct nm_kthread *);
  3383. -void nm_os_kthread_set_affinity(struct nm_kthread *, int);
  3384. +int nm_os_kctx_worker_start(struct nm_kctx *);
  3385. +void nm_os_kctx_worker_stop(struct nm_kctx *);
  3386. +void nm_os_kctx_destroy(struct nm_kctx *);
  3387. +void nm_os_kctx_worker_wakeup(struct nm_kctx *nmk);
  3388. +void nm_os_kctx_send_irq(struct nm_kctx *);
  3389. +void nm_os_kctx_worker_setaff(struct nm_kctx *, int);
  3390. u_int nm_os_ncpus(void);
  3391.  
  3392. #ifdef WITH_PTNETMAP_HOST
  3393. @@ -2032,12 +2085,18 @@ u_int nm_os_ncpus(void);
  3394. struct netmap_pt_host_adapter {
  3395. struct netmap_adapter up;
  3396.  
  3397. + /* the passed-through adapter */
  3398. struct netmap_adapter *parent;
  3399. + /* parent->na_flags, saved at NETMAP_PT_HOST_CREATE time,
  3400. + * and restored at NETMAP_PT_HOST_DELETE time */
  3401. + uint32_t parent_na_flags;
  3402. +
  3403. int (*parent_nm_notify)(struct netmap_kring *kring, int flags);
  3404. void *ptns;
  3405. };
  3406. /* ptnetmap HOST routines */
  3407. -int netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
  3408. +int netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na,
  3409. + struct netmap_mem_d * nmd, int create);
  3410. int ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na);
  3411. static inline int
  3412. nm_ptnetmap_host_on(struct netmap_adapter *na)
  3413. @@ -2045,7 +2104,7 @@ nm_ptnetmap_host_on(struct netmap_adapter *na)
  3414. return na && na->na_flags & NAF_PTNETMAP_HOST;
  3415. }
  3416. #else /* !WITH_PTNETMAP_HOST */
  3417. -#define netmap_get_pt_host_na(nmr, _2, _3) \
  3418. +#define netmap_get_pt_host_na(nmr, _2, _3, _4) \
  3419. ((nmr)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0)
  3420. #define ptnetmap_ctl(_1, _2) EINVAL
  3421. #define nm_ptnetmap_host_on(_1) EINVAL
  3422. diff --git a/sys/dev/netmap/netmap_mbq.h b/sys/dev/netmap/netmap_mbq.h
  3423. index 9dafa8b1149..8ba0947b570 100644
  3424. --- a/sys/dev/netmap/netmap_mbq.h
  3425. +++ b/sys/dev/netmap/netmap_mbq.h
  3426. @@ -29,8 +29,8 @@
  3427. */
  3428.  
  3429.  
  3430. -#ifndef __NETMAP_MBQ_H__
  3431. -#define __NETMAP_MBQ_H__
  3432. +#ifndef _NET_NETMAP_MBQ_H__
  3433. +#define _NET_NETMAP_MBQ_H__
  3434.  
  3435. /*
  3436. * These function implement an mbuf tailq with an optional lock.
  3437. @@ -67,7 +67,7 @@ void mbq_purge(struct mbq *q);
  3438. static inline struct mbuf *
  3439. mbq_peek(struct mbq *q)
  3440. {
  3441. - return q->head ? q->head : NULL;
  3442. + return q->head;
  3443. }
  3444.  
  3445. static inline void
  3446. @@ -94,4 +94,4 @@ static inline unsigned int mbq_len(struct mbq *q)
  3447. return q->count;
  3448. }
  3449.  
  3450. -#endif /* __NETMAP_MBQ_H_ */
  3451. +#endif /* _NET_NETMAP_MBQ_H_ */
  3452. diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
  3453. index 922e5f32ff0..ad990f0618c 100644
  3454. --- a/sys/dev/netmap/netmap_mem2.c
  3455. +++ b/sys/dev/netmap/netmap_mem2.c
  3456. @@ -36,7 +36,7 @@
  3457.  
  3458. #ifdef __FreeBSD__
  3459. #include <sys/cdefs.h> /* prerequisite */
  3460. -__FBSDID("$FreeBSD$");
  3461. +__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 241723 2012-10-19 09:41:45Z glebius $");
  3462.  
  3463. #include <sys/types.h>
  3464. #include <sys/malloc.h>
  3465. @@ -87,6 +87,9 @@ enum {
  3466. struct netmap_obj_params {
  3467. u_int size;
  3468. u_int num;
  3469. +
  3470. + u_int last_size;
  3471. + u_int last_num;
  3472. };
  3473.  
  3474. struct netmap_obj_pool {
  3475. @@ -139,20 +142,20 @@ struct netmap_mem_ops {
  3476. ssize_t (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr);
  3477. void (*nmd_delete)(struct netmap_mem_d *);
  3478.  
  3479. - struct netmap_if * (*nmd_if_new)(struct netmap_adapter *);
  3480. + struct netmap_if * (*nmd_if_new)(struct netmap_adapter *,
  3481. + struct netmap_priv_d *);
  3482. void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *);
  3483. int (*nmd_rings_create)(struct netmap_adapter *);
  3484. void (*nmd_rings_delete)(struct netmap_adapter *);
  3485. };
  3486.  
  3487. -typedef uint16_t nm_memid_t;
  3488. -
  3489. struct netmap_mem_d {
  3490. NMA_LOCK_T nm_mtx; /* protect the allocator */
  3491. u_int nm_totalsize; /* shorthand */
  3492.  
  3493. u_int flags;
  3494. #define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */
  3495. +#define NETMAP_MEM_HIDDEN 0x8 /* beeing prepared */
  3496. int lasterr; /* last error for curr config */
  3497. int active; /* active users */
  3498. int refcount;
  3499. @@ -166,6 +169,11 @@ struct netmap_mem_d {
  3500. struct netmap_mem_d *prev, *next;
  3501.  
  3502. struct netmap_mem_ops *ops;
  3503. +
  3504. + struct netmap_obj_params params[NETMAP_POOLS_NR];
  3505. +
  3506. +#define NM_MEM_NAMESZ 16
  3507. + char name[NM_MEM_NAMESZ];
  3508. };
  3509.  
  3510. /*
  3511. @@ -214,7 +222,7 @@ NMD_DEFCB(int, config);
  3512. NMD_DEFCB1(ssize_t, if_offset, const void *);
  3513. NMD_DEFCB(void, delete);
  3514.  
  3515. -NMD_DEFNACB(struct netmap_if *, if_new);
  3516. +NMD_DEFNACB1(struct netmap_if *, if_new, struct netmap_priv_d *);
  3517. NMD_DEFNACB1(void, if_delete, struct netmap_if *);
  3518. NMD_DEFNACB(int, rings_create);
  3519. NMD_DEFNACB(void, rings_delete);
  3520. @@ -222,6 +230,13 @@ NMD_DEFNACB(void, rings_delete);
  3521. static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
  3522. static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
  3523. static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
  3524. +static void nm_mem_release_id(struct netmap_mem_d *);
  3525. +
  3526. +nm_memid_t
  3527. +netmap_mem_get_id(struct netmap_mem_d *nmd)
  3528. +{
  3529. + return nmd->nm_id;
  3530. +}
  3531.  
  3532. #define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx)
  3533. #define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx)
  3534. @@ -230,34 +245,35 @@ static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
  3535.  
  3536. #ifdef NM_DEBUG_MEM_PUTGET
  3537. #define NM_DBG_REFC(nmd, func, line) \
  3538. - printf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
  3539. + nm_prinf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
  3540. #else
  3541. #define NM_DBG_REFC(nmd, func, line)
  3542. #endif
  3543.  
  3544. -#ifdef NM_DEBUG_MEM_PUTGET
  3545. -void __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
  3546. -#else
  3547. -void netmap_mem_get(struct netmap_mem_d *nmd)
  3548. -#endif
  3549. +/* circular list of all existing allocators */
  3550. +static struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
  3551. +NM_MTX_T nm_mem_list_lock;
  3552. +
  3553. +struct netmap_mem_d *
  3554. +__netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
  3555. {
  3556. - NMA_LOCK(nmd);
  3557. + NM_MTX_LOCK(nm_mem_list_lock);
  3558. nmd->refcount++;
  3559. NM_DBG_REFC(nmd, func, line);
  3560. - NMA_UNLOCK(nmd);
  3561. + NM_MTX_UNLOCK(nm_mem_list_lock);
  3562. + return nmd;
  3563. }
  3564.  
  3565. -#ifdef NM_DEBUG_MEM_PUTGET
  3566. -void __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
  3567. -#else
  3568. -void netmap_mem_put(struct netmap_mem_d *nmd)
  3569. -#endif
  3570. +void
  3571. +__netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
  3572. {
  3573. int last;
  3574. - NMA_LOCK(nmd);
  3575. + NM_MTX_LOCK(nm_mem_list_lock);
  3576. last = (--nmd->refcount == 0);
  3577. + if (last)
  3578. + nm_mem_release_id(nmd);
  3579. NM_DBG_REFC(nmd, func, line);
  3580. - NMA_UNLOCK(nmd);
  3581. + NM_MTX_UNLOCK(nm_mem_list_lock);
  3582. if (last)
  3583. netmap_mem_delete(nmd);
  3584. }
  3585. @@ -349,21 +365,6 @@ netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
  3586. return 0;
  3587. }
  3588.  
  3589. -static struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
  3590. - [NETMAP_IF_POOL] = {
  3591. - .size = 1024,
  3592. - .num = 100,
  3593. - },
  3594. - [NETMAP_RING_POOL] = {
  3595. - .size = 9*PAGE_SIZE,
  3596. - .num = 200,
  3597. - },
  3598. - [NETMAP_BUF_POOL] = {
  3599. - .size = 2048,
  3600. - .num = NETMAP_BUF_MAX_NUM,
  3601. - },
  3602. -};
  3603. -
  3604. static struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
  3605. [NETMAP_IF_POOL] = {
  3606. .size = 1024,
  3607. @@ -411,17 +412,32 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */
  3608. },
  3609. },
  3610.  
  3611. + .params = {
  3612. + [NETMAP_IF_POOL] = {
  3613. + .size = 1024,
  3614. + .num = 100,
  3615. + },
  3616. + [NETMAP_RING_POOL] = {
  3617. + .size = 9*PAGE_SIZE,
  3618. + .num = 200,
  3619. + },
  3620. + [NETMAP_BUF_POOL] = {
  3621. + .size = 2048,
  3622. + .num = NETMAP_BUF_MAX_NUM,
  3623. + },
  3624. + },
  3625. +
  3626. .nm_id = 1,
  3627. .nm_grp = -1,
  3628.  
  3629. .prev = &nm_mem,
  3630. .next = &nm_mem,
  3631.  
  3632. - .ops = &netmap_mem_global_ops
  3633. -};
  3634. + .ops = &netmap_mem_global_ops,
  3635.  
  3636. + .name = "1"
  3637. +};
  3638.  
  3639. -static struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
  3640.  
  3641. /* blueprint for the private memory allocators */
  3642. extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */
  3643. @@ -451,9 +467,11 @@ static const struct netmap_mem_d nm_blueprint = {
  3644. },
  3645. },
  3646.  
  3647. + .nm_grp = -1,
  3648. +
  3649. .flags = NETMAP_MEM_PRIVATE,
  3650.  
  3651. - .ops = &netmap_mem_private_ops
  3652. + .ops = &netmap_mem_global_ops,
  3653. };
  3654.  
  3655. /* memory allocator related sysctls */
  3656. @@ -464,11 +482,11 @@ static const struct netmap_mem_d nm_blueprint = {
  3657. #define DECLARE_SYSCTLS(id, name) \
  3658. SYSBEGIN(mem2_ ## name); \
  3659. SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
  3660. - CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
  3661. + CTLFLAG_RW, &nm_mem.params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
  3662. SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
  3663. CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
  3664. SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
  3665. - CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
  3666. + CTLFLAG_RW, &nm_mem.params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
  3667. SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
  3668. CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \
  3669. SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \
  3670. @@ -484,7 +502,7 @@ DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
  3671. DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
  3672. DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
  3673.  
  3674. -/* call with NMA_LOCK(&nm_mem) held */
  3675. +/* call with nm_mem_list_lock held */
  3676. static int
  3677. nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
  3678. {
  3679. @@ -505,6 +523,8 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
  3680. scan->prev->next = nmd;
  3681. scan->prev = nmd;
  3682. netmap_last_mem_d = nmd;
  3683. + nmd->refcount = 1;
  3684. + NM_DBG_REFC(nmd, __FUNCTION__, __LINE__);
  3685. error = 0;
  3686. break;
  3687. }
  3688. @@ -513,24 +533,23 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
  3689. return error;
  3690. }
  3691.  
  3692. -/* call with NMA_LOCK(&nm_mem) *not* held */
  3693. +/* call with nm_mem_list_lock *not* held */
  3694. static int
  3695. nm_mem_assign_id(struct netmap_mem_d *nmd)
  3696. {
  3697. int ret;
  3698.  
  3699. - NMA_LOCK(&nm_mem);
  3700. + NM_MTX_LOCK(nm_mem_list_lock);
  3701. ret = nm_mem_assign_id_locked(nmd);
  3702. - NMA_UNLOCK(&nm_mem);
  3703. + NM_MTX_UNLOCK(nm_mem_list_lock);
  3704.  
  3705. return ret;
  3706. }
  3707.  
  3708. +/* call with nm_mem_list_lock held */
  3709. static void
  3710. nm_mem_release_id(struct netmap_mem_d *nmd)
  3711. {
  3712. - NMA_LOCK(&nm_mem);
  3713. -
  3714. nmd->prev->next = nmd->next;
  3715. nmd->next->prev = nmd->prev;
  3716.  
  3717. @@ -538,8 +557,26 @@ nm_mem_release_id(struct netmap_mem_d *nmd)
  3718. netmap_last_mem_d = nmd->prev;
  3719.  
  3720. nmd->prev = nmd->next = NULL;
  3721. +}
  3722.  
  3723. - NMA_UNLOCK(&nm_mem);
  3724. +struct netmap_mem_d *
  3725. +netmap_mem_find(nm_memid_t id)
  3726. +{
  3727. + struct netmap_mem_d *nmd;
  3728. +
  3729. + NM_MTX_LOCK(nm_mem_list_lock);
  3730. + nmd = netmap_last_mem_d;
  3731. + do {
  3732. + if (!(nmd->flags & NETMAP_MEM_HIDDEN) && nmd->nm_id == id) {
  3733. + nmd->refcount++;
  3734. + NM_DBG_REFC(nmd, __FUNCTION__, __LINE__);
  3735. + NM_MTX_UNLOCK(nm_mem_list_lock);
  3736. + return nmd;
  3737. + }
  3738. + nmd = nmd->next;
  3739. + } while (nmd != netmap_last_mem_d);
  3740. + NM_MTX_UNLOCK(nm_mem_list_lock);
  3741. + return NULL;
  3742. }
  3743.  
  3744. static int
  3745. @@ -1032,7 +1069,7 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p)
  3746. if (p == NULL)
  3747. return;
  3748. if (p->bitmap)
  3749. - free(p->bitmap, M_NETMAP);
  3750. + nm_os_free(p->bitmap);
  3751. p->bitmap = NULL;
  3752. if (p->lut) {
  3753. u_int i;
  3754. @@ -1051,7 +1088,7 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p)
  3755. #ifdef linux
  3756. vfree(p->lut);
  3757. #else
  3758. - free(p->lut, M_NETMAP);
  3759. + nm_os_free(p->lut);
  3760. #endif
  3761. }
  3762. p->lut = NULL;
  3763. @@ -1170,7 +1207,7 @@ nm_alloc_lut(u_int nobj)
  3764. #ifdef linux
  3765. lut = vmalloc(n);
  3766. #else
  3767. - lut = malloc(n, M_NETMAP, M_NOWAIT | M_ZERO);
  3768. + lut = nm_os_malloc(n);
  3769. #endif
  3770. return lut;
  3771. }
  3772. @@ -1194,7 +1231,7 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
  3773.  
  3774. /* Allocate the bitmap */
  3775. n = (p->objtotal + 31) / 32;
  3776. - p->bitmap = malloc(sizeof(uint32_t) * n, M_NETMAP, M_NOWAIT | M_ZERO);
  3777. + p->bitmap = nm_os_malloc(sizeof(uint32_t) * n);
  3778. if (p->bitmap == NULL) {
  3779. D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
  3780. p->name);
  3781. @@ -1278,16 +1315,18 @@ clean:
  3782.  
  3783. /* call with lock held */
  3784. static int
  3785. -netmap_memory_config_changed(struct netmap_mem_d *nmd)
  3786. +netmap_mem_params_changed(struct netmap_obj_params* p)
  3787. {
  3788. - int i;
  3789. + int i, rv = 0;
  3790.  
  3791. for (i = 0; i < NETMAP_POOLS_NR; i++) {
  3792. - if (nmd->pools[i].r_objsize != netmap_params[i].size ||
  3793. - nmd->pools[i].r_objtotal != netmap_params[i].num)
  3794. - return 1;
  3795. + if (p[i].last_size != p[i].size || p[i].last_num != p[i].num) {
  3796. + p[i].last_size = p[i].size;
  3797. + p[i].last_num = p[i].num;
  3798. + rv = 1;
  3799. + }
  3800. }
  3801. - return 0;
  3802. + return rv;
  3803. }
  3804.  
  3805. static void
  3806. @@ -1308,7 +1347,7 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
  3807. {
  3808. int i, lim = p->_objtotal;
  3809.  
  3810. - if (na->pdev == NULL)
  3811. + if (na == NULL || na->pdev == NULL)
  3812. return 0;
  3813.  
  3814. #if defined(__FreeBSD__)
  3815. @@ -1386,66 +1425,16 @@ error:
  3816. return nmd->lasterr;
  3817. }
  3818.  
  3819. -
  3820. -
  3821. -static void
  3822. -netmap_mem_private_delete(struct netmap_mem_d *nmd)
  3823. -{
  3824. - if (nmd == NULL)
  3825. - return;
  3826. - if (netmap_verbose)
  3827. - D("deleting %p", nmd);
  3828. - if (nmd->active > 0)
  3829. - D("bug: deleting mem allocator with active=%d!", nmd->active);
  3830. - nm_mem_release_id(nmd);
  3831. - if (netmap_verbose)
  3832. - D("done deleting %p", nmd);
  3833. - NMA_LOCK_DESTROY(nmd);
  3834. - free(nmd, M_DEVBUF);
  3835. -}
  3836. -
  3837. -static int
  3838. -netmap_mem_private_config(struct netmap_mem_d *nmd)
  3839. -{
  3840. - /* nothing to do, we are configured on creation
  3841. - * and configuration never changes thereafter
  3842. - */
  3843. - return 0;
  3844. -}
  3845. -
  3846. -static int
  3847. -netmap_mem_private_finalize(struct netmap_mem_d *nmd)
  3848. -{
  3849. - int err;
  3850. - err = netmap_mem_finalize_all(nmd);
  3851. - if (!err)
  3852. - nmd->active++;
  3853. - return err;
  3854. -
  3855. -}
  3856. -
  3857. -static void
  3858. -netmap_mem_private_deref(struct netmap_mem_d *nmd)
  3859. -{
  3860. - if (--nmd->active <= 0)
  3861. - netmap_mem_reset_all(nmd);
  3862. -}
  3863. -
  3864. -
  3865. /*
  3866. * allocator for private memory
  3867. */
  3868. -struct netmap_mem_d *
  3869. -netmap_mem_private_new(const char *name, u_int txr, u_int txd,
  3870. - u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int *perr)
  3871. +static struct netmap_mem_d *
  3872. +_netmap_mem_private_new(struct netmap_obj_params *p, int *perr)
  3873. {
  3874. struct netmap_mem_d *d = NULL;
  3875. - struct netmap_obj_params p[NETMAP_POOLS_NR];
  3876. - int i, err;
  3877. - u_int v, maxd;
  3878. + int i, err = 0;
  3879.  
  3880. - d = malloc(sizeof(struct netmap_mem_d),
  3881. - M_DEVBUF, M_NOWAIT | M_ZERO);
  3882. + d = nm_os_malloc(sizeof(struct netmap_mem_d));
  3883. if (d == NULL) {
  3884. err = ENOMEM;
  3885. goto error;
  3886. @@ -1456,7 +1445,41 @@ netmap_mem_private_new(const char *name, u_int txr, u_int txd,
  3887. err = nm_mem_assign_id(d);
  3888. if (err)
  3889. goto error;
  3890. + snprintf(d->name, NM_MEM_NAMESZ, "%d", d->nm_id);
  3891. +
  3892. + for (i = 0; i < NETMAP_POOLS_NR; i++) {
  3893. + snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
  3894. + nm_blueprint.pools[i].name,
  3895. + d->name);
  3896. + d->params[i].num = p[i].num;
  3897. + d->params[i].size = p[i].size;
  3898. + }
  3899.  
  3900. + NMA_LOCK_INIT(d);
  3901. +
  3902. + err = netmap_mem_config(d);
  3903. + if (err)
  3904. + goto error;
  3905. +
  3906. + d->flags &= ~NETMAP_MEM_FINALIZED;
  3907. +
  3908. + return d;
  3909. +
  3910. +error:
  3911. + netmap_mem_delete(d);
  3912. + if (perr)
  3913. + *perr = err;
  3914. + return NULL;
  3915. +}
  3916. +
  3917. +struct netmap_mem_d *
  3918. +netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd,
  3919. + u_int extra_bufs, u_int npipes, int *perr)
  3920. +{
  3921. + struct netmap_mem_d *d = NULL;
  3922. + struct netmap_obj_params p[NETMAP_POOLS_NR];
  3923. + int i, err = 0;
  3924. + u_int v, maxd;
  3925. /* account for the fake host rings */
  3926. txr++;
  3927. rxr++;
  3928. @@ -1502,23 +1525,13 @@ netmap_mem_private_new(const char *name, u_int txr, u_int txd,
  3929. p[NETMAP_BUF_POOL].num,
  3930. p[NETMAP_BUF_POOL].size);
  3931.  
  3932. - for (i = 0; i < NETMAP_POOLS_NR; i++) {
  3933. - snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
  3934. - nm_blueprint.pools[i].name,
  3935. - name);
  3936. - err = netmap_config_obj_allocator(&d->pools[i],
  3937. - p[i].num, p[i].size);
  3938. - if (err)
  3939. - goto error;
  3940. - }
  3941. -
  3942. - d->flags &= ~NETMAP_MEM_FINALIZED;
  3943. -
  3944. - NMA_LOCK_INIT(d);
  3945. + d = _netmap_mem_private_new(p, perr);
  3946. + if (d == NULL)
  3947. + goto error;
  3948.  
  3949. return d;
  3950. error:
  3951. - netmap_mem_private_delete(d);
  3952. + netmap_mem_delete(d);
  3953. if (perr)
  3954. *perr = err;
  3955. return NULL;
  3956. @@ -1527,7 +1540,7 @@ error:
  3957.  
  3958. /* call with lock held */
  3959. static int
  3960. -netmap_mem_global_config(struct netmap_mem_d *nmd)
  3961. +netmap_mem2_config(struct netmap_mem_d *nmd)
  3962. {
  3963. int i;
  3964.  
  3965. @@ -1535,7 +1548,7 @@ netmap_mem_global_config(struct netmap_mem_d *nmd)
  3966. /* already in use, we cannot change the configuration */
  3967. goto out;
  3968.  
  3969. - if (!netmap_memory_config_changed(nmd))
  3970. + if (!netmap_mem_params_changed(nmd->params))
  3971. goto out;
  3972.  
  3973. ND("reconfiguring");
  3974. @@ -1550,7 +1563,7 @@ netmap_mem_global_config(struct netmap_mem_d *nmd)
  3975.  
  3976. for (i = 0; i < NETMAP_POOLS_NR; i++) {
  3977. nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i],
  3978. - netmap_params[i].num, netmap_params[i].size);
  3979. + nmd->params[i].num, nmd->params[i].size);
  3980. if (nmd->lasterr)
  3981. goto out;
  3982. }
  3983. @@ -1561,13 +1574,13 @@ out:
  3984. }
  3985.  
  3986. static int
  3987. -netmap_mem_global_finalize(struct netmap_mem_d *nmd)
  3988. +netmap_mem2_finalize(struct netmap_mem_d *nmd)
  3989. {
  3990. int err;
  3991.  
  3992. /* update configuration if changed */
  3993. - if (netmap_mem_global_config(nmd))
  3994. - return nmd->lasterr;
  3995. + if (netmap_mem2_config(nmd))
  3996. + goto out1;
  3997.  
  3998. nmd->active++;
  3999.  
  4000. @@ -1585,6 +1598,7 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd)
  4001. out:
  4002. if (nmd->lasterr)
  4003. nmd->active--;
  4004. +out1:
  4005. err = nmd->lasterr;
  4006.  
  4007. return err;
  4008. @@ -1592,20 +1606,23 @@ out:
  4009. }
  4010.  
  4011. static void
  4012. -netmap_mem_global_delete(struct netmap_mem_d *nmd)
  4013. +netmap_mem2_delete(struct netmap_mem_d *nmd)
  4014. {
  4015. int i;
  4016.  
  4017. for (i = 0; i < NETMAP_POOLS_NR; i++) {
  4018. - netmap_destroy_obj_allocator(&nm_mem.pools[i]);
  4019. + netmap_destroy_obj_allocator(&nmd->pools[i]);
  4020. }
  4021.  
  4022. - NMA_LOCK_DESTROY(&nm_mem);
  4023. + NMA_LOCK_DESTROY(nmd);
  4024. + if (nmd != &nm_mem)
  4025. + nm_os_free(nmd);
  4026. }
  4027.  
  4028. int
  4029. netmap_mem_init(void)
  4030. {
  4031. + NM_MTX_INIT(nm_mem_list_lock);
  4032. NMA_LOCK_INIT(&nm_mem);
  4033. netmap_mem_get(&nm_mem);
  4034. return (0);
  4035. @@ -1742,7 +1759,7 @@ netmap_mem2_rings_delete(struct netmap_adapter *na)
  4036. * the interface is in netmap mode.
  4037. */
  4038. static struct netmap_if *
  4039. -netmap_mem2_if_new(struct netmap_adapter *na)
  4040. +netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
  4041. {
  4042. struct netmap_if *nifp;
  4043. ssize_t base; /* handy for relative offsets between rings and nifp */
  4044. @@ -1781,24 +1798,28 @@ netmap_mem2_if_new(struct netmap_adapter *na)
  4045. */
  4046. base = netmap_if_offset(na->nm_mem, nifp);
  4047. for (i = 0; i < n[NR_TX]; i++) {
  4048. - if (na->tx_rings[i].ring == NULL) {
  4049. - // XXX maybe use the offset of an error ring,
  4050. - // like we do for buffers?
  4051. - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = 0;
  4052. - continue;
  4053. + /* XXX instead of ofs == 0 maybe use the offset of an error
  4054. + * ring, like we do for buffers? */
  4055. + ssize_t ofs = 0;
  4056. +
  4057. + if (na->tx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_TX]
  4058. + && i < priv->np_qlast[NR_TX]) {
  4059. + ofs = netmap_ring_offset(na->nm_mem,
  4060. + na->tx_rings[i].ring) - base;
  4061. }
  4062. - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] =
  4063. - netmap_ring_offset(na->nm_mem, na->tx_rings[i].ring) - base;
  4064. + *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = ofs;
  4065. }
  4066. for (i = 0; i < n[NR_RX]; i++) {
  4067. - if (na->rx_rings[i].ring == NULL) {
  4068. - // XXX maybe use the offset of an error ring,
  4069. - // like we do for buffers?
  4070. - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = 0;
  4071. - continue;
  4072. + /* XXX instead of ofs == 0 maybe use the offset of an error
  4073. + * ring, like we do for buffers? */
  4074. + ssize_t ofs = 0;
  4075. +
  4076. + if (na->rx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_RX]
  4077. + && i < priv->np_qlast[NR_RX]) {
  4078. + ofs = netmap_ring_offset(na->nm_mem,
  4079. + na->rx_rings[i].ring) - base;
  4080. }
  4081. - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] =
  4082. - netmap_ring_offset(na->nm_mem, na->rx_rings[i].ring) - base;
  4083. + *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = ofs;
  4084. }
  4085.  
  4086. NMA_UNLOCK(na->nm_mem);
  4087. @@ -1821,7 +1842,7 @@ netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
  4088. }
  4089.  
  4090. static void
  4091. -netmap_mem_global_deref(struct netmap_mem_d *nmd)
  4092. +netmap_mem2_deref(struct netmap_mem_d *nmd)
  4093. {
  4094.  
  4095. nmd->active--;
  4096. @@ -1836,25 +1857,11 @@ struct netmap_mem_ops netmap_mem_global_ops = {
  4097. .nmd_get_lut = netmap_mem2_get_lut,
  4098. .nmd_get_info = netmap_mem2_get_info,
  4099. .nmd_ofstophys = netmap_mem2_ofstophys,
  4100. - .nmd_config = netmap_mem_global_config,
  4101. - .nmd_finalize = netmap_mem_global_finalize,
  4102. - .nmd_deref = netmap_mem_global_deref,
  4103. - .nmd_delete = netmap_mem_global_delete,
  4104. - .nmd_if_offset = netmap_mem2_if_offset,
  4105. - .nmd_if_new = netmap_mem2_if_new,
  4106. - .nmd_if_delete = netmap_mem2_if_delete,
  4107. - .nmd_rings_create = netmap_mem2_rings_create,
  4108. - .nmd_rings_delete = netmap_mem2_rings_delete
  4109. -};
  4110. -struct netmap_mem_ops netmap_mem_private_ops = {
  4111. - .nmd_get_lut = netmap_mem2_get_lut,
  4112. - .nmd_get_info = netmap_mem2_get_info,
  4113. - .nmd_ofstophys = netmap_mem2_ofstophys,
  4114. - .nmd_config = netmap_mem_private_config,
  4115. - .nmd_finalize = netmap_mem_private_finalize,
  4116. - .nmd_deref = netmap_mem_private_deref,
  4117. + .nmd_config = netmap_mem2_config,
  4118. + .nmd_finalize = netmap_mem2_finalize,
  4119. + .nmd_deref = netmap_mem2_deref,
  4120. + .nmd_delete = netmap_mem2_delete,
  4121. .nmd_if_offset = netmap_mem2_if_offset,
  4122. - .nmd_delete = netmap_mem_private_delete,
  4123. .nmd_if_new = netmap_mem2_if_new,
  4124. .nmd_if_delete = netmap_mem2_if_delete,
  4125. .nmd_rings_create = netmap_mem2_rings_create,
  4126. @@ -1862,20 +1869,15 @@ struct netmap_mem_ops netmap_mem_private_ops = {
  4127. };
  4128.  
  4129. int
  4130. -netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_adapter *na)
  4131. +netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd)
  4132. {
  4133. uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
  4134. struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp);
  4135. - struct netmap_mem_d *nmd = na->nm_mem;
  4136. struct netmap_pools_info pi;
  4137. unsigned int memsize;
  4138. uint16_t memid;
  4139. int ret;
  4140.  
  4141. - if (!nmd) {
  4142. - return -1;
  4143. - }
  4144. -
  4145. ret = netmap_mem_get_info(nmd, &memsize, NULL, &memid);
  4146. if (ret) {
  4147. return ret;
  4148. @@ -1883,6 +1885,7 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_adapter *na)
  4149.  
  4150. pi.memsize = memsize;
  4151. pi.memid = memid;
  4152. + NMA_LOCK(nmd);
  4153. pi.if_pool_offset = 0;
  4154. pi.if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal;
  4155. pi.if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize;
  4156. @@ -1895,6 +1898,7 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_adapter *na)
  4157. nmd->pools[NETMAP_RING_POOL].memtotal;
  4158. pi.buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
  4159. pi.buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
  4160. + NMA_UNLOCK(nmd);
  4161.  
  4162. ret = copyout(&pi, upi, sizeof(pi));
  4163. if (ret) {
  4164. @@ -1929,8 +1933,7 @@ netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp,
  4165. unsigned int nifp_offset)
  4166. {
  4167. struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
  4168. - struct mem_pt_if *ptif = malloc(sizeof(*ptif), M_NETMAP,
  4169. - M_NOWAIT | M_ZERO);
  4170. + struct mem_pt_if *ptif = nm_os_malloc(sizeof(*ptif));
  4171.  
  4172. if (!ptif) {
  4173. return ENOMEM;
  4174. @@ -1989,7 +1992,7 @@ netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *nmd, struct ifnet *ifp)
  4175. }
  4176. D("removed (ifp=%p,nifp_offset=%u)",
  4177. curr->ifp, curr->nifp_offset);
  4178. - free(curr, M_NETMAP);
  4179. + nm_os_free(curr);
  4180. ret = 0;
  4181. break;
  4182. }
  4183. @@ -2143,7 +2146,7 @@ netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd)
  4184. if (ptnmd->ptn_dev) {
  4185. nm_os_pt_memdev_iounmap(ptnmd->ptn_dev);
  4186. }
  4187. - ptnmd->nm_addr = NULL;
  4188. + ptnmd->nm_addr = 0;
  4189. ptnmd->nm_paddr = 0;
  4190. }
  4191. }
  4192. @@ -2165,15 +2168,14 @@ netmap_mem_pt_guest_delete(struct netmap_mem_d *nmd)
  4193. D("deleting %p", nmd);
  4194. if (nmd->active > 0)
  4195. D("bug: deleting mem allocator with active=%d!", nmd->active);
  4196. - nm_mem_release_id(nmd);
  4197. if (netmap_verbose)
  4198. D("done deleting %p", nmd);
  4199. NMA_LOCK_DESTROY(nmd);
  4200. - free(nmd, M_DEVBUF);
  4201. + nm_os_free(nmd);
  4202. }
  4203.  
  4204. static struct netmap_if *
  4205. -netmap_mem_pt_guest_if_new(struct netmap_adapter *na)
  4206. +netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
  4207. {
  4208. struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
  4209. struct mem_pt_if *ptif;
  4210. @@ -2275,7 +2277,7 @@ static struct netmap_mem_ops netmap_mem_pt_guest_ops = {
  4211. .nmd_rings_delete = netmap_mem_pt_guest_rings_delete
  4212. };
  4213.  
  4214. -/* Called with NMA_LOCK(&nm_mem) held. */
  4215. +/* Called with nm_mem_list_lock held. */
  4216. static struct netmap_mem_d *
  4217. netmap_mem_pt_guest_find_memid(nm_memid_t mem_id)
  4218. {
  4219. @@ -2287,6 +2289,8 @@ netmap_mem_pt_guest_find_memid(nm_memid_t mem_id)
  4220. if (scan->ops->nmd_deref == netmap_mem_pt_guest_deref &&
  4221. ((struct netmap_mem_ptg *)(scan))->host_mem_id == mem_id) {
  4222. mem = scan;
  4223. + mem->refcount++;
  4224. + NM_DBG_REFC(mem, __FUNCTION__, __LINE__);
  4225. break;
  4226. }
  4227. scan = scan->next;
  4228. @@ -2295,15 +2299,14 @@ netmap_mem_pt_guest_find_memid(nm_memid_t mem_id)
  4229. return mem;
  4230. }
  4231.  
  4232. -/* Called with NMA_LOCK(&nm_mem) held. */
  4233. +/* Called with nm_mem_list_lock held. */
  4234. static struct netmap_mem_d *
  4235. netmap_mem_pt_guest_create(nm_memid_t mem_id)
  4236. {
  4237. struct netmap_mem_ptg *ptnmd;
  4238. int err = 0;
  4239.  
  4240. - ptnmd = malloc(sizeof(struct netmap_mem_ptg),
  4241. - M_DEVBUF, M_NOWAIT | M_ZERO);
  4242. + ptnmd = nm_os_malloc(sizeof(struct netmap_mem_ptg));
  4243. if (ptnmd == NULL) {
  4244. err = ENOMEM;
  4245. goto error;
  4246. @@ -2323,6 +2326,9 @@ netmap_mem_pt_guest_create(nm_memid_t mem_id)
  4247.  
  4248. NMA_LOCK_INIT(&ptnmd->up);
  4249.  
  4250. + snprintf(ptnmd->up.name, NM_MEM_NAMESZ, "%d", ptnmd->up.nm_id);
  4251. +
  4252. +
  4253. return &ptnmd->up;
  4254. error:
  4255. netmap_mem_pt_guest_delete(&ptnmd->up);
  4256. @@ -2338,12 +2344,12 @@ netmap_mem_pt_guest_get(nm_memid_t mem_id)
  4257. {
  4258. struct netmap_mem_d *nmd;
  4259.  
  4260. - NMA_LOCK(&nm_mem);
  4261. + NM_MTX_LOCK(nm_mem_list_lock);
  4262. nmd = netmap_mem_pt_guest_find_memid(mem_id);
  4263. if (nmd == NULL) {
  4264. nmd = netmap_mem_pt_guest_create(mem_id);
  4265. }
  4266. - NMA_UNLOCK(&nm_mem);
  4267. + NM_MTX_UNLOCK(nm_mem_list_lock);
  4268.  
  4269. return nmd;
  4270. }
  4271. diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h
  4272. index f170df9d549..66e688afd39 100644
  4273. --- a/sys/dev/netmap/netmap_mem2.h
  4274. +++ b/sys/dev/netmap/netmap_mem2.h
  4275. @@ -27,7 +27,7 @@
  4276. */
  4277.  
  4278. /*
  4279. - * $FreeBSD$
  4280. + * $FreeBSD: head/sys/dev/netmap/netmap_mem2.c 234290 2012-04-14 16:44:18Z luigi $
  4281. *
  4282. * (New) memory allocator for netmap
  4283. */
  4284. @@ -119,8 +119,10 @@
  4285. */
  4286.  
  4287. extern struct netmap_mem_d nm_mem;
  4288. +typedef uint16_t nm_memid_t;
  4289.  
  4290. int netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *);
  4291. +nm_memid_t netmap_mem_get_id(struct netmap_mem_d *);
  4292. vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
  4293. #ifdef _WIN32
  4294. PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd);
  4295. @@ -128,7 +130,7 @@ PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd);
  4296. int netmap_mem_finalize(struct netmap_mem_d *, struct netmap_adapter *);
  4297. int netmap_mem_init(void);
  4298. void netmap_mem_fini(void);
  4299. -struct netmap_if * netmap_mem_if_new(struct netmap_adapter *);
  4300. +struct netmap_if * netmap_mem_if_new(struct netmap_adapter *, struct netmap_priv_d *);
  4301. void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
  4302. int netmap_mem_rings_create(struct netmap_adapter *);
  4303. void netmap_mem_rings_delete(struct netmap_adapter *);
  4304. @@ -136,33 +138,15 @@ void netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
  4305. int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *);
  4306. int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
  4307. ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
  4308. -struct netmap_mem_d* netmap_mem_private_new(const char *name,
  4309. - u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes,
  4310. - int* error);
  4311. +struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_int rxd,
  4312. + u_int extra_bufs, u_int npipes, int* error);
  4313. void netmap_mem_delete(struct netmap_mem_d *);
  4314.  
  4315. -//#define NM_DEBUG_MEM_PUTGET 1
  4316. -
  4317. -#ifdef NM_DEBUG_MEM_PUTGET
  4318. -
  4319. -#define netmap_mem_get(nmd) \
  4320. - do { \
  4321. - __netmap_mem_get(nmd, __FUNCTION__, __LINE__); \
  4322. - } while (0)
  4323. -
  4324. -#define netmap_mem_put(nmd) \
  4325. - do { \
  4326. - __netmap_mem_put(nmd, __FUNCTION__, __LINE__); \
  4327. - } while (0)
  4328. -
  4329. -void __netmap_mem_get(struct netmap_mem_d *, const char *, int);
  4330. +#define netmap_mem_get(d) __netmap_mem_get(d, __FUNCTION__, __LINE__)
  4331. +#define netmap_mem_put(d) __netmap_mem_put(d, __FUNCTION__, __LINE__)
  4332. +struct netmap_mem_d* __netmap_mem_get(struct netmap_mem_d *, const char *, int);
  4333. void __netmap_mem_put(struct netmap_mem_d *, const char *, int);
  4334. -#else /* !NM_DEBUG_MEM_PUTGET */
  4335. -
  4336. -void netmap_mem_get(struct netmap_mem_d *);
  4337. -void netmap_mem_put(struct netmap_mem_d *);
  4338. -
  4339. -#endif /* !NM_DEBUG_PUTGET */
  4340. +struct netmap_mem_d* netmap_mem_find(nm_memid_t);
  4341.  
  4342. #ifdef WITH_PTNETMAP_GUEST
  4343. struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *,
  4344. @@ -173,7 +157,7 @@ struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16
  4345. int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *);
  4346. #endif /* WITH_PTNETMAP_GUEST */
  4347.  
  4348. -int netmap_mem_pools_info_get(struct nmreq *, struct netmap_adapter *);
  4349. +int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *);
  4350.  
  4351. #define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */
  4352. #define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */
  4353. diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c
  4354. index bf6e23f5546..174f35e5c6c 100644
  4355. --- a/sys/dev/netmap/netmap_monitor.c
  4356. +++ b/sys/dev/netmap/netmap_monitor.c
  4357. @@ -25,7 +25,7 @@
  4358. */
  4359.  
  4360. /*
  4361. - * $FreeBSD$
  4362. + * $FreeBSD: head/sys/dev/netmap/netmap_zmon.c 270063 2014-08-16 15:00:01Z luigi $
  4363. *
  4364. * Monitors
  4365. *
  4366. @@ -128,6 +128,13 @@
  4367. ********************************************************************
  4368. */
  4369.  
  4370. +static int netmap_zmon_reg(struct netmap_adapter *, int);
  4371. +static int
  4372. +nm_is_zmon(struct netmap_adapter *na)
  4373. +{
  4374. + return na->nm_register == netmap_zmon_reg;
  4375. +}
  4376. +
  4377. /* nm_sync callback for the monitor's own tx rings.
  4378. * This makes no sense and always returns error
  4379. */
  4380. @@ -148,7 +155,7 @@ static int
  4381. netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
  4382. {
  4383. ND("%s %x", kring->name, flags);
  4384. - kring->nr_hwcur = kring->rcur;
  4385. + kring->nr_hwcur = kring->rhead;
  4386. mb();
  4387. return 0;
  4388. }
  4389. @@ -185,19 +192,16 @@ nm_txrx2flag(enum txrx t)
  4390. static int
  4391. nm_monitor_alloc(struct netmap_kring *kring, u_int n)
  4392. {
  4393. - size_t len;
  4394. + size_t old_len, len;
  4395. struct netmap_kring **nm;
  4396.  
  4397. if (n <= kring->max_monitors)
  4398. /* we already have more entries that requested */
  4399. return 0;
  4400.  
  4401. + old_len = sizeof(struct netmap_kring *)*kring->max_monitors;
  4402. len = sizeof(struct netmap_kring *) * n;
  4403. -#ifndef _WIN32
  4404. - nm = realloc(kring->monitors, len, M_DEVBUF, M_NOWAIT | M_ZERO);
  4405. -#else
  4406. - nm = realloc(kring->monitors, len, sizeof(struct netmap_kring *)*kring->max_monitors);
  4407. -#endif
  4408. + nm = nm_os_realloc(kring->monitors, len, old_len);
  4409. if (nm == NULL)
  4410. return ENOMEM;
  4411.  
  4412. @@ -216,13 +220,22 @@ nm_monitor_dealloc(struct netmap_kring *kring)
  4413. D("freeing not empty monitor array for %s (%d dangling monitors)!", kring->name,
  4414. kring->n_monitors);
  4415. }
  4416. - free(kring->monitors, M_DEVBUF);
  4417. + nm_os_free(kring->monitors);
  4418. kring->monitors = NULL;
  4419. kring->max_monitors = 0;
  4420. kring->n_monitors = 0;
  4421. }
  4422. }
  4423.  
  4424. +/* returns 1 iff kring has no monitors */
  4425. +static inline int
  4426. +nm_monitor_none(struct netmap_kring *kring)
  4427. +{
  4428. + return kring->n_monitors == 0 &&
  4429. + kring->zmon_list[NR_TX].next == NULL &&
  4430. + kring->zmon_list[NR_RX].next == NULL;
  4431. +}
  4432. +
  4433. /*
  4434. * monitors work by replacing the nm_sync() and possibly the
  4435. * nm_notify() callbacks in the monitored rings.
  4436. @@ -233,71 +246,122 @@ static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
  4437. static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
  4438. static int netmap_monitor_parent_notify(struct netmap_kring *, int);
  4439.  
  4440. -
  4441. /* add the monitor mkring to the list of monitors of kring.
  4442. * If this is the first monitor, intercept the callbacks
  4443. */
  4444. static int
  4445. -netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zcopy)
  4446. +netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zmon)
  4447. {
  4448. int error = NM_IRQ_COMPLETED;
  4449. + enum txrx t = kring->tx;
  4450. + struct netmap_zmon_list *z = &kring->zmon_list[t];
  4451. + struct netmap_zmon_list *mz = &mkring->zmon_list[t];
  4452. +
  4453. + /* a zero-copy monitor which is not the first in the list
  4454. + * must monitor the previous monitor
  4455. + */
  4456. + if (zmon && z->prev != NULL)
  4457. + kring = z->prev;
  4458.  
  4459. /* sinchronize with concurrently running nm_sync()s */
  4460. nm_kr_stop(kring, NM_KR_LOCKED);
  4461. - /* make sure the monitor array exists and is big enough */
  4462. - error = nm_monitor_alloc(kring, kring->n_monitors + 1);
  4463. - if (error)
  4464. - goto out;
  4465. - kring->monitors[kring->n_monitors] = mkring;
  4466. - mkring->mon_pos = kring->n_monitors;
  4467. - kring->n_monitors++;
  4468. - if (kring->n_monitors == 1) {
  4469. +
  4470. + if (nm_monitor_none(kring)) {
  4471. /* this is the first monitor, intercept callbacks */
  4472. - ND("%s: intercept callbacks on %s", mkring->name, kring->name);
  4473. + ND("intercept callbacks on %s", kring->name);
  4474. kring->mon_sync = kring->nm_sync;
  4475. - /* zcopy monitors do not override nm_notify(), but
  4476. - * we save the original one regardless, so that
  4477. - * netmap_monitor_del() does not need to know the
  4478. - * monitor type
  4479. - */
  4480. kring->mon_notify = kring->nm_notify;
  4481. if (kring->tx == NR_TX) {
  4482. - kring->nm_sync = (zcopy ? netmap_zmon_parent_txsync :
  4483. - netmap_monitor_parent_txsync);
  4484. + kring->nm_sync = netmap_monitor_parent_txsync;
  4485. } else {
  4486. - kring->nm_sync = (zcopy ? netmap_zmon_parent_rxsync :
  4487. - netmap_monitor_parent_rxsync);
  4488. - if (!zcopy) {
  4489. - /* also intercept notify */
  4490. - kring->nm_notify = netmap_monitor_parent_notify;
  4491. - kring->mon_tail = kring->nr_hwtail;
  4492. - }
  4493. + kring->nm_sync = netmap_monitor_parent_rxsync;
  4494. + kring->nm_notify = netmap_monitor_parent_notify;
  4495. + kring->mon_tail = kring->nr_hwtail;
  4496. }
  4497. }
  4498.  
  4499. + if (zmon) {
  4500. + /* append the zmon to the list */
  4501. + struct netmap_monitor_adapter *mna =
  4502. + (struct netmap_monitor_adapter *)mkring->na;
  4503. + struct netmap_adapter *pna;
  4504. +
  4505. + if (z->prev != NULL)
  4506. + z->prev->zmon_list[t].next = mkring;
  4507. + mz->prev = z->prev;
  4508. + z->prev = mkring;
  4509. + if (z->next == NULL)
  4510. + z->next = mkring;
  4511. +
  4512. + /* grap a reference to the previous netmap adapter
  4513. + * in the chain (this may be the monitored port
  4514. + * or another zero-copy monitor)
  4515. + */
  4516. + pna = kring->na;
  4517. + netmap_adapter_get(pna);
  4518. + netmap_adapter_put(mna->priv.np_na);
  4519. + mna->priv.np_na = pna;
  4520. + } else {
  4521. + /* make sure the monitor array exists and is big enough */
  4522. + error = nm_monitor_alloc(kring, kring->n_monitors + 1);
  4523. + if (error)
  4524. + goto out;
  4525. + kring->monitors[kring->n_monitors] = mkring;
  4526. + mkring->mon_pos[kring->tx] = kring->n_monitors;
  4527. + kring->n_monitors++;
  4528. + }
  4529. +
  4530. out:
  4531. nm_kr_start(kring);
  4532. return error;
  4533. }
  4534.  
  4535. -
  4536. /* remove the monitor mkring from the list of monitors of kring.
  4537. * If this is the last monitor, restore the original callbacks
  4538. */
  4539. static void
  4540. netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
  4541. {
  4542. + struct netmap_zmon_list *mz = &mkring->zmon_list[kring->tx];
  4543. + int zmon = nm_is_zmon(mkring->na);
  4544. +
  4545. +
  4546. + if (zmon && mz->prev != NULL)
  4547. + kring = mz->prev;
  4548. +
  4549. /* sinchronize with concurrently running nm_sync()s */
  4550. nm_kr_stop(kring, NM_KR_LOCKED);
  4551. - kring->n_monitors--;
  4552. - if (mkring->mon_pos != kring->n_monitors) {
  4553. - kring->monitors[mkring->mon_pos] = kring->monitors[kring->n_monitors];
  4554. - kring->monitors[mkring->mon_pos]->mon_pos = mkring->mon_pos;
  4555. +
  4556. + if (zmon) {
  4557. + /* remove the monitor from the list */
  4558. + if (mz->prev != NULL)
  4559. + mz->prev->zmon_list[kring->tx].next = mz->next;
  4560. + else
  4561. + kring->zmon_list[kring->tx].next = mz->next;
  4562. + if (mz->next != NULL) {
  4563. + mz->next->zmon_list[kring->tx].prev = mz->prev;
  4564. + } else {
  4565. + kring->zmon_list[kring->tx].prev = mz->prev;
  4566. + }
  4567. + } else {
  4568. + /* this is a copy monitor */
  4569. + uint32_t mon_pos = mkring->mon_pos[kring->tx];
  4570. + kring->n_monitors--;
  4571. + if (mon_pos != kring->n_monitors) {
  4572. + kring->monitors[mon_pos] =
  4573. + kring->monitors[kring->n_monitors];
  4574. + kring->monitors[mon_pos]->mon_pos[kring->tx] = mon_pos;
  4575. + }
  4576. + kring->monitors[kring->n_monitors] = NULL;
  4577. + if (kring->n_monitors == 0) {
  4578. + nm_monitor_dealloc(kring);
  4579. + }
  4580. }
  4581. - kring->monitors[kring->n_monitors] = NULL;
  4582. - if (kring->n_monitors == 0) {
  4583. - /* this was the last monitor, restore callbacks and delete monitor array */
  4584. - ND("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync);
  4585. +
  4586. + if (nm_monitor_none(kring)) {
  4587. + /* this was the last monitor, restore the callbacks */
  4588. + ND("%s: restoring sync on %s: %p", mkring->name, kring->name,
  4589. + kring->mon_sync);
  4590. kring->nm_sync = kring->mon_sync;
  4591. kring->mon_sync = NULL;
  4592. if (kring->tx == NR_RX) {
  4593. @@ -306,8 +370,8 @@ netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
  4594. kring->nm_notify = kring->mon_notify;
  4595. kring->mon_notify = NULL;
  4596. }
  4597. - nm_monitor_dealloc(kring);
  4598. }
  4599. +
  4600. nm_kr_start(kring);
  4601. }
  4602.  
  4603. @@ -329,6 +393,7 @@ netmap_monitor_stop(struct netmap_adapter *na)
  4604.  
  4605. for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
  4606. struct netmap_kring *kring = &NMR(na, t)[i];
  4607. + struct netmap_kring *zkring;
  4608. u_int j;
  4609.  
  4610. for (j = 0; j < kring->n_monitors; j++) {
  4611. @@ -337,8 +402,33 @@ netmap_monitor_stop(struct netmap_adapter *na)
  4612. struct netmap_monitor_adapter *mna =
  4613. (struct netmap_monitor_adapter *)mkring->na;
  4614. /* forget about this adapter */
  4615. - netmap_adapter_put(mna->priv.np_na);
  4616. - mna->priv.np_na = NULL;
  4617. + if (mna->priv.np_na != NULL) {
  4618. + netmap_adapter_put(mna->priv.np_na);
  4619. + mna->priv.np_na = NULL;
  4620. + }
  4621. + }
  4622. +
  4623. + zkring = kring->zmon_list[kring->tx].next;
  4624. + if (zkring != NULL) {
  4625. + struct netmap_monitor_adapter *next =
  4626. + (struct netmap_monitor_adapter *)zkring->na;
  4627. + struct netmap_monitor_adapter *this =
  4628. + (struct netmap_monitor_adapter *)na;
  4629. + struct netmap_adapter *pna = this->priv.np_na;
  4630. + /* let the next monitor forget about us */
  4631. + if (next->priv.np_na != NULL) {
  4632. + netmap_adapter_put(next->priv.np_na);
  4633. + }
  4634. + if (pna != NULL && nm_is_zmon(na)) {
  4635. + /* we are a monitor ourselves and we may
  4636. + * need to pass down the reference to
  4637. + * the previous adapter in the chain
  4638. + */
  4639. + netmap_adapter_get(pna);
  4640. + next->priv.np_na = pna;
  4641. + continue;
  4642. + }
  4643. + next->priv.np_na = NULL;
  4644. }
  4645. }
  4646. }
  4647. @@ -357,7 +447,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
  4648. struct netmap_adapter *pna = priv->np_na;
  4649. struct netmap_kring *kring, *mkring;
  4650. int i;
  4651. - enum txrx t;
  4652. + enum txrx t, s;
  4653.  
  4654. ND("%p: onoff %d", na, onoff);
  4655. if (onoff) {
  4656. @@ -367,13 +457,19 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
  4657. return ENXIO;
  4658. }
  4659. for_rx_tx(t) {
  4660. - if (mna->flags & nm_txrx2flag(t)) {
  4661. - for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
  4662. - kring = &NMR(pna, t)[i];
  4663. - mkring = &na->rx_rings[i];
  4664. - if (nm_kring_pending_on(mkring)) {
  4665. + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
  4666. + mkring = &NMR(na, t)[i];
  4667. + if (!nm_kring_pending_on(mkring))
  4668. + continue;
  4669. + mkring->nr_mode = NKR_NETMAP_ON;
  4670. + if (t == NR_TX)
  4671. + continue;
  4672. + for_rx_tx(s) {
  4673. + if (i > nma_get_nrings(pna, s))
  4674. + continue;
  4675. + if (mna->flags & nm_txrx2flag(s)) {
  4676. + kring = &NMR(pna, s)[i];
  4677. netmap_monitor_add(mkring, kring, zmon);
  4678. - mkring->nr_mode = NKR_NETMAP_ON;
  4679. }
  4680. }
  4681. }
  4682. @@ -383,19 +479,25 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
  4683. if (na->active_fds == 0)
  4684. na->na_flags &= ~NAF_NETMAP_ON;
  4685. for_rx_tx(t) {
  4686. - if (mna->flags & nm_txrx2flag(t)) {
  4687. - for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
  4688. - mkring = &na->rx_rings[i];
  4689. - if (nm_kring_pending_off(mkring)) {
  4690. - mkring->nr_mode = NKR_NETMAP_OFF;
  4691. - /* we cannot access the parent krings if the parent
  4692. - * has left netmap mode. This is signaled by a NULL
  4693. - * pna pointer
  4694. - */
  4695. - if (pna) {
  4696. - kring = &NMR(pna, t)[i];
  4697. - netmap_monitor_del(mkring, kring);
  4698. - }
  4699. + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
  4700. + mkring = &NMR(na, t)[i];
  4701. + if (!nm_kring_pending_off(mkring))
  4702. + continue;
  4703. + mkring->nr_mode = NKR_NETMAP_OFF;
  4704. + if (t == NR_TX)
  4705. + continue;
  4706. + /* we cannot access the parent krings if the parent
  4707. + * has left netmap mode. This is signaled by a NULL
  4708. + * pna pointer
  4709. + */
  4710. + if (pna == NULL)
  4711. + continue;
  4712. + for_rx_tx(s) {
  4713. + if (i > nma_get_nrings(pna, s))
  4714. + continue;
  4715. + if (mna->flags & nm_txrx2flag(s)) {
  4716. + kring = &NMR(pna, s)[i];
  4717. + netmap_monitor_del(mkring, kring);
  4718. }
  4719. }
  4720. }
  4721. @@ -417,7 +519,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
  4722. static int
  4723. netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
  4724. {
  4725. - struct netmap_kring *mkring = kring->monitors[0];
  4726. + struct netmap_kring *mkring = kring->zmon_list[tx].next;
  4727. struct netmap_ring *ring = kring->ring, *mring;
  4728. int error = 0;
  4729. int rel_slots, free_slots, busy, sent = 0;
  4730. @@ -434,11 +536,11 @@ netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
  4731.  
  4732. /* get the relased slots (rel_slots) */
  4733. if (tx == NR_TX) {
  4734. - beg = kring->nr_hwtail;
  4735. + beg = kring->nr_hwtail + 1;
  4736. error = kring->mon_sync(kring, flags);
  4737. if (error)
  4738. return error;
  4739. - end = kring->nr_hwtail;
  4740. + end = kring->nr_hwtail + 1;
  4741. } else { /* NR_RX */
  4742. beg = kring->nr_hwcur;
  4743. end = kring->rhead;
  4744. @@ -473,10 +575,10 @@ netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
  4745. /* swap min(free_slots, rel_slots) slots */
  4746. if (free_slots < rel_slots) {
  4747. beg += (rel_slots - free_slots);
  4748. - if (beg >= kring->nkr_num_slots)
  4749. - beg -= kring->nkr_num_slots;
  4750. rel_slots = free_slots;
  4751. }
  4752. + if (unlikely(beg >= kring->nkr_num_slots))
  4753. + beg -= kring->nkr_num_slots;
  4754.  
  4755. sent = rel_slots;
  4756. for ( ; rel_slots; rel_slots--) {
  4757. @@ -521,7 +623,6 @@ out_rxsync:
  4758. static int
  4759. netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
  4760. {
  4761. - ND("%s %x", kring->name, flags);
  4762. return netmap_zmon_parent_sync(kring, flags, NR_TX);
  4763. }
  4764.  
  4765. @@ -529,11 +630,9 @@ netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
  4766. static int
  4767. netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
  4768. {
  4769. - ND("%s %x", kring->name, flags);
  4770. return netmap_zmon_parent_sync(kring, flags, NR_RX);
  4771. }
  4772.  
  4773. -
  4774. static int
  4775. netmap_zmon_reg(struct netmap_adapter *na, int onoff)
  4776. {
  4777. @@ -638,12 +737,17 @@ netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
  4778. int new_slots;
  4779.  
  4780. /* get the new slots */
  4781. - first_new = kring->nr_hwcur;
  4782. - new_slots = kring->rhead - first_new;
  4783. - if (new_slots < 0)
  4784. - new_slots += kring->nkr_num_slots;
  4785. - if (new_slots)
  4786. - netmap_monitor_parent_sync(kring, first_new, new_slots);
  4787. + if (kring->n_monitors > 0) {
  4788. + first_new = kring->nr_hwcur;
  4789. + new_slots = kring->rhead - first_new;
  4790. + if (new_slots < 0)
  4791. + new_slots += kring->nkr_num_slots;
  4792. + if (new_slots)
  4793. + netmap_monitor_parent_sync(kring, first_new, new_slots);
  4794. + }
  4795. + if (kring->zmon_list[NR_TX].next != NULL) {
  4796. + return netmap_zmon_parent_txsync(kring, flags);
  4797. + }
  4798. return kring->mon_sync(kring, flags);
  4799. }
  4800.  
  4801. @@ -655,16 +759,22 @@ netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
  4802. int new_slots, error;
  4803.  
  4804. /* get the new slots */
  4805. - error = kring->mon_sync(kring, flags);
  4806. + if (kring->zmon_list[NR_RX].next != NULL) {
  4807. + error = netmap_zmon_parent_rxsync(kring, flags);
  4808. + } else {
  4809. + error = kring->mon_sync(kring, flags);
  4810. + }
  4811. if (error)
  4812. return error;
  4813. - first_new = kring->mon_tail;
  4814. - new_slots = kring->nr_hwtail - first_new;
  4815. - if (new_slots < 0)
  4816. - new_slots += kring->nkr_num_slots;
  4817. - if (new_slots)
  4818. - netmap_monitor_parent_sync(kring, first_new, new_slots);
  4819. - kring->mon_tail = kring->nr_hwtail;
  4820. + if (kring->n_monitors > 0) {
  4821. + first_new = kring->mon_tail;
  4822. + new_slots = kring->nr_hwtail - first_new;
  4823. + if (new_slots < 0)
  4824. + new_slots += kring->nkr_num_slots;
  4825. + if (new_slots)
  4826. + netmap_monitor_parent_sync(kring, first_new, new_slots);
  4827. + kring->mon_tail = kring->nr_hwtail;
  4828. + }
  4829. return 0;
  4830. }
  4831.  
  4832. @@ -684,12 +794,14 @@ netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
  4833. }
  4834. if (kring->n_monitors > 0) {
  4835. netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
  4836. - notify = kring->mon_notify;
  4837. - } else {
  4838. + }
  4839. + if (nm_monitor_none(kring)) {
  4840. /* we are no longer monitoring this ring, so both
  4841. * mon_sync and mon_notify are NULL
  4842. */
  4843. notify = kring->nm_notify;
  4844. + } else {
  4845. + notify = kring->mon_notify;
  4846. }
  4847. nm_kr_put(kring);
  4848. return notify(kring, flags);
  4849. @@ -716,24 +828,21 @@ netmap_monitor_dtor(struct netmap_adapter *na)
  4850.  
  4851. /* check if nmr is a request for a monitor adapter that we can satisfy */
  4852. int
  4853. -netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  4854. +netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
  4855. + struct netmap_mem_d *nmd, int create)
  4856. {
  4857. struct nmreq pnmr;
  4858. struct netmap_adapter *pna; /* parent adapter */
  4859. struct netmap_monitor_adapter *mna;
  4860. struct ifnet *ifp = NULL;
  4861. - int i, error;
  4862. - enum txrx t;
  4863. + int error;
  4864. int zcopy = (nmr->nr_flags & NR_ZCOPY_MON);
  4865. char monsuff[10] = "";
  4866.  
  4867. + if (zcopy) {
  4868. + nmr->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
  4869. + }
  4870. if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
  4871. - if (nmr->nr_flags & NR_ZCOPY_MON) {
  4872. - /* the flag makes no sense unless you are
  4873. - * creating a monitor
  4874. - */
  4875. - return EINVAL;
  4876. - }
  4877. ND("not a monitor");
  4878. return 0;
  4879. }
  4880. @@ -741,12 +850,6 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  4881.  
  4882. ND("flags %x", nmr->nr_flags);
  4883.  
  4884. - mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
  4885. - if (mna == NULL) {
  4886. - D("memory error");
  4887. - return ENOMEM;
  4888. - }
  4889. -
  4890. /* first, try to find the adapter that we want to monitor
  4891. * We use the same nmr, after we have turned off the monitor flags.
  4892. * In this way we can potentially monitor everything netmap understands,
  4893. @@ -754,10 +857,9 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  4894. */
  4895. memcpy(&pnmr, nmr, sizeof(pnmr));
  4896. pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
  4897. - error = netmap_get_na(&pnmr, &pna, &ifp, create);
  4898. + error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create);
  4899. if (error) {
  4900. D("parent lookup failed: %d", error);
  4901. - free(mna, M_DEVBUF);
  4902. return error;
  4903. }
  4904. ND("found parent: %s", pna->name);
  4905. @@ -772,12 +874,19 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  4906. goto put_out;
  4907. }
  4908.  
  4909. - /* grab all the rings we need in the parent */
  4910. + mna = nm_os_malloc(sizeof(*mna));
  4911. + if (mna == NULL) {
  4912. + D("memory error");
  4913. + error = ENOMEM;
  4914. + goto put_out;
  4915. + }
  4916. mna->priv.np_na = pna;
  4917. +
  4918. + /* grab all the rings we need in the parent */
  4919. error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
  4920. if (error) {
  4921. D("ringid error");
  4922. - goto put_out;
  4923. + goto free_out;
  4924. }
  4925. if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) {
  4926. snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]);
  4927. @@ -788,57 +897,14 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  4928. (nmr->nr_flags & NR_MONITOR_RX) ? "r" : "",
  4929. (nmr->nr_flags & NR_MONITOR_TX) ? "t" : "");
  4930.  
  4931. - if (zcopy) {
  4932. - /* zero copy monitors need exclusive access to the monitored rings */
  4933. - for_rx_tx(t) {
  4934. - if (! (nmr->nr_flags & nm_txrx2flag(t)))
  4935. - continue;
  4936. - for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
  4937. - struct netmap_kring *kring = &NMR(pna, t)[i];
  4938. - if (kring->n_monitors > 0) {
  4939. - error = EBUSY;
  4940. - D("ring %s already monitored by %s", kring->name,
  4941. - kring->monitors[0]->name);
  4942. - goto put_out;
  4943. - }
  4944. - }
  4945. - }
  4946. - mna->up.nm_register = netmap_zmon_reg;
  4947. - mna->up.nm_dtor = netmap_zmon_dtor;
  4948. - /* to have zero copy, we need to use the same memory allocator
  4949. - * as the monitored port
  4950. - */
  4951. - mna->up.nm_mem = pna->nm_mem;
  4952. - mna->up.na_lut = pna->na_lut;
  4953. - } else {
  4954. - /* normal monitors are incompatible with zero copy ones */
  4955. - for_rx_tx(t) {
  4956. - if (! (nmr->nr_flags & nm_txrx2flag(t)))
  4957. - continue;
  4958. - for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
  4959. - struct netmap_kring *kring = &NMR(pna, t)[i];
  4960. - if (kring->n_monitors > 0 &&
  4961. - kring->monitors[0]->na->nm_register == netmap_zmon_reg)
  4962. - {
  4963. - error = EBUSY;
  4964. - D("ring busy");
  4965. - goto put_out;
  4966. - }
  4967. - }
  4968. - }
  4969. - mna->up.nm_rxsync = netmap_monitor_rxsync;
  4970. - mna->up.nm_register = netmap_monitor_reg;
  4971. - mna->up.nm_dtor = netmap_monitor_dtor;
  4972. - }
  4973. -
  4974. /* the monitor supports the host rings iff the parent does */
  4975. - mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
  4976. + mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS);
  4977. /* a do-nothing txsync: monitors cannot be used to inject packets */
  4978. mna->up.nm_txsync = netmap_monitor_txsync;
  4979. mna->up.nm_rxsync = netmap_monitor_rxsync;
  4980. mna->up.nm_krings_create = netmap_monitor_krings_create;
  4981. mna->up.nm_krings_delete = netmap_monitor_krings_delete;
  4982. - mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
  4983. + mna->up.num_tx_rings = 1; // XXX what should we do here with chained zmons?
  4984. /* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
  4985. * in the parent
  4986. */
  4987. @@ -855,14 +921,38 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  4988. mna->up.num_rx_desc = nmr->nr_rx_slots;
  4989. nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
  4990. 1, NM_MONITOR_MAXSLOTS, NULL);
  4991. + if (zcopy) {
  4992. + mna->up.nm_register = netmap_zmon_reg;
  4993. + mna->up.nm_dtor = netmap_zmon_dtor;
  4994. + /* to have zero copy, we need to use the same memory allocator
  4995. + * as the monitored port
  4996. + */
  4997. + mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
  4998. + /* and the allocator cannot be changed */
  4999. + mna->up.na_flags |= NAF_MEM_OWNER;
  5000. + } else {
  5001. + mna->up.nm_register = netmap_monitor_reg;
  5002. + mna->up.nm_dtor = netmap_monitor_dtor;
  5003. + mna->up.nm_mem = netmap_mem_private_new(
  5004. + mna->up.num_tx_rings,
  5005. + mna->up.num_tx_desc,
  5006. + mna->up.num_rx_rings,
  5007. + mna->up.num_rx_desc,
  5008. + 0, /* extra bufs */
  5009. + 0, /* pipes */
  5010. + &error);
  5011. + if (mna->up.nm_mem == NULL)
  5012. + goto put_out;
  5013. + }
  5014. +
  5015. error = netmap_attach_common(&mna->up);
  5016. if (error) {
  5017. D("attach_common error");
  5018. - goto put_out;
  5019. + goto mem_put_out;
  5020. }
  5021.  
  5022. /* remember the traffic directions we have to monitor */
  5023. - mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX));
  5024. + mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON));
  5025.  
  5026. *na = &mna->up;
  5027. netmap_adapter_get(*na);
  5028. @@ -876,9 +966,12 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5029.  
  5030. return 0;
  5031.  
  5032. +mem_put_out:
  5033. + netmap_mem_put(mna->up.nm_mem);
  5034. +free_out:
  5035. + nm_os_free(mna);
  5036. put_out:
  5037. netmap_unget_na(pna, ifp);
  5038. - free(mna, M_DEVBUF);
  5039. return error;
  5040. }
  5041.  
  5042. diff --git a/sys/dev/netmap/netmap_offloadings.c b/sys/dev/netmap/netmap_offloadings.c
  5043. index f8da672ffa5..8e5de7f7a9f 100644
  5044. --- a/sys/dev/netmap/netmap_offloadings.c
  5045. +++ b/sys/dev/netmap/netmap_offloadings.c
  5046. @@ -24,7 +24,7 @@
  5047. * SUCH DAMAGE.
  5048. */
  5049.  
  5050. -/* $FreeBSD$ */
  5051. +/* $FreeBSD: head/sys/dev/netmap/netmap_offloadings.c 261909 2014-02-15 04:53:04Z luigi $ */
  5052.  
  5053. #if defined(__FreeBSD__)
  5054. #include <sys/cdefs.h> /* prerequisite */
  5055. diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c
  5056. index f00f73f8b9b..36f5a3c9d9b 100644
  5057. --- a/sys/dev/netmap/netmap_pipe.c
  5058. +++ b/sys/dev/netmap/netmap_pipe.c
  5059. @@ -24,7 +24,7 @@
  5060. * SUCH DAMAGE.
  5061. */
  5062.  
  5063. -/* $FreeBSD$ */
  5064. +/* $FreeBSD: head/sys/dev/netmap/netmap_pipe.c 261909 2014-02-15 04:53:04Z luigi $ */
  5065.  
  5066. #if defined(__FreeBSD__)
  5067. #include <sys/cdefs.h> /* prerequisite */
  5068. @@ -86,7 +86,7 @@ SYSEND;
  5069. static int
  5070. nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
  5071. {
  5072. - size_t len;
  5073. + size_t old_len, len;
  5074. struct netmap_pipe_adapter **npa;
  5075.  
  5076. if (npipes <= na->na_max_pipes)
  5077. @@ -96,12 +96,9 @@ nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
  5078. if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES)
  5079. return EINVAL;
  5080.  
  5081. + old_len = sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes;
  5082. len = sizeof(struct netmap_pipe_adapter *) * npipes;
  5083. -#ifndef _WIN32
  5084. - npa = realloc(na->na_pipes, len, M_DEVBUF, M_NOWAIT | M_ZERO);
  5085. -#else
  5086. - npa = realloc(na->na_pipes, len, sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes);
  5087. -#endif
  5088. + npa = nm_os_realloc(na->na_pipes, len, old_len);
  5089. if (npa == NULL)
  5090. return ENOMEM;
  5091.  
  5092. @@ -120,7 +117,7 @@ netmap_pipe_dealloc(struct netmap_adapter *na)
  5093. D("freeing not empty pipe array for %s (%d dangling pipes)!", na->name,
  5094. na->na_next_pipe);
  5095. }
  5096. - free(na->na_pipes, M_DEVBUF);
  5097. + nm_os_free(na->na_pipes);
  5098. na->na_pipes = NULL;
  5099. na->na_max_pipes = 0;
  5100. na->na_next_pipe = 0;
  5101. @@ -175,7 +172,7 @@ netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na
  5102. parent->na_pipes[n] = NULL;
  5103. }
  5104.  
  5105. -static int
  5106. +int
  5107. netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
  5108. {
  5109. struct netmap_kring *rxkring = txkring->pipe;
  5110. @@ -240,7 +237,7 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
  5111. return 0;
  5112. }
  5113.  
  5114. -static int
  5115. +int
  5116. netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
  5117. {
  5118. struct netmap_kring *txkring = rxkring->pipe;
  5119. @@ -289,7 +286,7 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
  5120. */
  5121.  
  5122.  
  5123. -/* netmap_pipe_krings_delete.
  5124. +/* netmap_pipe_krings_create.
  5125. *
  5126. * There are two cases:
  5127. *
  5128. @@ -320,7 +317,7 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
  5129. int i;
  5130.  
  5131. /* case 1) above */
  5132. - D("%p: case 1, create both ends", na);
  5133. + ND("%p: case 1, create both ends", na);
  5134. error = netmap_krings_create(na, 0);
  5135. if (error)
  5136. goto err;
  5137. @@ -334,8 +331,8 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
  5138. for_rx_tx(t) {
  5139. enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
  5140. for (i = 0; i < nma_get_nrings(na, t); i++) {
  5141. - NMR(na, t)[i].pipe = NMR(&pna->peer->up, r) + i;
  5142. - NMR(&pna->peer->up, r)[i].pipe = NMR(na, t) + i;
  5143. + NMR(na, t)[i].pipe = NMR(ona, r) + i;
  5144. + NMR(ona, r)[i].pipe = NMR(na, t) + i;
  5145. }
  5146. }
  5147.  
  5148. @@ -393,11 +390,11 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
  5149. ND("%p: onoff %d", na, onoff);
  5150. if (onoff) {
  5151. for_rx_tx(t) {
  5152. - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
  5153. + for (i = 0; i < nma_get_nrings(na, t); i++) {
  5154. struct netmap_kring *kring = &NMR(na, t)[i];
  5155.  
  5156. if (nm_kring_pending_on(kring)) {
  5157. - /* mark the partner ring as needed */
  5158. + /* mark the peer ring as needed */
  5159. kring->pipe->nr_kflags |= NKR_NEEDRING;
  5160. }
  5161. }
  5162. @@ -432,7 +429,9 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
  5163. /* mark the peer ring as no longer needed by us
  5164. * (it may still be kept if sombody else is using it)
  5165. */
  5166. - kring->pipe->nr_kflags &= ~NKR_NEEDRING;
  5167. + if (kring->pipe) {
  5168. + kring->pipe->nr_kflags &= ~NKR_NEEDRING;
  5169. + }
  5170. }
  5171. }
  5172. }
  5173. @@ -441,7 +440,7 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
  5174. }
  5175.  
  5176. if (na->active_fds) {
  5177. - D("active_fds %d", na->active_fds);
  5178. + ND("active_fds %d", na->active_fds);
  5179. return 0;
  5180. }
  5181.  
  5182. @@ -494,7 +493,7 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
  5183. return;
  5184. }
  5185. /* case 1) above */
  5186. - ND("%p: case 1, deleting everyhing", na);
  5187. + ND("%p: case 1, deleting everything", na);
  5188. netmap_krings_delete(na); /* also zeroes tx_rings etc. */
  5189. ona = &pna->peer->up;
  5190. if (ona->tx_rings == NULL) {
  5191. @@ -511,7 +510,7 @@ netmap_pipe_dtor(struct netmap_adapter *na)
  5192. {
  5193. struct netmap_pipe_adapter *pna =
  5194. (struct netmap_pipe_adapter *)na;
  5195. - ND("%p", na);
  5196. + ND("%p %p", na, pna->parent_ifp);
  5197. if (pna->peer_ref) {
  5198. ND("%p: clean up peer", na);
  5199. pna->peer_ref = 0;
  5200. @@ -519,12 +518,15 @@ netmap_pipe_dtor(struct netmap_adapter *na)
  5201. }
  5202. if (pna->role == NR_REG_PIPE_MASTER)
  5203. netmap_pipe_remove(pna->parent, pna);
  5204. + if (pna->parent_ifp)
  5205. + if_rele(pna->parent_ifp);
  5206. netmap_adapter_put(pna->parent);
  5207. pna->parent = NULL;
  5208. }
  5209.  
  5210. int
  5211. -netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5212. +netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
  5213. + struct netmap_mem_d *nmd, int create)
  5214. {
  5215. struct nmreq pnmr;
  5216. struct netmap_adapter *pna; /* parent adapter */
  5217. @@ -532,7 +534,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5218. struct ifnet *ifp = NULL;
  5219. u_int pipe_id;
  5220. int role = nmr->nr_flags & NR_REG_MASK;
  5221. - int error;
  5222. + int error, retries = 0;
  5223.  
  5224. ND("flags %x", nmr->nr_flags);
  5225.  
  5226. @@ -547,12 +549,28 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5227. memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
  5228. /* pass to parent the requested number of pipes */
  5229. pnmr.nr_arg1 = nmr->nr_arg1;
  5230. - error = netmap_get_na(&pnmr, &pna, &ifp, create);
  5231. - if (error) {
  5232. - ND("parent lookup failed: %d", error);
  5233. - return error;
  5234. + for (;;) {
  5235. + int create_error;
  5236. +
  5237. + error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create);
  5238. + if (!error)
  5239. + break;
  5240. + if (error != ENXIO || retries++) {
  5241. + ND("parent lookup failed: %d", error);
  5242. + return error;
  5243. + }
  5244. + ND("try to create a persistent vale port");
  5245. + /* create a persistent vale port and try again */
  5246. + NMG_UNLOCK();
  5247. + create_error = netmap_vi_create(&pnmr, 1 /* autodelete */);
  5248. + NMG_LOCK();
  5249. + if (create_error && create_error != EEXIST) {
  5250. + if (create_error != EOPNOTSUPP) {
  5251. + D("failed to create a persistent vale port: %d", create_error);
  5252. + }
  5253. + return error;
  5254. + }
  5255. }
  5256. - ND("found parent: %s", na->name);
  5257.  
  5258. if (NETMAP_OWNED_BY_KERN(pna)) {
  5259. ND("parent busy");
  5260. @@ -575,7 +593,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5261. /* the pipe we have found already holds a ref to the parent,
  5262. * so we need to drop the one we got from netmap_get_na()
  5263. */
  5264. - netmap_adapter_put(pna);
  5265. + netmap_unget_na(pna, ifp);
  5266. goto found;
  5267. }
  5268. ND("pipe %d not found, create %d", pipe_id, create);
  5269. @@ -587,7 +605,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5270. * The endpoint we were asked for holds a reference to
  5271. * the other one.
  5272. */
  5273. - mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
  5274. + mna = nm_os_malloc(sizeof(*mna));
  5275. if (mna == NULL) {
  5276. error = ENOMEM;
  5277. goto put_out;
  5278. @@ -597,6 +615,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5279. mna->id = pipe_id;
  5280. mna->role = NR_REG_PIPE_MASTER;
  5281. mna->parent = pna;
  5282. + mna->parent_ifp = ifp;
  5283.  
  5284. mna->up.nm_txsync = netmap_pipe_txsync;
  5285. mna->up.nm_rxsync = netmap_pipe_rxsync;
  5286. @@ -604,7 +623,8 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5287. mna->up.nm_dtor = netmap_pipe_dtor;
  5288. mna->up.nm_krings_create = netmap_pipe_krings_create;
  5289. mna->up.nm_krings_delete = netmap_pipe_krings_delete;
  5290. - mna->up.nm_mem = pna->nm_mem;
  5291. + mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
  5292. + mna->up.na_flags |= NAF_MEM_OWNER;
  5293. mna->up.na_lut = pna->na_lut;
  5294.  
  5295. mna->up.num_tx_rings = 1;
  5296. @@ -624,13 +644,14 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5297. goto free_mna;
  5298.  
  5299. /* create the slave */
  5300. - sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
  5301. + sna = nm_os_malloc(sizeof(*mna));
  5302. if (sna == NULL) {
  5303. error = ENOMEM;
  5304. goto unregister_mna;
  5305. }
  5306. /* most fields are the same, copy from master and then fix */
  5307. *sna = *mna;
  5308. + sna->up.nm_mem = netmap_mem_get(mna->up.nm_mem);
  5309. snprintf(sna->up.name, sizeof(sna->up.name), "%s}%d", pna->name, pipe_id);
  5310. sna->role = NR_REG_PIPE_SLAVE;
  5311. error = netmap_attach_common(&sna->up);
  5312. @@ -645,6 +666,9 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5313. * need another one for the other endpoint we created
  5314. */
  5315. netmap_adapter_get(pna);
  5316. + /* likewise for the ifp, if any */
  5317. + if (ifp)
  5318. + if_ref(ifp);
  5319.  
  5320. if (role == NR_REG_PIPE_MASTER) {
  5321. req = mna;
  5322. @@ -667,19 +691,14 @@ found:
  5323. * It will be released by the req destructor
  5324. */
  5325.  
  5326. - /* drop the ifp reference, if any */
  5327. - if (ifp) {
  5328. - if_rele(ifp);
  5329. - }
  5330. -
  5331. return 0;
  5332.  
  5333. free_sna:
  5334. - free(sna, M_DEVBUF);
  5335. + nm_os_free(sna);
  5336. unregister_mna:
  5337. netmap_pipe_remove(pna, mna);
  5338. free_mna:
  5339. - free(mna, M_DEVBUF);
  5340. + nm_os_free(mna);
  5341. put_out:
  5342. netmap_unget_na(pna, ifp);
  5343. return error;
  5344. diff --git a/sys/dev/netmap/netmap_pt.c b/sys/dev/netmap/netmap_pt.c
  5345. index 3913f4b957f..27eaa0232ae 100644
  5346. --- a/sys/dev/netmap/netmap_pt.c
  5347. +++ b/sys/dev/netmap/netmap_pt.c
  5348. @@ -170,7 +170,7 @@ rate_batch_stats_update(struct rate_batch_stats *bf, uint32_t pre_tail,
  5349.  
  5350. struct ptnetmap_state {
  5351. /* Kthreads. */
  5352. - struct nm_kthread **kthreads;
  5353. + struct nm_kctx **kctxs;
  5354.  
  5355. /* Shared memory with the guest (TX/RX) */
  5356. struct ptnet_ring __user *ptrings;
  5357. @@ -186,11 +186,11 @@ struct ptnetmap_state {
  5358. static inline void
  5359. ptnetmap_kring_dump(const char *title, const struct netmap_kring *kring)
  5360. {
  5361. - RD(1, "%s - name: %s hwcur: %d hwtail: %d rhead: %d rcur: %d \
  5362. - rtail: %d head: %d cur: %d tail: %d",
  5363. - title, kring->name, kring->nr_hwcur,
  5364. - kring->nr_hwtail, kring->rhead, kring->rcur, kring->rtail,
  5365. - kring->ring->head, kring->ring->cur, kring->ring->tail);
  5366. + D("%s - name: %s hwcur: %d hwtail: %d rhead: %d rcur: %d"
  5367. + " rtail: %d head: %d cur: %d tail: %d",
  5368. + title, kring->name, kring->nr_hwcur,
  5369. + kring->nr_hwtail, kring->rhead, kring->rcur, kring->rtail,
  5370. + kring->ring->head, kring->ring->cur, kring->ring->tail);
  5371. }
  5372.  
  5373. /*
  5374. @@ -225,7 +225,7 @@ ptring_intr_enable(struct ptnet_ring __user *ptring, uint32_t val)
  5375.  
  5376. /* Handle TX events: from the guest or from the backend */
  5377. static void
  5378. -ptnetmap_tx_handler(void *data)
  5379. +ptnetmap_tx_handler(void *data, int is_kthread)
  5380. {
  5381. struct netmap_kring *kring = data;
  5382. struct netmap_pt_host_adapter *pth_na =
  5383. @@ -234,7 +234,7 @@ ptnetmap_tx_handler(void *data)
  5384. struct ptnet_ring __user *ptring;
  5385. struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
  5386. bool more_txspace = false;
  5387. - struct nm_kthread *kth;
  5388. + struct nm_kctx *kth;
  5389. uint32_t num_slots;
  5390. int batch;
  5391. IFRATE(uint32_t pre_tail);
  5392. @@ -259,7 +259,7 @@ ptnetmap_tx_handler(void *data)
  5393.  
  5394. /* Get TX ptring pointer from the CSB. */
  5395. ptring = ptns->ptrings + kring->ring_id;
  5396. - kth = ptns->kthreads[kring->ring_id];
  5397. + kth = ptns->kctxs[kring->ring_id];
  5398.  
  5399. num_slots = kring->nkr_num_slots;
  5400. shadow_ring.head = kring->rhead;
  5401. @@ -337,10 +337,10 @@ ptnetmap_tx_handler(void *data)
  5402.  
  5403. #ifndef BUSY_WAIT
  5404. /* Interrupt the guest if needed. */
  5405. - if (more_txspace && ptring_intr_enabled(ptring)) {
  5406. + if (more_txspace && ptring_intr_enabled(ptring) && is_kthread) {
  5407. /* Disable guest kick to avoid sending unnecessary kicks */
  5408. ptring_intr_enable(ptring, 0);
  5409. - nm_os_kthread_send_irq(kth);
  5410. + nm_os_kctx_send_irq(kth);
  5411. IFRATE(ptns->rate_ctx.new.htxk++);
  5412. more_txspace = false;
  5413. }
  5414. @@ -354,7 +354,9 @@ ptnetmap_tx_handler(void *data)
  5415. * go to sleep, waiting for a kick from the guest when new
  5416. * new slots are ready for transmission.
  5417. */
  5418. - usleep_range(1,1);
  5419. + if (is_kthread) {
  5420. + usleep_range(1,1);
  5421. + }
  5422. /* Reenable notifications. */
  5423. ptring_kick_enable(ptring, 1);
  5424. /* Doublecheck. */
  5425. @@ -383,13 +385,40 @@ ptnetmap_tx_handler(void *data)
  5426.  
  5427. nm_kr_put(kring);
  5428.  
  5429. - if (more_txspace && ptring_intr_enabled(ptring)) {
  5430. + if (more_txspace && ptring_intr_enabled(ptring) && is_kthread) {
  5431. ptring_intr_enable(ptring, 0);
  5432. - nm_os_kthread_send_irq(kth);
  5433. + nm_os_kctx_send_irq(kth);
  5434. IFRATE(ptns->rate_ctx.new.htxk++);
  5435. }
  5436. }
  5437.  
  5438. +/* Called on backend nm_notify when there is no worker thread. */
  5439. +static void
  5440. +ptnetmap_tx_nothread_notify(void *data)
  5441. +{
  5442. + struct netmap_kring *kring = data;
  5443. + struct netmap_pt_host_adapter *pth_na =
  5444. + (struct netmap_pt_host_adapter *)kring->na->na_private;
  5445. + struct ptnetmap_state *ptns = pth_na->ptns;
  5446. +
  5447. + if (unlikely(!ptns)) {
  5448. + D("ERROR ptnetmap state is NULL");
  5449. + return;
  5450. + }
  5451. +
  5452. + if (unlikely(ptns->stopped)) {
  5453. + D("backend netmap is being stopped");
  5454. + return;
  5455. + }
  5456. +
  5457. + /* We cannot access the CSB here (to check ptring->guest_need_kick),
  5458. + * unless we switch address space to the one of the guest. For now
  5459. + * we unconditionally inject an interrupt. */
  5460. + nm_os_kctx_send_irq(ptns->kctxs[kring->ring_id]);
  5461. + IFRATE(ptns->rate_ctx.new.htxk++);
  5462. + ND(1, "%s interrupt", kring->name);
  5463. +}
  5464. +
  5465. /*
  5466. * We need RX kicks from the guest when (tail == head-1), where we wait
  5467. * for the guest to refill.
  5468. @@ -405,7 +434,7 @@ ptnetmap_norxslots(struct netmap_kring *kring, uint32_t g_head)
  5469.  
  5470. /* Handle RX events: from the guest or from the backend */
  5471. static void
  5472. -ptnetmap_rx_handler(void *data)
  5473. +ptnetmap_rx_handler(void *data, int is_kthread)
  5474. {
  5475. struct netmap_kring *kring = data;
  5476. struct netmap_pt_host_adapter *pth_na =
  5477. @@ -413,7 +442,7 @@ ptnetmap_rx_handler(void *data)
  5478. struct ptnetmap_state *ptns = pth_na->ptns;
  5479. struct ptnet_ring __user *ptring;
  5480. struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
  5481. - struct nm_kthread *kth;
  5482. + struct nm_kctx *kth;
  5483. uint32_t num_slots;
  5484. int dry_cycles = 0;
  5485. bool some_recvd = false;
  5486. @@ -440,7 +469,7 @@ ptnetmap_rx_handler(void *data)
  5487.  
  5488. /* Get RX ptring pointer from the CSB. */
  5489. ptring = ptns->ptrings + (pth_na->up.num_tx_rings + kring->ring_id);
  5490. - kth = ptns->kthreads[pth_na->up.num_tx_rings + kring->ring_id];
  5491. + kth = ptns->kctxs[pth_na->up.num_tx_rings + kring->ring_id];
  5492.  
  5493. num_slots = kring->nkr_num_slots;
  5494. shadow_ring.head = kring->rhead;
  5495. @@ -500,7 +529,7 @@ ptnetmap_rx_handler(void *data)
  5496. if (some_recvd && ptring_intr_enabled(ptring)) {
  5497. /* Disable guest kick to avoid sending unnecessary kicks */
  5498. ptring_intr_enable(ptring, 0);
  5499. - nm_os_kthread_send_irq(kth);
  5500. + nm_os_kctx_send_irq(kth);
  5501. IFRATE(ptns->rate_ctx.new.hrxk++);
  5502. some_recvd = false;
  5503. }
  5504. @@ -549,7 +578,7 @@ ptnetmap_rx_handler(void *data)
  5505. /* Interrupt the guest if needed. */
  5506. if (some_recvd && ptring_intr_enabled(ptring)) {
  5507. ptring_intr_enable(ptring, 0);
  5508. - nm_os_kthread_send_irq(kth);
  5509. + nm_os_kctx_send_irq(kth);
  5510. IFRATE(ptns->rate_ctx.new.hrxk++);
  5511. }
  5512. }
  5513. @@ -597,14 +626,14 @@ ptnetmap_print_configuration(struct ptnetmap_cfg *cfg)
  5514. static int
  5515. ptnetmap_kring_snapshot(struct netmap_kring *kring, struct ptnet_ring __user *ptring)
  5516. {
  5517. - if(CSB_WRITE(ptring, head, kring->rhead))
  5518. + if (CSB_WRITE(ptring, head, kring->rhead))
  5519. goto err;
  5520. - if(CSB_WRITE(ptring, cur, kring->rcur))
  5521. + if (CSB_WRITE(ptring, cur, kring->rcur))
  5522. goto err;
  5523.  
  5524. - if(CSB_WRITE(ptring, hwcur, kring->nr_hwcur))
  5525. + if (CSB_WRITE(ptring, hwcur, kring->nr_hwcur))
  5526. goto err;
  5527. - if(CSB_WRITE(ptring, hwtail, NM_ACCESS_ONCE(kring->nr_hwtail)))
  5528. + if (CSB_WRITE(ptring, hwtail, NM_ACCESS_ONCE(kring->nr_hwtail)))
  5529. goto err;
  5530.  
  5531. DBG(ptnetmap_kring_dump("ptnetmap_kring_snapshot", kring);)
  5532. @@ -643,15 +672,15 @@ ptnetmap_krings_snapshot(struct netmap_pt_host_adapter *pth_na)
  5533. }
  5534.  
  5535. /*
  5536. - * Functions to create, start and stop the kthreads
  5537. + * Functions to create kernel contexts, and start/stop the workers.
  5538. */
  5539.  
  5540. static int
  5541. -ptnetmap_create_kthreads(struct netmap_pt_host_adapter *pth_na,
  5542. - struct ptnetmap_cfg *cfg)
  5543. +ptnetmap_create_kctxs(struct netmap_pt_host_adapter *pth_na,
  5544. + struct ptnetmap_cfg *cfg, int use_tx_kthreads)
  5545. {
  5546. struct ptnetmap_state *ptns = pth_na->ptns;
  5547. - struct nm_kthread_cfg nmk_cfg;
  5548. + struct nm_kctx_cfg nmk_cfg;
  5549. unsigned int num_rings;
  5550. uint8_t *cfg_entries = (uint8_t *)(cfg + 1);
  5551. int k;
  5552. @@ -665,13 +694,16 @@ ptnetmap_create_kthreads(struct netmap_pt_host_adapter *pth_na,
  5553. nmk_cfg.type = k;
  5554. if (k < pth_na->up.num_tx_rings) {
  5555. nmk_cfg.worker_fn = ptnetmap_tx_handler;
  5556. + nmk_cfg.use_kthread = use_tx_kthreads;
  5557. + nmk_cfg.notify_fn = ptnetmap_tx_nothread_notify;
  5558. } else {
  5559. nmk_cfg.worker_fn = ptnetmap_rx_handler;
  5560. + nmk_cfg.use_kthread = 1;
  5561. }
  5562.  
  5563. - ptns->kthreads[k] = nm_os_kthread_create(&nmk_cfg,
  5564. + ptns->kctxs[k] = nm_os_kctx_create(&nmk_cfg,
  5565. cfg->cfgtype, cfg_entries + k * cfg->entry_size);
  5566. - if (ptns->kthreads[k] == NULL) {
  5567. + if (ptns->kctxs[k] == NULL) {
  5568. goto err;
  5569. }
  5570. }
  5571. @@ -679,16 +711,16 @@ ptnetmap_create_kthreads(struct netmap_pt_host_adapter *pth_na,
  5572. return 0;
  5573. err:
  5574. for (k = 0; k < num_rings; k++) {
  5575. - if (ptns->kthreads[k]) {
  5576. - nm_os_kthread_delete(ptns->kthreads[k]);
  5577. - ptns->kthreads[k] = NULL;
  5578. + if (ptns->kctxs[k]) {
  5579. + nm_os_kctx_destroy(ptns->kctxs[k]);
  5580. + ptns->kctxs[k] = NULL;
  5581. }
  5582. }
  5583. return EFAULT;
  5584. }
  5585.  
  5586. static int
  5587. -ptnetmap_start_kthreads(struct netmap_pt_host_adapter *pth_na)
  5588. +ptnetmap_start_kctx_workers(struct netmap_pt_host_adapter *pth_na)
  5589. {
  5590. struct ptnetmap_state *ptns = pth_na->ptns;
  5591. int num_rings;
  5592. @@ -705,8 +737,8 @@ ptnetmap_start_kthreads(struct netmap_pt_host_adapter *pth_na)
  5593. num_rings = ptns->pth_na->up.num_tx_rings +
  5594. ptns->pth_na->up.num_rx_rings;
  5595. for (k = 0; k < num_rings; k++) {
  5596. - //nm_os_kthread_set_affinity(ptns->kthreads[k], xxx);
  5597. - error = nm_os_kthread_start(ptns->kthreads[k]);
  5598. + //nm_os_kctx_worker_setaff(ptns->kctxs[k], xxx);
  5599. + error = nm_os_kctx_worker_start(ptns->kctxs[k]);
  5600. if (error) {
  5601. return error;
  5602. }
  5603. @@ -716,7 +748,7 @@ ptnetmap_start_kthreads(struct netmap_pt_host_adapter *pth_na)
  5604. }
  5605.  
  5606. static void
  5607. -ptnetmap_stop_kthreads(struct netmap_pt_host_adapter *pth_na)
  5608. +ptnetmap_stop_kctx_workers(struct netmap_pt_host_adapter *pth_na)
  5609. {
  5610. struct ptnetmap_state *ptns = pth_na->ptns;
  5611. int num_rings;
  5612. @@ -732,7 +764,7 @@ ptnetmap_stop_kthreads(struct netmap_pt_host_adapter *pth_na)
  5613. num_rings = ptns->pth_na->up.num_tx_rings +
  5614. ptns->pth_na->up.num_rx_rings;
  5615. for (k = 0; k < num_rings; k++) {
  5616. - nm_os_kthread_stop(ptns->kthreads[k]);
  5617. + nm_os_kctx_worker_stop(ptns->kctxs[k]);
  5618. }
  5619. }
  5620.  
  5621. @@ -750,14 +782,14 @@ ptnetmap_read_cfg(struct nmreq *nmr)
  5622. }
  5623.  
  5624. cfglen = sizeof(tmp) + tmp.num_rings * tmp.entry_size;
  5625. - cfg = malloc(cfglen, M_DEVBUF, M_NOWAIT | M_ZERO);
  5626. + cfg = nm_os_malloc(cfglen);
  5627. if (!cfg) {
  5628. return NULL;
  5629. }
  5630.  
  5631. if (copyin((const void *)*nmr_ptncfg, cfg, cfglen)) {
  5632. D("Full copyin() failed");
  5633. - free(cfg, M_DEVBUF);
  5634. + nm_os_free(cfg);
  5635. return NULL;
  5636. }
  5637.  
  5638. @@ -772,6 +804,7 @@ static int
  5639. ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
  5640. struct ptnetmap_cfg *cfg)
  5641. {
  5642. + int use_tx_kthreads = ptnetmap_tx_workers; /* snapshot */
  5643. struct ptnetmap_state *ptns;
  5644. unsigned int num_rings;
  5645. int ret, i;
  5646. @@ -790,13 +823,18 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
  5647. return EINVAL;
  5648. }
  5649.  
  5650. - ptns = malloc(sizeof(*ptns) + num_rings * sizeof(*ptns->kthreads),
  5651. - M_DEVBUF, M_NOWAIT | M_ZERO);
  5652. + if (!use_tx_kthreads && na_is_generic(pth_na->parent)) {
  5653. + D("ERROR ptnetmap direct transmission not supported with "
  5654. + "passed-through emulated adapters");
  5655. + return EOPNOTSUPP;
  5656. + }
  5657. +
  5658. + ptns = nm_os_malloc(sizeof(*ptns) + num_rings * sizeof(*ptns->kctxs));
  5659. if (!ptns) {
  5660. return ENOMEM;
  5661. }
  5662.  
  5663. - ptns->kthreads = (struct nm_kthread **)(ptns + 1);
  5664. + ptns->kctxs = (struct nm_kctx **)(ptns + 1);
  5665. ptns->stopped = true;
  5666.  
  5667. /* Cross-link data structures. */
  5668. @@ -808,9 +846,9 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
  5669.  
  5670. DBG(ptnetmap_print_configuration(cfg));
  5671.  
  5672. - /* Create kthreads */
  5673. - if ((ret = ptnetmap_create_kthreads(pth_na, cfg))) {
  5674. - D("ERROR ptnetmap_create_kthreads()");
  5675. + /* Create kernel contexts. */
  5676. + if ((ret = ptnetmap_create_kctxs(pth_na, cfg, use_tx_kthreads))) {
  5677. + D("ERROR ptnetmap_create_kctxs()");
  5678. goto err;
  5679. }
  5680. /* Copy krings state into the CSB for the guest initialization */
  5681. @@ -819,10 +857,17 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
  5682. goto err;
  5683. }
  5684.  
  5685. - /* Overwrite parent nm_notify krings callback. */
  5686. + /* Overwrite parent nm_notify krings callback, and
  5687. + * clear NAF_BDG_MAYSLEEP if needed. */
  5688. pth_na->parent->na_private = pth_na;
  5689. pth_na->parent_nm_notify = pth_na->parent->nm_notify;
  5690. pth_na->parent->nm_notify = nm_unused_notify;
  5691. + pth_na->parent_na_flags = pth_na->parent->na_flags;
  5692. + if (!use_tx_kthreads) {
  5693. + /* VALE port txsync is executed under spinlock on Linux, so
  5694. + * we need to make sure the bridge cannot sleep. */
  5695. + pth_na->parent->na_flags &= ~NAF_BDG_MAYSLEEP;
  5696. + }
  5697.  
  5698. for (i = 0; i < pth_na->parent->num_rx_rings; i++) {
  5699. pth_na->up.rx_rings[i].save_notify =
  5700. @@ -849,7 +894,7 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
  5701.  
  5702. err:
  5703. pth_na->ptns = NULL;
  5704. - free(ptns, M_DEVBUF);
  5705. + nm_os_free(ptns);
  5706. return ret;
  5707. }
  5708.  
  5709. @@ -870,6 +915,7 @@ ptnetmap_delete(struct netmap_pt_host_adapter *pth_na)
  5710. /* Restore parent adapter callbacks. */
  5711. pth_na->parent->nm_notify = pth_na->parent_nm_notify;
  5712. pth_na->parent->na_private = NULL;
  5713. + pth_na->parent->na_flags = pth_na->parent_na_flags;
  5714.  
  5715. for (i = 0; i < pth_na->parent->num_rx_rings; i++) {
  5716. pth_na->up.rx_rings[i].nm_notify =
  5717. @@ -882,17 +928,17 @@ ptnetmap_delete(struct netmap_pt_host_adapter *pth_na)
  5718. pth_na->up.tx_rings[i].save_notify = NULL;
  5719. }
  5720.  
  5721. - /* Delete kthreads. */
  5722. + /* Destroy kernel contexts. */
  5723. num_rings = ptns->pth_na->up.num_tx_rings +
  5724. ptns->pth_na->up.num_rx_rings;
  5725. for (i = 0; i < num_rings; i++) {
  5726. - nm_os_kthread_delete(ptns->kthreads[i]);
  5727. - ptns->kthreads[i] = NULL;
  5728. + nm_os_kctx_destroy(ptns->kctxs[i]);
  5729. + ptns->kctxs[i] = NULL;
  5730. }
  5731.  
  5732. IFRATE(del_timer(&ptns->rate_ctx.timer));
  5733.  
  5734. - free(ptns, M_DEVBUF);
  5735. + nm_os_free(ptns);
  5736.  
  5737. pth_na->ptns = NULL;
  5738.  
  5739. @@ -932,21 +978,21 @@ ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na)
  5740. cfg = ptnetmap_read_cfg(nmr);
  5741. if (!cfg)
  5742. break;
  5743. - /* Create ptnetmap state (kthreads, ...) and switch parent
  5744. + /* Create ptnetmap state (kctxs, ...) and switch parent
  5745. * adapter to ptnetmap mode. */
  5746. error = ptnetmap_create(pth_na, cfg);
  5747. - free(cfg, M_DEVBUF);
  5748. + nm_os_free(cfg);
  5749. if (error)
  5750. break;
  5751. /* Start kthreads. */
  5752. - error = ptnetmap_start_kthreads(pth_na);
  5753. + error = ptnetmap_start_kctx_workers(pth_na);
  5754. if (error)
  5755. ptnetmap_delete(pth_na);
  5756. break;
  5757.  
  5758. case NETMAP_PT_HOST_DELETE:
  5759. /* Stop kthreads. */
  5760. - ptnetmap_stop_kthreads(pth_na);
  5761. + ptnetmap_stop_kctx_workers(pth_na);
  5762. /* Switch parent adapter back to normal mode and destroy
  5763. * ptnetmap state (kthreads, ...). */
  5764. ptnetmap_delete(pth_na);
  5765. @@ -994,7 +1040,7 @@ nm_pt_host_notify(struct netmap_kring *kring, int flags)
  5766. ND(1, "RX backend irq");
  5767. IFRATE(ptns->rate_ctx.new.brxwu++);
  5768. }
  5769. - nm_os_kthread_wakeup_worker(ptns->kthreads[k]);
  5770. + nm_os_kctx_worker_wakeup(ptns->kctxs[k]);
  5771.  
  5772. return NM_IRQ_COMPLETED;
  5773. }
  5774. @@ -1136,7 +1182,7 @@ nm_pt_host_dtor(struct netmap_adapter *na)
  5775.  
  5776. /* The equivalent of NETMAP_PT_HOST_DELETE if the hypervisor
  5777. * didn't do it. */
  5778. - ptnetmap_stop_kthreads(pth_na);
  5779. + ptnetmap_stop_kctx_workers(pth_na);
  5780. ptnetmap_delete(pth_na);
  5781.  
  5782. parent->na_flags &= ~NAF_BUSY;
  5783. @@ -1147,7 +1193,8 @@ nm_pt_host_dtor(struct netmap_adapter *na)
  5784.  
  5785. /* check if nmr is a request for a ptnetmap adapter that we can satisfy */
  5786. int
  5787. -netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5788. +netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na,
  5789. + struct netmap_mem_d *nmd, int create)
  5790. {
  5791. struct nmreq parent_nmr;
  5792. struct netmap_adapter *parent; /* target adapter */
  5793. @@ -1162,7 +1209,7 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5794.  
  5795. D("Requesting a ptnetmap host adapter");
  5796.  
  5797. - pth_na = malloc(sizeof(*pth_na), M_DEVBUF, M_NOWAIT | M_ZERO);
  5798. + pth_na = nm_os_malloc(sizeof(*pth_na));
  5799. if (pth_na == NULL) {
  5800. D("ERROR malloc");
  5801. return ENOMEM;
  5802. @@ -1174,7 +1221,7 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5803. */
  5804. memcpy(&parent_nmr, nmr, sizeof(parent_nmr));
  5805. parent_nmr.nr_flags &= ~(NR_PTNETMAP_HOST);
  5806. - error = netmap_get_na(&parent_nmr, &parent, &ifp, create);
  5807. + error = netmap_get_na(&parent_nmr, &parent, &ifp, nmd, create);
  5808. if (error) {
  5809. D("parent lookup failed: %d", error);
  5810. goto put_out_noputparent;
  5811. @@ -1216,7 +1263,7 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  5812. * directly. */
  5813. pth_na->up.nm_notify = nm_unused_notify;
  5814.  
  5815. - pth_na->up.nm_mem = parent->nm_mem;
  5816. + pth_na->up.nm_mem = netmap_mem_get(parent->nm_mem);
  5817.  
  5818. pth_na->up.na_flags |= NAF_HOST_RINGS;
  5819.  
  5820. @@ -1248,7 +1295,7 @@ put_out:
  5821. if (ifp)
  5822. if_rele(ifp);
  5823. put_out_noputparent:
  5824. - free(pth_na, M_DEVBUF);
  5825. + nm_os_free(pth_na);
  5826. return error;
  5827. }
  5828. #endif /* WITH_PTNETMAP_HOST */
  5829. @@ -1290,8 +1337,8 @@ netmap_pt_guest_txsync(struct ptnet_ring *ptring, struct netmap_kring *kring,
  5830. ptnetmap_guest_write_kring_csb(ptring, kring->rcur, kring->rhead);
  5831.  
  5832. /* Ask for a kick from a guest to the host if needed. */
  5833. - if ((kring->rhead != kring->nr_hwcur &&
  5834. - NM_ACCESS_ONCE(ptring->host_need_kick)) ||
  5835. + if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring))
  5836. + && NM_ACCESS_ONCE(ptring->host_need_kick)) ||
  5837. (flags & NAF_FORCE_RECLAIM)) {
  5838. ptring->sync_flags = flags;
  5839. notify = true;
  5840. @@ -1320,9 +1367,9 @@ netmap_pt_guest_txsync(struct ptnet_ring *ptring, struct netmap_kring *kring,
  5841. }
  5842. }
  5843.  
  5844. - ND(1, "TX - CSB: head:%u cur:%u hwtail:%u - KRING: head:%u cur:%u tail: %u",
  5845. - ptring->head, ptring->cur, ptring->hwtail,
  5846. - kring->rhead, kring->rcur, kring->nr_hwtail);
  5847. + ND(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
  5848. + kring->name, ptring->head, ptring->cur, ptring->hwtail,
  5849. + kring->rhead, kring->rcur, kring->nr_hwtail);
  5850.  
  5851. return notify;
  5852. }
  5853. @@ -1385,9 +1432,9 @@ netmap_pt_guest_rxsync(struct ptnet_ring *ptring, struct netmap_kring *kring,
  5854. }
  5855. }
  5856.  
  5857. - ND(1, "RX - CSB: head:%u cur:%u hwtail:%u - KRING: head:%u cur:%u",
  5858. - ptring->head, ptring->cur, ptring->hwtail,
  5859. - kring->rhead, kring->rcur);
  5860. + ND(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
  5861. + kring->name, ptring->head, ptring->cur, ptring->hwtail,
  5862. + kring->rhead, kring->rcur, kring->nr_hwtail);
  5863.  
  5864. return notify;
  5865. }
  5866. @@ -1445,9 +1492,43 @@ ptnet_nm_dtor(struct netmap_adapter *na)
  5867. struct netmap_pt_guest_adapter *ptna =
  5868. (struct netmap_pt_guest_adapter *)na;
  5869.  
  5870. - netmap_mem_put(ptna->dr.up.nm_mem);
  5871. + netmap_mem_put(ptna->dr.up.nm_mem); // XXX is this needed?
  5872. memset(&ptna->dr, 0, sizeof(ptna->dr));
  5873. netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
  5874. }
  5875.  
  5876. +int
  5877. +netmap_pt_guest_attach(struct netmap_adapter *arg, void *csb,
  5878. + unsigned int nifp_offset, unsigned int memid)
  5879. +{
  5880. + struct netmap_pt_guest_adapter *ptna;
  5881. + struct ifnet *ifp = arg ? arg->ifp : NULL;
  5882. + int error;
  5883. +
  5884. + /* get allocator */
  5885. + arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
  5886. + if (arg->nm_mem == NULL)
  5887. + return ENOMEM;
  5888. + arg->na_flags |= NAF_MEM_OWNER;
  5889. + error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter));
  5890. + if (error)
  5891. + return error;
  5892. +
  5893. + /* get the netmap_pt_guest_adapter */
  5894. + ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
  5895. + ptna->csb = csb;
  5896. +
  5897. + /* Initialize a separate pass-through netmap adapter that is going to
  5898. + * be used by the ptnet driver only, and so never exposed to netmap
  5899. + * applications. We only need a subset of the available fields. */
  5900. + memset(&ptna->dr, 0, sizeof(ptna->dr));
  5901. + ptna->dr.up.ifp = ifp;
  5902. + ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
  5903. + ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
  5904. +
  5905. + ptna->backend_regifs = 0;
  5906. +
  5907. + return 0;
  5908. +}
  5909. +
  5910. #endif /* WITH_PTNETMAP_GUEST */
  5911. diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
  5912. index 71b3aedddd4..a018f60ecb9 100644
  5913. --- a/sys/dev/netmap/netmap_vale.c
  5914. +++ b/sys/dev/netmap/netmap_vale.c
  5915. @@ -58,7 +58,7 @@ ports attached to the switch)
  5916.  
  5917. #if defined(__FreeBSD__)
  5918. #include <sys/cdefs.h> /* prerequisite */
  5919. -__FBSDID("$FreeBSD$");
  5920. +__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257176 2013-10-26 17:58:36Z glebius $");
  5921.  
  5922. #include <sys/types.h>
  5923. #include <sys/errno.h>
  5924. @@ -161,7 +161,8 @@ SYSCTL_DECL(_dev_netmap);
  5925. SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
  5926. SYSEND;
  5927.  
  5928. -static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
  5929. +static int netmap_vp_create(struct nmreq *, struct ifnet *,
  5930. + struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
  5931. static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
  5932. static int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
  5933.  
  5934. @@ -393,7 +394,7 @@ nm_free_bdgfwd(struct netmap_adapter *na)
  5935. kring = na->tx_rings;
  5936. for (i = 0; i < nrings; i++) {
  5937. if (kring[i].nkr_ft) {
  5938. - free(kring[i].nkr_ft, M_DEVBUF);
  5939. + nm_os_free(kring[i].nkr_ft);
  5940. kring[i].nkr_ft = NULL; /* protect from freeing twice */
  5941. }
  5942. }
  5943. @@ -423,7 +424,7 @@ nm_alloc_bdgfwd(struct netmap_adapter *na)
  5944. struct nm_bdg_q *dstq;
  5945. int j;
  5946.  
  5947. - ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
  5948. + ft = nm_os_malloc(l);
  5949. if (!ft) {
  5950. nm_free_bdgfwd(na);
  5951. return ENOMEM;
  5952. @@ -538,6 +539,13 @@ netmap_vp_dtor(struct netmap_adapter *na)
  5953. if (b) {
  5954. netmap_bdg_detach_common(b, vpna->bdg_port, -1);
  5955. }
  5956. +
  5957. + if (vpna->autodelete && na->ifp != NULL) {
  5958. + ND("releasing %s", na->ifp->if_xname);
  5959. + NMG_UNLOCK();
  5960. + nm_os_vi_detach(na->ifp);
  5961. + NMG_LOCK();
  5962. + }
  5963. }
  5964.  
  5965. /* remove a persistent VALE port from the system */
  5966. @@ -545,6 +553,7 @@ static int
  5967. nm_vi_destroy(const char *name)
  5968. {
  5969. struct ifnet *ifp;
  5970. + struct netmap_vp_adapter *vpna;
  5971. int error;
  5972.  
  5973. ifp = ifunit_ref(name);
  5974. @@ -557,18 +566,29 @@ nm_vi_destroy(const char *name)
  5975. goto err;
  5976. }
  5977.  
  5978. - if (NA(ifp)->na_refcount > 1) {
  5979. + vpna = (struct netmap_vp_adapter *)NA(ifp);
  5980. +
  5981. + /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
  5982. + if (vpna->autodelete) {
  5983. + error = EINVAL;
  5984. + goto err;
  5985. + }
  5986. +
  5987. + /* also make sure that nobody is using the inferface */
  5988. + if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
  5989. + vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
  5990. error = EBUSY;
  5991. goto err;
  5992. }
  5993. +
  5994. NMG_UNLOCK();
  5995.  
  5996. D("destroying a persistent vale interface %s", ifp->if_xname);
  5997. /* Linux requires all the references are released
  5998. * before unregister
  5999. */
  6000. - if_rele(ifp);
  6001. netmap_detach(ifp);
  6002. + if_rele(ifp);
  6003. nm_os_vi_detach(ifp);
  6004. return 0;
  6005.  
  6006. @@ -578,15 +598,26 @@ err:
  6007. return error;
  6008. }
  6009.  
  6010. +static int
  6011. +nm_update_info(struct nmreq *nmr, struct netmap_adapter *na)
  6012. +{
  6013. + nmr->nr_rx_rings = na->num_rx_rings;
  6014. + nmr->nr_tx_rings = na->num_tx_rings;
  6015. + nmr->nr_rx_slots = na->num_rx_desc;
  6016. + nmr->nr_tx_slots = na->num_tx_desc;
  6017. + return netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, NULL, &nmr->nr_arg2);
  6018. +}
  6019. +
  6020. /*
  6021. * Create a virtual interface registered to the system.
  6022. * The interface will be attached to a bridge later.
  6023. */
  6024. -static int
  6025. -nm_vi_create(struct nmreq *nmr)
  6026. +int
  6027. +netmap_vi_create(struct nmreq *nmr, int autodelete)
  6028. {
  6029. struct ifnet *ifp;
  6030. struct netmap_vp_adapter *vpna;
  6031. + struct netmap_mem_d *nmd = NULL;
  6032. int error;
  6033.  
  6034. /* don't include VALE prefix */
  6035. @@ -594,28 +625,64 @@ nm_vi_create(struct nmreq *nmr)
  6036. return EINVAL;
  6037. ifp = ifunit_ref(nmr->nr_name);
  6038. if (ifp) { /* already exist, cannot create new one */
  6039. + error = EEXIST;
  6040. + NMG_LOCK();
  6041. + if (NM_NA_VALID(ifp)) {
  6042. + int update_err = nm_update_info(nmr, NA(ifp));
  6043. + if (update_err)
  6044. + error = update_err;
  6045. + }
  6046. + NMG_UNLOCK();
  6047. if_rele(ifp);
  6048. - return EEXIST;
  6049. + return error;
  6050. }
  6051. error = nm_os_vi_persist(nmr->nr_name, &ifp);
  6052. if (error)
  6053. return error;
  6054.  
  6055. NMG_LOCK();
  6056. + if (nmr->nr_arg2) {
  6057. + nmd = netmap_mem_find(nmr->nr_arg2);
  6058. + if (nmd == NULL) {
  6059. + error = EINVAL;
  6060. + goto err_1;
  6061. + }
  6062. + }
  6063. /* netmap_vp_create creates a struct netmap_vp_adapter */
  6064. - error = netmap_vp_create(nmr, ifp, &vpna);
  6065. + error = netmap_vp_create(nmr, ifp, nmd, &vpna);
  6066. if (error) {
  6067. D("error %d", error);
  6068. - nm_os_vi_detach(ifp);
  6069. - return error;
  6070. + goto err_1;
  6071. }
  6072. /* persist-specific routines */
  6073. vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
  6074. - netmap_adapter_get(&vpna->up);
  6075. + if (!autodelete) {
  6076. + netmap_adapter_get(&vpna->up);
  6077. + } else {
  6078. + vpna->autodelete = 1;
  6079. + }
  6080. NM_ATTACH_NA(ifp, &vpna->up);
  6081. + /* return the updated info */
  6082. + error = nm_update_info(nmr, &vpna->up);
  6083. + if (error) {
  6084. + goto err_2;
  6085. + }
  6086. + D("returning nr_arg2 %d", nmr->nr_arg2);
  6087. + if (nmd)
  6088. + netmap_mem_put(nmd);
  6089. NMG_UNLOCK();
  6090. D("created %s", ifp->if_xname);
  6091. return 0;
  6092. +
  6093. +err_2:
  6094. + netmap_detach(ifp);
  6095. +err_1:
  6096. + if (nmd)
  6097. + netmap_mem_put(nmd);
  6098. + NMG_UNLOCK();
  6099. + nm_os_vi_detach(ifp);
  6100. +
  6101. + return error;
  6102. }
  6103.  
  6104. /* Try to get a reference to a netmap adapter attached to a VALE switch.
  6105. @@ -628,11 +695,12 @@ nm_vi_create(struct nmreq *nmr)
  6106. * (*na != NULL && return == 0).
  6107. */
  6108. int
  6109. -netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  6110. +netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
  6111. + struct netmap_mem_d *nmd, int create)
  6112. {
  6113. char *nr_name = nmr->nr_name;
  6114. const char *ifname;
  6115. - struct ifnet *ifp;
  6116. + struct ifnet *ifp = NULL;
  6117. int error = 0;
  6118. struct netmap_vp_adapter *vpna, *hostna = NULL;
  6119. struct nm_bridge *b;
  6120. @@ -702,15 +770,15 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  6121. */
  6122. if (nmr->nr_cmd) {
  6123. /* nr_cmd must be 0 for a virtual port */
  6124. - return EINVAL;
  6125. + error = EINVAL;
  6126. + goto out;
  6127. }
  6128.  
  6129. /* bdg_netmap_attach creates a struct netmap_adapter */
  6130. - error = netmap_vp_create(nmr, NULL, &vpna);
  6131. + error = netmap_vp_create(nmr, NULL, nmd, &vpna);
  6132. if (error) {
  6133. D("error %d", error);
  6134. - free(ifp, M_DEVBUF);
  6135. - return error;
  6136. + goto out;
  6137. }
  6138. /* shortcut - we can skip get_hw_na(),
  6139. * ownership check and nm_bdg_attach()
  6140. @@ -718,7 +786,7 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  6141. } else {
  6142. struct netmap_adapter *hw;
  6143.  
  6144. - error = netmap_get_hw_na(ifp, &hw);
  6145. + error = netmap_get_hw_na(ifp, nmd, &hw);
  6146. if (error || hw == NULL)
  6147. goto out;
  6148.  
  6149. @@ -751,10 +819,10 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
  6150. BDG_WUNLOCK(b);
  6151. *na = &vpna->up;
  6152. netmap_adapter_get(*na);
  6153. - return 0;
  6154.  
  6155. out:
  6156. - if_rele(ifp);
  6157. + if (ifp)
  6158. + if_rele(ifp);
  6159.  
  6160. return error;
  6161. }
  6162. @@ -765,11 +833,20 @@ static int
  6163. nm_bdg_ctl_attach(struct nmreq *nmr)
  6164. {
  6165. struct netmap_adapter *na;
  6166. + struct netmap_mem_d *nmd = NULL;
  6167. int error;
  6168.  
  6169. NMG_LOCK();
  6170.  
  6171. - error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
  6172. + if (nmr->nr_arg2) {
  6173. + nmd = netmap_mem_find(nmr->nr_arg2);
  6174. + if (nmd == NULL) {
  6175. + error = EINVAL;
  6176. + goto unlock_exit;
  6177. + }
  6178. + }
  6179. +
  6180. + error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */);
  6181. if (error) /* no device */
  6182. goto unlock_exit;
  6183.  
  6184. @@ -816,7 +893,7 @@ nm_bdg_ctl_detach(struct nmreq *nmr)
  6185. int error;
  6186.  
  6187. NMG_LOCK();
  6188. - error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
  6189. + error = netmap_get_bdg_na(nmr, &na, NULL, 0 /* don't create */);
  6190. if (error) { /* no device, or another bridge or user owns the device */
  6191. goto unlock_exit;
  6192. }
  6193. @@ -848,7 +925,7 @@ unlock_exit:
  6194. struct nm_bdg_polling_state;
  6195. struct
  6196. nm_bdg_kthread {
  6197. - struct nm_kthread *nmk;
  6198. + struct nm_kctx *nmk;
  6199. u_int qfirst;
  6200. u_int qlast;
  6201. struct nm_bdg_polling_state *bps;
  6202. @@ -867,7 +944,7 @@ struct nm_bdg_polling_state {
  6203. };
  6204.  
  6205. static void
  6206. -netmap_bwrap_polling(void *data)
  6207. +netmap_bwrap_polling(void *data, int is_kthread)
  6208. {
  6209. struct nm_bdg_kthread *nbk = data;
  6210. struct netmap_bwrap_adapter *bna;
  6211. @@ -890,16 +967,16 @@ netmap_bwrap_polling(void *data)
  6212. static int
  6213. nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
  6214. {
  6215. - struct nm_kthread_cfg kcfg;
  6216. + struct nm_kctx_cfg kcfg;
  6217. int i, j;
  6218.  
  6219. - bps->kthreads = malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus,
  6220. - M_DEVBUF, M_NOWAIT | M_ZERO);
  6221. + bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
  6222. if (bps->kthreads == NULL)
  6223. return ENOMEM;
  6224.  
  6225. bzero(&kcfg, sizeof(kcfg));
  6226. kcfg.worker_fn = netmap_bwrap_polling;
  6227. + kcfg.use_kthread = 1;
  6228. for (i = 0; i < bps->ncpus; i++) {
  6229. struct nm_bdg_kthread *t = bps->kthreads + i;
  6230. int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC);
  6231. @@ -913,24 +990,24 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
  6232.  
  6233. kcfg.type = i;
  6234. kcfg.worker_private = t;
  6235. - t->nmk = nm_os_kthread_create(&kcfg, 0, NULL);
  6236. + t->nmk = nm_os_kctx_create(&kcfg, 0, NULL);
  6237. if (t->nmk == NULL) {
  6238. goto cleanup;
  6239. }
  6240. - nm_os_kthread_set_affinity(t->nmk, affinity);
  6241. + nm_os_kctx_worker_setaff(t->nmk, affinity);
  6242. }
  6243. return 0;
  6244.  
  6245. cleanup:
  6246. for (j = 0; j < i; j++) {
  6247. struct nm_bdg_kthread *t = bps->kthreads + i;
  6248. - nm_os_kthread_delete(t->nmk);
  6249. + nm_os_kctx_destroy(t->nmk);
  6250. }
  6251. - free(bps->kthreads, M_DEVBUF);
  6252. + nm_os_free(bps->kthreads);
  6253. return EFAULT;
  6254. }
  6255.  
  6256. -/* a version of ptnetmap_start_kthreads() */
  6257. +/* A variant of ptnetmap_start_kthreads() */
  6258. static int
  6259. nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
  6260. {
  6261. @@ -944,7 +1021,7 @@ nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
  6262.  
  6263. for (i = 0; i < bps->ncpus; i++) {
  6264. struct nm_bdg_kthread *t = bps->kthreads + i;
  6265. - error = nm_os_kthread_start(t->nmk);
  6266. + error = nm_os_kctx_worker_start(t->nmk);
  6267. if (error) {
  6268. D("error in nm_kthread_start()");
  6269. goto cleanup;
  6270. @@ -955,7 +1032,7 @@ nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
  6271. cleanup:
  6272. for (j = 0; j < i; j++) {
  6273. struct nm_bdg_kthread *t = bps->kthreads + i;
  6274. - nm_os_kthread_stop(t->nmk);
  6275. + nm_os_kctx_worker_stop(t->nmk);
  6276. }
  6277. bps->stopped = true;
  6278. return error;
  6279. @@ -971,8 +1048,8 @@ nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
  6280.  
  6281. for (i = 0; i < bps->ncpus; i++) {
  6282. struct nm_bdg_kthread *t = bps->kthreads + i;
  6283. - nm_os_kthread_stop(t->nmk);
  6284. - nm_os_kthread_delete(t->nmk);
  6285. + nm_os_kctx_worker_stop(t->nmk);
  6286. + nm_os_kctx_destroy(t->nmk);
  6287. }
  6288. bps->stopped = true;
  6289. }
  6290. @@ -1050,19 +1127,19 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
  6291. return EFAULT;
  6292. }
  6293.  
  6294. - bps = malloc(sizeof(*bps), M_DEVBUF, M_NOWAIT | M_ZERO);
  6295. + bps = nm_os_malloc(sizeof(*bps));
  6296. if (!bps)
  6297. return ENOMEM;
  6298. bps->configured = false;
  6299. bps->stopped = true;
  6300.  
  6301. if (get_polling_cfg(nmr, na, bps)) {
  6302. - free(bps, M_DEVBUF);
  6303. + nm_os_free(bps);
  6304. return EINVAL;
  6305. }
  6306.  
  6307. if (nm_bdg_create_kthreads(bps)) {
  6308. - free(bps, M_DEVBUF);
  6309. + nm_os_free(bps);
  6310. return EFAULT;
  6311. }
  6312.  
  6313. @@ -1077,8 +1154,8 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
  6314. error = nm_bdg_polling_start_kthreads(bps);
  6315. if (error) {
  6316. D("ERROR nm_bdg_polling_start_kthread()");
  6317. - free(bps->kthreads, M_DEVBUF);
  6318. - free(bps, M_DEVBUF);
  6319. + nm_os_free(bps->kthreads);
  6320. + nm_os_free(bps);
  6321. bna->na_polling_state = NULL;
  6322. if (bna->hwna->nm_intr)
  6323. bna->hwna->nm_intr(bna->hwna, 1);
  6324. @@ -1099,7 +1176,7 @@ nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na)
  6325. bps = bna->na_polling_state;
  6326. nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
  6327. bps->configured = false;
  6328. - free(bps, M_DEVBUF);
  6329. + nm_os_free(bps);
  6330. bna->na_polling_state = NULL;
  6331. /* reenable interrupt */
  6332. if (bna->hwna->nm_intr)
  6333. @@ -1130,7 +1207,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
  6334.  
  6335. switch (cmd) {
  6336. case NETMAP_BDG_NEWIF:
  6337. - error = nm_vi_create(nmr);
  6338. + error = netmap_vi_create(nmr, 0 /* no autodelete */);
  6339. break;
  6340.  
  6341. case NETMAP_BDG_DELIF:
  6342. @@ -1193,18 +1270,19 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
  6343. NMG_LOCK();
  6344. for (error = ENOENT; i < NM_BRIDGES; i++) {
  6345. b = bridges + i;
  6346. - if (j >= b->bdg_active_ports) {
  6347. - j = 0; /* following bridges scan from 0 */
  6348. - continue;
  6349. + for ( ; j < NM_BDG_MAXPORTS; j++) {
  6350. + if (b->bdg_ports[j] == NULL)
  6351. + continue;
  6352. + vpna = b->bdg_ports[j];
  6353. + strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
  6354. + error = 0;
  6355. + goto out;
  6356. }
  6357. - nmr->nr_arg1 = i;
  6358. - nmr->nr_arg2 = j;
  6359. - j = b->bdg_port_index[j];
  6360. - vpna = b->bdg_ports[j];
  6361. - strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
  6362. - error = 0;
  6363. - break;
  6364. + j = 0; /* following bridges scan from 0 */
  6365. }
  6366. + out:
  6367. + nmr->nr_arg1 = i;
  6368. + nmr->nr_arg2 = j;
  6369. NMG_UNLOCK();
  6370. }
  6371. break;
  6372. @@ -1238,7 +1316,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
  6373. break;
  6374. }
  6375. NMG_LOCK();
  6376. - error = netmap_get_bdg_na(nmr, &na, 0);
  6377. + error = netmap_get_bdg_na(nmr, &na, NULL, 0);
  6378. if (na && !error) {
  6379. vpna = (struct netmap_vp_adapter *)na;
  6380. na->virt_hdr_len = nmr->nr_arg1;
  6381. @@ -1256,7 +1334,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
  6382. case NETMAP_BDG_POLLING_ON:
  6383. case NETMAP_BDG_POLLING_OFF:
  6384. NMG_LOCK();
  6385. - error = netmap_get_bdg_na(nmr, &na, 0);
  6386. + error = netmap_get_bdg_na(nmr, &na, NULL, 0);
  6387. if (na && !error) {
  6388. if (!nm_is_bwrap(na)) {
  6389. error = EOPNOTSUPP;
  6390. @@ -1384,7 +1462,7 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end)
  6391. if (na->up.na_flags & NAF_BDG_MAYSLEEP)
  6392. BDG_RLOCK(b);
  6393. else if (!BDG_RTRYLOCK(b))
  6394. - return 0;
  6395. + return j;
  6396. ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
  6397. ft = kring->nkr_ft;
  6398.  
  6399. @@ -1802,8 +1880,10 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
  6400. needed = d->bq_len + brddst->bq_len;
  6401.  
  6402. if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
  6403. - RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
  6404. - dst_na->up.virt_hdr_len);
  6405. + if (netmap_verbose) {
  6406. + RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
  6407. + dst_na->up.virt_hdr_len);
  6408. + }
  6409. /* There is a virtio-net header/offloadings mismatch between
  6410. * source and destination. The slower mismatch datapath will
  6411. * be used to cope with all the mismatches.
  6412. @@ -2125,14 +2205,16 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
  6413. * Only persistent VALE ports have a non-null ifp.
  6414. */
  6415. static int
  6416. -netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
  6417. +netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp,
  6418. + struct netmap_mem_d *nmd,
  6419. + struct netmap_vp_adapter **ret)
  6420. {
  6421. struct netmap_vp_adapter *vpna;
  6422. struct netmap_adapter *na;
  6423. - int error;
  6424. + int error = 0;
  6425. u_int npipes = 0;
  6426.  
  6427. - vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
  6428. + vpna = nm_os_malloc(sizeof(*vpna));
  6429. if (vpna == NULL)
  6430. return ENOMEM;
  6431.  
  6432. @@ -2183,7 +2265,10 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter
  6433. na->nm_krings_create = netmap_vp_krings_create;
  6434. na->nm_krings_delete = netmap_vp_krings_delete;
  6435. na->nm_dtor = netmap_vp_dtor;
  6436. - na->nm_mem = netmap_mem_private_new(na->name,
  6437. + D("nr_arg2 %d", nmr->nr_arg2);
  6438. + na->nm_mem = nmd ?
  6439. + netmap_mem_get(nmd):
  6440. + netmap_mem_private_new(
  6441. na->num_tx_rings, na->num_tx_desc,
  6442. na->num_rx_rings, na->num_rx_desc,
  6443. nmr->nr_arg3, npipes, &error);
  6444. @@ -2199,8 +2284,8 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter
  6445.  
  6446. err:
  6447. if (na->nm_mem != NULL)
  6448. - netmap_mem_delete(na->nm_mem);
  6449. - free(vpna, M_DEVBUF);
  6450. + netmap_mem_put(na->nm_mem);
  6451. + nm_os_free(vpna);
  6452. return error;
  6453. }
  6454.  
  6455. @@ -2243,6 +2328,8 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
  6456. struct nm_bridge *b = bna->up.na_bdg,
  6457. *bh = bna->host.na_bdg;
  6458.  
  6459. + netmap_mem_put(bna->host.up.nm_mem);
  6460. +
  6461. if (b) {
  6462. netmap_bdg_detach_common(b, bna->up.bdg_port,
  6463. (bh ? bna->host.bdg_port : -1));
  6464. @@ -2644,7 +2731,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
  6465. return EBUSY;
  6466. }
  6467.  
  6468. - bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
  6469. + bna = nm_os_malloc(sizeof(*bna));
  6470. if (bna == NULL) {
  6471. return ENOMEM;
  6472. }
  6473. @@ -2652,6 +2739,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
  6474. na = &bna->up.up;
  6475. /* make bwrap ifp point to the real ifp */
  6476. na->ifp = hwna->ifp;
  6477. + if_ref(na->ifp);
  6478. na->na_private = bna;
  6479. strncpy(na->name, nr_name, sizeof(na->name));
  6480. /* fill the ring data for the bwrap adapter with rx/tx meanings
  6481. @@ -2673,7 +2761,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
  6482. na->nm_notify = netmap_bwrap_notify;
  6483. na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
  6484. na->pdev = hwna->pdev;
  6485. - na->nm_mem = hwna->nm_mem;
  6486. + na->nm_mem = netmap_mem_get(hwna->nm_mem);
  6487. na->virt_hdr_len = hwna->virt_hdr_len;
  6488. bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
  6489.  
  6490. @@ -2697,7 +2785,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
  6491. // hostna->nm_txsync = netmap_bwrap_host_txsync;
  6492. // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
  6493. hostna->nm_notify = netmap_bwrap_notify;
  6494. - hostna->nm_mem = na->nm_mem;
  6495. + hostna->nm_mem = netmap_mem_get(na->nm_mem);
  6496. hostna->na_private = bna;
  6497. hostna->na_vp = &bna->up;
  6498. na->na_hostvp = hwna->na_hostvp =
  6499. @@ -2720,7 +2808,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
  6500. err_free:
  6501. hwna->na_vp = hwna->na_hostvp = NULL;
  6502. netmap_adapter_put(hwna);
  6503. - free(bna, M_DEVBUF);
  6504. + nm_os_free(bna);
  6505. return error;
  6506.  
  6507. }
  6508. @@ -2731,8 +2819,7 @@ netmap_init_bridges2(u_int n)
  6509. int i;
  6510. struct nm_bridge *b;
  6511.  
  6512. - b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF,
  6513. - M_NOWAIT | M_ZERO);
  6514. + b = nm_os_malloc(sizeof(struct nm_bridge) * n);
  6515. if (b == NULL)
  6516. return NULL;
  6517. for (i = 0; i < n; i++)
  6518. @@ -2750,7 +2837,7 @@ netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
  6519.  
  6520. for (i = 0; i < n; i++)
  6521. BDG_RWDESTROY(&b[i]);
  6522. - free(b, M_DEVBUF);
  6523. + nm_os_free(b);
  6524. }
  6525.  
  6526. int
  6527. diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
  6528. index cb9152f3d56..33855c709c8 100644
  6529. --- a/sys/modules/vmm/Makefile
  6530. +++ b/sys/modules/vmm/Makefile
  6531. @@ -21,6 +21,7 @@ SRCS+= vmm.c \
  6532. vmm_ioport.c \
  6533. vmm_lapic.c \
  6534. vmm_mem.c \
  6535. + vmm_usermem.c \
  6536. vmm_stat.c \
  6537. vmm_util.c \
  6538. x86.c
  6539. diff --git a/sys/net/netmap.h b/sys/net/netmap.h
  6540. index 3e0cdab4248..3543426b680 100644
  6541. --- a/sys/net/netmap.h
  6542. +++ b/sys/net/netmap.h
  6543. @@ -25,7 +25,7 @@
  6544. */
  6545.  
  6546. /*
  6547. - * $FreeBSD$
  6548. + * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $
  6549. *
  6550. * Definitions of constants and the structures used by the netmap
  6551. * framework, for the part visible to both kernel and userspace.
  6552. diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h
  6553. index 4fbf38731d3..758084c1dcc 100644
  6554. --- a/sys/net/netmap_user.h
  6555. +++ b/sys/net/netmap_user.h
  6556. @@ -309,16 +309,16 @@ typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
  6557. * ifname (netmap:foo or vale:foo) is the port name
  6558. * a suffix can indicate the follwing:
  6559. * ^ bind the host (sw) ring pair
  6560. - * * bind host and NIC ring pairs (transparent)
  6561. + * * bind host and NIC ring pairs
  6562. * -NN bind individual NIC ring pair
  6563. * {NN bind master side of pipe NN
  6564. * }NN bind slave side of pipe NN
  6565. * a suffix starting with / and the following flags,
  6566. * in any order:
  6567. * x exclusive access
  6568. - * z zero copy monitor
  6569. - * t monitor tx side
  6570. - * r monitor rx side
  6571. + * z zero copy monitor (both tx and rx)
  6572. + * t monitor tx side (copy monitor)
  6573. + * r monitor rx side (copy monitor)
  6574. * R bind only RX ring(s)
  6575. * T bind only TX ring(s)
  6576. *
  6577. @@ -634,9 +634,10 @@ nm_open(const char *ifname, const struct nmreq *req,
  6578. const char *vpname = NULL;
  6579. #define MAXERRMSG 80
  6580. char errmsg[MAXERRMSG] = "";
  6581. - enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK } p_state;
  6582. + enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state;
  6583. int is_vale;
  6584. long num;
  6585. + uint16_t nr_arg2 = 0;
  6586.  
  6587. if (strncmp(ifname, "netmap:", 7) &&
  6588. strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) {
  6589. @@ -665,7 +666,7 @@ nm_open(const char *ifname, const struct nmreq *req,
  6590. }
  6591.  
  6592. /* scan for a separator */
  6593. - for (; *port && !index("-*^{}/", *port); port++)
  6594. + for (; *port && !index("-*^{}/@", *port); port++)
  6595. ;
  6596.  
  6597. if (is_vale && !nm_is_identifier(vpname, port)) {
  6598. @@ -707,6 +708,9 @@ nm_open(const char *ifname, const struct nmreq *req,
  6599. case '/': /* start of flags */
  6600. p_state = P_FLAGS;
  6601. break;
  6602. + case '@': /* start of memid */
  6603. + p_state = P_MEMID;
  6604. + break;
  6605. default:
  6606. snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port);
  6607. goto fail;
  6608. @@ -718,6 +722,9 @@ nm_open(const char *ifname, const struct nmreq *req,
  6609. case '/':
  6610. p_state = P_FLAGS;
  6611. break;
  6612. + case '@':
  6613. + p_state = P_MEMID;
  6614. + break;
  6615. default:
  6616. snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port);
  6617. goto fail;
  6618. @@ -736,6 +743,11 @@ nm_open(const char *ifname, const struct nmreq *req,
  6619. break;
  6620. case P_FLAGS:
  6621. case P_FLAGSOK:
  6622. + if (*port == '@') {
  6623. + port++;
  6624. + p_state = P_MEMID;
  6625. + break;
  6626. + }
  6627. switch (*port) {
  6628. case 'x':
  6629. nr_flags |= NR_EXCLUSIVE;
  6630. @@ -762,17 +774,25 @@ nm_open(const char *ifname, const struct nmreq *req,
  6631. port++;
  6632. p_state = P_FLAGSOK;
  6633. break;
  6634. + case P_MEMID:
  6635. + if (nr_arg2 != 0) {
  6636. + snprintf(errmsg, MAXERRMSG, "double setting of memid");
  6637. + goto fail;
  6638. + }
  6639. + num = strtol(port, (char **)&port, 10);
  6640. + if (num <= 0) {
  6641. + snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num);
  6642. + goto fail;
  6643. + }
  6644. + nr_arg2 = num;
  6645. + p_state = P_RNGSFXOK;
  6646. + break;
  6647. }
  6648. }
  6649. if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) {
  6650. snprintf(errmsg, MAXERRMSG, "unexpected end of port name");
  6651. goto fail;
  6652. }
  6653. - if ((nr_flags & NR_ZCOPY_MON) &&
  6654. - !(nr_flags & (NR_MONITOR_TX|NR_MONITOR_RX))) {
  6655. - snprintf(errmsg, MAXERRMSG, "'z' used but neither 'r', nor 't' found");
  6656. - goto fail;
  6657. - }
  6658. ND("flags: %s %s %s %s",
  6659. (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "",
  6660. (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "",
  6661. @@ -799,6 +819,8 @@ nm_open(const char *ifname, const struct nmreq *req,
  6662. /* these fields are overridden by ifname and flags processing */
  6663. d->req.nr_ringid |= nr_ringid;
  6664. d->req.nr_flags |= nr_flags;
  6665. + if (nr_arg2)
  6666. + d->req.nr_arg2 = nr_arg2;
  6667. memcpy(d->req.nr_name, ifname, namelen);
  6668. d->req.nr_name[namelen] = '\0';
  6669. /* optionally import info from parent */
  6670. @@ -848,7 +870,7 @@ nm_open(const char *ifname, const struct nmreq *req,
  6671.  
  6672. nr_reg = d->req.nr_flags & NR_REG_MASK;
  6673.  
  6674. - if (nr_reg == NR_REG_SW) { /* host stack */
  6675. + if (nr_reg == NR_REG_SW) { /* host stack */
  6676. d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
  6677. d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
  6678. } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */
  6679. diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
  6680. index 8159d7ddbd7..dbd1781cee7 100644
  6681. --- a/usr.sbin/bhyve/Makefile
  6682. +++ b/usr.sbin/bhyve/Makefile
  6683. @@ -29,6 +29,8 @@ SRCS= \
  6684. mem.c \
  6685. mevent.c \
  6686. mptbl.c \
  6687. + net_backends.c \
  6688. + net_utils.c \
  6689. pci_ahci.c \
  6690. pci_e82545.c \
  6691. pci_emul.c \
  6692. @@ -37,6 +39,8 @@ SRCS= \
  6693. pci_irq.c \
  6694. pci_lpc.c \
  6695. pci_passthru.c \
  6696. + pci_ptnetmap_memdev.c \
  6697. + pci_ptnetmap_netif.c \
  6698. pci_virtio_block.c \
  6699. pci_virtio_console.c \
  6700. pci_virtio_net.c \
  6701. @@ -74,6 +78,8 @@ LIBADD+= crypto
  6702. CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
  6703. CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
  6704. CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
  6705. +CFLAGS+= -I${BHYVE_SYSDIR}/sys/
  6706. +CFLAGS+= -DWITH_NETMAP
  6707.  
  6708. WARNS?= 2
  6709.  
  6710. diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c
  6711. new file mode 100644
  6712. index 00000000000..0322a7dd0a5
  6713. --- /dev/null
  6714. +++ b/usr.sbin/bhyve/net_backends.c
  6715. @@ -0,0 +1,1082 @@
  6716. +/*-
  6717. + * Copyright (c) 2014-2016 Vincenzo Maffione
  6718. + * All rights reserved.
  6719. + *
  6720. + * Redistribution and use in source and binary forms, with or without
  6721. + * modification, are permitted provided that the following conditions
  6722. + * are met:
  6723. + * 1. Redistributions of source code must retain the above copyright
  6724. + * notice, this list of conditions and the following disclaimer.
  6725. + * 2. Redistributions in binary form must reproduce the above copyright
  6726. + * notice, this list of conditions and the following disclaimer in the
  6727. + * documentation and/or other materials provided with the distribution.
  6728. + *
  6729. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
  6730. + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  6731. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  6732. + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
  6733. + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  6734. + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  6735. + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  6736. + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  6737. + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  6738. + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  6739. + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  6740. + */
  6741. +
  6742. +/*
  6743. + * This file implements multiple network backends (null, tap, netmap, ...),
  6744. + * to be used by network frontends such as virtio-net and ptnet.
  6745. + * The API to access the backend (e.g. send/receive packets, negotiate
  6746. + * features) is exported by net_backends.h.
  6747. + */
  6748. +
  6749. +#include <sys/cdefs.h>
  6750. +#include <sys/uio.h>
  6751. +#include <sys/ioctl.h>
  6752. +#include <sys/mman.h>
  6753. +#include <sys/types.h> /* u_short etc */
  6754. +#include <net/if.h>
  6755. +
  6756. +#include <errno.h>
  6757. +#include <fcntl.h>
  6758. +#include <stdio.h>
  6759. +#include <stdlib.h>
  6760. +#include <stdint.h>
  6761. +#include <string.h>
  6762. +#include <unistd.h>
  6763. +#include <assert.h>
  6764. +#include <pthread.h>
  6765. +#include <pthread_np.h>
  6766. +#include <poll.h>
  6767. +#include <assert.h>
  6768. +
  6769. +#include "mevent.h"
  6770. +#include "net_backends.h"
  6771. +
  6772. +#include <sys/linker_set.h>
  6773. +
  6774. +/*
  6775. + * Each network backend registers a set of function pointers that are
  6776. + * used to implement the net backends API.
  6777. + * This might need to be exposed if we implement backends in separate files.
  6778. + */
  6779. +struct net_backend {
  6780. + const char *name; /* name of the backend */
  6781. + /*
  6782. + * The init and cleanup functions are used internally,
  6783. + * virtio-net should never use it.
  6784. + */
  6785. + int (*init)(struct net_backend *be, const char *devname,
  6786. + net_backend_cb_t cb, void *param);
  6787. + void (*cleanup)(struct net_backend *be);
  6788. +
  6789. +
  6790. + /*
  6791. + * Called to serve a guest transmit request. The scatter-gather
  6792. + * vector provided by the caller has 'iovcnt' elements and contains
  6793. + * the packet to send. 'len' is the length of whole packet in bytes.
  6794. + */
  6795. + int (*send)(struct net_backend *be, struct iovec *iov,
  6796. + int iovcnt, uint32_t len, int more);
  6797. +
  6798. + /*
  6799. + * Called to serve guest receive request. When the function
  6800. + * returns a positive value, the scatter-gather vector
  6801. + * provided by the caller (having 'iovcnt' elements in it) will
  6802. + * contain a chunk of the received packet. The 'more' flag will
  6803. + * be set if the returned chunk was the last one for the current
  6804. + * packet, and 0 otherwise. The function returns the chunk size
  6805. + * in bytes, or 0 if the backend doesn't have a new packet to
  6806. + * receive.
  6807. + * Note that it may be necessary to call this callback many
  6808. + * times to receive a single packet, depending of how big is
  6809. + * buffers you provide.
  6810. + */
  6811. + int (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt);
  6812. +
  6813. + /*
  6814. + * Ask the backend for the virtio-net features it is able to
  6815. + * support. Possible features are TSO, UFO and checksum offloading
  6816. + * in both rx and tx direction and for both IPv4 and IPv6.
  6817. + */
  6818. + uint64_t (*get_cap)(struct net_backend *be);
  6819. +
  6820. + /*
  6821. + * Tell the backend to enable/disable the specified virtio-net
  6822. + * features (capabilities).
  6823. + */
  6824. + int (*set_cap)(struct net_backend *be, uint64_t features,
  6825. + unsigned int vnet_hdr_len);
  6826. +
  6827. + struct pci_vtnet_softc *sc;
  6828. + int fd;
  6829. + unsigned int be_vnet_hdr_len;
  6830. + unsigned int fe_vnet_hdr_len;
  6831. + void *priv; /* Pointer to backend-specific data. */
  6832. +};
  6833. +
  6834. +SET_DECLARE(net_backend_s, struct net_backend);
  6835. +
  6836. +#define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr)
  6837. +
  6838. +#define WPRINTF(params) printf params
  6839. +
  6840. +/* the null backend */
  6841. +static int
  6842. +netbe_null_init(struct net_backend *be, const char *devname,
  6843. + net_backend_cb_t cb, void *param)
  6844. +{
  6845. + (void)devname; (void)cb; (void)param;
  6846. + be->fd = -1;
  6847. + return 0;
  6848. +}
  6849. +
  6850. +static void
  6851. +netbe_null_cleanup(struct net_backend *be)
  6852. +{
  6853. + (void)be;
  6854. +}
  6855. +
  6856. +static uint64_t
  6857. +netbe_null_get_cap(struct net_backend *be)
  6858. +{
  6859. + (void)be;
  6860. + return 0;
  6861. +}
  6862. +
  6863. +static int
  6864. +netbe_null_set_cap(struct net_backend *be, uint64_t features,
  6865. + unsigned vnet_hdr_len)
  6866. +{
  6867. + (void)be; (void)features; (void)vnet_hdr_len;
  6868. + return 0;
  6869. +}
  6870. +
  6871. +static int
  6872. +netbe_null_send(struct net_backend *be, struct iovec *iov,
  6873. + int iovcnt, uint32_t len, int more)
  6874. +{
  6875. + (void)be; (void)iov; (void)iovcnt; (void)len; (void)more;
  6876. + return 0; /* pretend we send */
  6877. +}
  6878. +
  6879. +static int
  6880. +netbe_null_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
  6881. +{
  6882. + (void)be; (void)iov; (void)iovcnt;
  6883. + fprintf(stderr, "netbe_null_recv called ?\n");
  6884. + return -1; /* never called, i believe */
  6885. +}
  6886. +
  6887. +static struct net_backend n_be = {
  6888. + .name = "null",
  6889. + .init = netbe_null_init,
  6890. + .cleanup = netbe_null_cleanup,
  6891. + .send = netbe_null_send,
  6892. + .recv = netbe_null_recv,
  6893. + .get_cap = netbe_null_get_cap,
  6894. + .set_cap = netbe_null_set_cap,
  6895. +};
  6896. +
  6897. +DATA_SET(net_backend_s, n_be);
  6898. +
  6899. +
  6900. +/* the tap backend */
  6901. +
  6902. +struct tap_priv {
  6903. + struct mevent *mevp;
  6904. +};
  6905. +
  6906. +static void
  6907. +tap_cleanup(struct net_backend *be)
  6908. +{
  6909. + struct tap_priv *priv = be->priv;
  6910. +
  6911. + if (be->priv) {
  6912. + mevent_delete(priv->mevp);
  6913. + free(be->priv);
  6914. + be->priv = NULL;
  6915. + }
  6916. + if (be->fd != -1) {
  6917. + close(be->fd);
  6918. + be->fd = -1;
  6919. + }
  6920. +}
  6921. +
  6922. +static int
  6923. +tap_init(struct net_backend *be, const char *devname,
  6924. + net_backend_cb_t cb, void *param)
  6925. +{
  6926. + char tbuf[80];
  6927. + int fd;
  6928. + int opt = 1;
  6929. + struct tap_priv *priv;
  6930. +
  6931. + if (cb == NULL) {
  6932. + WPRINTF(("TAP backend requires non-NULL callback\n"));
  6933. + return -1;
  6934. + }
  6935. +
  6936. + priv = calloc(1, sizeof(struct tap_priv));
  6937. + if (priv == NULL) {
  6938. + WPRINTF(("tap_priv alloc failed\n"));
  6939. + return -1;
  6940. + }
  6941. +
  6942. + strcpy(tbuf, "/dev/");
  6943. + strlcat(tbuf, devname, sizeof(tbuf));
  6944. +
  6945. + fd = open(tbuf, O_RDWR);
  6946. + if (fd == -1) {
  6947. + WPRINTF(("open of tap device %s failed\n", tbuf));
  6948. + goto error;
  6949. + }
  6950. +
  6951. + /*
  6952. + * Set non-blocking and register for read
  6953. + * notifications with the event loop
  6954. + */
  6955. + if (ioctl(fd, FIONBIO, &opt) < 0) {
  6956. + WPRINTF(("tap device O_NONBLOCK failed\n"));
  6957. + goto error;
  6958. + }
  6959. +
  6960. + priv->mevp = mevent_add(fd, EVF_READ, cb, param);
  6961. + if (priv->mevp == NULL) {
  6962. + WPRINTF(("Could not register event\n"));
  6963. + goto error;
  6964. + }
  6965. +
  6966. + be->fd = fd;
  6967. + be->priv = priv;
  6968. +
  6969. + return 0;
  6970. +
  6971. +error:
  6972. + tap_cleanup(be);
  6973. + return -1;
  6974. +}
  6975. +
  6976. +/*
  6977. + * Called to send a buffer chain out to the tap device
  6978. + */
  6979. +static int
  6980. +tap_send(struct net_backend *be, struct iovec *iov, int iovcnt, uint32_t len,
  6981. + int more)
  6982. +{
  6983. + static char pad[60]; /* all zero bytes */
  6984. +
  6985. + (void)more;
  6986. + /*
  6987. + * If the length is < 60, pad out to that and add the
  6988. + * extra zero'd segment to the iov. It is guaranteed that
  6989. + * there is always an extra iov available by the caller.
  6990. + */
  6991. + if (len < 60) {
  6992. + iov[iovcnt].iov_base = pad;
  6993. + iov[iovcnt].iov_len = (size_t)(60 - len);
  6994. + iovcnt++;
  6995. + }
  6996. +
  6997. + return (int)writev(be->fd, iov, iovcnt);
  6998. +}
  6999. +
  7000. +static int
  7001. +tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
  7002. +{
  7003. + int ret;
  7004. +
  7005. + /* Should never be called without a valid tap fd */
  7006. + assert(be->fd != -1);
  7007. +
  7008. + ret = (int)readv(be->fd, iov, iovcnt);
  7009. +
  7010. + if (ret < 0 && errno == EWOULDBLOCK) {
  7011. + return 0;
  7012. + }
  7013. +
  7014. + return ret;
  7015. +}
  7016. +
  7017. +static uint64_t
  7018. +tap_get_cap(struct net_backend *be)
  7019. +{
  7020. + (void)be;
  7021. + return 0; // nothing extra
  7022. +}
  7023. +
  7024. +static int
  7025. +tap_set_cap(struct net_backend *be, uint64_t features,
  7026. + unsigned vnet_hdr_len)
  7027. +{
  7028. + (void)be;
  7029. + return (features || vnet_hdr_len) ? -1 : 0;
  7030. +}
  7031. +
  7032. +static struct net_backend tap_backend = {
  7033. + .name = "tap|vmmnet",
  7034. + .init = tap_init,
  7035. + .cleanup = tap_cleanup,
  7036. + .send = tap_send,
  7037. + .recv = tap_recv,
  7038. + .get_cap = tap_get_cap,
  7039. + .set_cap = tap_set_cap,
  7040. +};
  7041. +
  7042. +DATA_SET(net_backend_s, tap_backend);
  7043. +
  7044. +#ifdef WITH_NETMAP
  7045. +
  7046. +/*
  7047. + * The netmap backend
  7048. + */
  7049. +
  7050. +/* The virtio-net features supported by netmap. */
  7051. +#define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
  7052. + VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
  7053. + VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
  7054. + VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
  7055. +
  7056. +#define NETMAP_POLLMASK (POLLIN | POLLRDNORM | POLLRDBAND)
  7057. +
  7058. +struct netmap_priv {
  7059. + char ifname[IFNAMSIZ];
  7060. + struct nm_desc *nmd;
  7061. + uint16_t memid;
  7062. + struct netmap_ring *rx;
  7063. + struct netmap_ring *tx;
  7064. + pthread_t evloop_tid;
  7065. + net_backend_cb_t cb;
  7066. + void *cb_param;
  7067. +
  7068. + struct ptnetmap_state ptnetmap;
  7069. +};
  7070. +
  7071. +static void *
  7072. +netmap_evloop_thread(void *param)
  7073. +{
  7074. + struct net_backend *be = param;
  7075. + struct netmap_priv *priv = be->priv;
  7076. + struct pollfd pfd;
  7077. + int ret;
  7078. +
  7079. + for (;;) {
  7080. + pfd.fd = be->fd;
  7081. + pfd.events = NETMAP_POLLMASK;
  7082. + ret = poll(&pfd, 1, INFTIM);
  7083. + if (ret == -1 && errno != EINTR) {
  7084. + WPRINTF(("netmap poll failed, %d\n", errno));
  7085. + } else if (ret == 1 && (pfd.revents & NETMAP_POLLMASK)) {
  7086. + priv->cb(pfd.fd, EVF_READ, priv->cb_param);
  7087. + }
  7088. + }
  7089. +
  7090. + return NULL;
  7091. +}
  7092. +
  7093. +static void
  7094. +nmreq_init(struct nmreq *req, char *ifname)
  7095. +{
  7096. + memset(req, 0, sizeof(*req));
  7097. + strncpy(req->nr_name, ifname, sizeof(req->nr_name));
  7098. + req->nr_version = NETMAP_API;
  7099. +}
  7100. +
  7101. +static int
  7102. +netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
  7103. +{
  7104. + int err;
  7105. + struct nmreq req;
  7106. + struct netmap_priv *priv = be->priv;
  7107. +
  7108. + nmreq_init(&req, priv->ifname);
  7109. + req.nr_cmd = NETMAP_BDG_VNET_HDR;
  7110. + req.nr_arg1 = vnet_hdr_len;
  7111. + err = ioctl(be->fd, NIOCREGIF, &req);
  7112. + if (err) {
  7113. + WPRINTF(("Unable to set vnet header length %d\n",
  7114. + vnet_hdr_len));
  7115. + return err;
  7116. + }
  7117. +
  7118. + be->be_vnet_hdr_len = vnet_hdr_len;
  7119. +
  7120. + return 0;
  7121. +}
  7122. +
  7123. +static int
  7124. +netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
  7125. +{
  7126. + int prev_hdr_len = be->be_vnet_hdr_len;
  7127. + int ret;
  7128. +
  7129. + if (vnet_hdr_len == prev_hdr_len) {
  7130. + return 1;
  7131. + }
  7132. +
  7133. + ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
  7134. + if (ret) {
  7135. + return 0;
  7136. + }
  7137. +
  7138. + netmap_set_vnet_hdr_len(be, prev_hdr_len);
  7139. +
  7140. + return 1;
  7141. +}
  7142. +
  7143. +static uint64_t
  7144. +netmap_get_cap(struct net_backend *be)
  7145. +{
  7146. + return netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
  7147. + NETMAP_FEATURES : 0;
  7148. +}
  7149. +
  7150. +static int
  7151. +netmap_set_cap(struct net_backend *be, uint64_t features,
  7152. + unsigned vnet_hdr_len)
  7153. +{
  7154. + return netmap_set_vnet_hdr_len(be, vnet_hdr_len);
  7155. +}
  7156. +
  7157. +/* Store and return the features we agreed upon. */
  7158. +uint32_t
  7159. +ptnetmap_ack_features(struct ptnetmap_state *ptn, uint32_t wanted_features)
  7160. +{
  7161. + ptn->acked_features = ptn->features & wanted_features;
  7162. +
  7163. + return ptn->acked_features;
  7164. +}
  7165. +
  7166. +struct ptnetmap_state *
  7167. +get_ptnetmap(struct net_backend *be)
  7168. +{
  7169. + struct netmap_priv *priv = be ? be->priv : NULL;
  7170. + struct netmap_pools_info pi;
  7171. + struct nmreq req;
  7172. + int err;
  7173. +
  7174. + /* Check that this is a ptnetmap backend. */
  7175. + if (!be || be->set_cap != netmap_set_cap ||
  7176. + !(priv->nmd->req.nr_flags & NR_PTNETMAP_HOST)) {
  7177. + return NULL;
  7178. + }
  7179. +
  7180. + nmreq_init(&req, priv->ifname);
  7181. + req.nr_cmd = NETMAP_POOLS_INFO_GET;
  7182. + nmreq_pointer_put(&req, &pi);
  7183. + err = ioctl(priv->nmd->fd, NIOCREGIF, &req);
  7184. + if (err) {
  7185. + return NULL;
  7186. + }
  7187. +
  7188. + err = ptn_memdev_attach(priv->nmd->mem, &pi);
  7189. + if (err) {
  7190. + return NULL;
  7191. + }
  7192. +
  7193. + return &priv->ptnetmap;
  7194. +}
  7195. +
  7196. +int
  7197. +ptnetmap_get_netmap_if(struct ptnetmap_state *ptn, struct netmap_if_info *nif)
  7198. +{
  7199. + struct netmap_priv *priv = ptn->netmap_priv;
  7200. +
  7201. + memset(nif, 0, sizeof(*nif));
  7202. + if (priv->nmd == NULL) {
  7203. + return EINVAL;
  7204. + }
  7205. +
  7206. + nif->nifp_offset = priv->nmd->req.nr_offset;
  7207. + nif->num_tx_rings = priv->nmd->req.nr_tx_rings;
  7208. + nif->num_rx_rings = priv->nmd->req.nr_rx_rings;
  7209. + nif->num_tx_slots = priv->nmd->req.nr_tx_slots;
  7210. + nif->num_rx_slots = priv->nmd->req.nr_rx_slots;
  7211. +
  7212. + return 0;
  7213. +}
  7214. +
  7215. +int
  7216. +ptnetmap_get_hostmemid(struct ptnetmap_state *ptn)
  7217. +{
  7218. + struct netmap_priv *priv = ptn->netmap_priv;
  7219. +
  7220. + if (priv->nmd == NULL) {
  7221. + return EINVAL;
  7222. + }
  7223. +
  7224. + return priv->memid;
  7225. +}
  7226. +
  7227. +int
  7228. +ptnetmap_create(struct ptnetmap_state *ptn, struct ptnetmap_cfg *cfg)
  7229. +{
  7230. + struct netmap_priv *priv = ptn->netmap_priv;
  7231. + struct nmreq req;
  7232. + int err;
  7233. +
  7234. + if (ptn->running) {
  7235. + return 0;
  7236. + }
  7237. +
  7238. + /* XXX We should stop the netmap evloop here. */
  7239. +
  7240. + /* Ask netmap to create kthreads for this interface. */
  7241. + nmreq_init(&req, priv->ifname);
  7242. + nmreq_pointer_put(&req, cfg);
  7243. + req.nr_cmd = NETMAP_PT_HOST_CREATE;
  7244. + err = ioctl(priv->nmd->fd, NIOCREGIF, &req);
  7245. + if (err) {
  7246. + fprintf(stderr, "%s: Unable to create ptnetmap kthreads on "
  7247. + "%s [errno=%d]", __func__, priv->ifname, errno);
  7248. + return err;
  7249. + }
  7250. +
  7251. + ptn->running = 1;
  7252. +
  7253. + return 0;
  7254. +}
  7255. +
  7256. +int
  7257. +ptnetmap_delete(struct ptnetmap_state *ptn)
  7258. +{
  7259. + struct netmap_priv *priv = ptn->netmap_priv;
  7260. + struct nmreq req;
  7261. + int err;
  7262. +
  7263. + if (!ptn->running) {
  7264. + return 0;
  7265. + }
  7266. +
  7267. + /* Ask netmap to delete kthreads for this interface. */
  7268. + nmreq_init(&req, priv->ifname);
  7269. + req.nr_cmd = NETMAP_PT_HOST_DELETE;
  7270. + err = ioctl(priv->nmd->fd, NIOCREGIF, &req);
  7271. + if (err) {
  7272. + fprintf(stderr, "%s: Unable to create ptnetmap kthreads on "
  7273. + "%s [errno=%d]", __func__, priv->ifname, errno);
  7274. + return err;
  7275. + }
  7276. +
  7277. + ptn->running = 0;
  7278. +
  7279. + return 0;
  7280. +}
  7281. +
  7282. +static int
  7283. +netmap_init(struct net_backend *be, const char *devname,
  7284. + net_backend_cb_t cb, void *param)
  7285. +{
  7286. + const char *ndname = "/dev/netmap";
  7287. + struct netmap_priv *priv = NULL;
  7288. + struct nmreq req;
  7289. + int ptnetmap = (cb == NULL);
  7290. +
  7291. + priv = calloc(1, sizeof(struct netmap_priv));
  7292. + if (priv == NULL) {
  7293. + WPRINTF(("Unable alloc netmap private data\n"));
  7294. + return -1;
  7295. + }
  7296. +
  7297. + strncpy(priv->ifname, devname, sizeof(priv->ifname));
  7298. + priv->ifname[sizeof(priv->ifname) - 1] = '\0';
  7299. +
  7300. + memset(&req, 0, sizeof(req));
  7301. + req.nr_flags = ptnetmap ? NR_PTNETMAP_HOST : 0;
  7302. +
  7303. + priv->nmd = nm_open(priv->ifname, &req, NETMAP_NO_TX_POLL, NULL);
  7304. + if (priv->nmd == NULL) {
  7305. + WPRINTF(("Unable to nm_open(): device '%s', "
  7306. + "interface '%s', errno (%s)\n",
  7307. + ndname, devname, strerror(errno)));
  7308. + free(priv);
  7309. + return -1;
  7310. + }
  7311. +
  7312. + priv->memid = priv->nmd->req.nr_arg2;
  7313. + priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
  7314. + priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
  7315. + priv->cb = cb;
  7316. + priv->cb_param = param;
  7317. + be->fd = priv->nmd->fd;
  7318. + be->priv = priv;
  7319. +
  7320. + priv->ptnetmap.netmap_priv = priv;
  7321. + priv->ptnetmap.features = 0;
  7322. + priv->ptnetmap.acked_features = 0;
  7323. + priv->ptnetmap.running = 0;
  7324. + if (ptnetmap) {
  7325. + if (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN)) {
  7326. + priv->ptnetmap.features |= PTNETMAP_F_VNET_HDR;
  7327. + }
  7328. + } else {
  7329. + char tname[40];
  7330. +
  7331. + /* Create a thread for netmap poll. */
  7332. + pthread_create(&priv->evloop_tid, NULL, netmap_evloop_thread, (void *)be);
  7333. + snprintf(tname, sizeof(tname), "netmap-evloop-%p", priv);
  7334. + pthread_set_name_np(priv->evloop_tid, tname);
  7335. + }
  7336. +
  7337. + return 0;
  7338. +}
  7339. +
  7340. +static void
  7341. +netmap_cleanup(struct net_backend *be)
  7342. +{
  7343. + struct netmap_priv *priv = be->priv;
  7344. +
  7345. + if (be->priv) {
  7346. + if (priv->ptnetmap.running) {
  7347. + ptnetmap_delete(&priv->ptnetmap);
  7348. + }
  7349. + nm_close(priv->nmd);
  7350. + free(be->priv);
  7351. + be->priv = NULL;
  7352. + }
  7353. + be->fd = -1;
  7354. +}
  7355. +
  7356. +/* A fast copy routine only for multiples of 64 bytes, non overlapped. */
  7357. +static inline void
  7358. +pkt_copy(const void *_src, void *_dst, int l)
  7359. +{
  7360. + const uint64_t *src = _src;
  7361. + uint64_t *dst = _dst;
  7362. + if (l >= 1024) {
  7363. + bcopy(src, dst, l);
  7364. + return;
  7365. + }
  7366. + for (; l > 0; l -= 64) {
  7367. + *dst++ = *src++;
  7368. + *dst++ = *src++;
  7369. + *dst++ = *src++;
  7370. + *dst++ = *src++;
  7371. + *dst++ = *src++;
  7372. + *dst++ = *src++;
  7373. + *dst++ = *src++;
  7374. + *dst++ = *src++;
  7375. + }
  7376. +}
  7377. +
  7378. +static int
  7379. +netmap_send(struct net_backend *be, struct iovec *iov,
  7380. + int iovcnt, uint32_t size, int more)
  7381. +{
  7382. + struct netmap_priv *priv = be->priv;
  7383. + struct netmap_ring *ring;
  7384. + int nm_buf_size;
  7385. + int nm_buf_len;
  7386. + uint32_t head;
  7387. + void *nm_buf;
  7388. + int j;
  7389. +
  7390. + if (iovcnt <= 0 || size <= 0) {
  7391. + D("Wrong iov: iovcnt %d size %d", iovcnt, size);
  7392. + return 0;
  7393. + }
  7394. +
  7395. + ring = priv->tx;
  7396. + head = ring->head;
  7397. + if (head == ring->tail) {
  7398. + RD(1, "No space, drop %d bytes", size);
  7399. + goto txsync;
  7400. + }
  7401. + nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
  7402. + nm_buf_size = ring->nr_buf_size;
  7403. + nm_buf_len = 0;
  7404. +
  7405. + for (j = 0; j < iovcnt; j++) {
  7406. + int iov_frag_size = iov[j].iov_len;
  7407. + void *iov_frag_buf = iov[j].iov_base;
  7408. +
  7409. + /* Split each iovec fragment over more netmap slots, if
  7410. + necessary. */
  7411. + for (;;) {
  7412. + int copylen;
  7413. +
  7414. + copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
  7415. + pkt_copy(iov_frag_buf, nm_buf, copylen);
  7416. +
  7417. + iov_frag_buf += copylen;
  7418. + iov_frag_size -= copylen;
  7419. + nm_buf += copylen;
  7420. + nm_buf_size -= copylen;
  7421. + nm_buf_len += copylen;
  7422. +
  7423. + if (iov_frag_size == 0) {
  7424. + break;
  7425. + }
  7426. +
  7427. + ring->slot[head].len = nm_buf_len;
  7428. + ring->slot[head].flags = NS_MOREFRAG;
  7429. + head = nm_ring_next(ring, head);
  7430. + if (head == ring->tail) {
  7431. + /* We ran out of netmap slots while
  7432. + * splitting the iovec fragments. */
  7433. + RD(1, "No space, drop %d bytes", size);
  7434. + goto txsync;
  7435. + }
  7436. + nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
  7437. + nm_buf_size = ring->nr_buf_size;
  7438. + nm_buf_len = 0;
  7439. + }
  7440. + }
  7441. +
  7442. + /* Complete the last slot, which must not have NS_MOREFRAG set. */
  7443. + ring->slot[head].len = nm_buf_len;
  7444. + ring->slot[head].flags = 0;
  7445. + head = nm_ring_next(ring, head);
  7446. +
  7447. + /* Now update ring->head and ring->cur. */
  7448. + ring->head = ring->cur = head;
  7449. +
  7450. + if (more) {// && nm_ring_space(ring) > 64
  7451. + return 0;
  7452. + }
  7453. +txsync:
  7454. + ioctl(be->fd, NIOCTXSYNC, NULL);
  7455. +
  7456. + return 0;
  7457. +}
  7458. +
  7459. +static int
  7460. +netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
  7461. +{
  7462. + struct netmap_priv *priv = be->priv;
  7463. + struct netmap_slot *slot = NULL;
  7464. + struct netmap_ring *ring;
  7465. + void *iov_frag_buf;
  7466. + int iov_frag_size;
  7467. + int totlen = 0;
  7468. + uint32_t head;
  7469. +
  7470. + assert(iovcnt);
  7471. +
  7472. + ring = priv->rx;
  7473. + head = ring->head;
  7474. + iov_frag_buf = iov->iov_base;
  7475. + iov_frag_size = iov->iov_len;
  7476. +
  7477. + do {
  7478. + int nm_buf_len;
  7479. + void *nm_buf;
  7480. +
  7481. + if (head == ring->tail) {
  7482. + return 0;
  7483. + }
  7484. +
  7485. + slot = ring->slot + head;
  7486. + nm_buf = NETMAP_BUF(ring, slot->buf_idx);
  7487. + nm_buf_len = slot->len;
  7488. +
  7489. + for (;;) {
  7490. + int copylen = nm_buf_len < iov_frag_size ? nm_buf_len : iov_frag_size;
  7491. +
  7492. + pkt_copy(nm_buf, iov_frag_buf, copylen);
  7493. + nm_buf += copylen;
  7494. + nm_buf_len -= copylen;
  7495. + iov_frag_buf += copylen;
  7496. + iov_frag_size -= copylen;
  7497. + totlen += copylen;
  7498. +
  7499. + if (nm_buf_len == 0) {
  7500. + break;
  7501. + }
  7502. +
  7503. + iov++;
  7504. + iovcnt--;
  7505. + if (iovcnt == 0) {
  7506. + /* No space to receive. */
  7507. + D("Short iov, drop %d bytes", totlen);
  7508. + return -ENOSPC;
  7509. + }
  7510. + iov_frag_buf = iov->iov_base;
  7511. + iov_frag_size = iov->iov_len;
  7512. + }
  7513. +
  7514. + head = nm_ring_next(ring, head);
  7515. +
  7516. + } while (slot->flags & NS_MOREFRAG);
  7517. +
  7518. + /* Release slots to netmap. */
  7519. + ring->head = ring->cur = head;
  7520. +
  7521. + return totlen;
  7522. +}
  7523. +
  7524. +static struct net_backend netmap_backend = {
  7525. + .name = "netmap|vale",
  7526. + .init = netmap_init,
  7527. + .cleanup = netmap_cleanup,
  7528. + .send = netmap_send,
  7529. + .recv = netmap_recv,
  7530. + .get_cap = netmap_get_cap,
  7531. + .set_cap = netmap_set_cap,
  7532. +};
  7533. +
  7534. +DATA_SET(net_backend_s, netmap_backend);
  7535. +
  7536. +#endif /* WITH_NETMAP */
  7537. +
  7538. +/*
  7539. + * make sure a backend is properly initialized
  7540. + */
  7541. +static void
  7542. +netbe_fix(struct net_backend *be)
  7543. +{
  7544. + if (be == NULL)
  7545. + return;
  7546. + if (be->name == NULL) {
  7547. + fprintf(stderr, "missing name for %p\n", be);
  7548. + be->name = "unnamed netbe";
  7549. + }
  7550. + if (be->init == NULL) {
  7551. + fprintf(stderr, "missing init for %p %s\n", be, be->name);
  7552. + be->init = netbe_null_init;
  7553. + }
  7554. + if (be->cleanup == NULL) {
  7555. + fprintf(stderr, "missing cleanup for %p %s\n", be, be->name);
  7556. + be->cleanup = netbe_null_cleanup;
  7557. + }
  7558. + if (be->send == NULL) {
  7559. + fprintf(stderr, "missing send for %p %s\n", be, be->name);
  7560. + be->send = netbe_null_send;
  7561. + }
  7562. + if (be->recv == NULL) {
  7563. + fprintf(stderr, "missing recv for %p %s\n", be, be->name);
  7564. + be->recv = netbe_null_recv;
  7565. + }
  7566. + if (be->get_cap == NULL) {
  7567. + fprintf(stderr, "missing get_cap for %p %s\n",
  7568. + be, be->name);
  7569. + be->get_cap = netbe_null_get_cap;
  7570. + }
  7571. + if (be->set_cap == NULL) {
  7572. + fprintf(stderr, "missing set_cap for %p %s\n",
  7573. + be, be->name);
  7574. + be->set_cap = netbe_null_set_cap;
  7575. + }
  7576. +}
  7577. +
  7578. +/*
  7579. + * keys is a set of prefixes separated by '|',
  7580. + * return 1 if the leftmost part of name matches one prefix.
  7581. + */
  7582. +static const char *
  7583. +netbe_name_match(const char *keys, const char *name)
  7584. +{
  7585. + const char *n = name, *good = keys;
  7586. + char c;
  7587. +
  7588. + if (!keys || !name)
  7589. + return NULL;
  7590. + while ( (c = *keys++) ) {
  7591. + if (c == '|') { /* reached the separator */
  7592. + if (good)
  7593. + break;
  7594. + /* prepare for new round */
  7595. + n = name;
  7596. + good = keys;
  7597. + } else if (good && c != *n++) {
  7598. + good = NULL; /* drop till next keyword */
  7599. + }
  7600. + }
  7601. + return good;
  7602. +}
  7603. +
  7604. +/*
  7605. + * Initialize a backend and attach to the frontend.
  7606. + * This is called during frontend initialization.
  7607. + * devname is the backend-name as supplied on the command line,
  7608. + * e.g. -s 2:0,frontend-name,backend-name[,other-args]
  7609. + * cb is the receive callback supplied by the frontend,
  7610. + * and it is invoked in the event loop when a receive
  7611. + * event is generated in the hypervisor,
  7612. + * param is a pointer to the frontend, and normally used as
  7613. + * the argument for the callback.
  7614. + */
  7615. +struct net_backend *
  7616. +netbe_init(const char *devname, net_backend_cb_t cb, void *param)
  7617. +{
  7618. + struct net_backend **pbe, *be, *tbe = NULL;
  7619. + int err;
  7620. +
  7621. + /*
  7622. + * Find the network backend depending on the user-provided
  7623. + * device name. net_backend_s is built using a linker set.
  7624. + */
  7625. + SET_FOREACH(pbe, net_backend_s) {
  7626. + if (netbe_name_match((*pbe)->name, devname)) {
  7627. + tbe = *pbe;
  7628. + break;
  7629. + }
  7630. + }
  7631. + if (tbe == NULL)
  7632. + return NULL; /* or null backend ? */
  7633. + be = calloc(1, sizeof(*be));
  7634. + *be = *tbe; /* copy the template */
  7635. + netbe_fix(be); /* make sure we have all fields */
  7636. + be->fd = -1;
  7637. + be->priv = NULL;
  7638. + be->sc = param;
  7639. + be->be_vnet_hdr_len = 0;
  7640. + be->fe_vnet_hdr_len = 0;
  7641. +
  7642. + /* initialize the backend */
  7643. + err = be->init(be, devname, cb, param);
  7644. + if (err) {
  7645. + free(be);
  7646. + be = NULL;
  7647. + }
  7648. + return be;
  7649. +}
  7650. +
  7651. +void
  7652. +netbe_cleanup(struct net_backend *be)
  7653. +{
  7654. + if (be == NULL)
  7655. + return;
  7656. + be->cleanup(be);
  7657. + free(be);
  7658. +}
  7659. +
  7660. +uint64_t
  7661. +netbe_get_cap(struct net_backend *be)
  7662. +{
  7663. + if (be == NULL)
  7664. + return 0;
  7665. + return be->get_cap(be);
  7666. +}
  7667. +
  7668. +int
  7669. +netbe_set_cap(struct net_backend *be, uint64_t features,
  7670. + unsigned vnet_hdr_len)
  7671. +{
  7672. + int ret;
  7673. +
  7674. + if (be == NULL)
  7675. + return 0;
  7676. +
  7677. + /* There are only three valid lengths. */
  7678. + if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
  7679. + && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
  7680. + return -1;
  7681. +
  7682. + be->fe_vnet_hdr_len = vnet_hdr_len;
  7683. +
  7684. + ret = be->set_cap(be, features, vnet_hdr_len);
  7685. + assert(be->be_vnet_hdr_len == 0 ||
  7686. + be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
  7687. +
  7688. + return ret;
  7689. +}
  7690. +
  7691. +static __inline struct iovec *
  7692. +iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen)
  7693. +{
  7694. + struct iovec *riov;
  7695. +
  7696. + /* XXX short-cut: assume first segment is >= tlen */
  7697. + assert(iov[0].iov_len >= tlen);
  7698. +
  7699. + iov[0].iov_len -= tlen;
  7700. + if (iov[0].iov_len == 0) {
  7701. + assert(*iovcnt > 1);
  7702. + *iovcnt -= 1;
  7703. + riov = &iov[1];
  7704. + } else {
  7705. + iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
  7706. + riov = &iov[0];
  7707. + }
  7708. +
  7709. + return (riov);
  7710. +}
  7711. +
  7712. +void
  7713. +netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt, uint32_t len,
  7714. + int more)
  7715. +{
  7716. + if (be == NULL)
  7717. + return;
  7718. +#if 0
  7719. + int i;
  7720. + D("sending iovcnt %d len %d iovec %p", iovcnt, len, iov);
  7721. + for (i=0; i < iovcnt; i++)
  7722. + D(" %3d: %4d %p", i, (int)iov[i].iov_len, iov[i].iov_base);
  7723. +#endif
  7724. + if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
  7725. + /* Here we are sure be->be_vnet_hdr_len is 0. */
  7726. + iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len);
  7727. + }
  7728. +
  7729. + be->send(be, iov, iovcnt, len, more);
  7730. +}
  7731. +
  7732. +/*
  7733. + * can return -1 in case of errors
  7734. + */
  7735. +int
  7736. +netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
  7737. +{
  7738. + unsigned int hlen = 0; /* length of prepended virtio-net header */
  7739. + int ret;
  7740. +
  7741. + if (be == NULL)
  7742. + return -1;
  7743. +
  7744. + if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
  7745. + struct virtio_net_rxhdr *vh;
  7746. +
  7747. + /* Here we are sure be->be_vnet_hdr_len is 0. */
  7748. + hlen = be->fe_vnet_hdr_len;
  7749. + /*
  7750. + * Get a pointer to the rx header, and use the
  7751. + * data immediately following it for the packet buffer.
  7752. + */
  7753. + vh = iov[0].iov_base;
  7754. + iov = iov_trim(iov, &iovcnt, hlen);
  7755. +
  7756. + /*
  7757. + * Here we are sure be->fe_vnet_hdr_len is 0.
  7758. + * The only valid field in the rx packet header is the
  7759. + * number of buffers if merged rx bufs were negotiated.
  7760. + */
  7761. + memset(vh, 0, hlen);
  7762. +
  7763. + if (hlen == VNET_HDR_LEN) {
  7764. + vh->vrh_bufs = 1;
  7765. + }
  7766. + }
  7767. +
  7768. + ret = be->recv(be, iov, iovcnt);
  7769. + if (ret > 0) {
  7770. + ret += hlen;
  7771. + }
  7772. +
  7773. + return ret;
  7774. +}
  7775. +
  7776. +/*
  7777. + * Read a packet from the backend and discard it.
  7778. + * Returns the size of the discarded packet or zero if no packet was available.
  7779. + * A negative error code is returned in case of read error.
  7780. + */
  7781. +int
  7782. +netbe_rx_discard(struct net_backend *be)
  7783. +{
  7784. + /*
  7785. + * MP note: the dummybuf is only used to discard frames,
  7786. + * so there is no need for it to be per-vtnet or locked.
  7787. + * We only make it large enough for TSO-sized segment.
  7788. + */
  7789. + static uint8_t dummybuf[65536+64];
  7790. + struct iovec iov;
  7791. +
  7792. + iov.iov_base = dummybuf;
  7793. + iov.iov_len = sizeof(dummybuf);
  7794. +
  7795. + return netbe_recv(be, &iov, 1);
  7796. +}
  7797. +
  7798. diff --git a/usr.sbin/bhyve/net_backends.h b/usr.sbin/bhyve/net_backends.h
  7799. new file mode 100644
  7800. index 00000000000..03c083bdbfd
  7801. --- /dev/null
  7802. +++ b/usr.sbin/bhyve/net_backends.h
  7803. @@ -0,0 +1,144 @@
  7804. +/*-
  7805. + * Copyright (c) 2014 Vincenzo Maffione <v.maffione@gmail.com>
  7806. + * All rights reserved.
  7807. + *
  7808. + * Redistribution and use in source and binary forms, with or without
  7809. + * modification, are permitted provided that the following conditions
  7810. + * are met:
  7811. + * 1. Redistributions of source code must retain the above copyright
  7812. + * notice, this list of conditions and the following disclaimer.
  7813. + * 2. Redistributions in binary form must reproduce the above copyright
  7814. + * notice, this list of conditions and the following disclaimer in the
  7815. + * documentation and/or other materials provided with the distribution.
  7816. + *
  7817. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
  7818. + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  7819. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  7820. + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
  7821. + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  7822. + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  7823. + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  7824. + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  7825. + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  7826. + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  7827. + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  7828. + */
  7829. +
  7830. +#ifndef __NET_BACKENDS_H__
  7831. +#define __NET_BACKENDS_H__
  7832. +
  7833. +#include <stdint.h>
  7834. +
  7835. +#ifdef WITH_NETMAP
  7836. +#include <net/netmap.h>
  7837. +#include <net/netmap_virt.h>
  7838. +#define NETMAP_WITH_LIBS
  7839. +#include <net/netmap_user.h>
  7840. +#if (NETMAP_API < 11)
  7841. +#error "Netmap API version must be >= 11"
  7842. +#endif
  7843. +#endif /* WITH_NETMAP */
  7844. +
  7845. +#include "mevent.h"
  7846. +
  7847. +extern int netmap_ioctl_counter;
  7848. +
  7849. +typedef void (*net_backend_cb_t)(int, enum ev_type, void *param);
  7850. +
  7851. +/* Interface between virtio-net and the network backend. */
  7852. +struct net_backend;
  7853. +
  7854. +struct net_backend *netbe_init(const char *devname,
  7855. + net_backend_cb_t cb, void *param);
  7856. +void netbe_cleanup(struct net_backend *be);
  7857. +uint64_t netbe_get_cap(struct net_backend *be);
  7858. +int netbe_set_cap(struct net_backend *be, uint64_t cap,
  7859. + unsigned vnet_hdr_len);
  7860. +void netbe_send(struct net_backend *be, struct iovec *iov,
  7861. + int iovcnt, uint32_t len, int more);
  7862. +int netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt);
  7863. +int netbe_rx_discard(struct net_backend *be);
  7864. +
  7865. +
  7866. +/*
  7867. + * Network device capabilities taken from VirtIO standard.
  7868. + * Despite the name, these capabilities can be used by different frontents
  7869. + * (virtio-net, ptnet) and supported by different backends (netmap, tap, ...).
  7870. + */
  7871. +#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */
  7872. +#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */
  7873. +#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */
  7874. +#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */
  7875. +#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */
  7876. +#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */
  7877. +#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */
  7878. +#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */
  7879. +#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */
  7880. +#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */
  7881. +#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */
  7882. +#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */
  7883. +#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */
  7884. +#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */
  7885. +#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */
  7886. +#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */
  7887. +#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */
  7888. +#define VIRTIO_NET_F_GUEST_ANNOUNCE \
  7889. + (1 << 21) /* guest can send gratuitous pkts */
  7890. +
  7891. +/*
  7892. + * Fixed network header size
  7893. + */
  7894. +struct virtio_net_rxhdr {
  7895. + uint8_t vrh_flags;
  7896. + uint8_t vrh_gso_type;
  7897. + uint16_t vrh_hdr_len;
  7898. + uint16_t vrh_gso_size;
  7899. + uint16_t vrh_csum_start;
  7900. + uint16_t vrh_csum_offset;
  7901. + uint16_t vrh_bufs;
  7902. +} __packed;
  7903. +
  7904. +/*
  7905. + * ptnetmap definitions
  7906. + */
  7907. +struct ptnetmap_state {
  7908. + void *netmap_priv;
  7909. +
  7910. + /* True if ptnetmap kthreads are running. */
  7911. + int running;
  7912. +
  7913. + /* Feature acknoweledgement support. */
  7914. + unsigned long features;
  7915. + unsigned long acked_features;
  7916. +
  7917. + /* Info about netmap memory. */
  7918. + uint32_t memsize;
  7919. + void *mem;
  7920. +};
  7921. +
  7922. +#ifdef WITH_NETMAP
  7923. +/* Used to get read-only info. */
  7924. +struct netmap_if_info {
  7925. + uint32_t nifp_offset;
  7926. + uint16_t num_tx_rings;
  7927. + uint16_t num_rx_rings;
  7928. + uint16_t num_tx_slots;
  7929. + uint16_t num_rx_slots;
  7930. +};
  7931. +
  7932. +int ptn_memdev_attach(void *mem_ptr, struct netmap_pools_info *);
  7933. +int ptnetmap_get_netmap_if(struct ptnetmap_state *ptn,
  7934. + struct netmap_if_info *nif);
  7935. +struct ptnetmap_state * get_ptnetmap(struct net_backend *be);
  7936. +uint32_t ptnetmap_ack_features(struct ptnetmap_state *ptn,
  7937. + uint32_t wanted_features);
  7938. +int ptnetmap_get_hostmemid(struct ptnetmap_state *ptn);
  7939. +int ptnetmap_create(struct ptnetmap_state *ptn, struct ptnetmap_cfg *cfg);
  7940. +int ptnetmap_delete(struct ptnetmap_state *ptn);
  7941. +#endif /* WITH_NETMAP */
  7942. +
  7943. +#include "pci_emul.h"
  7944. +int net_parsemac(char *mac_str, uint8_t *mac_addr);
  7945. +void net_genmac(struct pci_devinst *pi, uint8_t *macaddr);
  7946. +
  7947. +#endif /* __NET_BACKENDS_H__ */
  7948. diff --git a/usr.sbin/bhyve/net_utils.c b/usr.sbin/bhyve/net_utils.c
  7949. new file mode 100644
  7950. index 00000000000..a5a004a1a78
  7951. --- /dev/null
  7952. +++ b/usr.sbin/bhyve/net_utils.c
  7953. @@ -0,0 +1,86 @@
  7954. +/*-
  7955. + * Copyright (c) 2011 NetApp, Inc.
  7956. + * All rights reserved.
  7957. + *
  7958. + * Redistribution and use in source and binary forms, with or without
  7959. + * modification, are permitted provided that the following conditions
  7960. + * are met:
  7961. + * 1. Redistributions of source code must retain the above copyright
  7962. + * notice, this list of conditions and the following disclaimer.
  7963. + * 2. Redistributions in binary form must reproduce the above copyright
  7964. + * notice, this list of conditions and the following disclaimer in the
  7965. + * documentation and/or other materials provided with the distribution.
  7966. + *
  7967. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
  7968. + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  7969. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  7970. + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
  7971. + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  7972. + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  7973. + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  7974. + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  7975. + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  7976. + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  7977. + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  7978. + */
  7979. +#include "net_utils.h"
  7980. +#include "bhyverun.h"
  7981. +#include <md5.h>
  7982. +#include <net/ethernet.h>
  7983. +#include <string.h>
  7984. +#include <stdio.h>
  7985. +#include <errno.h>
  7986. +
  7987. +/*
  7988. + * Some utils functions, used by net front-ends. Originally, they were
  7989. + * in pci_virtio_net.c.
  7990. + */
  7991. +
  7992. +int
  7993. +net_parsemac(char *mac_str, uint8_t *mac_addr)
  7994. +{
  7995. + struct ether_addr *ea;
  7996. + char *tmpstr;
  7997. + char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
  7998. +
  7999. + tmpstr = strsep(&mac_str,"=");
  8000. +
  8001. + if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
  8002. + ea = ether_aton(mac_str);
  8003. +
  8004. + if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
  8005. + memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
  8006. + fprintf(stderr, "Invalid MAC %s\n", mac_str);
  8007. + return (EINVAL);
  8008. + } else
  8009. + memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
  8010. + }
  8011. +
  8012. + return (0);
  8013. +}
  8014. +
  8015. +void
  8016. +net_genmac(struct pci_devinst *pi, uint8_t *macaddr)
  8017. +{
  8018. + /*
  8019. + * The default MAC address is the standard NetApp OUI of 00-a0-98,
  8020. + * followed by an MD5 of the PCI slot/func number and dev name
  8021. + */
  8022. + MD5_CTX mdctx;
  8023. + unsigned char digest[16];
  8024. + char nstr[80];
  8025. +
  8026. + snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
  8027. + pi->pi_func, vmname);
  8028. +
  8029. + MD5Init(&mdctx);
  8030. + MD5Update(&mdctx, nstr, (unsigned int)strlen(nstr));
  8031. + MD5Final(digest, &mdctx);
  8032. +
  8033. + macaddr[0] = 0x00;
  8034. + macaddr[1] = 0xa0;
  8035. + macaddr[2] = 0x98;
  8036. + macaddr[3] = digest[0];
  8037. + macaddr[4] = digest[1];
  8038. + macaddr[5] = digest[2];
  8039. +}
  8040. diff --git a/usr.sbin/bhyve/net_utils.h b/usr.sbin/bhyve/net_utils.h
  8041. new file mode 100644
  8042. index 00000000000..2a0f3a86efb
  8043. --- /dev/null
  8044. +++ b/usr.sbin/bhyve/net_utils.h
  8045. @@ -0,0 +1,34 @@
  8046. +/*-
  8047. + * Copyright (c) 2011 NetApp, Inc.
  8048. + * All rights reserved.
  8049. + *
  8050. + * Redistribution and use in source and binary forms, with or without
  8051. + * modification, are permitted provided that the following conditions
  8052. + * are met:
  8053. + * 1. Redistributions of source code must retain the above copyright
  8054. + * notice, this list of conditions and the following disclaimer.
  8055. + * 2. Redistributions in binary form must reproduce the above copyright
  8056. + * notice, this list of conditions and the following disclaimer in the
  8057. + * documentation and/or other materials provided with the distribution.
  8058. + *
  8059. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
  8060. + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  8061. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  8062. + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
  8063. + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
  8064. + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
  8065. + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  8066. + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  8067. + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  8068. + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  8069. + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  8070. + */
  8071. +#ifndef _NET_UTILS_H_
  8072. +#define _NET_UTILS_H_
  8073. +
  8074. +#include <stdint.h>
  8075. +#include "pci_emul.h"
  8076. +
  8077. +void net_genmac(struct pci_devinst *pi, uint8_t *macaddr);
  8078. +int net_parsemac(char *mac_str, uint8_t *mac_addr);
  8079. +#endif /* _NET_UTILS_H_ */
  8080. diff --git a/usr.sbin/bhyve/pci_e82545.c b/usr.sbin/bhyve/pci_e82545.c
  8081. index 7db7cab4e74..899b273ddca 100644
  8082. --- a/usr.sbin/bhyve/pci_e82545.c
  8083. +++ b/usr.sbin/bhyve/pci_e82545.c
  8084. @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
  8085. #include <net/ethernet.h>
  8086. #include <netinet/in.h>
  8087. #include <netinet/tcp.h>
  8088. +#include <net/if.h> /* IFNAMSIZ */
  8089.  
  8090. #include <err.h>
  8091. #include <errno.h>
  8092. @@ -60,6 +61,8 @@ __FBSDID("$FreeBSD$");
  8093. #include "bhyverun.h"
  8094. #include "pci_emul.h"
  8095. #include "mevent.h"
  8096. +#include "net_utils.h" /* MAC address generation */
  8097. +#include "net_backends.h"
  8098.  
  8099. /* Hardware/register definitions XXX: move some to common code. */
  8100. #define E82545_VENDOR_ID_INTEL 0x8086
  8101. @@ -239,11 +242,10 @@ struct eth_uni {
  8102. struct e82545_softc {
  8103. struct pci_devinst *esc_pi;
  8104. struct vmctx *esc_ctx;
  8105. - struct mevent *esc_mevp;
  8106. struct mevent *esc_mevpitr;
  8107. pthread_mutex_t esc_mtx;
  8108. struct ether_addr esc_mac;
  8109. - int esc_tapfd;
  8110. + struct net_backend *esc_be;
  8111.  
  8112. /* General */
  8113. uint32_t esc_CTRL; /* x0000 device ctl */
  8114. @@ -349,7 +351,7 @@ struct e82545_softc {
  8115. static void e82545_reset(struct e82545_softc *sc, int dev);
  8116. static void e82545_rx_enable(struct e82545_softc *sc);
  8117. static void e82545_rx_disable(struct e82545_softc *sc);
  8118. -static void e82545_tap_callback(int fd, enum ev_type type, void *param);
  8119. +static void e82545_rx_callback(int fd, enum ev_type type, void *param);
  8120. static void e82545_tx_start(struct e82545_softc *sc);
  8121. static void e82545_tx_enable(struct e82545_softc *sc);
  8122. static void e82545_tx_disable(struct e82545_softc *sc);
  8123. @@ -818,11 +820,9 @@ e82545_bufsz(uint32_t rctl)
  8124. return (256); /* Forbidden value. */
  8125. }
  8126.  
  8127. -static uint8_t dummybuf[2048];
  8128. -
  8129. /* XXX one packet at a time until this is debugged */
  8130. static void
  8131. -e82545_tap_callback(int fd, enum ev_type type, void *param)
  8132. +e82545_rx_callback(int fd, enum ev_type type, void *param)
  8133. {
  8134. struct e82545_softc *sc = param;
  8135. struct e1000_rx_desc *rxd;
  8136. @@ -837,7 +837,7 @@ e82545_tap_callback(int fd, enum ev_type type, void *param)
  8137. if (!sc->esc_rx_enabled || sc->esc_rx_loopback) {
  8138. DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped\r\n",
  8139. sc->esc_rx_enabled, sc->esc_rx_loopback);
  8140. - while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) {
  8141. + while (netbe_rx_discard(sc->esc_be) > 0) {
  8142. }
  8143. goto done1;
  8144. }
  8145. @@ -850,7 +850,7 @@ e82545_tap_callback(int fd, enum ev_type type, void *param)
  8146. if (left < maxpktdesc) {
  8147. DPRINTF("rx overflow (%d < %d) -- packet(s) dropped\r\n",
  8148. left, maxpktdesc);
  8149. - while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) {
  8150. + while (netbe_rx_discard(sc->esc_be) > 0) {
  8151. }
  8152. goto done1;
  8153. }
  8154. @@ -867,9 +867,9 @@ e82545_tap_callback(int fd, enum ev_type type, void *param)
  8155. rxd->buffer_addr, bufsz);
  8156. vec[i].iov_len = bufsz;
  8157. }
  8158. - len = readv(sc->esc_tapfd, vec, maxpktdesc);
  8159. + len = netbe_recv(sc->esc_be, vec, maxpktdesc);
  8160. if (len <= 0) {
  8161. - DPRINTF("tap: readv() returned %d\n", len);
  8162. + DPRINTF("be: recv() returned %d\n", len);
  8163. goto done;
  8164. }
  8165.  
  8166. @@ -1041,13 +1041,10 @@ e82545_transmit_checksum(struct iovec *iov, int iovcnt, struct ck_info *ck)
  8167. }
  8168.  
  8169. static void
  8170. -e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt)
  8171. +e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt,
  8172. + uint32_t len)
  8173. {
  8174. -
  8175. - if (sc->esc_tapfd == -1)
  8176. - return;
  8177. -
  8178. - (void) writev(sc->esc_tapfd, iov, iovcnt);
  8179. + netbe_send(sc->esc_be, iov, iovcnt, len, 0);
  8180. }
  8181.  
  8182. static void
  8183. @@ -1083,7 +1080,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
  8184.  
  8185. ckinfo[0].ck_valid = ckinfo[1].ck_valid = 0;
  8186. iovcnt = 0;
  8187. - tlen = 0;
  8188. + tlen = 0; /* total length */
  8189. ntype = 0;
  8190. tso = 0;
  8191. ohead = head;
  8192. @@ -1208,6 +1205,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
  8193. hdrlen = ETHER_ADDR_LEN*2;
  8194. vlen = ETHER_VLAN_ENCAP_LEN;
  8195. }
  8196. + tlen += vlen;
  8197. if (!tso) {
  8198. /* Estimate required writable space for checksums. */
  8199. if (ckinfo[0].ck_valid)
  8200. @@ -1273,7 +1271,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
  8201. e82545_transmit_checksum(iov, iovcnt, &ckinfo[0]);
  8202. if (ckinfo[1].ck_valid)
  8203. e82545_transmit_checksum(iov, iovcnt, &ckinfo[1]);
  8204. - e82545_transmit_backend(sc, iov, iovcnt);
  8205. + e82545_transmit_backend(sc, iov, iovcnt, tlen);
  8206. goto done;
  8207. }
  8208.  
  8209. @@ -1297,13 +1295,14 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
  8210. /* Construct IOVs for the segment. */
  8211. /* Include whole original header. */
  8212. tiov[0].iov_base = hdr;
  8213. - tiov[0].iov_len = hdrlen;
  8214. + tiov[0].iov_len = tlen = hdrlen;
  8215. tiovcnt = 1;
  8216. /* Include respective part of payload IOV. */
  8217. for (nleft = now; pv < iovcnt && nleft > 0; nleft -= nnow) {
  8218. nnow = MIN(nleft, iov[pv].iov_len - pvoff);
  8219. tiov[tiovcnt].iov_base = iov[pv].iov_base + pvoff;
  8220. tiov[tiovcnt++].iov_len = nnow;
  8221. + tlen += nnow;
  8222. if (pvoff + nnow == iov[pv].iov_len) {
  8223. pv++;
  8224. pvoff = 0;
  8225. @@ -1356,7 +1355,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
  8226. e82545_carry(tcpsum);
  8227. e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[1]);
  8228. }
  8229. - e82545_transmit_backend(sc, tiov, tiovcnt);
  8230. + e82545_transmit_backend(sc, tiov, tiovcnt, tlen);
  8231. }
  8232.  
  8233. done:
  8234. @@ -2203,89 +2202,17 @@ e82545_reset(struct e82545_softc *sc, int drvr)
  8235. sc->esc_TXDCTL = 0;
  8236. }
  8237.  
  8238. -static void
  8239. -e82545_open_tap(struct e82545_softc *sc, char *opts)
  8240. -{
  8241. - char tbuf[80];
  8242. -#ifndef WITHOUT_CAPSICUM
  8243. - cap_rights_t rights;
  8244. -#endif
  8245. -
  8246. - if (opts == NULL) {
  8247. - sc->esc_tapfd = -1;
  8248. - return;
  8249. - }
  8250. -
  8251. - strcpy(tbuf, "/dev/");
  8252. - strlcat(tbuf, opts, sizeof(tbuf));
  8253. -
  8254. - sc->esc_tapfd = open(tbuf, O_RDWR);
  8255. - if (sc->esc_tapfd == -1) {
  8256. - DPRINTF("unable to open tap device %s\n", opts);
  8257. - exit(1);
  8258. - }
  8259. -
  8260. - /*
  8261. - * Set non-blocking and register for read
  8262. - * notifications with the event loop
  8263. - */
  8264. - int opt = 1;
  8265. - if (ioctl(sc->esc_tapfd, FIONBIO, &opt) < 0) {
  8266. - WPRINTF("tap device O_NONBLOCK failed: %d\n", errno);
  8267. - close(sc->esc_tapfd);
  8268. - sc->esc_tapfd = -1;
  8269. - }
  8270. -
  8271. -#ifndef WITHOUT_CAPSICUM
  8272. - cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
  8273. - if (cap_rights_limit(sc->esc_tapfd, &rights) == -1 && errno != ENOSYS)
  8274. - errx(EX_OSERR, "Unable to apply rights for sandbox");
  8275. -#endif
  8276. -
  8277. - sc->esc_mevp = mevent_add(sc->esc_tapfd,
  8278. - EVF_READ,
  8279. - e82545_tap_callback,
  8280. - sc);
  8281. - if (sc->esc_mevp == NULL) {
  8282. - DPRINTF("Could not register mevent %d\n", EVF_READ);
  8283. - close(sc->esc_tapfd);
  8284. - sc->esc_tapfd = -1;
  8285. - }
  8286. -}
  8287. -
  8288. -static int
  8289. -e82545_parsemac(char *mac_str, uint8_t *mac_addr)
  8290. -{
  8291. - struct ether_addr *ea;
  8292. - char *tmpstr;
  8293. - char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
  8294. -
  8295. - tmpstr = strsep(&mac_str,"=");
  8296. - if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
  8297. - ea = ether_aton(mac_str);
  8298. - if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
  8299. - memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
  8300. - fprintf(stderr, "Invalid MAC %s\n", mac_str);
  8301. - return (1);
  8302. - } else
  8303. - memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
  8304. - }
  8305. - return (0);
  8306. -}
  8307. -
  8308. static int
  8309. e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  8310. {
  8311. - DPRINTF("Loading with options: %s\r\n", opts);
  8312. -
  8313. - MD5_CTX mdctx;
  8314. - unsigned char digest[16];
  8315. char nstr[80];
  8316. struct e82545_softc *sc;
  8317. char *devname;
  8318. char *vtopts;
  8319. int mac_provided;
  8320.  
  8321. + DPRINTF("Loading with options: %s\r\n", opts);
  8322. +
  8323. /* Setup our softc */
  8324. sc = calloc(1, sizeof(*sc));
  8325.  
  8326. @@ -2323,11 +2250,10 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  8327. E82545_BAR_IO_LEN);
  8328.  
  8329. /*
  8330. - * Attempt to open the tap device and read the MAC address
  8331. - * if specified. Copied from virtio-net, slightly modified.
  8332. + * Attempt to open the backend device and read the MAC address
  8333. + * if specified. Copied from virtio-net, slightly modified.
  8334. */
  8335. mac_provided = 0;
  8336. - sc->esc_tapfd = -1;
  8337. if (opts != NULL) {
  8338. int err;
  8339.  
  8340. @@ -2335,7 +2261,7 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  8341. (void) strsep(&vtopts, ",");
  8342.  
  8343. if (vtopts != NULL) {
  8344. - err = e82545_parsemac(vtopts, sc->esc_mac.octet);
  8345. + err = net_parsemac(vtopts, sc->esc_mac.octet);
  8346. if (err != 0) {
  8347. free(devname);
  8348. return (err);
  8349. @@ -2343,9 +2269,11 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  8350. mac_provided = 1;
  8351. }
  8352.  
  8353. - if (strncmp(devname, "tap", 3) == 0 ||
  8354. - strncmp(devname, "vmnet", 5) == 0)
  8355. - e82545_open_tap(sc, devname);
  8356. + sc->esc_be = netbe_init(devname, e82545_rx_callback, sc);
  8357. + if (!sc->esc_be) {
  8358. + WPRINTF("net backend '%s' initialization failed\n",
  8359. + devname);
  8360. + }
  8361.  
  8362. free(devname);
  8363. }
  8364. @@ -2355,19 +2283,7 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  8365. * followed by an MD5 of the PCI slot/func number and dev name
  8366. */
  8367. if (!mac_provided) {
  8368. - snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
  8369. - pi->pi_func, vmname);
  8370. -
  8371. - MD5Init(&mdctx);
  8372. - MD5Update(&mdctx, nstr, strlen(nstr));
  8373. - MD5Final(digest, &mdctx);
  8374. -
  8375. - sc->esc_mac.octet[0] = 0x00;
  8376. - sc->esc_mac.octet[1] = 0xa0;
  8377. - sc->esc_mac.octet[2] = 0x98;
  8378. - sc->esc_mac.octet[3] = digest[0];
  8379. - sc->esc_mac.octet[4] = digest[1];
  8380. - sc->esc_mac.octet[5] = digest[2];
  8381. + net_genmac(pi, sc->esc_mac.octet);
  8382. }
  8383.  
  8384. /* H/w initiated reset */
  8385. diff --git a/usr.sbin/bhyve/pci_ptnetmap_memdev.c b/usr.sbin/bhyve/pci_ptnetmap_memdev.c
  8386. new file mode 100644
  8387. index 00000000000..a1e95a1ed0f
  8388. --- /dev/null
  8389. +++ b/usr.sbin/bhyve/pci_ptnetmap_memdev.c
  8390. @@ -0,0 +1,341 @@
  8391. +/*
  8392. + * Copyright (C) 2015 Stefano Garzarella (stefano.garzarella@gmail.com)
  8393. + * All rights reserved.
  8394. + *
  8395. + * Redistribution and use in source and binary forms, with or without
  8396. + * modification, are permitted provided that the following conditions
  8397. + * are met:
  8398. + * 1. Redistributions of source code must retain the above copyright
  8399. + * notice, this list of conditions and the following disclaimer.
  8400. + * 2. Redistributions in binary form must reproduce the above copyright
  8401. + * notice, this list of conditions and the following disclaimer in the
  8402. + * documentation and/or other materials provided with the distribution.
  8403. + *
  8404. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  8405. + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  8406. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  8407. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  8408. + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  8409. + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  8410. + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  8411. + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  8412. + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  8413. + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  8414. + * SUCH DAMAGE.
  8415. + */
  8416. +
  8417. +#ifdef WITH_NETMAP
  8418. +
  8419. +#include <sys/cdefs.h>
  8420. +__FBSDID("$FreeBSD$");
  8421. +
  8422. +#include <errno.h>
  8423. +#include <stdio.h>
  8424. +#include <stdlib.h>
  8425. +#include <stdint.h>
  8426. +
  8427. +#include <net/if.h> /* IFNAMSIZ */
  8428. +#include <net/netmap.h>
  8429. +#include <net/netmap_virt.h>
  8430. +
  8431. +#include <machine/vmm.h>
  8432. +#include <vmmapi.h>
  8433. +
  8434. +#include "bhyverun.h"
  8435. +#include "pci_emul.h"
  8436. +
  8437. +/*
  8438. + * ptnetmap memdev PCI device
  8439. + *
  8440. + * This device is used to map a netmap memory allocator on the guest VM
  8441. + * through PCI_BAR. The same allocator can be shared between multiple ptnetmap
  8442. + * ports in the guest.
  8443. + *
  8444. + * Each netmap allocator has a unique ID assigned by the netmap host module.
  8445. + *
  8446. + * The implementation here is based on the QEMU/KVM one.
  8447. + */
  8448. +struct ptn_memdev_softc {
  8449. + struct pci_devinst *pi; /* PCI device instance */
  8450. +
  8451. + void *mem_ptr; /* netmap shared memory */
  8452. + struct netmap_pools_info info;
  8453. +
  8454. + TAILQ_ENTRY(ptn_memdev_softc) next;
  8455. +};
  8456. +static TAILQ_HEAD(, ptn_memdev_softc) ptn_memdevs = TAILQ_HEAD_INITIALIZER(ptn_memdevs);
  8457. +
  8458. +/*
  8459. + * ptn_memdev_softc can be created by pe_init or ptnetmap backend,
  8460. + * this depends on the order of initialization.
  8461. + */
  8462. +static struct ptn_memdev_softc *
  8463. +ptn_memdev_create()
  8464. +{
  8465. + struct ptn_memdev_softc *sc;
  8466. +
  8467. + sc = calloc(1, sizeof(struct ptn_memdev_softc));
  8468. + if (sc != NULL) {
  8469. + TAILQ_INSERT_TAIL(&ptn_memdevs, sc, next);
  8470. + }
  8471. +
  8472. + return sc;
  8473. +}
  8474. +
  8475. +static void
  8476. +ptn_memdev_delete(struct ptn_memdev_softc *sc)
  8477. +{
  8478. + TAILQ_REMOVE(&ptn_memdevs, sc, next);
  8479. +
  8480. + free(sc);
  8481. +}
  8482. +
  8483. +/*
  8484. + * Find ptn_memdev through memid (netmap memory allocator ID)
  8485. + */
  8486. +static struct ptn_memdev_softc *
  8487. +ptn_memdev_find_memid(uint32_t mem_id)
  8488. +{
  8489. + struct ptn_memdev_softc *sc;
  8490. +
  8491. + TAILQ_FOREACH(sc, &ptn_memdevs, next) {
  8492. + if (sc->mem_ptr != NULL && mem_id == sc->info.memid) {
  8493. + return sc;
  8494. + }
  8495. + }
  8496. +
  8497. + return NULL;
  8498. +}
  8499. +
  8500. +/*
  8501. + * Find ptn_memdev that has not netmap memory (attached by ptnetmap backend)
  8502. + */
  8503. +static struct ptn_memdev_softc *
  8504. +ptn_memdev_find_empty_mem()
  8505. +{
  8506. + struct ptn_memdev_softc *sc;
  8507. +
  8508. + TAILQ_FOREACH(sc, &ptn_memdevs, next) {
  8509. + if (sc->mem_ptr == NULL) {
  8510. + return sc;
  8511. + }
  8512. + }
  8513. +
  8514. + return NULL;
  8515. +}
  8516. +
  8517. +/*
  8518. + * Find ptn_memdev that has not PCI device istance (created by pe_init)
  8519. + */
  8520. +static struct ptn_memdev_softc *
  8521. +ptn_memdev_find_empty_pi()
  8522. +{
  8523. + struct ptn_memdev_softc *sc;
  8524. +
  8525. + TAILQ_FOREACH(sc, &ptn_memdevs, next) {
  8526. + if (sc->pi == NULL) {
  8527. + return sc;
  8528. + }
  8529. + }
  8530. +
  8531. + return NULL;
  8532. +}
  8533. +
  8534. +/*
  8535. + * Handle read on ptnetmap-memdev register
  8536. + */
  8537. +static uint64_t
  8538. +ptn_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
  8539. + int baridx, uint64_t offset, int size)
  8540. +{
  8541. + struct ptn_memdev_softc *sc = pi->pi_arg;
  8542. +
  8543. + if (sc == NULL)
  8544. + return 0;
  8545. +
  8546. + if (baridx == PTNETMAP_IO_PCI_BAR) {
  8547. + switch (offset) {
  8548. + case PTNET_MDEV_IO_MEMSIZE_LO:
  8549. + return sc->info.memsize & 0xffffffff;
  8550. + case PTNET_MDEV_IO_MEMSIZE_HI:
  8551. + return sc->info.memsize >> 32;
  8552. + case PTNET_MDEV_IO_MEMID:
  8553. + return sc->info.memid;
  8554. + case PTNET_MDEV_IO_IF_POOL_OFS:
  8555. + return sc->info.if_pool_offset;
  8556. + case PTNET_MDEV_IO_IF_POOL_OBJNUM:
  8557. + return sc->info.if_pool_objtotal;
  8558. + case PTNET_MDEV_IO_IF_POOL_OBJSZ:
  8559. + return sc->info.if_pool_objsize;
  8560. + case PTNET_MDEV_IO_RING_POOL_OFS:
  8561. + return sc->info.ring_pool_offset;
  8562. + case PTNET_MDEV_IO_RING_POOL_OBJNUM:
  8563. + return sc->info.ring_pool_objtotal;
  8564. + case PTNET_MDEV_IO_RING_POOL_OBJSZ:
  8565. + return sc->info.ring_pool_objsize;
  8566. + case PTNET_MDEV_IO_BUF_POOL_OFS:
  8567. + return sc->info.buf_pool_offset;
  8568. + case PTNET_MDEV_IO_BUF_POOL_OBJNUM:
  8569. + return sc->info.buf_pool_objtotal;
  8570. + case PTNET_MDEV_IO_BUF_POOL_OBJSZ:
  8571. + return sc->info.buf_pool_objsize;
  8572. + }
  8573. + }
  8574. +
  8575. + printf("%s: Unexpected register read [bar %u, offset %lx size %d]\n",
  8576. + __func__, baridx, offset, size);
  8577. +
  8578. + return 0;
  8579. +}
  8580. +
  8581. +/*
  8582. + * Handle write on ptnetmap-memdev register (unused for now)
  8583. + */
  8584. +static void
  8585. +ptn_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
  8586. + int baridx, uint64_t offset, int size, uint64_t value)
  8587. +{
  8588. + struct ptn_memdev_softc *sc = pi->pi_arg;
  8589. +
  8590. + if (sc == NULL)
  8591. + return;
  8592. +
  8593. + printf("%s: Unexpected register write [bar %u, offset %lx size %d "
  8594. + "value %lx]\n", __func__, baridx, offset, size, value);
  8595. +}
  8596. +
  8597. +/*
  8598. + * Configure the ptnetmap-memdev PCI BARs. PCI BARs can only be created
  8599. + * when the PCI device is created and the netmap memory is attached.
  8600. + */
  8601. +static int
  8602. +ptn_memdev_configure_bars(struct ptn_memdev_softc *sc)
  8603. +{
  8604. + int ret;
  8605. +
  8606. + if (sc->pi == NULL || sc->mem_ptr == NULL)
  8607. + return 0;
  8608. +
  8609. + /* Allocate a BAR for an I/O region. */
  8610. + ret = pci_emul_alloc_bar(sc->pi, PTNETMAP_IO_PCI_BAR, PCIBAR_IO,
  8611. + PTNET_MDEV_IO_END);
  8612. + if (ret) {
  8613. + printf("ptnetmap_memdev: iobar allocation error %d\n", ret);
  8614. + return ret;
  8615. + }
  8616. +
  8617. + /* Allocate a BAR for a memory region. */
  8618. + ret = pci_emul_alloc_bar(sc->pi, PTNETMAP_MEM_PCI_BAR, PCIBAR_MEM32,
  8619. + sc->info.memsize);
  8620. + if (ret) {
  8621. + printf("ptnetmap_memdev: membar allocation error %d\n", ret);
  8622. + return ret;
  8623. + }
  8624. +
  8625. + /* Map netmap memory on the memory BAR. */
  8626. + ret = vm_map_user_buf(sc->pi->pi_vmctx,
  8627. + sc->pi->pi_bar[PTNETMAP_MEM_PCI_BAR].addr,
  8628. + sc->info.memsize, sc->mem_ptr, 1);
  8629. + if (ret) {
  8630. + printf("ptnetmap_memdev: membar map error %d\n", ret);
  8631. + return ret;
  8632. + }
  8633. +
  8634. + return 0;
  8635. +}
  8636. +
  8637. +/*
  8638. + * PCI device initialization
  8639. + */
  8640. +static int
  8641. +ptn_memdev_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  8642. +{
  8643. + struct ptn_memdev_softc *sc;
  8644. + int ret;
  8645. +
  8646. + sc = ptn_memdev_find_empty_pi();
  8647. + if (sc == NULL) {
  8648. + sc = ptn_memdev_create();
  8649. + if (sc == NULL) {
  8650. + printf("ptnetmap_memdev: calloc error\n");
  8651. + return (ENOMEM);
  8652. + }
  8653. + }
  8654. +
  8655. + /* Link our softc in the pci_devinst. */
  8656. + pi->pi_arg = sc;
  8657. + sc->pi = pi;
  8658. +
  8659. + /* Initialize PCI configuration space. */
  8660. + pci_set_cfgdata16(pi, PCIR_VENDOR, PTNETMAP_PCI_VENDOR_ID);
  8661. + pci_set_cfgdata16(pi, PCIR_DEVICE, PTNETMAP_PCI_DEVICE_ID);
  8662. + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
  8663. + pci_set_cfgdata16(pi, PCIR_SUBDEV_0, 1);
  8664. + pci_set_cfgdata16(pi, PCIR_SUBVEND_0, PTNETMAP_PCI_VENDOR_ID);
  8665. +
  8666. + /* Configure PCI-BARs. */
  8667. + ret = ptn_memdev_configure_bars(sc);
  8668. + if (ret) {
  8669. + printf("ptnetmap_memdev: configure error\n");
  8670. + goto err;
  8671. + }
  8672. +
  8673. + return 0;
  8674. +err:
  8675. + ptn_memdev_delete(sc);
  8676. + pi->pi_arg = NULL;
  8677. + return ret;
  8678. +}
  8679. +
  8680. +/*
  8681. + * used by ptnetmap backend to attach the netmap memory allocator to the
  8682. + * ptnetmap-memdev. (shared with the guest VM through PCI-BAR)
  8683. + */
  8684. +int
  8685. +ptn_memdev_attach(void *mem_ptr, struct netmap_pools_info *info)
  8686. +{
  8687. + struct ptn_memdev_softc *sc;
  8688. + int ret;
  8689. +
  8690. + /* if a device with the same mem_id is already attached, we are done */
  8691. + if (ptn_memdev_find_memid(info->memid)) {
  8692. + printf("ptnetmap_memdev: already attched\n");
  8693. + return 0;
  8694. + }
  8695. +
  8696. + sc = ptn_memdev_find_empty_mem();
  8697. + if (sc == NULL) {
  8698. + sc = ptn_memdev_create();
  8699. + if (sc == NULL) {
  8700. + printf("ptnetmap_memdev: calloc error\n");
  8701. + return (ENOMEM);
  8702. + }
  8703. + }
  8704. +
  8705. + sc->mem_ptr = mem_ptr;
  8706. + sc->info = *info;
  8707. +
  8708. + /* configure device PCI-BARs */
  8709. + ret = ptn_memdev_configure_bars(sc);
  8710. + if (ret) {
  8711. + printf("ptnetmap_memdev: configure error\n");
  8712. + goto err;
  8713. + }
  8714. +
  8715. +
  8716. + return 0;
  8717. +err:
  8718. + ptn_memdev_delete(sc);
  8719. + sc->pi->pi_arg = NULL;
  8720. + return ret;
  8721. +}
  8722. +
  8723. +struct pci_devemu pci_de_ptnetmap = {
  8724. + .pe_emu = PTNETMAP_MEMDEV_NAME,
  8725. + .pe_init = ptn_memdev_init,
  8726. + .pe_barwrite = ptn_pci_write,
  8727. + .pe_barread = ptn_pci_read
  8728. +};
  8729. +PCI_EMUL_SET(pci_de_ptnetmap);
  8730. +
  8731. +#endif /* WITH_NETMAP */
  8732. diff --git a/usr.sbin/bhyve/pci_ptnetmap_netif.c b/usr.sbin/bhyve/pci_ptnetmap_netif.c
  8733. new file mode 100644
  8734. index 00000000000..060062f2dfd
  8735. --- /dev/null
  8736. +++ b/usr.sbin/bhyve/pci_ptnetmap_netif.c
  8737. @@ -0,0 +1,411 @@
  8738. +/*
  8739. + * Copyright (C) 2016 Vincenzo Maffione
  8740. + * All rights reserved.
  8741. + *
  8742. + * Redistribution and use in source and binary forms, with or without
  8743. + * modification, are permitted provided that the following conditions
  8744. + * are met:
  8745. + * 1. Redistributions of source code must retain the above copyright
  8746. + * notice, this list of conditions and the following disclaimer.
  8747. + * 2. Redistributions in binary form must reproduce the above copyright
  8748. + * notice, this list of conditions and the following disclaimer in the
  8749. + * documentation and/or other materials provided with the distribution.
  8750. + *
  8751. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  8752. + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  8753. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  8754. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  8755. + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  8756. + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  8757. + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  8758. + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  8759. + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  8760. + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  8761. + * SUCH DAMAGE.
  8762. + */
  8763. +
  8764. +/*
  8765. + * This file contains the emulation of the ptnet network frontend, to be used
  8766. + * with netmap backend.
  8767. + */
  8768. +
  8769. +#ifdef WITH_NETMAP
  8770. +
  8771. +#include <sys/cdefs.h>
  8772. +__FBSDID("$FreeBSD$");
  8773. +
  8774. +#include <errno.h>
  8775. +#include <stdio.h>
  8776. +#include <stdlib.h>
  8777. +#include <stdint.h>
  8778. +#include <string.h>
  8779. +
  8780. +#include <net/if.h> /* IFNAMSIZ */
  8781. +#include <net/netmap.h>
  8782. +#include <net/netmap_virt.h>
  8783. +
  8784. +#include <sys/ioctl.h>
  8785. +#include <sys/param.h>
  8786. +#include <sys/_cpuset.h>
  8787. +#include <machine/vmm.h>
  8788. +#include <machine/vmm_dev.h> /* VM_LAPIC_MSI */
  8789. +#include <vmmapi.h>
  8790. +
  8791. +#include "bhyverun.h"
  8792. +#include "pci_emul.h"
  8793. +#include "net_utils.h"
  8794. +#include "net_backends.h"
  8795. +
  8796. +#ifndef PTNET_CSB_ALLOC
  8797. +#error "Hypervisor-allocated CSB not supported"
  8798. +#endif
  8799. +
  8800. +
  8801. +struct ptnet_softc {
  8802. + struct pci_devinst *pi;
  8803. +
  8804. + struct net_backend *be;
  8805. + struct ptnetmap_state *ptbe;
  8806. +
  8807. + unsigned int num_rings;
  8808. + uint32_t ioregs[PTNET_IO_END >> 2];
  8809. + void *csb;
  8810. +};
  8811. +
  8812. +static int
  8813. +ptnet_get_netmap_if(struct ptnet_softc *sc)
  8814. +{
  8815. + unsigned int num_rings;
  8816. + struct netmap_if_info nif;
  8817. + int ret;
  8818. +
  8819. + ret = ptnetmap_get_netmap_if(sc->ptbe, &nif);
  8820. + if (ret) {
  8821. + return ret;
  8822. + }
  8823. +
  8824. + sc->ioregs[PTNET_IO_NIFP_OFS >> 2] = nif.nifp_offset;
  8825. + sc->ioregs[PTNET_IO_NUM_TX_RINGS >> 2] = nif.num_tx_rings;
  8826. + sc->ioregs[PTNET_IO_NUM_RX_RINGS >> 2] = nif.num_rx_rings;
  8827. + sc->ioregs[PTNET_IO_NUM_TX_SLOTS >> 2] = nif.num_tx_slots;
  8828. + sc->ioregs[PTNET_IO_NUM_RX_SLOTS >> 2] = nif.num_rx_slots;
  8829. +
  8830. + num_rings = sc->ioregs[PTNET_IO_NUM_TX_RINGS >> 2] +
  8831. + sc->ioregs[PTNET_IO_NUM_RX_RINGS >> 2];
  8832. + if (sc->num_rings && num_rings && sc->num_rings != num_rings) {
  8833. + fprintf(stderr, "Number of rings changed: not supported\n");
  8834. + return EINVAL;
  8835. + }
  8836. + sc->num_rings = num_rings;
  8837. +
  8838. + return 0;
  8839. +}
  8840. +
  8841. +static int
  8842. +ptnet_ptctl_create(struct ptnet_softc *sc)
  8843. +{
  8844. + struct ptnetmap_cfgentry_bhyve *cfgentry;
  8845. + struct pci_devinst *pi = sc->pi;
  8846. + struct vmctx *vmctx = pi->pi_vmctx;
  8847. + struct ptnetmap_cfg *cfg;
  8848. + unsigned int kick_addr;
  8849. + int ret;
  8850. + int i;
  8851. +
  8852. + if (sc->csb == NULL) {
  8853. + fprintf(stderr, "%s: Unexpected NULL CSB", __func__);
  8854. + return -1;
  8855. + }
  8856. +
  8857. + cfg = calloc(1, sizeof(*cfg) + sc->num_rings * sizeof(*cfgentry));
  8858. +
  8859. + cfg->cfgtype = PTNETMAP_CFGTYPE_BHYVE;
  8860. + cfg->entry_size = sizeof(*cfgentry);
  8861. + cfg->num_rings = sc->num_rings;
  8862. + cfg->ptrings = sc->csb;
  8863. +
  8864. + kick_addr = pi->pi_bar[PTNETMAP_IO_PCI_BAR].addr + PTNET_IO_KICK_BASE;
  8865. + cfgentry = (struct ptnetmap_cfgentry_bhyve *)(cfg + 1);
  8866. +
  8867. + for (i = 0; i < sc->num_rings; i++, kick_addr += 4, cfgentry++) {
  8868. + struct msix_table_entry *mte;
  8869. + uint64_t cookie = sc->ioregs[PTNET_IO_MAC_LO >> 2] + 4*i;
  8870. +
  8871. + cfgentry->ioctl_fd = vm_get_fd(vmctx);
  8872. + cfgentry->ioctl_cmd = VM_LAPIC_MSI;
  8873. + mte = &pi->pi_msix.table[i];
  8874. + cfgentry->ioctl_data.addr = mte->addr;
  8875. + cfgentry->ioctl_data.msg_data = mte->msg_data;
  8876. +
  8877. + fprintf(stderr, "%s: vector %u, addr %lu, data %u, "
  8878. + "kick_addr %u, cookie: %p\n",
  8879. + __func__, i, mte->addr, mte->msg_data, kick_addr,
  8880. + (void*)cookie);
  8881. +
  8882. + ret = vm_io_reg_handler(vmctx, kick_addr /* ioaddr */,
  8883. + 0 /* in */, 0 /* mask_data */,
  8884. + 0 /* data */, VM_IO_REGH_KWEVENTS,
  8885. + (void*)cookie /* cookie */);
  8886. + if (ret) {
  8887. + fprintf(stderr, "%s: vm_io_reg_handler %d\n",
  8888. + __func__, ret);
  8889. + }
  8890. + cfgentry->wchan = (uint64_t) cookie;
  8891. + }
  8892. +
  8893. + ret = ptnetmap_create(sc->ptbe, cfg);
  8894. + free(cfg);
  8895. +
  8896. + return ret;
  8897. +}
  8898. +
  8899. +static int
  8900. +ptnet_ptctl_delete(struct ptnet_softc *sc)
  8901. +{
  8902. + struct pci_devinst *pi = sc->pi;
  8903. + struct vmctx *vmctx = pi->pi_vmctx;
  8904. + unsigned int kick_addr;
  8905. + int i;
  8906. +
  8907. + kick_addr = pi->pi_bar[PTNETMAP_IO_PCI_BAR].addr + PTNET_IO_KICK_BASE;
  8908. +
  8909. + for (i = 0; i < sc->num_rings; i++, kick_addr += 4) {
  8910. + vm_io_reg_handler(vmctx, kick_addr, 0, 0, 0,
  8911. + VM_IO_REGH_DELETE, 0);
  8912. + }
  8913. +
  8914. + return ptnetmap_delete(sc->ptbe);
  8915. +}
  8916. +
  8917. +static void
  8918. +ptnet_ptctl(struct ptnet_softc *sc, uint64_t cmd)
  8919. +{
  8920. + int ret = EINVAL;
  8921. +
  8922. + switch (cmd) {
  8923. + case PTNETMAP_PTCTL_CREATE:
  8924. + /* React to a REGIF in the guest. */
  8925. + ret = ptnet_ptctl_create(sc);
  8926. + break;
  8927. +
  8928. + case PTNETMAP_PTCTL_DELETE:
  8929. + /* React to an UNREGIF in the guest. */
  8930. + ret = ptnet_ptctl_delete(sc);
  8931. + break;
  8932. + }
  8933. +
  8934. + sc->ioregs[PTNET_IO_PTCTL >> 2] = ret;
  8935. +}
  8936. +
  8937. +static void
  8938. +ptnet_csb_mapping(struct ptnet_softc *sc)
  8939. +{
  8940. + uint64_t base = ((uint64_t)sc->ioregs[PTNET_IO_CSBBAH >> 2] << 32) |
  8941. + sc->ioregs[PTNET_IO_CSBBAL >> 2];
  8942. + uint64_t len = 4096;
  8943. +
  8944. + sc->csb = NULL;
  8945. + if (base) {
  8946. + sc->csb = paddr_guest2host(sc->pi->pi_vmctx, base, len);
  8947. + }
  8948. +}
  8949. +
  8950. +static void
  8951. +ptnet_bar_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
  8952. + int baridx, uint64_t offset, int size, uint64_t value)
  8953. +{
  8954. + struct ptnet_softc *sc = pi->pi_arg;
  8955. + unsigned int index;
  8956. +
  8957. + /* Redirect to MSI-X emulation code. */
  8958. + if (baridx == pci_msix_table_bar(pi) ||
  8959. + baridx == pci_msix_pba_bar(pi)) {
  8960. + pci_emul_msix_twrite(pi, offset, size, value);
  8961. + return;
  8962. + }
  8963. +
  8964. + if (sc == NULL)
  8965. + return;
  8966. +
  8967. + offset &= PTNET_IO_MASK;
  8968. + index = offset >> 2;
  8969. +
  8970. + if (baridx != PTNETMAP_IO_PCI_BAR || offset >= PTNET_IO_END) {
  8971. + fprintf(stderr, "%s: Unexpected register write [bar %u, "
  8972. + "offset %lx size %d value %lx]\n", __func__, baridx,
  8973. + offset, size, value);
  8974. + return;
  8975. + }
  8976. +
  8977. + switch (offset) {
  8978. + case PTNET_IO_PTFEAT:
  8979. + value = ptnetmap_ack_features(sc->ptbe, value);
  8980. + sc->ioregs[index] = value;
  8981. + break;
  8982. +
  8983. + case PTNET_IO_PTCTL:
  8984. + ptnet_ptctl(sc, value);
  8985. + break;
  8986. +
  8987. + case PTNET_IO_CSBBAH:
  8988. + sc->ioregs[index] = value;
  8989. + break;
  8990. +
  8991. + case PTNET_IO_CSBBAL:
  8992. + sc->ioregs[index] = value;
  8993. + ptnet_csb_mapping(sc);
  8994. + break;
  8995. +
  8996. + case PTNET_IO_VNET_HDR_LEN:
  8997. + if (netbe_set_cap(sc->be, netbe_get_cap(sc->be),
  8998. + value) == 0) {
  8999. + sc->ioregs[index] = value;
  9000. + }
  9001. + break;
  9002. + }
  9003. +}
  9004. +
  9005. +static uint64_t
  9006. +ptnet_bar_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
  9007. + int baridx, uint64_t offset, int size)
  9008. +{
  9009. + struct ptnet_softc *sc = pi->pi_arg;
  9010. + uint64_t index = offset >> 2;
  9011. +
  9012. + if (baridx == pci_msix_table_bar(pi) ||
  9013. + baridx == pci_msix_pba_bar(pi)) {
  9014. + return pci_emul_msix_tread(pi, offset, size);
  9015. + }
  9016. +
  9017. + if (sc == NULL)
  9018. + return 0;
  9019. +
  9020. + offset &= PTNET_IO_MASK;
  9021. +
  9022. + if (baridx != PTNETMAP_IO_PCI_BAR || offset >= PTNET_IO_END) {
  9023. + fprintf(stderr, "%s: Unexpected register read [bar %u, "
  9024. + "offset %lx size %d]\n", __func__, baridx, offset,
  9025. + size);
  9026. + return 0;
  9027. + }
  9028. +
  9029. + switch (offset) {
  9030. + case PTNET_IO_NIFP_OFS:
  9031. + case PTNET_IO_NUM_TX_RINGS:
  9032. + case PTNET_IO_NUM_RX_RINGS:
  9033. + case PTNET_IO_NUM_TX_SLOTS:
  9034. + case PTNET_IO_NUM_RX_SLOTS:
  9035. + /* Fill in device registers with information about
  9036. + * nifp_offset, num_*x_rings, and num_*x_slots. */
  9037. + ptnet_get_netmap_if(sc);
  9038. + break;
  9039. +
  9040. + case PTNET_IO_HOSTMEMID:
  9041. + sc->ioregs[index] = ptnetmap_get_hostmemid(sc->ptbe);
  9042. + break;
  9043. + }
  9044. +
  9045. + return sc->ioregs[index];
  9046. +}
  9047. +
  9048. +/* PCI device initialization. */
  9049. +static int
  9050. +ptnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  9051. +{
  9052. + struct ptnet_softc *sc;
  9053. + char *ptopts, *devname;
  9054. + uint8_t macaddr[6];
  9055. + int mac_provided = 0;
  9056. + int ret;
  9057. +
  9058. + sc = calloc(1, sizeof(*sc));
  9059. + if (sc == NULL) {
  9060. + fprintf(stderr, "%s: out of memory\n", __func__);
  9061. + return -1;
  9062. + }
  9063. +
  9064. + /* Link our softc in the pci_devinst. */
  9065. + pi->pi_arg = sc;
  9066. + sc->pi = pi;
  9067. +
  9068. + /* Parse command line options. */
  9069. + if (opts == NULL) {
  9070. + fprintf(stderr, "%s: No backend specified\n", __func__);
  9071. + return -1;
  9072. + }
  9073. +
  9074. + devname = ptopts = strdup(opts);
  9075. + (void) strsep(&ptopts, ",");
  9076. +
  9077. + if (ptopts != NULL) {
  9078. + ret = net_parsemac(ptopts, macaddr);
  9079. + if (ret != 0) {
  9080. + free(devname);
  9081. + return ret;
  9082. + }
  9083. + mac_provided = 1;
  9084. + }
  9085. +
  9086. + if (!mac_provided) {
  9087. + net_genmac(pi, macaddr);
  9088. + }
  9089. +
  9090. + /* Initialize backend. A NULL callback is used here to tell
  9091. + * the ask the netmap backend to use ptnetmap. */
  9092. + sc->be = netbe_init(devname, NULL, sc);
  9093. + if (!sc->be) {
  9094. + fprintf(stderr, "net backend initialization failed\n");
  9095. + return -1;
  9096. + }
  9097. +
  9098. + free(devname);
  9099. +
  9100. + sc->ptbe = get_ptnetmap(sc->be);
  9101. + if (!sc->ptbe) {
  9102. + fprintf(stderr, "%s: failed to get ptnetmap\n", __func__);
  9103. + return -1;
  9104. + }
  9105. +
  9106. + /* Initialize PCI configuration space. */
  9107. + pci_set_cfgdata16(pi, PCIR_VENDOR, PTNETMAP_PCI_VENDOR_ID);
  9108. + pci_set_cfgdata16(pi, PCIR_DEVICE, PTNETMAP_PCI_NETIF_ID);
  9109. + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
  9110. + pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_NETWORK_ETHERNET);
  9111. + pci_set_cfgdata16(pi, PCIR_SUBDEV_0, 1);
  9112. + pci_set_cfgdata16(pi, PCIR_SUBVEND_0, PTNETMAP_PCI_VENDOR_ID);
  9113. +
  9114. + /* Allocate a BAR for an I/O region. */
  9115. + ret = pci_emul_alloc_bar(pi, PTNETMAP_IO_PCI_BAR, PCIBAR_IO,
  9116. + PTNET_IO_MASK + 1);
  9117. + if (ret) {
  9118. + fprintf(stderr, "%s: failed to allocate BAR [%d]\n",
  9119. + __func__, ret);
  9120. + return ret;
  9121. + }
  9122. +
  9123. + /* Initialize registers and data structures. */
  9124. + memset(sc->ioregs, 0, sizeof(sc->ioregs));
  9125. + sc->csb = NULL;
  9126. + sc->ioregs[PTNET_IO_MAC_HI >> 2] = (macaddr[0] << 8) | macaddr[1];
  9127. + sc->ioregs[PTNET_IO_MAC_LO >> 2] = (macaddr[2] << 24) |
  9128. + (macaddr[3] << 16) |
  9129. + (macaddr[4] << 8) | macaddr[5];
  9130. +
  9131. + sc->num_rings = 0;
  9132. + ptnet_get_netmap_if(sc);
  9133. +
  9134. + /* Allocate a BAR for MSI-X vectors. */
  9135. + pci_emul_add_msixcap(pi, sc->num_rings, PTNETMAP_MSIX_PCI_BAR);
  9136. +
  9137. + return 0;
  9138. +}
  9139. +
  9140. +struct pci_devemu pci_de_ptnet = {
  9141. + .pe_emu = "ptnet",
  9142. + .pe_init = ptnet_init,
  9143. + .pe_barwrite = ptnet_bar_write,
  9144. + .pe_barread = ptnet_bar_read,
  9145. +};
  9146. +PCI_EMUL_SET(pci_de_ptnet);
  9147. +
  9148. +#endif /* WITH_NETMAP */
  9149. diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
  9150. index c6104a646a5..91a6d9c59d1 100644
  9151. --- a/usr.sbin/bhyve/pci_virtio_net.c
  9152. +++ b/usr.sbin/bhyve/pci_virtio_net.c
  9153. @@ -26,6 +26,22 @@
  9154. * $FreeBSD$
  9155. */
  9156.  
  9157. +/*
  9158. + * This file contains the emulation of the virtio-net network frontend. Network
  9159. + * backends are in net_backends.c.
  9160. + *
  9161. + * The frontend is selected using the pe_emu field of the descriptor,
  9162. + * Upon a match, the pe_init function is invoked, which initializes
  9163. + * the emulated PCI device, attaches to the backend, and calls virtio
  9164. + * initialization functions.
  9165. + *
  9166. + * PCI register read/writes are handled through generic PCI methods
  9167. + *
  9168. + * virtio TX is handled by a dedicated thread, pci_vtnet_tx_thread()
  9169. + * virtio RX is handled by the backend (often with some helper thread),
  9170. + * which in turn calls a frontend callback, pci_vtnet_rx_callback()
  9171. + */
  9172. +
  9173. #include <sys/cdefs.h>
  9174. __FBSDID("$FreeBSD$");
  9175.  
  9176. @@ -39,10 +55,7 @@ __FBSDID("$FreeBSD$");
  9177. #include <sys/ioctl.h>
  9178. #include <machine/atomic.h>
  9179. #include <net/ethernet.h>
  9180. -#ifndef NETMAP_WITH_LIBS
  9181. -#define NETMAP_WITH_LIBS
  9182. -#endif
  9183. -#include <net/netmap_user.h>
  9184. +#include <net/if.h> /* IFNAMSIZ */
  9185.  
  9186. #include <err.h>
  9187. #include <errno.h>
  9188. @@ -54,7 +67,6 @@ __FBSDID("$FreeBSD$");
  9189. #include <strings.h>
  9190. #include <unistd.h>
  9191. #include <assert.h>
  9192. -#include <md5.h>
  9193. #include <pthread.h>
  9194. #include <pthread_np.h>
  9195. #include <sysexits.h>
  9196. @@ -63,36 +75,16 @@ __FBSDID("$FreeBSD$");
  9197. #include "pci_emul.h"
  9198. #include "mevent.h"
  9199. #include "virtio.h"
  9200. +#include "net_utils.h" /* MAC address generation */
  9201. +#include "net_backends.h" /* VirtIO capabilities */
  9202.  
  9203. #define VTNET_RINGSZ 1024
  9204.  
  9205. #define VTNET_MAXSEGS 256
  9206.  
  9207. -/*
  9208. - * Host capabilities. Note that we only offer a few of these.
  9209. - */
  9210. -#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */
  9211. -#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */
  9212. -#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */
  9213. -#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */
  9214. -#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */
  9215. -#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */
  9216. -#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */
  9217. -#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */
  9218. -#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */
  9219. -#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */
  9220. -#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */
  9221. -#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */
  9222. -#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */
  9223. -#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */
  9224. -#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */
  9225. -#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */
  9226. -#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */
  9227. -#define VIRTIO_NET_F_GUEST_ANNOUNCE \
  9228. - (1 << 21) /* guest can send gratuitous pkts */
  9229. -
  9230. +/* Our capabilities: we don't support VIRTIO_NET_F_MRG_RXBUF at the moment. */
  9231. #define VTNET_S_HOSTCAPS \
  9232. - ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
  9233. + ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \
  9234. VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
  9235.  
  9236. /*
  9237. @@ -101,6 +93,7 @@ __FBSDID("$FreeBSD$");
  9238. struct virtio_net_config {
  9239. uint8_t mac[6];
  9240. uint16_t status;
  9241. + uint16_t max_virtqueue_pairs;
  9242. } __packed;
  9243.  
  9244. /*
  9245. @@ -112,19 +105,6 @@ struct virtio_net_config {
  9246.  
  9247. #define VTNET_MAXQ 3
  9248.  
  9249. -/*
  9250. - * Fixed network header size
  9251. - */
  9252. -struct virtio_net_rxhdr {
  9253. - uint8_t vrh_flags;
  9254. - uint8_t vrh_gso_type;
  9255. - uint16_t vrh_hdr_len;
  9256. - uint16_t vrh_gso_size;
  9257. - uint16_t vrh_csum_start;
  9258. - uint16_t vrh_csum_offset;
  9259. - uint16_t vrh_bufs;
  9260. -} __packed;
  9261. -
  9262. /*
  9263. * Debug printf
  9264. */
  9265. @@ -139,31 +119,24 @@ struct pci_vtnet_softc {
  9266. struct virtio_softc vsc_vs;
  9267. struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
  9268. pthread_mutex_t vsc_mtx;
  9269. - struct mevent *vsc_mevp;
  9270.  
  9271. - int vsc_tapfd;
  9272. - struct nm_desc *vsc_nmd;
  9273. + struct net_backend *vsc_be;
  9274.  
  9275. int vsc_rx_ready;
  9276. volatile int resetting; /* set and checked outside lock */
  9277.  
  9278. uint64_t vsc_features; /* negotiated features */
  9279.  
  9280. - struct virtio_net_config vsc_config;
  9281. -
  9282. pthread_mutex_t rx_mtx;
  9283. - int rx_in_progress;
  9284. - int rx_vhdrlen;
  9285. + unsigned int rx_vhdrlen;
  9286. int rx_merge; /* merged rx bufs in use */
  9287.  
  9288. pthread_t tx_tid;
  9289. pthread_mutex_t tx_mtx;
  9290. pthread_cond_t tx_cond;
  9291. int tx_in_progress;
  9292. + struct virtio_net_config vsc_config;
  9293.  
  9294. - void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc);
  9295. - void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov,
  9296. - int iovcnt, int len);
  9297. };
  9298.  
  9299. static void pci_vtnet_reset(void *);
  9300. @@ -186,6 +159,7 @@ static struct virtio_consts vtnet_vi_consts = {
  9301.  
  9302. /*
  9303. * If the transmit thread is active then stall until it is done.
  9304. + * Only used once in pci_vtnet_reset()
  9305. */
  9306. static void
  9307. pci_vtnet_txwait(struct pci_vtnet_softc *sc)
  9308. @@ -202,20 +176,18 @@ pci_vtnet_txwait(struct pci_vtnet_softc *sc)
  9309.  
  9310. /*
  9311. * If the receive thread is active then stall until it is done.
  9312. + * It is enough to lock and unlock the RX mutex.
  9313. + * Only used once in pci_vtnet_reset()
  9314. */
  9315. static void
  9316. pci_vtnet_rxwait(struct pci_vtnet_softc *sc)
  9317. {
  9318.  
  9319. pthread_mutex_lock(&sc->rx_mtx);
  9320. - while (sc->rx_in_progress) {
  9321. - pthread_mutex_unlock(&sc->rx_mtx);
  9322. - usleep(10000);
  9323. - pthread_mutex_lock(&sc->rx_mtx);
  9324. - }
  9325. pthread_mutex_unlock(&sc->rx_mtx);
  9326. }
  9327.  
  9328. +/* handler for virtio_reset */
  9329. static void
  9330. pci_vtnet_reset(void *vsc)
  9331. {
  9332. @@ -242,360 +214,80 @@ pci_vtnet_reset(void *vsc)
  9333. sc->resetting = 0;
  9334. }
  9335.  
  9336. -/*
  9337. - * Called to send a buffer chain out to the tap device
  9338. - */
  9339. static void
  9340. -pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
  9341. - int len)
  9342. +pci_vtnet_rx(struct pci_vtnet_softc *sc)
  9343. {
  9344. - static char pad[60]; /* all zero bytes */
  9345. -
  9346. - if (sc->vsc_tapfd == -1)
  9347. - return;
  9348. -
  9349. - /*
  9350. - * If the length is < 60, pad out to that and add the
  9351. - * extra zero'd segment to the iov. It is guaranteed that
  9352. - * there is always an extra iov available by the caller.
  9353. - */
  9354. - if (len < 60) {
  9355. - iov[iovcnt].iov_base = pad;
  9356. - iov[iovcnt].iov_len = 60 - len;
  9357. - iovcnt++;
  9358. - }
  9359. - (void) writev(sc->vsc_tapfd, iov, iovcnt);
  9360. -}
  9361. -
  9362. -/*
  9363. - * Called when there is read activity on the tap file descriptor.
  9364. - * Each buffer posted by the guest is assumed to be able to contain
  9365. - * an entire ethernet frame + rx header.
  9366. - * MP note: the dummybuf is only used for discarding frames, so there
  9367. - * is no need for it to be per-vtnet or locked.
  9368. - */
  9369. -static uint8_t dummybuf[2048];
  9370. -
  9371. -static __inline struct iovec *
  9372. -rx_iov_trim(struct iovec *iov, int *niov, int tlen)
  9373. -{
  9374. - struct iovec *riov;
  9375. -
  9376. - /* XXX short-cut: assume first segment is >= tlen */
  9377. - assert(iov[0].iov_len >= tlen);
  9378. -
  9379. - iov[0].iov_len -= tlen;
  9380. - if (iov[0].iov_len == 0) {
  9381. - assert(*niov > 1);
  9382. - *niov -= 1;
  9383. - riov = &iov[1];
  9384. - } else {
  9385. - iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
  9386. - riov = &iov[0];
  9387. - }
  9388. -
  9389. - return (riov);
  9390. -}
  9391. -
  9392. -static void
  9393. -pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
  9394. -{
  9395. - struct iovec iov[VTNET_MAXSEGS], *riov;
  9396. + struct iovec iov[VTNET_MAXSEGS + 1];
  9397. struct vqueue_info *vq;
  9398. - void *vrx;
  9399. int len, n;
  9400. uint16_t idx;
  9401.  
  9402. - /*
  9403. - * Should never be called without a valid tap fd
  9404. - */
  9405. - assert(sc->vsc_tapfd != -1);
  9406. -
  9407. - /*
  9408. - * But, will be called when the rx ring hasn't yet
  9409. - * been set up or the guest is resetting the device.
  9410. - */
  9411. if (!sc->vsc_rx_ready || sc->resetting) {
  9412. /*
  9413. - * Drop the packet and try later.
  9414. + * The rx ring has not yet been set up or the guest is
  9415. + * resetting the device. Drop the packet and try later.
  9416. */
  9417. - (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
  9418. + netbe_rx_discard(sc->vsc_be);
  9419. return;
  9420. }
  9421.  
  9422. - /*
  9423. - * Check for available rx buffers
  9424. - */
  9425. vq = &sc->vsc_queues[VTNET_RXQ];
  9426. if (!vq_has_descs(vq)) {
  9427. /*
  9428. - * Drop the packet and try later. Interrupt on
  9429. - * empty, if that's negotiated.
  9430. + * No available rx buffers. Drop the packet and try later.
  9431. + * Interrupt on empty, if that's negotiated.
  9432. */
  9433. - (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
  9434. + netbe_rx_discard(sc->vsc_be);
  9435. vq_endchains(vq, 1);
  9436. return;
  9437. }
  9438.  
  9439. do {
  9440. - /*
  9441. - * Get descriptor chain.
  9442. - */
  9443. + /* Get descriptor chain into iov */
  9444. n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
  9445. assert(n >= 1 && n <= VTNET_MAXSEGS);
  9446.  
  9447. - /*
  9448. - * Get a pointer to the rx header, and use the
  9449. - * data immediately following it for the packet buffer.
  9450. - */
  9451. - vrx = iov[0].iov_base;
  9452. - riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
  9453. -
  9454. - len = readv(sc->vsc_tapfd, riov, n);
  9455. -
  9456. - if (len < 0 && errno == EWOULDBLOCK) {
  9457. - /*
  9458. - * No more packets, but still some avail ring
  9459. - * entries. Interrupt if needed/appropriate.
  9460. - */
  9461. - vq_retchain(vq);
  9462. - vq_endchains(vq, 0);
  9463. - return;
  9464. - }
  9465. -
  9466. - /*
  9467. - * The only valid field in the rx packet header is the
  9468. - * number of buffers if merged rx bufs were negotiated.
  9469. - */
  9470. - memset(vrx, 0, sc->rx_vhdrlen);
  9471. -
  9472. - if (sc->rx_merge) {
  9473. - struct virtio_net_rxhdr *vrxh;
  9474. -
  9475. - vrxh = vrx;
  9476. - vrxh->vrh_bufs = 1;
  9477. - }
  9478. -
  9479. - /*
  9480. - * Release this chain and handle more chains.
  9481. - */
  9482. - vq_relchain(vq, idx, len + sc->rx_vhdrlen);
  9483. - } while (vq_has_descs(vq));
  9484. -
  9485. - /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
  9486. - vq_endchains(vq, 1);
  9487. -}
  9488. + len = netbe_recv(sc->vsc_be, iov, n);
  9489.  
  9490. -static __inline int
  9491. -pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
  9492. -{
  9493. - int r, i;
  9494. - int len = 0;
  9495. -
  9496. - for (r = nmd->cur_tx_ring; ; ) {
  9497. - struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
  9498. - uint32_t cur, idx;
  9499. - char *buf;
  9500. -
  9501. - if (nm_ring_empty(ring)) {
  9502. - r++;
  9503. - if (r > nmd->last_tx_ring)
  9504. - r = nmd->first_tx_ring;
  9505. - if (r == nmd->cur_tx_ring)
  9506. - break;
  9507. - continue;
  9508. + if (len < 0) {
  9509. + break;
  9510. }
  9511. - cur = ring->cur;
  9512. - idx = ring->slot[cur].buf_idx;
  9513. - buf = NETMAP_BUF(ring, idx);
  9514. -
  9515. - for (i = 0; i < iovcnt; i++) {
  9516. - if (len + iov[i].iov_len > 2048)
  9517. - break;
  9518. - memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
  9519. - len += iov[i].iov_len;
  9520. - }
  9521. - ring->slot[cur].len = len;
  9522. - ring->head = ring->cur = nm_ring_next(ring, cur);
  9523. - nmd->cur_tx_ring = r;
  9524. - ioctl(nmd->fd, NIOCTXSYNC, NULL);
  9525. - break;
  9526. - }
  9527. -
  9528. - return (len);
  9529. -}
  9530. -
  9531. -static __inline int
  9532. -pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
  9533. -{
  9534. - int len = 0;
  9535. - int i = 0;
  9536. - int r;
  9537. -
  9538. - for (r = nmd->cur_rx_ring; ; ) {
  9539. - struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
  9540. - uint32_t cur, idx;
  9541. - char *buf;
  9542. - size_t left;
  9543. -
  9544. - if (nm_ring_empty(ring)) {
  9545. - r++;
  9546. - if (r > nmd->last_rx_ring)
  9547. - r = nmd->first_rx_ring;
  9548. - if (r == nmd->cur_rx_ring)
  9549. - break;
  9550. - continue;
  9551. - }
  9552. - cur = ring->cur;
  9553. - idx = ring->slot[cur].buf_idx;
  9554. - buf = NETMAP_BUF(ring, idx);
  9555. - left = ring->slot[cur].len;
  9556. -
  9557. - for (i = 0; i < iovcnt && left > 0; i++) {
  9558. - if (iov[i].iov_len > left)
  9559. - iov[i].iov_len = left;
  9560. - memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
  9561. - len += iov[i].iov_len;
  9562. - left -= iov[i].iov_len;
  9563. - }
  9564. - ring->head = ring->cur = nm_ring_next(ring, cur);
  9565. - nmd->cur_rx_ring = r;
  9566. - ioctl(nmd->fd, NIOCRXSYNC, NULL);
  9567. - break;
  9568. - }
  9569. - for (; i < iovcnt; i++)
  9570. - iov[i].iov_len = 0;
  9571. -
  9572. - return (len);
  9573. -}
  9574. -
  9575. -/*
  9576. - * Called to send a buffer chain out to the vale port
  9577. - */
  9578. -static void
  9579. -pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
  9580. - int len)
  9581. -{
  9582. - static char pad[60]; /* all zero bytes */
  9583. -
  9584. - if (sc->vsc_nmd == NULL)
  9585. - return;
  9586. -
  9587. - /*
  9588. - * If the length is < 60, pad out to that and add the
  9589. - * extra zero'd segment to the iov. It is guaranteed that
  9590. - * there is always an extra iov available by the caller.
  9591. - */
  9592. - if (len < 60) {
  9593. - iov[iovcnt].iov_base = pad;
  9594. - iov[iovcnt].iov_len = 60 - len;
  9595. - iovcnt++;
  9596. - }
  9597. - (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
  9598. -}
  9599. -
  9600. -static void
  9601. -pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
  9602. -{
  9603. - struct iovec iov[VTNET_MAXSEGS], *riov;
  9604. - struct vqueue_info *vq;
  9605. - void *vrx;
  9606. - int len, n;
  9607. - uint16_t idx;
  9608. -
  9609. - /*
  9610. - * Should never be called without a valid netmap descriptor
  9611. - */
  9612. - assert(sc->vsc_nmd != NULL);
  9613. -
  9614. - /*
  9615. - * But, will be called when the rx ring hasn't yet
  9616. - * been set up or the guest is resetting the device.
  9617. - */
  9618. - if (!sc->vsc_rx_ready || sc->resetting) {
  9619. - /*
  9620. - * Drop the packet and try later.
  9621. - */
  9622. - (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
  9623. - return;
  9624. - }
  9625. -
  9626. - /*
  9627. - * Check for available rx buffers
  9628. - */
  9629. - vq = &sc->vsc_queues[VTNET_RXQ];
  9630. - if (!vq_has_descs(vq)) {
  9631. - /*
  9632. - * Drop the packet and try later. Interrupt on
  9633. - * empty, if that's negotiated.
  9634. - */
  9635. - (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
  9636. - vq_endchains(vq, 1);
  9637. - return;
  9638. - }
  9639. -
  9640. - do {
  9641. - /*
  9642. - * Get descriptor chain.
  9643. - */
  9644. - n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
  9645. - assert(n >= 1 && n <= VTNET_MAXSEGS);
  9646. -
  9647. - /*
  9648. - * Get a pointer to the rx header, and use the
  9649. - * data immediately following it for the packet buffer.
  9650. - */
  9651. - vrx = iov[0].iov_base;
  9652. - riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
  9653. -
  9654. - len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
  9655.  
  9656. if (len == 0) {
  9657. /*
  9658. * No more packets, but still some avail ring
  9659. * entries. Interrupt if needed/appropriate.
  9660. */
  9661. - vq_retchain(vq);
  9662. + vq_retchain(vq); /* return the slot to the vq */
  9663. vq_endchains(vq, 0);
  9664. return;
  9665. }
  9666.  
  9667. - /*
  9668. - * The only valid field in the rx packet header is the
  9669. - * number of buffers if merged rx bufs were negotiated.
  9670. - */
  9671. - memset(vrx, 0, sc->rx_vhdrlen);
  9672. -
  9673. - if (sc->rx_merge) {
  9674. - struct virtio_net_rxhdr *vrxh;
  9675. -
  9676. - vrxh = vrx;
  9677. - vrxh->vrh_bufs = 1;
  9678. - }
  9679. -
  9680. - /*
  9681. - * Release this chain and handle more chains.
  9682. - */
  9683. - vq_relchain(vq, idx, len + sc->rx_vhdrlen);
  9684. + /* Publish the info to the guest */
  9685. + vq_relchain(vq, idx, (uint32_t)len);
  9686. } while (vq_has_descs(vq));
  9687.  
  9688. /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
  9689. vq_endchains(vq, 1);
  9690. }
  9691.  
  9692. +/*
  9693. + * Called when there is read activity on the tap file descriptor.
  9694. + * Each buffer posted by the guest is assumed to be able to contain
  9695. + * an entire ethernet frame + rx header.
  9696. + */
  9697. static void
  9698. pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
  9699. {
  9700. struct pci_vtnet_softc *sc = param;
  9701.  
  9702. + (void)fd; (void)type;
  9703. pthread_mutex_lock(&sc->rx_mtx);
  9704. - sc->rx_in_progress = 1;
  9705. - sc->pci_vtnet_rx(sc);
  9706. - sc->rx_in_progress = 0;
  9707. + pci_vtnet_rx(sc);
  9708. pthread_mutex_unlock(&sc->rx_mtx);
  9709. -
  9710. }
  9711.  
  9712. +/* callback when writing to the PCI register */
  9713. static void
  9714. pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
  9715. {
  9716. @@ -610,35 +302,33 @@ pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
  9717. }
  9718. }
  9719.  
  9720. +/* TX processing (guest to host), called in the tx thread */
  9721. static void
  9722. pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
  9723. {
  9724. struct iovec iov[VTNET_MAXSEGS + 1];
  9725. int i, n;
  9726. - int plen, tlen;
  9727. + uint32_t len;
  9728. uint16_t idx;
  9729.  
  9730. /*
  9731. - * Obtain chain of descriptors. The first one is
  9732. - * really the header descriptor, so we need to sum
  9733. - * up two lengths: packet length and transfer length.
  9734. + * Obtain chain of descriptors. The first descriptor also
  9735. + * contains the virtio-net header.
  9736. */
  9737. n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
  9738. assert(n >= 1 && n <= VTNET_MAXSEGS);
  9739. - plen = 0;
  9740. - tlen = iov[0].iov_len;
  9741. - for (i = 1; i < n; i++) {
  9742. - plen += iov[i].iov_len;
  9743. - tlen += iov[i].iov_len;
  9744. + len = 0;
  9745. + for (i = 0; i < n; i++) {
  9746. + len += iov[i].iov_len;
  9747. }
  9748.  
  9749. - DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
  9750. - sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen);
  9751. + netbe_send(sc->vsc_be, iov, n, len, 0 /* more */);
  9752.  
  9753. - /* chain is processed, release it and set tlen */
  9754. - vq_relchain(vq, idx, tlen);
  9755. + /* chain is processed, release it and set len */
  9756. + vq_relchain(vq, idx, len);
  9757. }
  9758.  
  9759. +/* callback when writing to the PCI register */
  9760. static void
  9761. pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
  9762. {
  9763. @@ -668,6 +358,14 @@ pci_vtnet_tx_thread(void *param)
  9764. struct vqueue_info *vq;
  9765. int error;
  9766.  
  9767. + {
  9768. + struct pci_devinst *pi = sc->vsc_vs.vs_pi;
  9769. + char tname[MAXCOMLEN + 1];
  9770. + snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
  9771. + pi->pi_func);
  9772. + pthread_set_name_np(pthread_self(), tname);
  9773. + }
  9774. +
  9775. vq = &sc->vsc_queues[VTNET_TXQ];
  9776.  
  9777. /*
  9778. @@ -721,119 +419,28 @@ pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
  9779. }
  9780. #endif
  9781.  
  9782. -static int
  9783. -pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
  9784. -{
  9785. - struct ether_addr *ea;
  9786. - char *tmpstr;
  9787. - char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
  9788. -
  9789. - tmpstr = strsep(&mac_str,"=");
  9790. -
  9791. - if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
  9792. - ea = ether_aton(mac_str);
  9793. -
  9794. - if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
  9795. - memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
  9796. - fprintf(stderr, "Invalid MAC %s\n", mac_str);
  9797. - return (EINVAL);
  9798. - } else
  9799. - memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
  9800. - }
  9801. -
  9802. - return (0);
  9803. -}
  9804. -
  9805. -static void
  9806. -pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname)
  9807. -{
  9808. - char tbuf[80];
  9809. -#ifndef WITHOUT_CAPSICUM
  9810. - cap_rights_t rights;
  9811. -#endif
  9812. -
  9813. - strcpy(tbuf, "/dev/");
  9814. - strlcat(tbuf, devname, sizeof(tbuf));
  9815. -
  9816. - sc->pci_vtnet_rx = pci_vtnet_tap_rx;
  9817. - sc->pci_vtnet_tx = pci_vtnet_tap_tx;
  9818. -
  9819. - sc->vsc_tapfd = open(tbuf, O_RDWR);
  9820. - if (sc->vsc_tapfd == -1) {
  9821. - WPRINTF(("open of tap device %s failed\n", tbuf));
  9822. - return;
  9823. - }
  9824. -
  9825. - /*
  9826. - * Set non-blocking and register for read
  9827. - * notifications with the event loop
  9828. - */
  9829. - int opt = 1;
  9830. - if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
  9831. - WPRINTF(("tap device O_NONBLOCK failed\n"));
  9832. - close(sc->vsc_tapfd);
  9833. - sc->vsc_tapfd = -1;
  9834. - }
  9835. -
  9836. -#ifndef WITHOUT_CAPSICUM
  9837. - cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
  9838. - if (cap_rights_limit(sc->vsc_tapfd, &rights) == -1 && errno != ENOSYS)
  9839. - errx(EX_OSERR, "Unable to apply rights for sandbox");
  9840. -#endif
  9841. -
  9842. - sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
  9843. - EVF_READ,
  9844. - pci_vtnet_rx_callback,
  9845. - sc);
  9846. - if (sc->vsc_mevp == NULL) {
  9847. - WPRINTF(("Could not register event\n"));
  9848. - close(sc->vsc_tapfd);
  9849. - sc->vsc_tapfd = -1;
  9850. - }
  9851. -}
  9852. -
  9853. -static void
  9854. -pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname)
  9855. -{
  9856. - sc->pci_vtnet_rx = pci_vtnet_netmap_rx;
  9857. - sc->pci_vtnet_tx = pci_vtnet_netmap_tx;
  9858. -
  9859. - sc->vsc_nmd = nm_open(ifname, NULL, 0, 0);
  9860. - if (sc->vsc_nmd == NULL) {
  9861. - WPRINTF(("open of netmap device %s failed\n", ifname));
  9862. - return;
  9863. - }
  9864. -
  9865. - sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd,
  9866. - EVF_READ,
  9867. - pci_vtnet_rx_callback,
  9868. - sc);
  9869. - if (sc->vsc_mevp == NULL) {
  9870. - WPRINTF(("Could not register event\n"));
  9871. - nm_close(sc->vsc_nmd);
  9872. - sc->vsc_nmd = NULL;
  9873. - }
  9874. -}
  9875. -
  9876. static int
  9877. pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  9878. {
  9879. - MD5_CTX mdctx;
  9880. - unsigned char digest[16];
  9881. - char nstr[80];
  9882. - char tname[MAXCOMLEN + 1];
  9883. struct pci_vtnet_softc *sc;
  9884. char *devname;
  9885. char *vtopts;
  9886. int mac_provided;
  9887. + struct virtio_consts *vc;
  9888.  
  9889. - sc = calloc(1, sizeof(struct pci_vtnet_softc));
  9890. + /*
  9891. + * Allocate data structures for further virtio initializations.
  9892. + * sc also contains a copy of the vtnet_vi_consts,
  9893. + * because the capabilities change depending on
  9894. + * the backend.
  9895. + */
  9896. + sc = calloc(1, sizeof(struct pci_vtnet_softc) +
  9897. + sizeof(struct virtio_consts));
  9898. + vc = (struct virtio_consts *)(sc + 1);
  9899. + memcpy(vc, &vtnet_vi_consts, sizeof(*vc));
  9900.  
  9901. pthread_mutex_init(&sc->vsc_mtx, NULL);
  9902.  
  9903. - vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
  9904. - sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
  9905. -
  9906. sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
  9907. sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
  9908. sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
  9909. @@ -844,12 +451,10 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  9910. #endif
  9911.  
  9912. /*
  9913. - * Attempt to open the tap device and read the MAC address
  9914. + * Attempt to open the backend device and read the MAC address
  9915. * if specified
  9916. */
  9917. mac_provided = 0;
  9918. - sc->vsc_tapfd = -1;
  9919. - sc->vsc_nmd = NULL;
  9920. if (opts != NULL) {
  9921. int err;
  9922.  
  9923. @@ -857,7 +462,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  9924. (void) strsep(&vtopts, ",");
  9925.  
  9926. if (vtopts != NULL) {
  9927. - err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
  9928. + err = net_parsemac(vtopts, sc->vsc_config.mac);
  9929. if (err != 0) {
  9930. free(devname);
  9931. return (err);
  9932. @@ -865,33 +470,18 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  9933. mac_provided = 1;
  9934. }
  9935.  
  9936. - if (strncmp(devname, "vale", 4) == 0)
  9937. - pci_vtnet_netmap_setup(sc, devname);
  9938. - if (strncmp(devname, "tap", 3) == 0 ||
  9939. - strncmp(devname, "vmnet", 5) == 0)
  9940. - pci_vtnet_tap_setup(sc, devname);
  9941. + sc->vsc_be = netbe_init(devname, pci_vtnet_rx_callback, sc);
  9942. + if (!sc->vsc_be) {
  9943. + WPRINTF(("net backend initialization failed\n"));
  9944. + } else {
  9945. + vc->vc_hv_caps |= netbe_get_cap(sc->vsc_be);
  9946. + }
  9947.  
  9948. free(devname);
  9949. }
  9950.  
  9951. - /*
  9952. - * The default MAC address is the standard NetApp OUI of 00-a0-98,
  9953. - * followed by an MD5 of the PCI slot/func number and dev name
  9954. - */
  9955. if (!mac_provided) {
  9956. - snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
  9957. - pi->pi_func, vmname);
  9958. -
  9959. - MD5Init(&mdctx);
  9960. - MD5Update(&mdctx, nstr, strlen(nstr));
  9961. - MD5Final(digest, &mdctx);
  9962. -
  9963. - sc->vsc_config.mac[0] = 0x00;
  9964. - sc->vsc_config.mac[1] = 0xa0;
  9965. - sc->vsc_config.mac[2] = 0x98;
  9966. - sc->vsc_config.mac[3] = digest[0];
  9967. - sc->vsc_config.mac[4] = digest[1];
  9968. - sc->vsc_config.mac[5] = digest[2];
  9969. + net_genmac(pi, sc->vsc_config.mac);
  9970. }
  9971.  
  9972. /* initialize config space */
  9973. @@ -901,22 +491,23 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  9974. pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
  9975. pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
  9976.  
  9977. - /* Link is up if we managed to open tap device or vale port. */
  9978. - sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0 ||
  9979. - sc->vsc_nmd != NULL);
  9980. + /* Link is up if we managed to open backend device. */
  9981. + sc->vsc_config.status = (opts == NULL || sc->vsc_be);
  9982.  
  9983. + vi_softc_linkup(&sc->vsc_vs, vc, sc, pi, sc->vsc_queues);
  9984. + sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
  9985. +
  9986. /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
  9987. if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
  9988. return (1);
  9989.  
  9990. /* use BAR 0 to map config regs in IO space */
  9991. - vi_set_io_bar(&sc->vsc_vs, 0);
  9992. + vi_set_io_bar(&sc->vsc_vs, 0); /* calls into virtio */
  9993.  
  9994. sc->resetting = 0;
  9995.  
  9996. sc->rx_merge = 1;
  9997. sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
  9998. - sc->rx_in_progress = 0;
  9999. pthread_mutex_init(&sc->rx_mtx, NULL);
  10000.  
  10001. /*
  10002. @@ -928,9 +519,6 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
  10003. pthread_mutex_init(&sc->tx_mtx, NULL);
  10004. pthread_cond_init(&sc->tx_cond, NULL);
  10005. pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
  10006. - snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
  10007. - pi->pi_func);
  10008. - pthread_set_name_np(sc->tx_tid, tname);
  10009.  
  10010. return (0);
  10011. }
  10012. @@ -941,8 +529,8 @@ pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
  10013. struct pci_vtnet_softc *sc = vsc;
  10014. void *ptr;
  10015.  
  10016. - if (offset < 6) {
  10017. - assert(offset + size <= 6);
  10018. + if (offset < (int)sizeof(sc->vsc_config.mac)) {
  10019. + assert(offset + size <= (int)sizeof(sc->vsc_config.mac));
  10020. /*
  10021. * The driver is allowed to change the MAC address
  10022. */
  10023. @@ -974,14 +562,17 @@ pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
  10024.  
  10025. sc->vsc_features = negotiated_features;
  10026.  
  10027. - if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) {
  10028. + if (!(negotiated_features & VIRTIO_NET_F_MRG_RXBUF)) {
  10029. sc->rx_merge = 0;
  10030. /* non-merge rx header is 2 bytes shorter */
  10031. sc->rx_vhdrlen -= 2;
  10032. }
  10033. +
  10034. + /* Tell the backend to enable some capabilities it has advertised. */
  10035. + netbe_set_cap(sc->vsc_be, negotiated_features, sc->rx_vhdrlen);
  10036. }
  10037.  
  10038. -struct pci_devemu pci_de_vnet = {
  10039. +static struct pci_devemu pci_de_vnet = {
  10040. .pe_emu = "virtio-net",
  10041. .pe_init = pci_vtnet_init,
  10042. .pe_barwrite = vi_pci_write,
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement