Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c
- index 9ba07d7e0b0..7ff1276f64a 100644
- --- a/lib/libvmmapi/vmmapi.c
- +++ b/lib/libvmmapi/vmmapi.c
- @@ -887,6 +887,57 @@ vm_map_pptdev_mmio(struct vmctx *ctx, int bus, int slot, int func,
- return (ioctl(ctx->fd, VM_MAP_PPTDEV_MMIO, &pptmmio));
- }
- +/*
- + * Export the file descriptor associated with this VM, userful for external
- + * programs (e.g. to issue ioctl()).
- + */
- +int
- +vm_get_fd(struct vmctx *ctx)
- +{
- + return (ctx->fd);
- +}
- +
- +/*
- + * Map an user-space buffer into the VM at a given physical address.
- + * To be used for devices that expose internal memory.
- + */
- +int
- +vm_map_user_buf(struct vmctx *ctx, vm_paddr_t gpa, size_t len,
- + void *host_buf, int map)
- +{
- + struct vm_user_buf user_buf;
- +
- + bzero(&user_buf, sizeof(user_buf));
- + user_buf.gpa = gpa;
- + user_buf.len = len;
- + user_buf.addr = host_buf;
- + user_buf.map = map;
- +
- + return (ioctl(ctx->fd, VM_MAP_USER_BUF, &user_buf));
- +}
- +
- +/*
- + * Register handler for guest I/O accesses on a given I/O port, optionally
- + * filtering on the data. QEMU/KVM implement a similar functionality.
- + */
- +int
- +vm_io_reg_handler(struct vmctx *ctx, uint16_t port, uint16_t in,
- + uint32_t mask_data, uint32_t data,
- + enum vm_io_regh_type type, void *arg)
- +{
- + struct vm_io_reg_handler ioregh;
- +
- + bzero(&ioregh, sizeof(ioregh));
- + ioregh.port = port;
- + ioregh.in = in;
- + ioregh.mask_data = mask_data;
- + ioregh.data = data;
- + ioregh.type = type;
- + ioregh.arg = arg;
- +
- + return (ioctl(ctx->fd, VM_IO_REG_HANDLER, &ioregh));
- +}
- +
- int
- vm_setup_pptdev_msi(struct vmctx *ctx, int vcpu, int bus, int slot, int func,
- uint64_t addr, uint64_t msg, int numvec)
- @@ -1444,7 +1495,7 @@ vm_get_ioctls(size_t *len)
- VM_GET_HPET_CAPABILITIES, VM_GET_GPA_PMAP, VM_GLA2GPA,
- VM_ACTIVATE_CPU, VM_GET_CPUS, VM_SET_INTINFO, VM_GET_INTINFO,
- VM_RTC_WRITE, VM_RTC_READ, VM_RTC_SETTIME, VM_RTC_GETTIME,
- - VM_RESTART_INSTRUCTION };
- + VM_RESTART_INSTRUCTION, VM_MAP_USER_BUF, VM_IO_REG_HANDLER };
- if (len == NULL) {
- cmds = malloc(sizeof(vm_ioctl_cmds));
- diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h
- index df3a81b5aad..05dccd320fc 100644
- --- a/lib/libvmmapi/vmmapi.h
- +++ b/lib/libvmmapi/vmmapi.h
- @@ -163,6 +163,12 @@ int vm_setup_pptdev_msix(struct vmctx *ctx, int vcpu, int bus, int slot,
- int vm_get_intinfo(struct vmctx *ctx, int vcpu, uint64_t *i1, uint64_t *i2);
- int vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t exit_intinfo);
- +/* The next three functions are documented in vmmapi.c */
- +int vm_get_fd(struct vmctx *ctx);
- +int vm_map_user_buf(struct vmctx *ctx, vm_paddr_t gpa, size_t len,
- + void *host_buf, int map);
- +int vm_io_reg_handler(struct vmctx *ctx, uint16_t port, uint16_t in,
- + uint32_t mask_data, uint32_t data, enum vm_io_regh_type type, void *arg);
- const cap_ioctl_t *vm_get_ioctls(size_t *len);
- /*
- diff --git a/share/man/man4/netmap.4 b/share/man/man4/netmap.4
- index e86d3d17a04..7cf58a20399 100644
- --- a/share/man/man4/netmap.4
- +++ b/share/man/man4/netmap.4
- @@ -103,13 +103,12 @@ virtual machines, NICs and the host stack.
- .Pp
- .Nm
- supports both non-blocking I/O through
- -.Xr ioctl 2 ,
- +.Xr ioctls() ,
- synchronization and blocking I/O through a file descriptor
- and standard OS mechanisms such as
- .Xr select 2 ,
- .Xr poll 2 ,
- .Xr epoll 2 ,
- -and
- .Xr kqueue 2 .
- All types of
- .Nm netmap ports
- @@ -156,7 +155,7 @@ All NICs operating in
- .Nm
- mode use the same memory region,
- accessible to all processes who own
- -.Pa /dev/netmap
- +.Nm /dev/netmap
- file descriptors bound to NICs.
- Independent
- .Nm VALE
- @@ -212,7 +211,7 @@ and the number, size and location of all the
- data structures, which can be accessed by mmapping the memory
- .Dl char *mem = mmap(0, arg.nr_memsize, fd);
- .Pp
- -Non-blocking I/O is done with special
- +Non blocking I/O is done with special
- .Xr ioctl 2
- .Xr select 2
- and
- @@ -238,11 +237,10 @@ and returns the NIC to normal mode (reconnecting the data path
- to the host stack), or destroys the virtual port.
- .Sh DATA STRUCTURES
- The data structures in the mmapped memory region are detailed in
- -.In sys/net/netmap.h ,
- +.Xr sys/net/netmap.h ,
- which is the ultimate reference for the
- .Nm
- -API.
- -The main structures and fields are indicated below:
- +API. The main structures and fields are indicated below:
- .Bl -tag -width XXX
- .It Dv struct netmap_if (one per interface)
- .Bd -literal
- @@ -271,9 +269,7 @@ to be used as temporary storage for packets.
- contains the index of the first of these free rings,
- which are connected in a list (the first uint32_t of each
- buffer being the index of the next buffer in the list).
- -A
- -.Dv 0
- -indicates the end of the list.
- +A 0 indicates the end of the list.
- .It Dv struct netmap_ring (one per ring)
- .Bd -literal
- struct netmap_ring {
- @@ -293,8 +289,8 @@ struct netmap_ring {
- .Ed
- .Pp
- Implements transmit and receive rings, with read/write
- -pointers, metadata and an array of
- -.Em slots
- +pointers, metadata and and an array of
- +.Pa slots
- describing the buffers.
- .It Dv struct netmap_slot (one per buffer)
- .Bd -literal
- @@ -317,11 +313,10 @@ The offset of the
- in the mmapped region is indicated by the
- .Pa nr_offset
- field in the structure returned by
- -.Dv NIOCREGIF .
- +.Pa NIOCREGIF .
- From there, all other objects are reachable through
- relative references (offsets or indexes).
- -Macros and functions in
- -.In net/netmap_user.h
- +Macros and functions in <net/netmap_user.h>
- help converting them into actual pointers:
- .Pp
- .Dl struct netmap_if *nifp = NETMAP_IF(mem, arg.nr_offset);
- @@ -351,9 +346,7 @@ passes
- .Va tail
- is the first slot reserved to the kernel.
- .Pp
- -Slot indexes
- -.Em must
- -only move forward;
- +Slot indexes MUST only move forward;
- for convenience, the function
- .Dl nm_ring_next(ring, index)
- returns the next index modulo the ring size.
- @@ -482,10 +475,7 @@ One packet is fully contained in a single buffer.
- The following flags affect slot and buffer processing:
- .Bl -tag -width XXX
- .It NS_BUF_CHANGED
- -.Em must
- -be used when the
- -.Va buf_idx
- -in the slot is changed.
- +it MUST be used when the buf_idx in the slot is changed.
- This can be used to implement
- zero-copy forwarding, see
- .Sx ZERO-COPY FORWARDING .
- @@ -494,20 +484,19 @@ reports when this buffer has been transmitted.
- Normally,
- .Nm
- notifies transmit completions in batches, hence signals
- -can be delayed indefinitely.
- -This flag helps detect
- +can be delayed indefinitely. This flag helps detecting
- when packets have been sent and a file descriptor can be closed.
- .It NS_FORWARD
- When a ring is in 'transparent' mode (see
- .Sx TRANSPARENT MODE ) ,
- -packets marked with this flag are forwarded to the other endpoint
- +packets marked with this flags are forwarded to the other endpoint
- at the next system call, thus restoring (in a selective way)
- the connection between a NIC and the host stack.
- .It NS_NO_LEARN
- -tells the forwarding code that the source MAC address for this
- +tells the forwarding code that the SRC MAC address for this
- packet must not be used in the learning bridge code.
- .It NS_INDIRECT
- -indicates that the packet's payload is in a user-supplied buffer
- +indicates that the packet's payload is in a user-supplied buffer,
- whose user virtual address is in the 'ptr' field of the slot.
- The size can reach 65535 bytes.
- .Pp
- @@ -540,8 +529,7 @@ Slots with a value greater than 1 also have NS_MOREFRAG set.
- .Sh IOCTLS
- .Nm
- uses two ioctls (NIOCTXSYNC, NIOCRXSYNC)
- -for non-blocking I/O.
- -They take no argument.
- +for non-blocking I/O. They take no argument.
- Two more ioctls (NIOCGINFO, NIOCREGIF) are used
- to query and configure ports, with the following argument:
- .Bd -literal
- @@ -553,7 +541,7 @@ struct nmreq {
- uint32_t nr_tx_slots; /* (i/o) slots in tx rings */
- uint32_t nr_rx_slots; /* (i/o) slots in rx rings */
- uint16_t nr_tx_rings; /* (i/o) number of tx rings */
- - uint16_t nr_rx_rings; /* (i/o) number of rx rings */
- + uint16_t nr_rx_rings; /* (i/o) number of tx rings */
- uint16_t nr_ringid; /* (i/o) ring(s) we care about */
- uint16_t nr_cmd; /* (i) special command */
- uint16_t nr_arg1; /* (i/o) extra arguments */
- @@ -579,8 +567,7 @@ interface is actually put in netmap mode.
- .It Pa nr_memsize
- indicates the size of the
- .Nm
- -memory region.
- -NICs in
- +memory region. NICs in
- .Nm
- mode all share the same memory region,
- whereas
- @@ -599,8 +586,7 @@ using interface-specific functions (e.g.,
- .It Dv NIOCREGIF
- binds the port named in
- .Va nr_name
- -to the file descriptor.
- -For a physical device this also switches it into
- +to the file descriptor. For a physical device this also switches it into
- .Nm
- mode, disconnecting
- it from the host stack.
- @@ -652,7 +638,7 @@ In the example below, "netmap:foo" is any valid netmap port name.
- (default) all hardware ring pairs
- .It NR_REG_SW "netmap:foo^"
- the ``host rings'', connecting to the host stack.
- -.It NR_REG_NIC_SW "netmap:foo+"
- +.It NR_REG_NIC_SW "netmap:foo+
- all hardware rings and the host rings
- .It NR_REG_ONE_NIC "netmap:foo-i"
- only the i-th hardware ring pair, where the number is in
- @@ -665,11 +651,9 @@ the slave side of the netmap pipe whose identifier (i) is in
- .Pa nr_ringid .
- .Pp
- The identifier of a pipe must be thought as part of the pipe name,
- -and does not need to be sequential.
- -On return the pipe
- +and does not need to be sequential. On return the pipe
- will only have a single ring pair with index 0,
- -irrespective of the value of
- -.Va i.
- +irrespective of the value of i.
- .El
- .Pp
- By default, a
- @@ -719,22 +703,13 @@ are supported too.
- .Pp
- Packets in transmit rings are normally pushed out
- (and buffers reclaimed) even without
- -requesting write events.
- -Passing the
- -.Dv NETMAP_NO_TX_POLL
- -flag to
- +requesting write events. Passing the NETMAP_NO_TX_POLL flag to
- .Em NIOCREGIF
- disables this feature.
- By default, receive rings are processed only if read
- -events are requested.
- -Passing the
- -.Dv NETMAP_DO_RX_POLL
- -flag to
- +events are requested. Passing the NETMAP_DO_RX_POLL flag to
- .Em NIOCREGIF updates receive rings even without read events.
- -Note that on epoll and kqueue,
- -.Dv NETMAP_NO_TX_POLL
- -and
- -.Dv NETMAP_DO_RX_POLL
- +Note that on epoll and kqueue, NETMAP_NO_TX_POLL and NETMAP_DO_RX_POLL
- only have an effect when some event is posted for the file descriptor.
- .Sh LIBRARIES
- The
- @@ -742,13 +717,12 @@ The
- API is supposed to be used directly, both because of its simplicity and
- for efficient integration with applications.
- .Pp
- -For convenience, the
- -.In net/netmap_user.h
- +For conveniency, the
- +.Va <net/netmap_user.h>
- header provides a few macros and functions to ease creating
- a file descriptor and doing I/O with a
- .Nm
- -port.
- -These are loosely modeled after the
- +port. These are loosely modeled after the
- .Xr pcap 3
- API, to ease porting of libpcap-based applications to
- .Nm .
- @@ -885,8 +859,7 @@ Verbose kernel messages
- .It Va dev.netmap.if_num: 100
- .It Va dev.netmap.if_size: 1024
- Sizes and number of objects (netmap_if, netmap_ring, buffers)
- -for the global memory region.
- -The only parameter worth modifying is
- +for the global memory region. The only parameter worth modifying is
- .Va dev.netmap.buf_num
- as it impacts the total amount of memory used by netmap.
- .It Va dev.netmap.buf_curr_num: 0
- @@ -899,8 +872,7 @@ Actual values in use.
- .It Va dev.netmap.bridge_batch: 1024
- Batch size used when moving packets across a
- .Nm VALE
- -switch.
- -Values above 64 generally guarantee good
- +switch. Values above 64 generally guarantee good
- performance.
- .El
- .Sh SYSTEM CALLS
- @@ -931,14 +903,12 @@ may be of use.
- comes with a few programs that can be used for testing or
- simple applications.
- See the
- -.Pa examples/
- +.Va examples/
- directory in
- .Nm
- distributions, or
- -.Pa tools/tools/netmap/
- -directory in
- -.Fx
- -distributions.
- +.Va tools/tools/netmap/
- +directory in FreeBSD distributions.
- .Pp
- .Xr pkt-gen 8
- is a general purpose traffic source/sink.
- @@ -958,8 +928,7 @@ rates, and use multiple send/receive threads and cores.
- .Xr bridge 4
- is another test program which interconnects two
- .Nm
- -ports.
- -It can be used for transparent forwarding between
- +ports. It can be used for transparent forwarding between
- interfaces, as in
- .Dl bridge -i ix0 -i ix1
- or even connect the NIC to the host stack using netmap
- @@ -1026,8 +995,7 @@ void receiver(void)
- .Ss ZERO-COPY FORWARDING
- Since physical interfaces share the same memory region,
- it is possible to do packet forwarding between ports
- -swapping buffers.
- -The buffer from the transmit ring is used
- +swapping buffers. The buffer from the transmit ring is used
- to replenish the receive ring:
- .Bd -literal -compact
- uint32_t tmp;
- @@ -1099,7 +1067,6 @@ and further extended with help from
- .An Matteo Landi ,
- .An Gaetano Catalli ,
- .An Giuseppe Lettieri ,
- -and
- .An Vincenzo Maffione .
- .Pp
- .Nm
- @@ -1112,8 +1079,7 @@ No matter how fast the CPU and OS are,
- achieving line rate on 10G and faster interfaces
- requires hardware with sufficient performance.
- Several NICs are unable to sustain line rate with
- -small packet sizes.
- -Insufficient PCIe or memory bandwidth
- +small packet sizes. Insufficient PCIe or memory bandwidth
- can also cause reduced performance.
- .Pp
- Another frequent reason for low performance is the use
- @@ -1121,6 +1087,7 @@ of flow control on the link: a slow receiver can limit
- the transmit speed.
- Be sure to disable flow control when running high
- speed experiments.
- +.Pp
- .Ss SPECIAL NIC FEATURES
- .Nm
- is orthogonal to some NIC features such as
- @@ -1140,6 +1107,6 @@ and filtering of incoming traffic.
- features such as
- .Em checksum offloading , TCP segmentation offloading ,
- .Em encryption , VLAN encapsulation/decapsulation ,
- -etc.
- +etc. .
- When using netmap to exchange packets with the host stack,
- make sure to disable these features.
- diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h
- index bdfff1f8408..46536912b36 100644
- --- a/sys/amd64/include/vmm.h
- +++ b/sys/amd64/include/vmm.h
- @@ -183,6 +183,8 @@ int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
- int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
- void vm_free_memseg(struct vm *vm, int ident);
- int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
- +int vm_map_usermem(struct vm *vm, vm_paddr_t gpa, size_t len, void *buf,
- + int map, struct thread *td);
- int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
- int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
- int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
- @@ -321,6 +323,7 @@ struct vatpic *vm_atpic(struct vm *vm);
- struct vatpit *vm_atpit(struct vm *vm);
- struct vpmtmr *vm_pmtmr(struct vm *vm);
- struct vrtc *vm_rtc(struct vm *vm);
- +struct ioregh *vm_ioregh(struct vm *vm);
- /*
- * Inject exception 'vector' into the guest vcpu. This function returns 0 on
- @@ -417,7 +420,14 @@ enum vm_intr_trigger {
- EDGE_TRIGGER,
- LEVEL_TRIGGER
- };
- -
- +
- +/* Operations supported on VM_IO_REG_HANDLER ioctl. */
- +enum vm_io_regh_type {
- + VM_IO_REGH_DELETE,
- + VM_IO_REGH_KWEVENTS, /* kernel wait events */
- + VM_IO_REGH_MAX
- +};
- +
- /*
- * The 'access' field has the format specified in Table 21-2 of the Intel
- * Architecture Manual vol 3b.
- diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h
- index 1af75a3c065..7dd4c72ed6e 100644
- --- a/sys/amd64/include/vmm_dev.h
- +++ b/sys/amd64/include/vmm_dev.h
- @@ -123,6 +123,24 @@ struct vm_pptdev_mmio {
- size_t len;
- };
- +/* Argument for VM_MAP_USER_BUF ioctl in vmmapi.c */
- +struct vm_user_buf {
- + vm_paddr_t gpa;
- + void *addr;
- + size_t len;
- + int map; /* boolean */
- +};
- +
- +/* Argument for VM_IO_REG_HANDLER ioctl in vmmapi.c */
- +struct vm_io_reg_handler {
- + uint16_t port; /* I/O address */
- + uint16_t in; /* 0 out, 1 in */
- + uint32_t mask_data; /* 0 means match anything */
- + uint32_t data; /* data to match */
- + enum vm_io_regh_type type; /* handler type */
- + void *arg; /* handler argument */
- +};
- +
- struct vm_pptdev_msi {
- int vcpu;
- int bus;
- @@ -286,6 +304,10 @@ enum {
- IOCNUM_RTC_WRITE = 101,
- IOCNUM_RTC_SETTIME = 102,
- IOCNUM_RTC_GETTIME = 103,
- +
- + /* host mmap and IO handler */
- + IOCNUM_MAP_USER_BUF = 104,
- + IOCNUM_IO_REG_HANDLER = 105,
- };
- #define VM_RUN \
- @@ -344,6 +366,10 @@ enum {
- _IOW('v', IOCNUM_UNBIND_PPTDEV, struct vm_pptdev)
- #define VM_MAP_PPTDEV_MMIO \
- _IOW('v', IOCNUM_MAP_PPTDEV_MMIO, struct vm_pptdev_mmio)
- +#define VM_MAP_USER_BUF \
- + _IOW('v', IOCNUM_MAP_USER_BUF, struct vm_user_buf)
- +#define VM_IO_REG_HANDLER \
- + _IOW('v', IOCNUM_IO_REG_HANDLER, struct vm_io_reg_handler)
- #define VM_PPTDEV_MSI \
- _IOW('v', IOCNUM_PPTDEV_MSI, struct vm_pptdev_msi)
- #define VM_PPTDEV_MSIX \
- diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c
- index 537454a48e7..dda2e3b0660 100644
- --- a/sys/amd64/vmm/vmm.c
- +++ b/sys/amd64/vmm/vmm.c
- @@ -66,6 +66,7 @@ __FBSDID("$FreeBSD$");
- #include "vmm_ktr.h"
- #include "vmm_host.h"
- #include "vmm_mem.h"
- +#include "vmm_usermem.h"
- #include "vmm_util.h"
- #include "vatpic.h"
- #include "vatpit.h"
- @@ -148,6 +149,7 @@ struct vm {
- struct vatpit *vatpit; /* (i) virtual atpit */
- struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */
- struct vrtc *vrtc; /* (o) virtual RTC */
- + struct ioregh *ioregh; /* () I/O reg handler */
- volatile cpuset_t active_cpus; /* (i) active vcpus */
- int suspend; /* (i) stop VM execution */
- volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
- @@ -409,6 +411,7 @@ vm_init(struct vm *vm, bool create)
- vm->vpmtmr = vpmtmr_init(vm);
- if (create)
- vm->vrtc = vrtc_init(vm);
- + vm->ioregh = ioregh_init(vm);
- CPU_ZERO(&vm->active_cpus);
- @@ -465,11 +468,13 @@ vm_cleanup(struct vm *vm, bool destroy)
- vrtc_cleanup(vm->vrtc);
- else
- vrtc_reset(vm->vrtc);
- + ioregh_cleanup(vm->ioregh);
- vpmtmr_cleanup(vm->vpmtmr);
- vatpit_cleanup(vm->vatpit);
- vhpet_cleanup(vm->vhpet);
- vatpic_cleanup(vm->vatpic);
- vioapic_cleanup(vm->vioapic);
- + vmm_usermem_cleanup(vm->vmspace);
- for (i = 0; i < VM_MAXCPU; i++)
- vcpu_cleanup(vm, i, destroy);
- @@ -542,6 +547,18 @@ vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
- return (0);
- }
- +/* Handler function for VM_MAP_USER_BUF ioctl. */
- +int
- +vm_map_usermem(struct vm *vm, vm_paddr_t gpa, size_t len, void *buf,
- + int map, struct thread *td)
- +{
- + if (!map) /* this is an unmapping request */
- + return vmm_usermem_free(vm->vmspace, gpa, len);
- +
- + /* this is a mapping request */
- + return vmm_usermem_alloc(vm->vmspace, gpa, len, buf, td);
- +}
- +
- int
- vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
- {
- @@ -578,6 +595,9 @@ vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
- if (ppt_is_mmio(vm, gpa))
- return (true); /* 'gpa' is pci passthru mmio */
- + if (usermem_mapped(vm->vmspace, gpa))
- + return (true); /* 'gpa' is user-space buffer mapped */
- +
- return (false);
- }
- @@ -2449,6 +2469,12 @@ vm_rtc(struct vm *vm)
- return (vm->vrtc);
- }
- +struct ioregh *
- +vm_ioregh(struct vm *vm)
- +{
- + return (vm->ioregh);
- +}
- +
- enum vm_reg_name
- vm_segment_name(int seg)
- {
- diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c
- index 53a8bdc660a..cd333cd9cf8 100644
- --- a/sys/amd64/vmm/vmm_dev.c
- +++ b/sys/amd64/vmm/vmm_dev.c
- @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
- #include "vmm_lapic.h"
- #include "vmm_stat.h"
- #include "vmm_mem.h"
- +#include "vmm_ioport.h"
- #include "io/ppt.h"
- #include "io/vatpic.h"
- #include "io/vioapic.h"
- @@ -300,6 +301,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
- struct vm_pptdev_mmio *pptmmio;
- struct vm_pptdev_msi *pptmsi;
- struct vm_pptdev_msix *pptmsix;
- + struct vm_user_buf *usermem;
- + struct vm_io_reg_handler *ioregh;
- struct vm_nmi *vmnmi;
- struct vm_stats *vmstats;
- struct vm_stat_desc *statdesc;
- @@ -358,6 +361,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
- case VM_UNBIND_PPTDEV:
- case VM_ALLOC_MEMSEG:
- case VM_MMAP_MEMSEG:
- + case VM_MAP_USER_BUF:
- case VM_REINIT:
- /*
- * ioctls that operate on the entire virtual machine must
- @@ -433,6 +437,16 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
- pptmmio->func, pptmmio->gpa, pptmmio->len,
- pptmmio->hpa);
- break;
- + case VM_MAP_USER_BUF:
- + usermem = (struct vm_user_buf *)data;
- + error = vm_map_usermem(sc->vm, usermem->gpa, usermem->len,
- + usermem->addr, usermem->map, td);
- + break;
- + case VM_IO_REG_HANDLER:
- + ioregh = (struct vm_io_reg_handler *)data;
- + error = vmm_ioport_reg_handler(sc->vm, ioregh->port, ioregh->in, ioregh->mask_data,
- + ioregh->data, ioregh->type, ioregh->arg);
- + break;
- case VM_BIND_PPTDEV:
- pptdev = (struct vm_pptdev *)data;
- error = vm_assign_pptdev(sc->vm, pptdev->bus, pptdev->slot,
- diff --git a/sys/amd64/vmm/vmm_ioport.c b/sys/amd64/vmm/vmm_ioport.c
- index 63044e81402..5d37561dcf9 100644
- --- a/sys/amd64/vmm/vmm_ioport.c
- +++ b/sys/amd64/vmm/vmm_ioport.c
- @@ -97,31 +97,267 @@ inout_instruction(struct vm_exit *vmexit)
- }
- #endif /* KTR */
- +#ifdef VMM_IOPORT_REG_HANDLER
- +#include <sys/kernel.h>
- +#include <sys/param.h>
- +#include <sys/lock.h>
- +#include <sys/sx.h>
- +#include <sys/malloc.h>
- +#include <sys/systm.h>
- +
- +static MALLOC_DEFINE(M_IOREGH, "ioregh", "bhyve ioport reg handlers");
- +
- +#define IOPORT_MAX_REG_HANDLER 16
- +
- +/*
- + * ioport_reg_handler functions allows us to to catch VM write/read
- + * on specific I/O address and send notification.
- + *
- + * When the VM writes or reads a specific value on I/O address, if the address
- + * and the value matches with the info stored durign the handler registration,
- + * then we send a notification (we can have multiple type of notification,
- + * but for now is implemented only the VM_IO_REGH_KWEVENTS handler.
- + */
- +
- +typedef int (*ioport_reg_handler_func_t)(struct vm *vm,
- + struct ioport_reg_handler *regh, uint32_t *val);
- +
- +struct ioport_reg_handler {
- + uint16_t port; /* I/O address */
- + uint16_t in; /* 0 out, 1 in */
- + uint32_t mask_data; /* 0 means match anything */
- + uint32_t data; /* data to match */
- + ioport_reg_handler_func_t handler; /* handler pointer */
- + void *handler_arg; /* handler argument */
- +};
- +
- +struct ioregh {
- + struct sx lock;
- + /* TODO: use hash table */
- + struct ioport_reg_handler handlers[IOPORT_MAX_REG_HANDLER];
- +};
- +
- +/* ----- I/O reg handlers ----- */
- +
- +/*
- + * VM_IO_REGH_KWEVENTS handler
- + *
- + * wakeup() on specified address that uniquely identifies the event
- + *
- + */
- +static int
- +vmm_ioport_reg_wakeup(struct vm *vm, struct ioport_reg_handler *regh, uint32_t *val)
- +{
- + wakeup(regh->handler_arg);
- + return (0);
- +}
- +
- +/* call with ioregh->lock held */
- +static struct ioport_reg_handler *
- +vmm_ioport_find_handler(struct ioregh *ioregh, uint16_t port, uint16_t in,
- + uint32_t mask_data, uint32_t data)
- +{
- + struct ioport_reg_handler *regh;
- + uint32_t mask;
- + int i;
- +
- + regh = ioregh->handlers;
- + for (i = 0; i < IOPORT_MAX_REG_HANDLER; i++) {
- + if (regh[i].handler != NULL) {
- + mask = regh[i].mask_data & mask_data;
- + if ((regh[i].port == port) && (regh[i].in == in)
- + && ((mask & regh[i].data) == (mask & data))) {
- + return ®h[i];
- + }
- + }
- + }
- +
- + return (NULL);
- +}
- +
- +/* call with ioregh->lock held */
- +static struct ioport_reg_handler *
- +vmm_ioport_empty_handler(struct ioregh *ioregh)
- +{
- + struct ioport_reg_handler *regh;
- + int i;
- +
- + regh = ioregh->handlers;
- + for (i = 0; i < IOPORT_MAX_REG_HANDLER; i++) {
- + if (regh[i].handler == NULL) {
- + return ®h[i];
- + }
- + }
- +
- + return (NULL);
- +}
- +
- +
- +static int
- +vmm_ioport_add_handler(struct vm *vm, uint16_t port, uint16_t in, uint32_t mask_data,
- + uint32_t data, ioport_reg_handler_func_t handler, void *handler_arg)
- +{
- + struct ioport_reg_handler *regh;
- + struct ioregh *ioregh;
- + int ret = 0;
- +
- + ioregh = vm_ioregh(vm);
- +
- + sx_xlock(&ioregh->lock);
- +
- + regh = vmm_ioport_find_handler(ioregh, port, in, mask_data, data);
- + if (regh != NULL) {
- + printf("%s: handler for port %d in %d mask_data %d data %d \
- + already registered\n",
- + __FUNCTION__, port, in, mask_data, data);
- + ret = EEXIST;
- + goto err;
- + }
- +
- + regh = vmm_ioport_empty_handler(ioregh);
- + if (regh == NULL) {
- + printf("%s: empty reg_handler slot not found\n", __FUNCTION__);
- + ret = ENOMEM;
- + goto err;
- + }
- +
- + regh->port = port;
- + regh->in = in;
- + regh->mask_data = mask_data;
- + regh->data = data;
- + regh->handler = handler;
- + regh->handler_arg = handler_arg;
- +
- +err:
- + sx_xunlock(&ioregh->lock);
- + return (ret);
- +}
- +
- +static int
- +vmm_ioport_del_handler(struct vm *vm, uint16_t port, uint16_t in,
- + uint32_t mask_data, uint32_t data)
- +{
- + struct ioport_reg_handler *regh;
- + struct ioregh *ioregh;
- + int ret = 0;
- +
- + ioregh = vm_ioregh(vm);
- +
- + sx_xlock(&ioregh->lock);
- +
- + regh = vmm_ioport_find_handler(ioregh, port, in, mask_data, data);
- +
- + if (regh == NULL) {
- + ret = EINVAL;
- + goto err;
- + }
- +
- + bzero(regh, sizeof(struct ioport_reg_handler));
- +err:
- + sx_xunlock(&ioregh->lock);
- + return (ret);
- +}
- +
- +/*
- + * register or delete a new I/O event handler.
- + */
- +int
- +vmm_ioport_reg_handler(struct vm *vm, uint16_t port, uint16_t in,
- + uint32_t mask_data, uint32_t data, enum vm_io_regh_type type, void *arg)
- +{
- + int ret = 0;
- +
- + switch (type) {
- + case VM_IO_REGH_DELETE:
- + ret = vmm_ioport_del_handler(vm, port, in, mask_data, data);
- + break;
- + case VM_IO_REGH_KWEVENTS:
- + ret = vmm_ioport_add_handler(vm, port, in, mask_data, data,
- + vmm_ioport_reg_wakeup, arg);
- + break;
- + default:
- + printf("%s: unknown reg_handler type\n", __FUNCTION__);
- + ret = EINVAL;
- + break;
- + }
- +
- + return (ret);
- +}
- +
- +/*
- + * Invoke an handler, if the data matches.
- + */
- +static int
- +invoke_reg_handler(struct vm *vm, int vcpuid, struct vm_exit *vmexit,
- + uint32_t *val, int *error)
- +{
- + struct ioport_reg_handler *regh;
- + struct ioregh *ioregh;
- + uint32_t mask_data;
- +
- + mask_data = vie_size2mask(vmexit->u.inout.bytes);
- + ioregh = vm_ioregh(vm);
- +
- + sx_slock(&ioregh->lock);
- + regh = vmm_ioport_find_handler(ioregh, vmexit->u.inout.port,
- + vmexit->u.inout.in, mask_data, vmexit->u.inout.eax);
- + if (regh != NULL) {
- + *error = (*(regh->handler))(vm, regh, val);
- + }
- + sx_sunlock(&ioregh->lock);
- + return (regh != NULL);
- +}
- +
- +struct ioregh *
- +ioregh_init(struct vm *vm)
- +{
- + struct ioregh *ioregh;
- +
- + ioregh = malloc(sizeof(struct ioregh), M_IOREGH, M_WAITOK | M_ZERO);
- + sx_init(&ioregh->lock, "ioregh lock");
- +
- + return (ioregh);
- +}
- +
- +void
- +ioregh_cleanup(struct ioregh *ioregh)
- +{
- + sx_destroy(&ioregh->lock);
- + free(ioregh, M_IOREGH);
- +}
- +#else /* !VMM_IOPORT_REG_HANDLER */
- +#define invoke_reg_handler(_1, _2, _3, _4, _5) (0)
- +#endif /* VMM_IOPORT_REG_HANDLER */
- +
- static int
- emulate_inout_port(struct vm *vm, int vcpuid, struct vm_exit *vmexit,
- bool *retu)
- {
- ioport_handler_func_t handler;
- uint32_t mask, val;
- - int error;
- + int regh = 0, error = 0;
- /*
- * If there is no handler for the I/O port then punt to userspace.
- */
- - if (vmexit->u.inout.port >= MAX_IOPORTS ||
- - (handler = ioport_handler[vmexit->u.inout.port]) == NULL) {
- + if ((vmexit->u.inout.port >= MAX_IOPORTS ||
- + (handler = ioport_handler[vmexit->u.inout.port]) == NULL) &&
- + (regh = invoke_reg_handler(vm, vcpuid, vmexit, &val, &error)) == 0) {
- *retu = true;
- return (0);
- }
- - mask = vie_size2mask(vmexit->u.inout.bytes);
- + if (!regh) {
- + mask = vie_size2mask(vmexit->u.inout.bytes);
- +
- + if (!vmexit->u.inout.in) {
- + val = vmexit->u.inout.eax & mask;
- + }
- - if (!vmexit->u.inout.in) {
- - val = vmexit->u.inout.eax & mask;
- + error = (*handler)(vm, vcpuid, vmexit->u.inout.in,
- + vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
- }
- - error = (*handler)(vm, vcpuid, vmexit->u.inout.in,
- - vmexit->u.inout.port, vmexit->u.inout.bytes, &val);
- if (error) {
- /*
- * The value returned by this function is also the return value
- diff --git a/sys/amd64/vmm/vmm_ioport.h b/sys/amd64/vmm/vmm_ioport.h
- index ba51989b1af..5ef0d16d17b 100644
- --- a/sys/amd64/vmm/vmm_ioport.h
- +++ b/sys/amd64/vmm/vmm_ioport.h
- @@ -29,6 +29,22 @@
- #ifndef _VMM_IOPORT_H_
- #define _VMM_IOPORT_H_
- +#define VMM_IOPORT_REG_HANDLER
- +#ifdef VMM_IOPORT_REG_HANDLER
- +struct ioport_reg_handler;
- +struct ioregh;
- +
- +struct ioregh *ioregh_init(struct vm *vm);
- +void ioregh_cleanup(struct ioregh *ioregh);
- +
- +int vmm_ioport_reg_handler(struct vm *vm, uint16_t port, uint16_t in,
- + uint32_t mask_data, uint32_t data, enum vm_io_regh_type type, void *arg);
- +#else /* !VMM_IOPORT_REG_HANDLER */
- +#define ioregh_init(_1) (NULL)
- +#define ioregh_cleanup(_1)
- +#define vmm_ioport_reg_handler(_1, _2, _3, _4,_5, _6, _7) (EINVAL)
- +#endif /* VMM_IOPORT_REG_HANDLER */
- +
- typedef int (*ioport_handler_func_t)(struct vm *vm, int vcpuid,
- bool in, int port, int bytes, uint32_t *val);
- diff --git a/sys/amd64/vmm/vmm_usermem.c b/sys/amd64/vmm/vmm_usermem.c
- new file mode 100644
- index 00000000000..1449e18ef58
- --- /dev/null
- +++ b/sys/amd64/vmm/vmm_usermem.c
- @@ -0,0 +1,188 @@
- +/*
- + * Copyright (C) 2015 Stefano Garzarella (stefano.garzarella@gmail.com)
- + * All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + * 2. Redistributions in binary form must reproduce the above copyright
- + * notice, this list of conditions and the following disclaimer in the
- + * documentation and/or other materials provided with the distribution.
- + *
- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- + * SUCH DAMAGE.
- + *
- + * $FreeBSD$
- + */
- +
- +#include <sys/cdefs.h>
- +__FBSDID("$FreeBSD$");
- +
- +#include <sys/param.h>
- +#include <sys/systm.h>
- +#include <sys/malloc.h>
- +#include <sys/sglist.h>
- +#include <sys/lock.h>
- +#include <sys/rwlock.h>
- +#include <sys/proc.h>
- +
- +#include <vm/vm.h>
- +#include <vm/vm_param.h>
- +#include <vm/pmap.h>
- +#include <vm/vm_map.h>
- +#include <vm/vm_object.h>
- +#include <vm/vm_page.h>
- +#include <vm/vm_pager.h>
- +
- +#include <machine/md_var.h>
- +
- +#include "vmm_mem.h"
- +#include "vmm_usermem.h"
- +
- +/*
- + * usermem functions allow us to map an host userspace buffer (eg. from bhyve)
- + * in the guest VM.
- + *
- + * This feature is used to implement ptnetmap on bhyve, mapping the netmap memory
- + * (returned by the mmap() in the byvhe userspace application) in the guest VM.
- + */
- +
- +/* TODO: we can create a dynamical list of usermem */
- +#define MAX_USERMEMS 64
- +
- +static struct usermem {
- + struct vmspace *vmspace; /* guest address space */
- + vm_paddr_t gpa; /* guest physical address */
- + size_t len;
- +} usermems[MAX_USERMEMS];
- +
- +static int
- +vmm_usermem_add(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
- +{
- + int i;
- +
- + for (i = 0; i < MAX_USERMEMS; i++) {
- + if (usermems[i].len == 0) {
- + usermems[i].vmspace = vmspace;
- + usermems[i].gpa = gpa;
- + usermems[i].len = len;
- + break;
- + }
- + }
- +
- + if (i == MAX_USERMEMS) {
- + printf("vmm_usermem_add: empty usermem slot not found\n");
- + return (ENOMEM);
- + }
- +
- + return 0;
- +}
- +
- +static int
- +vmm_usermem_del(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
- +{
- + int i;
- +
- + for (i = 0; i < MAX_USERMEMS; i++) {
- + if (usermems[i].vmspace == vmspace && usermems[i].gpa == gpa
- + && usermems[i].len == len) {
- + bzero(&usermems[i], sizeof(struct usermem));
- + return 1;
- + }
- + }
- +
- + return 0;
- +}
- +
- +boolean_t
- +usermem_mapped(struct vmspace *vmspace, vm_paddr_t gpa)
- +{
- + int i;
- +
- + for (i = 0; i < MAX_USERMEMS; i++) {
- + if (usermems[i].vmspace != vmspace || usermems[i].len == 0)
- + continue;
- + if (gpa >= usermems[i].gpa &&
- + gpa < usermems[i].gpa + usermems[i].len)
- + return (TRUE);
- + }
- + return (FALSE);
- +}
- +
- +int
- +vmm_usermem_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len,
- + void *buf, struct thread *td)
- +{
- + vm_object_t obj = NULL;
- + vm_map_t map;
- + vm_map_entry_t entry;
- + vm_pindex_t index;
- + vm_prot_t prot;
- + boolean_t wired;
- + int error;
- +
- + map = &td->td_proc->p_vmspace->vm_map;
- +
- + /* lookup the vm_object that describe user addr */
- + error = vm_map_lookup(&map, (unsigned long)buf, VM_PROT_RW, &entry,
- + &obj, &index, &prot, &wired);
- + if (error != KERN_SUCCESS)
- + return EINVAL;
- +
- + /* map th vm_object in the vmspace */
- + error = vm_map_find(&vmspace->vm_map, obj, index, &gpa, len, 0,
- + VMFS_NO_SPACE, VM_PROT_RW, VM_PROT_RW, 0);
- + if (error != KERN_SUCCESS) {
- + vm_object_deallocate(obj);
- + obj = NULL;
- + }
- + vm_map_lookup_done(map, entry);
- +
- + if (error)
- + return EINVAL;
- +
- + /* acquire the reference to the vm_object */
- + vm_object_reference(obj);
- + vmm_usermem_add(vmspace, gpa, len);
- +
- + return 0;
- +}
- +
- +int
- +vmm_usermem_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
- +{
- + int found;
- +
- + found = vmm_usermem_del(vmspace, gpa, len);
- + if (!found)
- + return EINVAL;
- +
- + //TODO should we call vm_object_deallocate ?
- + return vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
- +}
- +
- +void
- +vmm_usermem_cleanup(struct vmspace *vmspace)
- +{
- + int i;
- +
- + for (i = 0; i < MAX_USERMEMS; i++) {
- + if (usermems[i].vmspace == vmspace) {
- + //TODO same as above
- + vm_map_remove(&vmspace->vm_map, usermems[i].gpa,
- + usermems[i].gpa + usermems[i].len);
- + bzero(&usermems[i], sizeof(struct usermem));
- + }
- + }
- +}
- diff --git a/sys/amd64/vmm/vmm_usermem.h b/sys/amd64/vmm/vmm_usermem.h
- new file mode 100644
- index 00000000000..c55d9006dc6
- --- /dev/null
- +++ b/sys/amd64/vmm/vmm_usermem.h
- @@ -0,0 +1,40 @@
- +/*
- + * Copyright (C) 2015 Stefano Garzarella (stefano.garzarella@gmail.com)
- + * All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + * 2. Redistributions in binary form must reproduce the above copyright
- + * notice, this list of conditions and the following disclaimer in the
- + * documentation and/or other materials provided with the distribution.
- + *
- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- + * SUCH DAMAGE.
- + *
- + * $FreeBSD$
- + */
- +
- +#ifndef _VMM_USERMEM_H_
- +#define _VMM_USERMEM_H_
- +
- +struct vm;
- +
- +int vmm_usermem_alloc(struct vmspace *, vm_paddr_t gpa,
- + size_t len, void *buf, struct thread *td);
- +int vmm_usermem_free(struct vmspace *, vm_paddr_t gpa, size_t len);
- +void vmm_usermem_cleanup(struct vmspace *);
- +boolean_t usermem_mapped(struct vmspace *, vm_paddr_t gpa);
- +
- +#endif
- diff --git a/sys/dev/netmap/if_em_netmap.h b/sys/dev/netmap/if_em_netmap.h
- index 1fe7563348c..5a66f0e0499 100644
- --- a/sys/dev/netmap/if_em_netmap.h
- +++ b/sys/dev/netmap/if_em_netmap.h
- @@ -24,7 +24,7 @@
- */
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/dev/netmap/if_em_netmap.h 238985 2012-08-02 11:59:43Z luigi $
- *
- * netmap support for: em.
- *
- diff --git a/sys/dev/netmap/if_igb_netmap.h b/sys/dev/netmap/if_igb_netmap.h
- index 33b7b3b6654..884785f719a 100644
- --- a/sys/dev/netmap/if_igb_netmap.h
- +++ b/sys/dev/netmap/if_igb_netmap.h
- @@ -24,7 +24,7 @@
- */
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/dev/netmap/if_igb_netmap.h 256200 2013-10-09 17:32:52Z jfv $
- *
- * Netmap support for igb, partly contributed by Ahmed Kooli
- * For details on netmap support please see ixgbe_netmap.h
- diff --git a/sys/dev/netmap/if_ixl_netmap.h b/sys/dev/netmap/if_ixl_netmap.h
- index 223dc06e36a..14f21e93853 100644
- --- a/sys/dev/netmap/if_ixl_netmap.h
- +++ b/sys/dev/netmap/if_ixl_netmap.h
- @@ -24,7 +24,7 @@
- */
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/dev/netmap/if_ixl_netmap.h 279232 2015-02-24 06:20:50Z luigi $
- *
- * netmap support for: ixl
- *
- @@ -129,7 +129,7 @@ ixl_netmap_attach(struct ixl_vsi *vsi)
- na.ifp = vsi->ifp;
- na.na_flags = NAF_BDG_MAYSLEEP;
- // XXX check that queues is set.
- - printf("queues is %p\n", vsi->queues);
- + nm_prinf("queues is %p\n", vsi->queues);
- if (vsi->queues) {
- na.num_tx_desc = vsi->queues[0].num_desc;
- na.num_rx_desc = vsi->queues[0].num_desc;
- diff --git a/sys/dev/netmap/if_lem_netmap.h b/sys/dev/netmap/if_lem_netmap.h
- index 91c637a8b3f..d8c59014512 100644
- --- a/sys/dev/netmap/if_lem_netmap.h
- +++ b/sys/dev/netmap/if_lem_netmap.h
- @@ -25,7 +25,7 @@
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/dev/netmap/if_lem_netmap.h 271849 2014-09-19 03:51:26Z glebius $
- *
- * netmap support for: lem
- *
- diff --git a/sys/dev/netmap/if_nfe_netmap.h b/sys/dev/netmap/if_nfe_netmap.h
- new file mode 100644
- index 00000000000..c5db32f1323
- --- /dev/null
- +++ b/sys/dev/netmap/if_nfe_netmap.h
- @@ -0,0 +1,384 @@
- +/*
- + * Copyright (C) 2011-2014 Luigi Rizzo. All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + * 2. Redistributions in binary form must reproduce the above copyright
- + * notice, this list of conditions and the following disclaimer in the
- + * documentation and/or other materials provided with the distribution.
- + *
- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- + * SUCH DAMAGE.
- + */
- +
- +/*
- + * $FreeBSD: head/sys/dev/netmap/if_em_netmap.h 231881 2012-02-17 14:09:04Z luigi $
- + *
- + * netmap support for: nfe XXX not yet tested.
- + *
- + * For more details on netmap support please see ixgbe_netmap.h
- + */
- +
- +
- +#include <net/netmap.h>
- +#include <sys/selinfo.h>
- +#include <vm/vm.h>
- +#include <vm/pmap.h>
- +
- +#include <dev/netmap/netmap_kern.h>
- +
- +
- +static int
- +nfe_netmap_init_buffers(struct nfe_softc *sc)
- +{
- + struct netmap_adapter *na = NA(sc->nfe_ifp);
- + struct netmap_slot *slot;
- + int i, l, n, max_avail;
- + struct nfe_desc32 *desc32 = NULL;
- + struct nfe_desc64 *desc64 = NULL;
- + void *addr;
- + uint64_t paddr;
- +
- + slot = netmap_reset(na, NR_TX, 0, 0);
- + if (!slot)
- + return 0; // not in native mode
- + // XXX init the tx ring
- + n = NFE_TX_RING_COUNT;
- + for (i = 0; i < n; i++) {
- + l = netmap_idx_n2k(&na->tx_rings[0], i);
- + addr = PNMB(na, slot + l, &paddr);
- + netmap_reload_map(sc->txq.tx_data_tag,
- + sc->txq.data[l].tx_data_map, addr);
- + slot[l].flags = 0;
- + if (sc->nfe_flags & NFE_40BIT_ADDR) {
- + desc64 = &sc->txq.desc64[l];
- + desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
- + desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
- + desc64->vtag = 0;
- + desc64->length = htole16(0);
- + desc64->flags = htole16(0);
- + } else {
- + desc32 = &sc->txq.desc32[l];
- + desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
- + desc32->length = htole16(0);
- + desc32->flags = htole16(0);
- + }
- + }
- +
- + slot = netmap_reset(na, NR_RX, 0, 0);
- + // XXX init the rx ring
- + /*
- + * preserve buffers still owned by the driver (and keep one empty).
- + */
- + n = NFE_RX_RING_COUNT;
- + max_avail = n - 1 - nm_kr_rxspace(&na->rx_rings[0]);
- + for (i = 0; i < n; i++) {
- + uint16_t flags;
- + l = netmap_idx_n2k(&na->rx_rings[0], i);
- + addr = PNMB(na, slot + l, &paddr);
- + flags = (i < max_avail) ? NFE_RX_READY : 0;
- + if (sc->nfe_flags & NFE_40BIT_ADDR) {
- + desc64 = &sc->rxq.desc64[l];
- + desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
- + desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
- + desc64->vtag = 0;
- + desc64->length = htole16(NETMAP_BUF_SIZE);
- + desc64->flags = htole16(NFE_RX_READY);
- + } else {
- + desc32 = &sc->rxq.desc32[l];
- + desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
- + desc32->length = htole16(NETMAP_BUF_SIZE);
- + desc32->flags = htole16(NFE_RX_READY);
- + }
- +
- + netmap_reload_map(sc->rxq.rx_data_tag,
- + sc->rxq.data[l].rx_data_map, addr);
- + bus_dmamap_sync(sc->rxq.rx_data_tag,
- + sc->rxq.data[l].rx_data_map, BUS_DMASYNC_PREREAD);
- + }
- +
- + return 1;
- +}
- +
- +
- +/*
- + * Register/unregister. We are already under netmap lock.
- + */
- +static int
- +nfe_netmap_reg(struct netmap_adapter *na, int onoff)
- +{
- + struct ifnet *ifp = na->ifp;
- + struct nfe_softc *sc = ifp->if_softc;
- +
- + NFE_LOCK(sc);
- + nfe_stop(ifp); /* also clear IFF_DRV_RUNNING */
- + if (onoff) {
- + nm_set_native_flags(na);
- + } else {
- + nm_clear_native_flags(na);
- + }
- + nfe_init_locked(sc); /* also enable intr */
- + NFE_UNLOCK(sc);
- + return (0);
- +}
- +
- +
- +/*
- + * Reconcile kernel and user view of the transmit ring.
- + */
- +static int
- +nfe_netmap_txsync(struct netmap_kring *kring, int flags)
- +{
- + struct netmap_adapter *na = kring->na;
- + struct ifnet *ifp = na->ifp;
- + struct netmap_ring *ring = kring->ring;
- + u_int nm_i; /* index into the netmap ring */
- + u_int nic_i; /* index into the NIC ring */
- + u_int n;
- + u_int const lim = kring->nkr_num_slots - 1;
- + u_int const head = kring->rhead;
- + /* generate an interrupt approximately every half ring */
- + u_int report_frequency = kring->nkr_num_slots >> 1;
- +
- + /* device-specific */
- + struct nfe_softc *sc = ifp->if_softc;
- + struct nfe_desc32 *desc32 = NULL;
- + struct nfe_desc64 *desc64 = NULL;
- +
- + bus_dmamap_sync(sc->txq.tx_desc_tag, sc->txq.tx_desc_map,
- + BUS_DMASYNC_POSTREAD);
- +
- + /*
- + * First part: process new packets to send.
- + */
- +
- + nm_i = kring->nr_hwcur;
- + if (nm_i != head) { /* we have new packets to send */
- + nic_i = netmap_idx_k2n(kring, nm_i);
- + for (n = 0; nm_i != head; n++) {
- + /* slot is the current slot in the netmap ring */
- + struct netmap_slot *slot = &ring->slot[nm_i];
- + u_int len = slot->len;
- + uint64_t paddr;
- + void *addr = PNMB(na, slot, &paddr);
- +
- + NM_CHECK_ADDR_LEN(addr, len);
- +
- + if (slot->flags & NS_BUF_CHANGED) {
- + /* buffer has changed, reload map */
- + netmap_reload_map(sc->txq.tx_data_tag,
- + sc->txq.data[l].tx_data_map, addr);
- + }
- + slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
- +
- + if (sc->nfe_flags & NFE_40BIT_ADDR) {
- + desc64 = &sc->txq.desc64[l];
- + desc64->physaddr[0] = htole32(NFE_ADDR_HI(paddr));
- + desc64->physaddr[1] = htole32(NFE_ADDR_LO(paddr));
- + desc64->vtag = 0;
- + desc64->length = htole16(len - 1);
- + desc64->flags =
- + htole16(NFE_TX_VALID | NFE_TX_LASTFRAG_V2);
- + } else {
- + desc32 = &sc->txq.desc32[l];
- + desc32->physaddr = htole32(NFE_ADDR_LO(paddr));
- + desc32->length = htole16(len - 1);
- + desc32->flags =
- + htole16(NFE_TX_VALID | NFE_TX_LASTFRAG_V1);
- + }
- +
- + bus_dmamap_sync(sc->txq.tx_data_tag,
- + sc->txq.data[l].tx_data_map, BUS_DMASYNC_PREWRITE);
- + nm_i = nm_next(nm_i, lim);
- + nic_i = nm_next(nic_i, lim);
- + }
- + kring->nr_hwcur = head;
- + sc->txq.cur = nic_i;
- +
- + bus_dmamap_sync(sc->txq.tx_desc_tag, sc->txq.tx_desc_map,
- + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- +
- + /* XXX something missing ? where is the last pkt marker ? */
- + NFE_WRITE(sc, NFE_RXTX_CTL, NFE_RXTX_KICKTX | sc->rxtxctl);
- + }
- +
- + /*
- + * Second part: reclaim buffers for completed transmissions.
- + */
- + if (flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
- + u_int nic_cur = sc->txq.cur;
- + nic_i = sc->txq.next;
- + for (n = 0; nic_i != nic_cur; n++, NFE_INC(nic_i, NFE_TX_RING_COUNT)) {
- + uint16_t flags;
- + if (sc->nfe_flags & NFE_40BIT_ADDR) {
- + desc64 = &sc->txq.desc64[l];
- + flags = le16toh(desc64->flags);
- + } else {
- + desc32 = &sc->txq.desc32[l];
- + flags = le16toh(desc32->flags);
- + }
- + if (flags & NFE_TX_VALID)
- + break;
- + }
- + if (n > 0) {
- + sc->txq.next = nic_i;
- + kring->nr_hwtail = nm_prev(netmap_idx_n2k(kring, nic_i), lim);
- + }
- + }
- +
- +
- + return 0;
- +}
- +
- +
- +/*
- + * Reconcile kernel and user view of the receive ring.
- + */
- +static int
- +nfe_netmap_rxsync(struct netmap_kring *kring, int flags)
- +{
- + struct netmap_adapter *na = kring->na;
- + struct ifnet *ifp = na->ifp;
- + struct netmap_ring *ring = kring->ring;
- + u_int nm_i; /* index into the netmap ring */
- + u_int nic_i; /* index into the NIC ring */
- + u_int n;
- + u_int const lim = kring->nkr_num_slots - 1;
- + u_int const head = kring->rhead;
- + int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
- +
- + /* device-specific */
- + struct nfe_softc *sc = ifp->if_softc;
- + struct nfe_desc32 *desc32;
- + struct nfe_desc64 *desc64;
- +
- + if (head > lim)
- + return netmap_ring_reinit(kring);
- +
- + bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map,
- + BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
- +
- + /*
- + * First part: import newly received packets.
- + */
- + if (netmap_no_pendintr || force_update) {
- + uint16_t flags, len;
- + uint16_t slot_flags = kring->nkr_slot_flags;
- +
- + nic_i = sc->rxq.cur;
- + nm_i = netmap_idx_n2k(kring, nic_i);
- + for (n = 0; ; n++) {
- + if (sc->nfe_flags & NFE_40BIT_ADDR) {
- + desc64 = &sc->rxq.desc64[sc->rxq.cur];
- + flags = le16toh(desc64->flags);
- + len = le16toh(desc64->length) & NFE_RX_LEN_MASK;
- + } else {
- + desc32 = &sc->rxq.desc32[sc->rxq.cur];
- + flags = le16toh(desc32->flags);
- + len = le16toh(desc32->length) & NFE_RX_LEN_MASK;
- + }
- +
- + if (flags & NFE_RX_READY)
- + break;
- +
- + ring->slot[nm_i].len = len;
- + ring->slot[nm_i].flags = slot_flags;
- + bus_dmamap_sync(sc->rxq.rx_data_tag,
- + sc->rxq.data[nic_i].rx_data_map,
- + BUS_DMASYNC_POSTREAD);
- + nm_i = nm_next(nm_i, lim);
- + nic_i = nm_next(nic_i, lim);
- + }
- + if (n) { /* update the state variables */
- + sc->rxq.cur = nic_i;
- + kring->nr_hwtail = nm_i;
- + }
- + kring->nr_kflags &= ~NKR_PENDINTR;
- + }
- +
- + /*
- + * Second part: skip past packets that userspace has released.
- + */
- + nm_i = kring->nr_hwcur;
- + if (nm_i != head) {
- + nic_i = netmap_idx_k2n(kring, nm_i);
- + for (n = 0; nm_i != head; n++) {
- + struct netmap_slot *slot = &ring->slot[nm_i];
- + uint64_t paddr;
- + void *addr = PNMB(na, slot, &paddr);
- +
- + if (addr == netmap_buffer_base) /* bad buf */
- + goto ring_reset;
- +
- + if (slot->flags & NS_BUF_CHANGED) {
- + /* buffer has changed, reload map */
- + netmap_reload_map(sc->rxq.rx_data_tag,
- + sc->rxq.data[l].rx_data_map, addr);
- + slot->flags &= ~NS_BUF_CHANGED;
- + }
- + if (sc->nfe_flags & NFE_40BIT_ADDR) {
- + desc64 = &sc->rxq.desc64[nic_i];
- + desc64->physaddr[0] =
- + htole32(NFE_ADDR_HI(paddr));
- + desc64->physaddr[1] =
- + htole32(NFE_ADDR_LO(paddr));
- + desc64->length = htole16(NETMAP_BUF_SIZE);
- + desc64->flags = htole16(NFE_RX_READY);
- + } else {
- + desc32 = &sc->rxq.desc32[nic_i];
- + desc32->physaddr =
- + htole32(NFE_ADDR_LO(paddr));
- + desc32->length = htole16(NETMAP_BUF_SIZE);
- + desc32->flags = htole16(NFE_RX_READY);
- + }
- +
- + bus_dmamap_sync(sc->rxq.rx_data_tag,
- + sc->rxq.data[nic_i].rx_data_map,
- + BUS_DMASYNC_PREREAD);
- + nm_i = nm_next(nm_i, lim);
- + nic_i = nm_next(nic_i, lim);
- + }
- + kring->nr_hwcur = head;
- + bus_dmamap_sync(sc->rxq.rx_desc_tag, sc->rxq.rx_desc_map,
- + BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
- + }
- +
- +
- + return 0;
- +
- +ring_reset:
- + return netmap_ring_reinit(kring);
- +}
- +
- +
- +static void
- +nfe_netmap_attach(struct nfe_softc *sc)
- +{
- + struct netmap_adapter na;
- +
- + bzero(&na, sizeof(na));
- +
- + na.ifp = sc->nfe_ifp;
- + na.na_flags = NAF_BDG_MAYSLEEP;
- + na.num_tx_desc = NFE_TX_RING_COUNT;
- + na.num_rx_desc = NFE_RX_RING_COUNT;
- + na.nm_txsync = nfe_netmap_txsync;
- + na.nm_rxsync = nfe_netmap_rxsync;
- + na.nm_register = nfe_netmap_reg;
- + na.num_tx_rings = na.num_rx_rings = 1;
- + netmap_attach(&na, 1);
- +}
- +
- +/* end of file */
- diff --git a/sys/dev/netmap/if_re_netmap.h b/sys/dev/netmap/if_re_netmap.h
- index ac08aedd796..28971cb7d93 100644
- --- a/sys/dev/netmap/if_re_netmap.h
- +++ b/sys/dev/netmap/if_re_netmap.h
- @@ -24,7 +24,7 @@
- */
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/dev/netmap/if_re_netmap.h 234225 2012-04-13 15:33:12Z luigi $
- *
- * netmap support for: re
- *
- diff --git a/sys/dev/netmap/if_vtnet_netmap.h b/sys/dev/netmap/if_vtnet_netmap.h
- index 4bed0e718dd..4d8d9e36749 100644
- --- a/sys/dev/netmap/if_vtnet_netmap.h
- +++ b/sys/dev/netmap/if_vtnet_netmap.h
- @@ -24,7 +24,7 @@
- */
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/dev/netmap/if_vtnet_netmap.h 270097 2014-08-17 10:25:27Z luigi $
- */
- #include <net/netmap.h>
- diff --git a/sys/dev/netmap/ixgbe_netmap.h b/sys/dev/netmap/ixgbe_netmap.h
- index 7986c996517..ddfed4a44a5 100644
- --- a/sys/dev/netmap/ixgbe_netmap.h
- +++ b/sys/dev/netmap/ixgbe_netmap.h
- @@ -24,7 +24,7 @@
- */
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/dev/netmap/ixgbe_netmap.h 244514 2012-12-20 22:26:03Z luigi $
- *
- * netmap support for: ixgbe (both ix and ixv)
- *
- diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c
- index 15e44815acc..3a3ae0ee0ff 100644
- --- a/sys/dev/netmap/netmap.c
- +++ b/sys/dev/netmap/netmap.c
- @@ -388,7 +388,7 @@ ports attached to the switch)
- *
- * - VALE ports:
- * concurrently:
- - * 1) ioctlNIOCRXSYNC)/netmap_poll() in process context
- + * 1) ioctl(NIOCRXSYNC)/netmap_poll() in process context
- * kring->nm_sync() == netmap_vp_rxsync()
- * 2) from nm_bdg_flush()
- * na->nm_notify() == netmap_notify()
- @@ -484,7 +484,7 @@ int netmap_mitigate = 1;
- int netmap_no_pendintr = 1;
- int netmap_txsync_retry = 2;
- int netmap_flags = 0; /* debug flags */
- -static int netmap_fwd = 0; /* force transparent mode */
- +static int netmap_fwd = 0; /* force transparent forwarding */
- /*
- * netmap_admode selects the netmap mode to use.
- @@ -522,6 +522,9 @@ int netmap_generic_rings = 1;
- /* Non-zero if ptnet devices are allowed to use virtio-net headers. */
- int ptnet_vnet_hdr = 1;
- +/* 0 if ptnetmap should not use worker threads for TX processing */
- +int ptnetmap_tx_workers = 1;
- +
- /*
- * SYSCTL calls are grouped between SYSBEGIN and SYSEND to be emulated
- * in some other operating systems
- @@ -548,6 +551,7 @@ SYSCTL_INT(_dev_netmap, OID_AUTO, generic_ringsize, CTLFLAG_RW, &netmap_generic_
- SYSCTL_INT(_dev_netmap, OID_AUTO, generic_rings, CTLFLAG_RW, &netmap_generic_rings, 0 , "");
- SYSCTL_INT(_dev_netmap, OID_AUTO, generic_txqdisc, CTLFLAG_RW, &netmap_generic_txqdisc, 0 , "");
- SYSCTL_INT(_dev_netmap, OID_AUTO, ptnet_vnet_hdr, CTLFLAG_RW, &ptnet_vnet_hdr, 0 , "");
- +SYSCTL_INT(_dev_netmap, OID_AUTO, ptnetmap_tx_workers, CTLFLAG_RW, &ptnetmap_tx_workers, 0 , "");
- SYSEND;
- @@ -669,7 +673,7 @@ nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg)
- op = "Clamp";
- }
- if (op && msg)
- - printf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
- + nm_prinf("%s %s to %d (was %d)\n", op, msg, *v, oldv);
- return *v;
- }
- @@ -801,13 +805,18 @@ netmap_krings_create(struct netmap_adapter *na, u_int tailroom)
- u_int n[NR_TXRX];
- enum txrx t;
- + if (na->tx_rings != NULL) {
- + D("warning: krings were already created");
- + return 0;
- + }
- +
- /* account for the (possibly fake) host rings */
- n[NR_TX] = na->num_tx_rings + 1;
- n[NR_RX] = na->num_rx_rings + 1;
- len = (n[NR_TX] + n[NR_RX]) * sizeof(struct netmap_kring) + tailroom;
- - na->tx_rings = malloc((size_t)len, M_DEVBUF, M_NOWAIT | M_ZERO);
- + na->tx_rings = nm_os_malloc((size_t)len);
- if (na->tx_rings == NULL) {
- D("Cannot allocate krings");
- return ENOMEM;
- @@ -866,6 +875,11 @@ netmap_krings_delete(struct netmap_adapter *na)
- struct netmap_kring *kring = na->tx_rings;
- enum txrx t;
- + if (na->tx_rings == NULL) {
- + D("warning: krings were already deleted");
- + return;
- + }
- +
- for_rx_tx(t)
- nm_os_selinfo_uninit(&na->si[t]);
- @@ -874,7 +888,7 @@ netmap_krings_delete(struct netmap_adapter *na)
- mtx_destroy(&kring->q_lock);
- nm_os_selinfo_uninit(&kring->si);
- }
- - free(na->tx_rings, M_DEVBUF);
- + nm_os_free(na->tx_rings);
- na->tx_rings = na->rx_rings = na->tailroom = NULL;
- }
- @@ -983,8 +997,7 @@ netmap_priv_new(void)
- {
- struct netmap_priv_d *priv;
- - priv = malloc(sizeof(struct netmap_priv_d), M_DEVBUF,
- - M_NOWAIT | M_ZERO);
- + priv = nm_os_malloc(sizeof(struct netmap_priv_d));
- if (priv == NULL)
- return NULL;
- priv->np_refs = 1;
- @@ -1016,7 +1029,7 @@ netmap_priv_delete(struct netmap_priv_d *priv)
- }
- netmap_unget_na(na, priv->np_ifp);
- bzero(priv, sizeof(*priv)); /* for safety */
- - free(priv, M_DEVBUF);
- + nm_os_free(priv);
- }
- @@ -1032,20 +1045,27 @@ netmap_dtor(void *data)
- }
- -
- -
- /*
- - * Handlers for synchronization of the queues from/to the host.
- - * Netmap has two operating modes:
- - * - in the default mode, the rings connected to the host stack are
- - * just another ring pair managed by userspace;
- - * - in transparent mode (XXX to be defined) incoming packets
- - * (from the host or the NIC) are marked as NS_FORWARD upon
- - * arrival, and the user application has a chance to reset the
- - * flag for packets that should be dropped.
- - * On the RXSYNC or poll(), packets in RX rings between
- - * kring->nr_kcur and ring->cur with NS_FORWARD still set are moved
- - * to the other side.
- + * Handlers for synchronization of the rings from/to the host stack.
- + * These are associated to a network interface and are just another
- + * ring pair managed by userspace.
- + *
- + * Netmap also supports transparent forwarding (NS_FORWARD and NR_FORWARD
- + * flags):
- + *
- + * - Before releasing buffers on hw RX rings, the application can mark
- + * them with the NS_FORWARD flag. During the next RXSYNC or poll(), they
- + * will be forwarded to the host stack, similarly to what happened if
- + * the application moved them to the host TX ring.
- + *
- + * - Before releasing buffers on the host RX ring, the application can
- + * mark them with the NS_FORWARD flag. During the next RXSYNC or poll(),
- + * they will be forwarded to the hw TX rings, saving the application
- + * from doing the same task in user-space.
- + *
- + * Transparent fowarding can be enabled per-ring, by setting the NR_FORWARD
- + * flag, or globally with the netmap_fwd sysctl.
- + *
- * The transfer NIC --> host is relatively easy, just encapsulate
- * into mbufs and we are done. The host --> NIC side is slightly
- * harder because there might not be room in the tx ring so it
- @@ -1054,8 +1074,9 @@ netmap_dtor(void *data)
- /*
- - * pass a chain of buffers to the host stack as coming from 'dst'
- + * Pass a whole queue of mbufs to the host stack as coming from 'dst'
- * We do not need to lock because the queue is private.
- + * After this call the queue is empty.
- */
- static void
- netmap_send_up(struct ifnet *dst, struct mbq *q)
- @@ -1063,7 +1084,8 @@ netmap_send_up(struct ifnet *dst, struct mbq *q)
- struct mbuf *m;
- struct mbuf *head = NULL, *prev = NULL;
- - /* send packets up, outside the lock */
- + /* Send packets up, outside the lock; head/prev machinery
- + * is only useful for Windows. */
- while ((m = mbq_dequeue(q)) != NULL) {
- if (netmap_verbose & NM_VERB_HOST)
- D("sending up pkt %p size %d", m, MBUF_LEN(m));
- @@ -1078,9 +1100,9 @@ netmap_send_up(struct ifnet *dst, struct mbq *q)
- /*
- - * put a copy of the buffers marked NS_FORWARD into an mbuf chain.
- - * Take packets from hwcur to ring->head marked NS_FORWARD (or forced)
- - * and pass them up. Drop remaining packets in the unlikely event
- + * Scan the buffers from hwcur to ring->head, and put a copy of those
- + * marked NS_FORWARD (or all of them if forced) into a queue of mbufs.
- + * Drop remaining packets in the unlikely event
- * of an mbuf shortage.
- */
- static void
- @@ -1127,16 +1149,24 @@ nm_may_forward_up(struct netmap_kring *kring)
- }
- static inline int
- -nm_may_forward_down(struct netmap_kring *kring)
- +nm_may_forward_down(struct netmap_kring *kring, int sync_flags)
- {
- return _nm_may_forward(kring) &&
- + (sync_flags & NAF_CAN_FORWARD_DOWN) &&
- kring->ring_id == kring->na->num_rx_rings;
- }
- /*
- * Send to the NIC rings packets marked NS_FORWARD between
- - * kring->nr_hwcur and kring->rhead
- - * Called under kring->rx_queue.lock on the sw rx ring,
- + * kring->nr_hwcur and kring->rhead.
- + * Called under kring->rx_queue.lock on the sw rx ring.
- + *
- + * It can only be called if the user opened all the TX hw rings,
- + * see NAF_CAN_FORWARD_DOWN flag.
- + * We can touch the TX netmap rings (slots, head and cur) since
- + * we are in poll/ioctl system call context, and the application
- + * is not supposed to touch the ring (using a different thread)
- + * during the execution of the system call.
- */
- static u_int
- netmap_sw_to_nic(struct netmap_adapter *na)
- @@ -1179,7 +1209,7 @@ netmap_sw_to_nic(struct netmap_adapter *na)
- rdst->head = rdst->cur = nm_next(dst_head, dst_lim);
- }
- - /* if (sent) XXX txsync ? */
- + /* if (sent) XXX txsync ? it would be just an optimization */
- }
- return sent;
- }
- @@ -1200,9 +1230,7 @@ netmap_txsync_to_host(struct netmap_kring *kring, int flags)
- struct mbq q;
- /* Take packets from hwcur to head and pass them up.
- - * force head = cur since netmap_grab_packets() stops at head
- - * In case of no buffers we give up. At the end of the loop,
- - * the queue is drained in all cases.
- + * Force hwcur = head since netmap_grab_packets() stops at head
- */
- mbq_init(&q);
- netmap_grab_packets(kring, &q, 1 /* force */);
- @@ -1222,11 +1250,9 @@ netmap_txsync_to_host(struct netmap_kring *kring, int flags)
- * They have been put in kring->rx_queue by netmap_transmit().
- * We protect access to the kring using kring->rx_queue.lock
- *
- - * This routine also does the selrecord if called from the poll handler
- - * (we know because sr != NULL).
- - *
- - * returns the number of packets delivered to tx queues in
- - * transparent mode, or a negative value if error
- + * also moves to the nic hw rings any packet the user has marked
- + * for transparent-mode forwarding, then sets the NR_FORWARD
- + * flag in the kring to let the caller push them out
- */
- static int
- netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
- @@ -1250,7 +1276,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
- uint32_t stop_i;
- nm_i = kring->nr_hwtail;
- - stop_i = nm_prev(nm_i, lim);
- + stop_i = nm_prev(kring->nr_hwcur, lim);
- while ( nm_i != stop_i && (m = mbq_dequeue(q)) != NULL ) {
- int len = MBUF_LEN(m);
- struct netmap_slot *slot = &ring->slot[nm_i];
- @@ -1273,7 +1299,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
- */
- nm_i = kring->nr_hwcur;
- if (nm_i != head) { /* something was released */
- - if (nm_may_forward_down(kring)) {
- + if (nm_may_forward_down(kring, flags)) {
- ret = netmap_sw_to_nic(na);
- if (ret > 0) {
- kring->nr_kflags |= NR_FORWARD;
- @@ -1317,7 +1343,7 @@ netmap_rxsync_from_host(struct netmap_kring *kring, int flags)
- */
- static void netmap_hw_dtor(struct netmap_adapter *); /* needed by NM_IS_NATIVE() */
- int
- -netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
- +netmap_get_hw_na(struct ifnet *ifp, struct netmap_mem_d *nmd, struct netmap_adapter **na)
- {
- /* generic support */
- int i = netmap_admode; /* Take a snapshot. */
- @@ -1348,7 +1374,7 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
- #endif
- ) {
- *na = prev_na;
- - return 0;
- + goto assign_mem;
- }
- }
- @@ -1377,10 +1403,17 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
- return error;
- *na = NA(ifp);
- +
- +assign_mem:
- + if (nmd != NULL && !((*na)->na_flags & NAF_MEM_OWNER) &&
- + (*na)->active_fds == 0 && ((*na)->nm_mem != nmd)) {
- + netmap_mem_put((*na)->nm_mem);
- + (*na)->nm_mem = netmap_mem_get(nmd);
- + }
- +
- return 0;
- }
- -
- /*
- * MUST BE CALLED UNDER NMG_LOCK()
- *
- @@ -1400,16 +1433,28 @@ netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na)
- */
- int
- netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
- - struct ifnet **ifp, int create)
- + struct ifnet **ifp, struct netmap_mem_d *nmd, int create)
- {
- int error = 0;
- struct netmap_adapter *ret = NULL;
- + int nmd_ref = 0;
- *na = NULL; /* default return value */
- *ifp = NULL;
- NMG_LOCK_ASSERT();
- + /* if the request contain a memid, try to find the
- + * corresponding memory region
- + */
- + if (nmd == NULL && nmr->nr_arg2) {
- + nmd = netmap_mem_find(nmr->nr_arg2);
- + if (nmd == NULL)
- + return EINVAL;
- + /* keep the rereference */
- + nmd_ref = 1;
- + }
- +
- /* We cascade through all possible types of netmap adapter.
- * All netmap_get_*_na() functions return an error and an na,
- * with the following combinations:
- @@ -1422,24 +1467,24 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
- */
- /* try to see if this is a ptnetmap port */
- - error = netmap_get_pt_host_na(nmr, na, create);
- + error = netmap_get_pt_host_na(nmr, na, nmd, create);
- if (error || *na != NULL)
- - return error;
- + goto out;
- /* try to see if this is a monitor port */
- - error = netmap_get_monitor_na(nmr, na, create);
- + error = netmap_get_monitor_na(nmr, na, nmd, create);
- if (error || *na != NULL)
- - return error;
- + goto out;
- /* try to see if this is a pipe port */
- - error = netmap_get_pipe_na(nmr, na, create);
- + error = netmap_get_pipe_na(nmr, na, nmd, create);
- if (error || *na != NULL)
- - return error;
- + goto out;
- /* try to see if this is a bridge port */
- - error = netmap_get_bdg_na(nmr, na, create);
- + error = netmap_get_bdg_na(nmr, na, nmd, create);
- if (error)
- - return error;
- + goto out;
- if (*na != NULL) /* valid match in netmap_get_bdg_na() */
- goto out;
- @@ -1452,10 +1497,11 @@ netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
- */
- *ifp = ifunit_ref(nmr->nr_name);
- if (*ifp == NULL) {
- - return ENXIO;
- + error = ENXIO;
- + goto out;
- }
- - error = netmap_get_hw_na(*ifp, &ret);
- + error = netmap_get_hw_na(*ifp, nmd, &ret);
- if (error)
- goto out;
- @@ -1471,6 +1517,8 @@ out:
- *ifp = NULL;
- }
- }
- + if (nmd_ref)
- + netmap_mem_put(nmd);
- return error;
- }
- @@ -1712,7 +1760,8 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
- D("deprecated API, old ringid 0x%x -> ringid %x reg %d", ringid, i, reg);
- }
- - if ((flags & NR_PTNETMAP_HOST) && (reg != NR_REG_ALL_NIC ||
- + if ((flags & NR_PTNETMAP_HOST) && ((reg != NR_REG_ALL_NIC &&
- + reg != NR_REG_PIPE_MASTER && reg != NR_REG_PIPE_SLAVE) ||
- flags & (NR_RX_RINGS_ONLY|NR_TX_RINGS_ONLY))) {
- D("Error: only NR_REG_ALL_NIC supported with netmap passthrough");
- return EINVAL;
- @@ -1766,6 +1815,13 @@ netmap_interp_ringid(struct netmap_priv_d *priv, uint16_t ringid, uint32_t flags
- }
- priv->np_flags = (flags & ~NR_REG_MASK) | reg;
- + /* Allow transparent forwarding mode in the host --> nic
- + * direction only if all the TX hw rings have been opened. */
- + if (priv->np_qfirst[NR_TX] == 0 &&
- + priv->np_qlast[NR_TX] >= na->num_tx_rings) {
- + priv->np_sync_flags |= NAF_CAN_FORWARD_DOWN;
- + }
- +
- if (netmap_verbose) {
- D("%s: tx [%d,%d) rx [%d,%d) id %d",
- na->name,
- @@ -2029,7 +2085,7 @@ netmap_do_regif(struct netmap_priv_d *priv, struct netmap_adapter *na,
- goto err_rel_excl;
- /* in all cases, create a new netmap if */
- - nifp = netmap_mem_if_new(na);
- + nifp = netmap_mem_if_new(na, priv);
- if (nifp == NULL) {
- error = ENOMEM;
- goto err_del_rings;
- @@ -2103,6 +2159,16 @@ nm_sync_finalize(struct netmap_kring *kring)
- kring->rhead, kring->rcur, kring->rtail);
- }
- +/* set ring timestamp */
- +static inline void
- +ring_timestamp_set(struct netmap_ring *ring)
- +{
- + if (netmap_no_timestamp == 0 || ring->flags & NR_TIMESTAMP) {
- + microtime(&ring->ts);
- + }
- +}
- +
- +
- /*
- * ioctl(2) support for the "netmap" device.
- *
- @@ -2118,13 +2184,16 @@ nm_sync_finalize(struct netmap_kring *kring)
- int
- netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread *td)
- {
- + struct mbq q; /* packets from RX hw queues to host stack */
- struct nmreq *nmr = (struct nmreq *) data;
- struct netmap_adapter *na = NULL;
- + struct netmap_mem_d *nmd = NULL;
- struct ifnet *ifp = NULL;
- int error = 0;
- u_int i, qfirst, qlast;
- struct netmap_if *nifp;
- struct netmap_kring *krings;
- + int sync_flags;
- enum txrx t;
- if (cmd == NIOCGINFO || cmd == NIOCREGIF) {
- @@ -2152,19 +2221,24 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
- NMG_LOCK();
- do {
- /* memsize is always valid */
- - struct netmap_mem_d *nmd = &nm_mem;
- u_int memflags;
- if (nmr->nr_name[0] != '\0') {
- /* get a refcount */
- - error = netmap_get_na(nmr, &na, &ifp, 1 /* create */);
- + error = netmap_get_na(nmr, &na, &ifp, NULL, 1 /* create */);
- if (error) {
- na = NULL;
- ifp = NULL;
- break;
- }
- nmd = na->nm_mem; /* get memory allocator */
- + } else {
- + nmd = netmap_mem_find(nmr->nr_arg2 ? nmr->nr_arg2 : 1);
- + if (nmd == NULL) {
- + error = EINVAL;
- + break;
- + }
- }
- error = netmap_mem_get_info(nmd, &nmr->nr_memsize, &memflags,
- @@ -2210,7 +2284,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
- struct ifnet *ifp;
- NMG_LOCK();
- - error = netmap_get_na(nmr, &na, &ifp, 0);
- + error = netmap_get_na(nmr, &na, &ifp, NULL, 0);
- if (na && !error) {
- nmr->nr_arg1 = na->virt_hdr_len;
- }
- @@ -2219,7 +2293,14 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
- break;
- } else if (i == NETMAP_POOLS_INFO_GET) {
- /* get information from the memory allocator */
- - error = netmap_mem_pools_info_get(nmr, priv->np_na);
- + NMG_LOCK();
- + if (priv->np_na && priv->np_na->nm_mem) {
- + struct netmap_mem_d *nmd = priv->np_na->nm_mem;
- + error = netmap_mem_pools_info_get(nmr, nmd);
- + } else {
- + error = EINVAL;
- + }
- + NMG_UNLOCK();
- break;
- } else if (i != 0) {
- D("nr_cmd must be 0 not %d", i);
- @@ -2237,26 +2318,32 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
- error = EBUSY;
- break;
- }
- +
- + if (nmr->nr_arg2) {
- + /* find the allocator and get a reference */
- + nmd = netmap_mem_find(nmr->nr_arg2);
- + if (nmd == NULL) {
- + error = EINVAL;
- + break;
- + }
- + }
- /* find the interface and a reference */
- - error = netmap_get_na(nmr, &na, &ifp,
- + error = netmap_get_na(nmr, &na, &ifp, nmd,
- 1 /* create */); /* keep reference */
- if (error)
- break;
- if (NETMAP_OWNED_BY_KERN(na)) {
- - netmap_unget_na(na, ifp);
- error = EBUSY;
- break;
- }
- if (na->virt_hdr_len && !(nmr->nr_flags & NR_ACCEPT_VNET_HDR)) {
- - netmap_unget_na(na, ifp);
- error = EIO;
- break;
- }
- error = netmap_do_regif(priv, na, nmr->nr_ringid, nmr->nr_flags);
- if (error) { /* reg. failed, release priv and ref */
- - netmap_unget_na(na, ifp);
- break;
- }
- nifp = priv->np_nifp;
- @@ -2271,7 +2358,6 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
- &nmr->nr_arg2);
- if (error) {
- netmap_do_unregif(priv);
- - netmap_unget_na(na, ifp);
- break;
- }
- if (memflags & NETMAP_MEM_PRIVATE) {
- @@ -2295,6 +2381,14 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
- /* store ifp reference so that priv destructor may release it */
- priv->np_ifp = ifp;
- } while (0);
- + if (error) {
- + netmap_unget_na(na, ifp);
- + }
- + /* release the reference from netmap_mem_find() or
- + * netmap_mem_ext_create()
- + */
- + if (nmd)
- + netmap_mem_put(nmd);
- NMG_UNLOCK();
- break;
- @@ -2316,10 +2410,12 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
- break;
- }
- + mbq_init(&q);
- t = (cmd == NIOCTXSYNC ? NR_TX : NR_RX);
- krings = NMR(na, t);
- qfirst = priv->np_qfirst[t];
- qlast = priv->np_qlast[t];
- + sync_flags = priv->np_sync_flags;
- for (i = qfirst; i < qlast; i++) {
- struct netmap_kring *kring = krings + i;
- @@ -2337,7 +2433,7 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
- kring->nr_hwcur);
- if (nm_txsync_prologue(kring, ring) >= kring->nkr_num_slots) {
- netmap_ring_reinit(kring);
- - } else if (kring->nm_sync(kring, NAF_FORCE_RECLAIM) == 0) {
- + } else if (kring->nm_sync(kring, sync_flags | NAF_FORCE_RECLAIM) == 0) {
- nm_sync_finalize(kring);
- }
- if (netmap_verbose & NM_VERB_TXSYNC)
- @@ -2347,14 +2443,23 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread
- } else {
- if (nm_rxsync_prologue(kring, ring) >= kring->nkr_num_slots) {
- netmap_ring_reinit(kring);
- - } else if (kring->nm_sync(kring, NAF_FORCE_READ) == 0) {
- + }
- + if (nm_may_forward_up(kring)) {
- + /* transparent forwarding, see netmap_poll() */
- + netmap_grab_packets(kring, &q, netmap_fwd);
- + }
- + if (kring->nm_sync(kring, sync_flags | NAF_FORCE_READ) == 0) {
- nm_sync_finalize(kring);
- }
- - microtime(&ring->ts);
- + ring_timestamp_set(ring);
- }
- nm_kr_put(kring);
- }
- + if (mbq_peek(&q)) {
- + netmap_send_up(na->ifp, &q);
- + }
- +
- break;
- #ifdef WITH_VALE
- @@ -2425,7 +2530,7 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
- u_int i, check_all_tx, check_all_rx, want[NR_TXRX], revents = 0;
- #define want_tx want[NR_TX]
- #define want_rx want[NR_RX]
- - struct mbq q; /* packets from hw queues to host stack */
- + struct mbq q; /* packets from RX hw queues to host stack */
- enum txrx t;
- /*
- @@ -2435,11 +2540,14 @@ netmap_poll(struct netmap_priv_d *priv, int events, NM_SELRECORD_T *sr)
- */
- int retry_tx = 1, retry_rx = 1;
- - /* transparent mode: send_down is 1 if we have found some
- - * packets to forward during the rx scan and we have not
- - * sent them down to the nic yet
- + /* Transparent mode: send_down is 1 if we have found some
- + * packets to forward (host RX ring --> NIC) during the rx
- + * scan and we have not sent them down to the NIC yet.
- + * Transparent mode requires to bind all rings to a single
- + * file descriptor.
- */
- int send_down = 0;
- + int sync_flags = priv->np_sync_flags;
- mbq_init(&q);
- @@ -2549,7 +2657,7 @@ flush_tx:
- netmap_ring_reinit(kring);
- revents |= POLLERR;
- } else {
- - if (kring->nm_sync(kring, 0))
- + if (kring->nm_sync(kring, sync_flags))
- revents |= POLLERR;
- else
- nm_sync_finalize(kring);
- @@ -2602,25 +2710,23 @@ do_retry_rx:
- /* now we can use kring->rcur, rtail */
- /*
- - * transparent mode support: collect packets
- - * from the rxring(s).
- + * transparent mode support: collect packets from
- + * hw rxring(s) that have been released by the user
- */
- if (nm_may_forward_up(kring)) {
- - ND(10, "forwarding some buffers up %d to %d",
- - kring->nr_hwcur, ring->cur);
- netmap_grab_packets(kring, &q, netmap_fwd);
- }
- + /* Clear the NR_FORWARD flag anyway, it may be set by
- + * the nm_sync() below only on for the host RX ring (see
- + * netmap_rxsync_from_host()). */
- kring->nr_kflags &= ~NR_FORWARD;
- - if (kring->nm_sync(kring, 0))
- + if (kring->nm_sync(kring, sync_flags))
- revents |= POLLERR;
- else
- nm_sync_finalize(kring);
- - send_down |= (kring->nr_kflags & NR_FORWARD); /* host ring only */
- - if (netmap_no_timestamp == 0 ||
- - ring->flags & NR_TIMESTAMP) {
- - microtime(&ring->ts);
- - }
- + send_down |= (kring->nr_kflags & NR_FORWARD);
- + ring_timestamp_set(ring);
- found = kring->rcur != kring->rtail;
- nm_kr_put(kring);
- if (found) {
- @@ -2634,7 +2740,7 @@ do_retry_rx:
- nm_os_selrecord(sr, check_all_rx ?
- &na->si[NR_RX] : &na->rx_rings[priv->np_qfirst[NR_RX]].si);
- }
- - if (send_down > 0 || retry_rx) {
- + if (send_down || retry_rx) {
- retry_rx = 0;
- if (send_down)
- goto flush_tx; /* and retry_rx */
- @@ -2644,17 +2750,13 @@ do_retry_rx:
- }
- /*
- - * Transparent mode: marked bufs on rx rings between
- - * kring->nr_hwcur and ring->head
- - * are passed to the other endpoint.
- - *
- - * Transparent mode requires to bind all
- - * rings to a single file descriptor.
- + * Transparent mode: released bufs (i.e. between kring->nr_hwcur and
- + * ring->head) marked with NS_FORWARD on hw rx rings are passed up
- + * to the host stack.
- */
- - if (q.head && !nm_kr_tryget(&na->tx_rings[na->num_tx_rings], 1, &revents)) {
- + if (mbq_peek(&q)) {
- netmap_send_up(na->ifp, &q);
- - nm_kr_put(&na->tx_rings[na->num_tx_rings]);
- }
- return (revents);
- @@ -2683,22 +2785,6 @@ netmap_notify(struct netmap_kring *kring, int flags)
- return NM_IRQ_COMPLETED;
- }
- -#if 0
- -static int
- -netmap_notify(struct netmap_adapter *na, u_int n_ring,
- -enum txrx tx, int flags)
- -{
- - if (tx == NR_TX) {
- - KeSetEvent(notes->TX_EVENT, 0, FALSE);
- - }
- - else
- - {
- - KeSetEvent(notes->RX_EVENT, 0, FALSE);
- - }
- - return 0;
- -}
- -#endif
- -
- /* called by all routines that create netmap_adapters.
- * provide some defaults and get a reference to the
- * memory allocator
- @@ -2729,10 +2815,10 @@ netmap_attach_common(struct netmap_adapter *na)
- na->nm_notify = netmap_notify;
- na->active_fds = 0;
- - if (na->nm_mem == NULL)
- + if (na->nm_mem == NULL) {
- /* use the global allocator */
- - na->nm_mem = &nm_mem;
- - netmap_mem_get(na->nm_mem);
- + na->nm_mem = netmap_mem_get(&nm_mem);
- + }
- #ifdef WITH_VALE
- if (na->nm_bdg_attach == NULL)
- /* no special nm_bdg_attach callback. On VALE
- @@ -2757,7 +2843,7 @@ netmap_detach_common(struct netmap_adapter *na)
- if (na->nm_mem)
- netmap_mem_put(na->nm_mem);
- bzero(na, sizeof(*na));
- - free(na, M_DEVBUF);
- + nm_os_free(na);
- }
- /* Wrapper for the register callback provided netmap-enabled
- @@ -2804,26 +2890,28 @@ netmap_hw_dtor(struct netmap_adapter *na)
- /*
- - * Allocate a ``netmap_adapter`` object, and initialize it from the
- + * Allocate a netmap_adapter object, and initialize it from the
- * 'arg' passed by the driver on attach.
- - * We allocate a block of memory with room for a struct netmap_adapter
- - * plus two sets of N+2 struct netmap_kring (where N is the number
- - * of hardware rings):
- - * krings 0..N-1 are for the hardware queues.
- - * kring N is for the host stack queue
- - * kring N+1 is only used for the selinfo for all queues. // XXX still true ?
- + * We allocate a block of memory of 'size' bytes, which has room
- + * for struct netmap_adapter plus additional room private to
- + * the caller.
- * Return 0 on success, ENOMEM otherwise.
- */
- -static int
- -_netmap_attach(struct netmap_adapter *arg, size_t size)
- +int
- +netmap_attach_ext(struct netmap_adapter *arg, size_t size)
- {
- struct netmap_hw_adapter *hwna = NULL;
- struct ifnet *ifp = NULL;
- + if (size < sizeof(struct netmap_hw_adapter)) {
- + D("Invalid netmap adapter size %d", (int)size);
- + return EINVAL;
- + }
- +
- if (arg == NULL || arg->ifp == NULL)
- goto fail;
- ifp = arg->ifp;
- - hwna = malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
- + hwna = nm_os_malloc(size);
- if (hwna == NULL)
- goto fail;
- hwna->up = *arg;
- @@ -2832,7 +2920,7 @@ _netmap_attach(struct netmap_adapter *arg, size_t size)
- hwna->nm_hw_register = hwna->up.nm_register;
- hwna->up.nm_register = netmap_hw_reg;
- if (netmap_attach_common(&hwna->up)) {
- - free(hwna, M_DEVBUF);
- + nm_os_free(hwna);
- goto fail;
- }
- netmap_adapter_get(&hwna->up);
- @@ -2878,46 +2966,8 @@ fail:
- int
- netmap_attach(struct netmap_adapter *arg)
- {
- - return _netmap_attach(arg, sizeof(struct netmap_hw_adapter));
- -}
- -
- -
- -#ifdef WITH_PTNETMAP_GUEST
- -int
- -netmap_pt_guest_attach(struct netmap_adapter *arg, void *csb,
- - unsigned int nifp_offset, unsigned int memid)
- -{
- - struct netmap_pt_guest_adapter *ptna;
- - struct ifnet *ifp = arg ? arg->ifp : NULL;
- - int error;
- -
- - /* get allocator */
- - arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
- - if (arg->nm_mem == NULL)
- - return ENOMEM;
- - arg->na_flags |= NAF_MEM_OWNER;
- - error = _netmap_attach(arg, sizeof(struct netmap_pt_guest_adapter));
- - if (error)
- - return error;
- -
- - /* get the netmap_pt_guest_adapter */
- - ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
- - ptna->csb = csb;
- -
- - /* Initialize a separate pass-through netmap adapter that is going to
- - * be used by the ptnet driver only, and so never exposed to netmap
- - * applications. We only need a subset of the available fields. */
- - memset(&ptna->dr, 0, sizeof(ptna->dr));
- - ptna->dr.up.ifp = ifp;
- - ptna->dr.up.nm_mem = ptna->hwup.up.nm_mem;
- - netmap_mem_get(ptna->dr.up.nm_mem);
- - ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
- -
- - ptna->backend_regifs = 0;
- -
- - return 0;
- + return netmap_attach_ext(arg, sizeof(struct netmap_hw_adapter));
- }
- -#endif /* WITH_PTNETMAP_GUEST */
- void
- @@ -3019,7 +3069,7 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
- u_int error = ENOBUFS;
- unsigned int txr;
- struct mbq *q;
- - int space;
- + int busy;
- kring = &na->rx_rings[na->num_rx_rings];
- // XXX [Linux] we do not need this lock
- @@ -3052,28 +3102,27 @@ netmap_transmit(struct ifnet *ifp, struct mbuf *m)
- }
- if (nm_os_mbuf_has_offld(m)) {
- - RD(1, "%s drop mbuf requiring offloadings", na->name);
- + RD(1, "%s drop mbuf that needs offloadings", na->name);
- goto done;
- }
- - /* protect against rxsync_from_host(), netmap_sw_to_nic()
- + /* protect against netmap_rxsync_from_host(), netmap_sw_to_nic()
- * and maybe other instances of netmap_transmit (the latter
- * not possible on Linux).
- - * Also avoid overflowing the queue.
- + * We enqueue the mbuf only if we are sure there is going to be
- + * enough room in the host RX ring, otherwise we drop it.
- */
- mbq_lock(q);
- - space = kring->nr_hwtail - kring->nr_hwcur;
- - if (space < 0)
- - space += kring->nkr_num_slots;
- - if (space + mbq_len(q) >= kring->nkr_num_slots - 1) { // XXX
- - RD(10, "%s full hwcur %d hwtail %d qlen %d len %d m %p",
- - na->name, kring->nr_hwcur, kring->nr_hwtail, mbq_len(q),
- - len, m);
- + busy = kring->nr_hwtail - kring->nr_hwcur;
- + if (busy < 0)
- + busy += kring->nkr_num_slots;
- + if (busy + mbq_len(q) >= kring->nkr_num_slots - 1) {
- + RD(2, "%s full hwcur %d hwtail %d qlen %d", na->name,
- + kring->nr_hwcur, kring->nr_hwtail, mbq_len(q));
- } else {
- mbq_enqueue(q, m);
- - ND(10, "%s %d bufs in queue len %d m %p",
- - na->name, mbq_len(q), len, m);
- + ND(2, "%s %d bufs in queue", na->name, mbq_len(q));
- /* notify outside the lock */
- m = NULL;
- error = 0;
- @@ -3293,7 +3342,7 @@ netmap_fini(void)
- netmap_uninit_bridges();
- netmap_mem_fini();
- NMG_LOCK_DESTROY();
- - printf("netmap: unloaded module.\n");
- + nm_prinf("netmap: unloaded module.\n");
- }
- @@ -3330,7 +3379,7 @@ netmap_init(void)
- if (error)
- goto fail;
- - printf("netmap: loaded module\n");
- + nm_prinf("netmap: loaded module\n");
- return (0);
- fail:
- netmap_fini();
- diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c
- index fbbd9b35734..6d0453d3b82 100644
- --- a/sys/dev/netmap/netmap_freebsd.c
- +++ b/sys/dev/netmap/netmap_freebsd.c
- @@ -23,7 +23,7 @@
- * SUCH DAMAGE.
- */
- -/* $FreeBSD$ */
- +/* $FreeBSD: head/sys/dev/netmap/netmap_freebsd.c 307706 2016-10-21 06:32:45Z sephe $ */
- #include "opt_inet.h"
- #include "opt_inet6.h"
- @@ -89,6 +89,24 @@ nm_os_selinfo_uninit(NM_SELINFO_T *si)
- mtx_destroy(&si->m);
- }
- +void *
- +nm_os_malloc(size_t size)
- +{
- + return malloc(size, M_DEVBUF, M_NOWAIT | M_ZERO);
- +}
- +
- +void *
- +nm_os_realloc(void *addr, size_t new_size, size_t old_size __unused)
- +{
- + return realloc(addr, new_size, M_DEVBUF, M_NOWAIT | M_ZERO);
- +}
- +
- +void
- +nm_os_free(void *addr)
- +{
- + free(addr, M_DEVBUF);
- +}
- +
- void
- nm_os_ifnet_lock(void)
- {
- @@ -235,7 +253,6 @@ nm_os_csum_tcpudp_ipv6(struct nm_ipv6hdr *ip6h, void *data,
- void *
- nm_os_send_up(struct ifnet *ifp, struct mbuf *m, struct mbuf *prev)
- {
- -
- NA(ifp)->if_input(ifp, m);
- return NULL;
- }
- @@ -251,11 +268,17 @@ nm_os_mbuf_has_offld(struct mbuf *m)
- static void
- freebsd_generic_rx_handler(struct ifnet *ifp, struct mbuf *m)
- {
- - struct netmap_generic_adapter *gna =
- - (struct netmap_generic_adapter *)NA(ifp);
- - int stolen = generic_rx_handler(ifp, m);
- + int stolen;
- + if (!NM_NA_VALID(ifp)) {
- + RD(1, "Warning: got RX packet for invalid emulated adapter");
- + return;
- + }
- +
- + stolen = generic_rx_handler(ifp, m);
- if (!stolen) {
- + struct netmap_generic_adapter *gna =
- + (struct netmap_generic_adapter *)NA(ifp);
- gna->save_if_input(ifp, m);
- }
- }
- @@ -386,7 +409,6 @@ netmap_getna(if_t ifp)
- int
- nm_os_generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *rx)
- {
- - D("called, in tx %d rx %d", *tx, *rx);
- return 0;
- }
- @@ -394,9 +416,10 @@ nm_os_generic_find_num_desc(struct ifnet *ifp, unsigned int *tx, unsigned int *r
- void
- nm_os_generic_find_num_queues(struct ifnet *ifp, u_int *txq, u_int *rxq)
- {
- - D("called, in txq %d rxq %d", *txq, *rxq);
- - *txq = netmap_generic_rings;
- - *rxq = netmap_generic_rings;
- + unsigned num_rings = netmap_generic_rings ? netmap_generic_rings : 1;
- +
- + *txq = num_rings;
- + *rxq = num_rings;
- }
- void
- @@ -648,7 +671,7 @@ nm_os_pt_memdev_iomap(struct ptnetmap_memdev *ptn_dev, vm_paddr_t *nm_paddr,
- &rid, 0, ~0, *mem_size, RF_ACTIVE);
- if (ptn_dev->pci_mem == NULL) {
- *nm_paddr = 0;
- - *nm_addr = NULL;
- + *nm_addr = 0;
- return ENOMEM;
- }
- @@ -985,32 +1008,32 @@ nm_os_ncpus(void)
- return mp_maxid + 1;
- }
- -struct nm_kthread_ctx {
- +struct nm_kctx_ctx {
- struct thread *user_td; /* thread user-space (kthread creator) to send ioctl */
- struct ptnetmap_cfgentry_bhyve cfg;
- /* worker function and parameter */
- - nm_kthread_worker_fn_t worker_fn;
- + nm_kctx_worker_fn_t worker_fn;
- void *worker_private;
- - struct nm_kthread *nmk;
- + struct nm_kctx *nmk;
- /* integer to manage multiple worker contexts (e.g., RX or TX on ptnetmap) */
- long type;
- };
- -struct nm_kthread {
- +struct nm_kctx {
- struct thread *worker;
- struct mtx worker_lock;
- uint64_t scheduled; /* pending wake_up request */
- - struct nm_kthread_ctx worker_ctx;
- + struct nm_kctx_ctx worker_ctx;
- int run; /* used to stop kthread */
- int attach_user; /* kthread attached to user_process */
- int affinity;
- };
- void inline
- -nm_os_kthread_wakeup_worker(struct nm_kthread *nmk)
- +nm_os_kctx_worker_wakeup(struct nm_kctx *nmk)
- {
- /*
- * There may be a race between FE and BE,
- @@ -1030,9 +1053,9 @@ nm_os_kthread_wakeup_worker(struct nm_kthread *nmk)
- }
- void inline
- -nm_os_kthread_send_irq(struct nm_kthread *nmk)
- +nm_os_kctx_send_irq(struct nm_kctx *nmk)
- {
- - struct nm_kthread_ctx *ctx = &nmk->worker_ctx;
- + struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
- int err;
- if (ctx->user_td && ctx->cfg.ioctl_fd > 0) {
- @@ -1047,10 +1070,10 @@ nm_os_kthread_send_irq(struct nm_kthread *nmk)
- }
- static void
- -nm_kthread_worker(void *data)
- +nm_kctx_worker(void *data)
- {
- - struct nm_kthread *nmk = data;
- - struct nm_kthread_ctx *ctx = &nmk->worker_ctx;
- + struct nm_kctx *nmk = data;
- + struct nm_kctx_ctx *ctx = &nmk->worker_ctx;
- uint64_t old_scheduled = nmk->scheduled;
- if (nmk->affinity >= 0) {
- @@ -1077,7 +1100,7 @@ nm_kthread_worker(void *data)
- * mechanism and we continually execute worker_fn()
- */
- if (!ctx->cfg.wchan) {
- - ctx->worker_fn(ctx->worker_private); /* worker body */
- + ctx->worker_fn(ctx->worker_private, 1); /* worker body */
- } else {
- /* checks if there is a pending notification */
- mtx_lock(&nmk->worker_lock);
- @@ -1085,13 +1108,13 @@ nm_kthread_worker(void *data)
- old_scheduled = nmk->scheduled;
- mtx_unlock(&nmk->worker_lock);
- - ctx->worker_fn(ctx->worker_private); /* worker body */
- + ctx->worker_fn(ctx->worker_private, 1); /* worker body */
- continue;
- } else if (nmk->run) {
- /* wait on event with one second timeout */
- - msleep((void *)(uintptr_t)ctx->cfg.wchan,
- - &nmk->worker_lock, 0, "nmk_ev", hz);
- + msleep((void *)(uintptr_t)ctx->cfg.wchan, &nmk->worker_lock,
- + 0, "nmk_ev", hz);
- nmk->scheduled++;
- }
- mtx_unlock(&nmk->worker_lock);
- @@ -1102,16 +1125,16 @@ nm_kthread_worker(void *data)
- }
- void
- -nm_os_kthread_set_affinity(struct nm_kthread *nmk, int affinity)
- +nm_os_kctx_worker_setaff(struct nm_kctx *nmk, int affinity)
- {
- nmk->affinity = affinity;
- }
- -struct nm_kthread *
- -nm_os_kthread_create(struct nm_kthread_cfg *cfg, unsigned int cfgtype,
- +struct nm_kctx *
- +nm_os_kctx_create(struct nm_kctx_cfg *cfg, unsigned int cfgtype,
- void *opaque)
- {
- - struct nm_kthread *nmk = NULL;
- + struct nm_kctx *nmk = NULL;
- if (cfgtype != PTNETMAP_CFGTYPE_BHYVE) {
- D("Unsupported cfgtype %u", cfgtype);
- @@ -1140,7 +1163,7 @@ nm_os_kthread_create(struct nm_kthread_cfg *cfg, unsigned int cfgtype,
- }
- int
- -nm_os_kthread_start(struct nm_kthread *nmk)
- +nm_os_kctx_worker_start(struct nm_kctx *nmk)
- {
- struct proc *p = NULL;
- int error = 0;
- @@ -1158,7 +1181,7 @@ nm_os_kthread_start(struct nm_kthread *nmk)
- /* enable kthread main loop */
- nmk->run = 1;
- /* create kthread */
- - if((error = kthread_add(nm_kthread_worker, nmk, p,
- + if((error = kthread_add(nm_kctx_worker, nmk, p,
- &nmk->worker, RFNOWAIT /* to be checked */, 0, "nm-kthread-%ld",
- nmk->worker_ctx.type))) {
- goto err;
- @@ -1174,7 +1197,7 @@ err:
- }
- void
- -nm_os_kthread_stop(struct nm_kthread *nmk)
- +nm_os_kctx_worker_stop(struct nm_kctx *nmk)
- {
- if (!nmk->worker) {
- return;
- @@ -1184,18 +1207,18 @@ nm_os_kthread_stop(struct nm_kthread *nmk)
- /* wake up kthread if it sleeps */
- kthread_resume(nmk->worker);
- - nm_os_kthread_wakeup_worker(nmk);
- + nm_os_kctx_worker_wakeup(nmk);
- nmk->worker = NULL;
- }
- void
- -nm_os_kthread_delete(struct nm_kthread *nmk)
- +nm_os_kctx_destroy(struct nm_kctx *nmk)
- {
- if (!nmk)
- return;
- if (nmk->worker) {
- - nm_os_kthread_stop(nmk);
- + nm_os_kctx_worker_stop(nmk);
- }
- memset(&nmk->worker_ctx.cfg, 0, sizeof(nmk->worker_ctx.cfg));
- diff --git a/sys/dev/netmap/netmap_generic.c b/sys/dev/netmap/netmap_generic.c
- index 45dc9918498..f148b228115 100644
- --- a/sys/dev/netmap/netmap_generic.c
- +++ b/sys/dev/netmap/netmap_generic.c
- @@ -65,7 +65,7 @@
- #ifdef __FreeBSD__
- #include <sys/cdefs.h> /* prerequisite */
- -__FBSDID("$FreeBSD$");
- +__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap_generic.c 274353 2014-11-10 20:19:58Z luigi $");
- #include <sys/types.h>
- #include <sys/errno.h>
- @@ -109,13 +109,10 @@ __FBSDID("$FreeBSD$");
- * chain into uma_zfree(zone_pack, mf)
- * (or reinstall the buffer ?)
- */
- -static inline void
- -set_mbuf_destructor(struct mbuf *m, void *fn)
- -{
- -
- - m->m_ext.ext_free = fn;
- - m->m_ext.ext_type = EXT_EXTREF;
- -}
- +#define SET_MBUF_DESTRUCTOR(m, fn) do { \
- + (m)->m_ext.ext_free = (void *)fn; \
- + (m)->m_ext.ext_type = EXT_EXTREF; \
- +} while (0)
- static int
- void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2)
- @@ -170,12 +167,9 @@ nm_os_get_mbuf(struct ifnet *ifp, int len)
- static void void_mbuf_dtor(struct mbuf *m, void *arg1, void *arg2) { }
- -static inline void
- -set_mbuf_destructor(struct mbuf *m, void *fn)
- -{
- -
- - m->m_ext.ext_free = (fn != NULL) ? fn : (void *)void_mbuf_dtor;
- -}
- +#define SET_MBUF_DESTRUCTOR(m, fn) do { \
- + (m)->m_ext.ext_free = fn ? (void *)fn : (void *)void_mbuf_dtor; \
- +} while (0)
- static inline struct mbuf *
- nm_os_get_mbuf(struct ifnet *ifp, int len)
- @@ -311,7 +305,7 @@ void generic_rate(int txp, int txs, int txi, int rxp, int rxs, int rxi)
- #endif /* !RATE */
- -/* =============== GENERIC NETMAP ADAPTER SUPPORT ================= */
- +/* ========== GENERIC (EMULATED) NETMAP ADAPTER SUPPORT ============= */
- /*
- * Wrapper used by the generic adapter layer to notify
- @@ -341,7 +335,6 @@ generic_netmap_unregister(struct netmap_adapter *na)
- int i, r;
- if (na->active_fds == 0) {
- - D("Generic adapter %p goes off", na);
- rtnl_lock();
- na->na_flags &= ~NAF_NETMAP_ON;
- @@ -357,14 +350,14 @@ generic_netmap_unregister(struct netmap_adapter *na)
- for_each_rx_kring_h(r, kring, na) {
- if (nm_kring_pending_off(kring)) {
- - D("RX ring %d of generic adapter %p goes off", r, na);
- + D("Emulated adapter: ring '%s' deactivated", kring->name);
- kring->nr_mode = NKR_NETMAP_OFF;
- }
- }
- for_each_tx_kring_h(r, kring, na) {
- if (nm_kring_pending_off(kring)) {
- kring->nr_mode = NKR_NETMAP_OFF;
- - D("TX ring %d of generic adapter %p goes off", r, na);
- + D("Emulated adapter: ring '%s' deactivated", kring->name);
- }
- }
- @@ -387,14 +380,14 @@ generic_netmap_unregister(struct netmap_adapter *na)
- * TX event is consumed. */
- mtx_lock_spin(&kring->tx_event_lock);
- if (kring->tx_event) {
- - set_mbuf_destructor(kring->tx_event, NULL);
- + SET_MBUF_DESTRUCTOR(kring->tx_event, NULL);
- }
- kring->tx_event = NULL;
- mtx_unlock_spin(&kring->tx_event_lock);
- }
- if (na->active_fds == 0) {
- - free(gna->mit, M_DEVBUF);
- + nm_os_free(gna->mit);
- for_each_rx_kring(r, kring, na) {
- mbq_safe_fini(&kring->rx_queue);
- @@ -411,7 +404,7 @@ generic_netmap_unregister(struct netmap_adapter *na)
- m_freem(kring->tx_pool[i]);
- }
- }
- - free(kring->tx_pool, M_DEVBUF);
- + nm_os_free(kring->tx_pool);
- kring->tx_pool = NULL;
- }
- @@ -421,6 +414,7 @@ generic_netmap_unregister(struct netmap_adapter *na)
- del_timer(&rate_ctx.timer);
- }
- #endif
- + D("Emulated adapter for %s deactivated", na->name);
- }
- return 0;
- @@ -445,13 +439,12 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
- }
- if (na->active_fds == 0) {
- - D("Generic adapter %p goes on", na);
- + D("Emulated adapter for %s activated", na->name);
- /* Do all memory allocations when (na->active_fds == 0), to
- * simplify error management. */
- /* Allocate memory for mitigation support on all the rx queues. */
- - gna->mit = malloc(na->num_rx_rings * sizeof(struct nm_generic_mit),
- - M_DEVBUF, M_NOWAIT | M_ZERO);
- + gna->mit = nm_os_malloc(na->num_rx_rings * sizeof(struct nm_generic_mit));
- if (!gna->mit) {
- D("mitigation allocation failed");
- error = ENOMEM;
- @@ -478,8 +471,7 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
- }
- for_each_tx_kring(r, kring, na) {
- kring->tx_pool =
- - malloc(na->num_tx_desc * sizeof(struct mbuf *),
- - M_DEVBUF, M_NOWAIT | M_ZERO);
- + nm_os_malloc(na->num_tx_desc * sizeof(struct mbuf *));
- if (!kring->tx_pool) {
- D("tx_pool allocation failed");
- error = ENOMEM;
- @@ -492,14 +484,14 @@ generic_netmap_register(struct netmap_adapter *na, int enable)
- for_each_rx_kring_h(r, kring, na) {
- if (nm_kring_pending_on(kring)) {
- - D("RX ring %d of generic adapter %p goes on", r, na);
- + D("Emulated adapter: ring '%s' activated", kring->name);
- kring->nr_mode = NKR_NETMAP_ON;
- }
- }
- for_each_tx_kring_h(r, kring, na) {
- if (nm_kring_pending_on(kring)) {
- - D("TX ring %d of generic adapter %p goes on", r, na);
- + D("Emulated adapter: ring '%s' activated", kring->name);
- kring->nr_mode = NKR_NETMAP_ON;
- }
- }
- @@ -560,13 +552,13 @@ free_tx_pools:
- if (kring->tx_pool == NULL) {
- continue;
- }
- - free(kring->tx_pool, M_DEVBUF);
- + nm_os_free(kring->tx_pool);
- kring->tx_pool = NULL;
- }
- for_each_rx_kring(r, kring, na) {
- mbq_safe_fini(&kring->rx_queue);
- }
- - free(gna->mit, M_DEVBUF);
- + nm_os_free(gna->mit);
- out:
- return error;
- @@ -768,7 +760,7 @@ generic_set_tx_event(struct netmap_kring *kring, u_int hwcur)
- return;
- }
- - set_mbuf_destructor(m, generic_mbuf_destructor);
- + SET_MBUF_DESTRUCTOR(m, generic_mbuf_destructor);
- kring->tx_event = m;
- mtx_unlock_spin(&kring->tx_event_lock);
- @@ -1161,7 +1153,6 @@ generic_netmap_dtor(struct netmap_adapter *na)
- struct netmap_adapter *prev_na = gna->prev;
- if (prev_na != NULL) {
- - D("Released generic NA %p", gna);
- netmap_adapter_put(prev_na);
- if (nm_iszombie(na)) {
- /*
- @@ -1170,6 +1161,7 @@ generic_netmap_dtor(struct netmap_adapter *na)
- */
- netmap_adapter_put(prev_na);
- }
- + D("Native netmap adapter %p restored", prev_na);
- }
- NM_ATTACH_NA(ifp, prev_na);
- /*
- @@ -1177,7 +1169,13 @@ generic_netmap_dtor(struct netmap_adapter *na)
- * overrides WNA(ifp) if na->ifp is not NULL.
- */
- na->ifp = NULL;
- - D("Restored native NA %p", prev_na);
- + D("Emulated netmap adapter for %s destroyed", na->name);
- +}
- +
- +int
- +na_is_generic(struct netmap_adapter *na)
- +{
- + return na->nm_register == generic_netmap_register;
- }
- /*
- @@ -1208,7 +1206,7 @@ generic_netmap_attach(struct ifnet *ifp)
- return EINVAL;
- }
- - gna = malloc(sizeof(*gna), M_DEVBUF, M_NOWAIT | M_ZERO);
- + gna = nm_os_malloc(sizeof(*gna));
- if (gna == NULL) {
- D("no memory on attach, give up");
- return ENOMEM;
- @@ -1237,7 +1235,7 @@ generic_netmap_attach(struct ifnet *ifp)
- retval = netmap_attach_common(na);
- if (retval) {
- - free(gna, M_DEVBUF);
- + nm_os_free(gna);
- return retval;
- }
- @@ -1249,7 +1247,7 @@ generic_netmap_attach(struct ifnet *ifp)
- nm_os_generic_set_features(gna);
- - D("Created generic NA %p (prev %p)", gna, gna->prev);
- + D("Emulated adapter for %s created (prev was %p)", na->name, gna->prev);
- return retval;
- }
- diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h
- index f904476721b..3972f82d6fe 100644
- --- a/sys/dev/netmap/netmap_kern.h
- +++ b/sys/dev/netmap/netmap_kern.h
- @@ -26,7 +26,7 @@
- */
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/dev/netmap/netmap_kern.h 238985 2012-08-02 11:59:43Z luigi $
- *
- * The header contains the definitions of constants and function
- * prototypes used only in kernelspace.
- @@ -55,6 +55,9 @@
- #if defined(CONFIG_NETMAP_PTNETMAP_HOST)
- #define WITH_PTNETMAP_HOST
- #endif
- +#if defined(CONFIG_NETMAP_SINK)
- +#define WITH_SINK
- +#endif
- #elif defined (_WIN32)
- #define WITH_VALE // comment out to disable VALE support
- @@ -240,12 +243,23 @@ typedef struct hrtimer{
- #define NMG_UNLOCK() NM_MTX_UNLOCK(netmap_global_lock)
- #define NMG_LOCK_ASSERT() NM_MTX_ASSERT(netmap_global_lock)
- +#if defined(__FreeBSD__)
- +#define nm_prerr printf
- +#define nm_prinf printf
- +#elif defined (_WIN32)
- +#define nm_prerr DbgPrint
- +#define nm_prinf DbgPrint
- +#elif defined(linux)
- +#define nm_prerr(fmt, arg...) printk(KERN_ERR fmt, ##arg)
- +#define nm_prinf(fmt, arg...) printk(KERN_INFO fmt, ##arg)
- +#endif
- +
- #define ND(format, ...)
- #define D(format, ...) \
- do { \
- struct timeval __xxts; \
- microtime(&__xxts); \
- - printf("%03d.%06d [%4d] %-25s " format "\n", \
- + nm_prerr("%03d.%06d [%4d] %-25s " format "\n", \
- (int)__xxts.tv_sec % 1000, (int)__xxts.tv_usec, \
- __LINE__, __FUNCTION__, ##__VA_ARGS__); \
- } while (0)
- @@ -287,6 +301,11 @@ void nm_os_put_module(void);
- void netmap_make_zombie(struct ifnet *);
- void netmap_undo_zombie(struct ifnet *);
- +/* os independent alloc/realloc/free */
- +void *nm_os_malloc(size_t);
- +void *nm_os_realloc(void *, size_t new_size, size_t old_size);
- +void nm_os_free(void *);
- +
- /* passes a packet up to the host stack.
- * If the packet is sent (or dropped) immediately it returns NULL,
- * otherwise it links the packet to prev and returns m.
- @@ -317,6 +336,12 @@ nm_txrx_swap(enum txrx t)
- #define for_rx_tx(t) for ((t) = 0; (t) < NR_TXRX; (t)++)
- +#ifdef WITH_MONITOR
- +struct netmap_zmon_list {
- + struct netmap_kring *next;
- + struct netmap_kring *prev;
- +};
- +#endif /* WITH_MONITOR */
- /*
- * private, kernel view of a ring. Keeps track of the status of
- @@ -491,6 +516,12 @@ struct netmap_kring {
- struct netmap_kring **monitors;
- uint32_t max_monitors; /* current size of the monitors array */
- uint32_t n_monitors; /* next unused entry in the monitor array */
- + uint32_t mon_pos[NR_TXRX]; /* index of this ring in the monitored ring array */
- + uint32_t mon_tail; /* last seen slot on rx */
- +
- + /* circular list of zero-copy monitors */
- + struct netmap_zmon_list zmon_list[NR_TXRX];
- +
- /*
- * Monitors work by intercepting the sync and notify callbacks of the
- * monitored krings. This is implemented by replacing the pointers
- @@ -499,8 +530,6 @@ struct netmap_kring {
- int (*mon_sync)(struct netmap_kring *kring, int flags);
- int (*mon_notify)(struct netmap_kring *kring, int flags);
- - uint32_t mon_tail; /* last seen slot on rx */
- - uint32_t mon_pos; /* index of this ring in the monitored ring array */
- #endif
- }
- #ifdef _WIN32
- @@ -731,8 +760,9 @@ struct netmap_adapter {
- int (*nm_txsync)(struct netmap_kring *kring, int flags);
- int (*nm_rxsync)(struct netmap_kring *kring, int flags);
- int (*nm_notify)(struct netmap_kring *kring, int flags);
- -#define NAF_FORCE_READ 1
- -#define NAF_FORCE_RECLAIM 2
- +#define NAF_FORCE_READ 1
- +#define NAF_FORCE_RECLAIM 2
- +#define NAF_CAN_FORWARD_DOWN 4
- /* return configuration information */
- int (*nm_config)(struct netmap_adapter *,
- u_int *txr, u_int *txd, u_int *rxr, u_int *rxd);
- @@ -854,6 +884,7 @@ struct netmap_vp_adapter { /* VALE software port */
- int bdg_port;
- struct nm_bridge *na_bdg;
- int retry;
- + int autodelete; /* remove the ifp on last reference */
- /* Maximum Frame Size, used in bdg_mismatch_datapath() */
- u_int mfs;
- @@ -977,7 +1008,10 @@ struct netmap_bwrap_adapter {
- struct nm_bdg_polling_state *na_polling_state;
- };
- int netmap_bwrap_attach(const char *name, struct netmap_adapter *);
- +int netmap_vi_create(struct nmreq *, int);
- +#else /* !WITH_VALE */
- +#define netmap_vi_create(nmr, a) (EOPNOTSUPP)
- #endif /* WITH_VALE */
- #ifdef WITH_PIPES
- @@ -993,6 +1027,7 @@ struct netmap_pipe_adapter {
- struct netmap_adapter *parent; /* adapter that owns the memory */
- struct netmap_pipe_adapter *peer; /* the other end of the pipe */
- int peer_ref; /* 1 iff we are holding a ref to the peer */
- + struct ifnet *parent_ifp; /* maybe null */
- u_int parent_slot; /* index in the parent pipe array */
- };
- @@ -1149,6 +1184,7 @@ static __inline void nm_kr_start(struct netmap_kring *kr)
- * virtual ports (vale, pipes, monitor)
- */
- int netmap_attach(struct netmap_adapter *);
- +int netmap_attach_ext(struct netmap_adapter *, size_t size);
- void netmap_detach(struct ifnet *);
- int netmap_transmit(struct ifnet *, struct mbuf *);
- struct netmap_slot *netmap_reset(struct netmap_adapter *na,
- @@ -1380,9 +1416,10 @@ void netmap_do_unregif(struct netmap_priv_d *priv);
- u_int nm_bound_var(u_int *v, u_int dflt, u_int lo, u_int hi, const char *msg);
- int netmap_get_na(struct nmreq *nmr, struct netmap_adapter **na,
- - struct ifnet **ifp, int create);
- + struct ifnet **ifp, struct netmap_mem_d *nmd, int create);
- void netmap_unget_na(struct netmap_adapter *na, struct ifnet *ifp);
- -int netmap_get_hw_na(struct ifnet *ifp, struct netmap_adapter **na);
- +int netmap_get_hw_na(struct ifnet *ifp,
- + struct netmap_mem_d *nmd, struct netmap_adapter **na);
- #ifdef WITH_VALE
- @@ -1414,7 +1451,8 @@ u_int netmap_bdg_learning(struct nm_bdg_fwd *ft, uint8_t *dst_ring,
- #define NM_BDG_NOPORT (NM_BDG_MAXPORTS+1)
- /* these are redefined in case of no VALE support */
- -int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
- +int netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
- + struct netmap_mem_d *nmd, int create);
- struct nm_bridge *netmap_init_bridges2(u_int);
- void netmap_uninit_bridges2(struct nm_bridge *, u_int);
- int netmap_init_bridges(void);
- @@ -1423,7 +1461,7 @@ int netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops);
- int netmap_bdg_config(struct nmreq *nmr);
- #else /* !WITH_VALE */
- -#define netmap_get_bdg_na(_1, _2, _3) 0
- +#define netmap_get_bdg_na(_1, _2, _3, _4) 0
- #define netmap_init_bridges(_1) 0
- #define netmap_uninit_bridges()
- #define netmap_bdg_ctl(_1, _2) EINVAL
- @@ -1433,22 +1471,24 @@ int netmap_bdg_config(struct nmreq *nmr);
- /* max number of pipes per device */
- #define NM_MAXPIPES 64 /* XXX how many? */
- void netmap_pipe_dealloc(struct netmap_adapter *);
- -int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
- +int netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
- + struct netmap_mem_d *nmd, int create);
- #else /* !WITH_PIPES */
- #define NM_MAXPIPES 0
- #define netmap_pipe_alloc(_1, _2) 0
- #define netmap_pipe_dealloc(_1)
- -#define netmap_get_pipe_na(nmr, _2, _3) \
- +#define netmap_get_pipe_na(nmr, _2, _3, _4) \
- ({ int role__ = (nmr)->nr_flags & NR_REG_MASK; \
- (role__ == NR_REG_PIPE_MASTER || \
- role__ == NR_REG_PIPE_SLAVE) ? EOPNOTSUPP : 0; })
- #endif
- #ifdef WITH_MONITOR
- -int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
- +int netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
- + struct netmap_mem_d *nmd, int create);
- void netmap_monitor_stop(struct netmap_adapter *na);
- #else
- -#define netmap_get_monitor_na(nmr, _2, _3) \
- +#define netmap_get_monitor_na(nmr, _2, _3, _4) \
- ((nmr)->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX) ? EOPNOTSUPP : 0)
- #endif
- @@ -1532,6 +1572,7 @@ extern int netmap_generic_mit;
- extern int netmap_generic_ringsize;
- extern int netmap_generic_rings;
- extern int netmap_generic_txqdisc;
- +extern int ptnetmap_tx_workers;
- /*
- * NA returns a pointer to the struct netmap adapter from the ifp,
- @@ -1781,6 +1822,7 @@ struct netmap_priv_d {
- u_int np_qfirst[NR_TXRX],
- np_qlast[NR_TXRX]; /* range of tx/rx rings to scan */
- uint16_t np_txpoll; /* XXX and also np_rxpoll ? */
- + int np_sync_flags; /* to be passed to nm_sync */
- int np_refs; /* use with NMG_LOCK held */
- @@ -1812,6 +1854,11 @@ static inline int nm_kring_pending(struct netmap_priv_d *np)
- return 0;
- }
- +#ifdef WITH_PIPES
- +int netmap_pipe_txsync(struct netmap_kring *txkring, int flags);
- +int netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags);
- +#endif /* WITH_PIPES */
- +
- #ifdef WITH_MONITOR
- struct netmap_monitor_adapter {
- @@ -1835,6 +1882,8 @@ int generic_rx_handler(struct ifnet *ifp, struct mbuf *m);;
- int nm_os_catch_rx(struct netmap_generic_adapter *gna, int intercept);
- int nm_os_catch_tx(struct netmap_generic_adapter *gna, int intercept);
- +int na_is_generic(struct netmap_adapter *na);
- +
- /*
- * the generic transmit routine is passed a structure to optionally
- * build a queue of descriptors, in an OS-specific way.
- @@ -1891,6 +1940,7 @@ int nm_os_mitigation_active(struct nm_generic_mit *mit);
- void nm_os_mitigation_cleanup(struct nm_generic_mit *mit);
- #else /* !WITH_GENERIC */
- #define generic_netmap_attach(ifp) (EOPNOTSUPP)
- +#define na_is_generic(na) (0)
- #endif /* WITH_GENERIC */
- /* Shared declarations for the VALE switch. */
- @@ -2003,26 +2053,29 @@ void nm_os_vi_init_index(void);
- /*
- * kernel thread routines
- */
- -struct nm_kthread; /* OS-specific kthread - opaque */
- -typedef void (*nm_kthread_worker_fn_t)(void *data);
- +struct nm_kctx; /* OS-specific kernel context - opaque */
- +typedef void (*nm_kctx_worker_fn_t)(void *data, int is_kthread);
- +typedef void (*nm_kctx_notify_fn_t)(void *data);
- /* kthread configuration */
- -struct nm_kthread_cfg {
- - long type; /* kthread type/identifier */
- - nm_kthread_worker_fn_t worker_fn; /* worker function */
- - void *worker_private;/* worker parameter */
- - int attach_user; /* attach kthread to user process */
- +struct nm_kctx_cfg {
- + long type; /* kthread type/identifier */
- + nm_kctx_worker_fn_t worker_fn; /* worker function */
- + void *worker_private;/* worker parameter */
- + nm_kctx_notify_fn_t notify_fn; /* notify function */
- + int attach_user; /* attach kthread to user process */
- + int use_kthread; /* use a kthread for the context */
- };
- /* kthread configuration */
- -struct nm_kthread *nm_os_kthread_create(struct nm_kthread_cfg *cfg,
- +struct nm_kctx *nm_os_kctx_create(struct nm_kctx_cfg *cfg,
- unsigned int cfgtype,
- void *opaque);
- -int nm_os_kthread_start(struct nm_kthread *);
- -void nm_os_kthread_stop(struct nm_kthread *);
- -void nm_os_kthread_delete(struct nm_kthread *);
- -void nm_os_kthread_wakeup_worker(struct nm_kthread *nmk);
- -void nm_os_kthread_send_irq(struct nm_kthread *);
- -void nm_os_kthread_set_affinity(struct nm_kthread *, int);
- +int nm_os_kctx_worker_start(struct nm_kctx *);
- +void nm_os_kctx_worker_stop(struct nm_kctx *);
- +void nm_os_kctx_destroy(struct nm_kctx *);
- +void nm_os_kctx_worker_wakeup(struct nm_kctx *nmk);
- +void nm_os_kctx_send_irq(struct nm_kctx *);
- +void nm_os_kctx_worker_setaff(struct nm_kctx *, int);
- u_int nm_os_ncpus(void);
- #ifdef WITH_PTNETMAP_HOST
- @@ -2032,12 +2085,18 @@ u_int nm_os_ncpus(void);
- struct netmap_pt_host_adapter {
- struct netmap_adapter up;
- + /* the passed-through adapter */
- struct netmap_adapter *parent;
- + /* parent->na_flags, saved at NETMAP_PT_HOST_CREATE time,
- + * and restored at NETMAP_PT_HOST_DELETE time */
- + uint32_t parent_na_flags;
- +
- int (*parent_nm_notify)(struct netmap_kring *kring, int flags);
- void *ptns;
- };
- /* ptnetmap HOST routines */
- -int netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create);
- +int netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na,
- + struct netmap_mem_d * nmd, int create);
- int ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na);
- static inline int
- nm_ptnetmap_host_on(struct netmap_adapter *na)
- @@ -2045,7 +2104,7 @@ nm_ptnetmap_host_on(struct netmap_adapter *na)
- return na && na->na_flags & NAF_PTNETMAP_HOST;
- }
- #else /* !WITH_PTNETMAP_HOST */
- -#define netmap_get_pt_host_na(nmr, _2, _3) \
- +#define netmap_get_pt_host_na(nmr, _2, _3, _4) \
- ((nmr)->nr_flags & (NR_PTNETMAP_HOST) ? EOPNOTSUPP : 0)
- #define ptnetmap_ctl(_1, _2) EINVAL
- #define nm_ptnetmap_host_on(_1) EINVAL
- diff --git a/sys/dev/netmap/netmap_mbq.h b/sys/dev/netmap/netmap_mbq.h
- index 9dafa8b1149..8ba0947b570 100644
- --- a/sys/dev/netmap/netmap_mbq.h
- +++ b/sys/dev/netmap/netmap_mbq.h
- @@ -29,8 +29,8 @@
- */
- -#ifndef __NETMAP_MBQ_H__
- -#define __NETMAP_MBQ_H__
- +#ifndef _NET_NETMAP_MBQ_H__
- +#define _NET_NETMAP_MBQ_H__
- /*
- * These function implement an mbuf tailq with an optional lock.
- @@ -67,7 +67,7 @@ void mbq_purge(struct mbq *q);
- static inline struct mbuf *
- mbq_peek(struct mbq *q)
- {
- - return q->head ? q->head : NULL;
- + return q->head;
- }
- static inline void
- @@ -94,4 +94,4 @@ static inline unsigned int mbq_len(struct mbq *q)
- return q->count;
- }
- -#endif /* __NETMAP_MBQ_H_ */
- +#endif /* _NET_NETMAP_MBQ_H_ */
- diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c
- index 922e5f32ff0..ad990f0618c 100644
- --- a/sys/dev/netmap/netmap_mem2.c
- +++ b/sys/dev/netmap/netmap_mem2.c
- @@ -36,7 +36,7 @@
- #ifdef __FreeBSD__
- #include <sys/cdefs.h> /* prerequisite */
- -__FBSDID("$FreeBSD$");
- +__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 241723 2012-10-19 09:41:45Z glebius $");
- #include <sys/types.h>
- #include <sys/malloc.h>
- @@ -87,6 +87,9 @@ enum {
- struct netmap_obj_params {
- u_int size;
- u_int num;
- +
- + u_int last_size;
- + u_int last_num;
- };
- struct netmap_obj_pool {
- @@ -139,20 +142,20 @@ struct netmap_mem_ops {
- ssize_t (*nmd_if_offset)(struct netmap_mem_d *, const void *vaddr);
- void (*nmd_delete)(struct netmap_mem_d *);
- - struct netmap_if * (*nmd_if_new)(struct netmap_adapter *);
- + struct netmap_if * (*nmd_if_new)(struct netmap_adapter *,
- + struct netmap_priv_d *);
- void (*nmd_if_delete)(struct netmap_adapter *, struct netmap_if *);
- int (*nmd_rings_create)(struct netmap_adapter *);
- void (*nmd_rings_delete)(struct netmap_adapter *);
- };
- -typedef uint16_t nm_memid_t;
- -
- struct netmap_mem_d {
- NMA_LOCK_T nm_mtx; /* protect the allocator */
- u_int nm_totalsize; /* shorthand */
- u_int flags;
- #define NETMAP_MEM_FINALIZED 0x1 /* preallocation done */
- +#define NETMAP_MEM_HIDDEN 0x8 /* beeing prepared */
- int lasterr; /* last error for curr config */
- int active; /* active users */
- int refcount;
- @@ -166,6 +169,11 @@ struct netmap_mem_d {
- struct netmap_mem_d *prev, *next;
- struct netmap_mem_ops *ops;
- +
- + struct netmap_obj_params params[NETMAP_POOLS_NR];
- +
- +#define NM_MEM_NAMESZ 16
- + char name[NM_MEM_NAMESZ];
- };
- /*
- @@ -214,7 +222,7 @@ NMD_DEFCB(int, config);
- NMD_DEFCB1(ssize_t, if_offset, const void *);
- NMD_DEFCB(void, delete);
- -NMD_DEFNACB(struct netmap_if *, if_new);
- +NMD_DEFNACB1(struct netmap_if *, if_new, struct netmap_priv_d *);
- NMD_DEFNACB1(void, if_delete, struct netmap_if *);
- NMD_DEFNACB(int, rings_create);
- NMD_DEFNACB(void, rings_delete);
- @@ -222,6 +230,13 @@ NMD_DEFNACB(void, rings_delete);
- static int netmap_mem_map(struct netmap_obj_pool *, struct netmap_adapter *);
- static int netmap_mem_unmap(struct netmap_obj_pool *, struct netmap_adapter *);
- static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
- +static void nm_mem_release_id(struct netmap_mem_d *);
- +
- +nm_memid_t
- +netmap_mem_get_id(struct netmap_mem_d *nmd)
- +{
- + return nmd->nm_id;
- +}
- #define NMA_LOCK_INIT(n) NM_MTX_INIT((n)->nm_mtx)
- #define NMA_LOCK_DESTROY(n) NM_MTX_DESTROY((n)->nm_mtx)
- @@ -230,34 +245,35 @@ static int nm_mem_assign_group(struct netmap_mem_d *, struct device *);
- #ifdef NM_DEBUG_MEM_PUTGET
- #define NM_DBG_REFC(nmd, func, line) \
- - printf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
- + nm_prinf("%s:%d mem[%d] -> %d\n", func, line, (nmd)->nm_id, (nmd)->refcount);
- #else
- #define NM_DBG_REFC(nmd, func, line)
- #endif
- -#ifdef NM_DEBUG_MEM_PUTGET
- -void __netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
- -#else
- -void netmap_mem_get(struct netmap_mem_d *nmd)
- -#endif
- +/* circular list of all existing allocators */
- +static struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
- +NM_MTX_T nm_mem_list_lock;
- +
- +struct netmap_mem_d *
- +__netmap_mem_get(struct netmap_mem_d *nmd, const char *func, int line)
- {
- - NMA_LOCK(nmd);
- + NM_MTX_LOCK(nm_mem_list_lock);
- nmd->refcount++;
- NM_DBG_REFC(nmd, func, line);
- - NMA_UNLOCK(nmd);
- + NM_MTX_UNLOCK(nm_mem_list_lock);
- + return nmd;
- }
- -#ifdef NM_DEBUG_MEM_PUTGET
- -void __netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
- -#else
- -void netmap_mem_put(struct netmap_mem_d *nmd)
- -#endif
- +void
- +__netmap_mem_put(struct netmap_mem_d *nmd, const char *func, int line)
- {
- int last;
- - NMA_LOCK(nmd);
- + NM_MTX_LOCK(nm_mem_list_lock);
- last = (--nmd->refcount == 0);
- + if (last)
- + nm_mem_release_id(nmd);
- NM_DBG_REFC(nmd, func, line);
- - NMA_UNLOCK(nmd);
- + NM_MTX_UNLOCK(nm_mem_list_lock);
- if (last)
- netmap_mem_delete(nmd);
- }
- @@ -349,21 +365,6 @@ netmap_mem2_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut)
- return 0;
- }
- -static struct netmap_obj_params netmap_params[NETMAP_POOLS_NR] = {
- - [NETMAP_IF_POOL] = {
- - .size = 1024,
- - .num = 100,
- - },
- - [NETMAP_RING_POOL] = {
- - .size = 9*PAGE_SIZE,
- - .num = 200,
- - },
- - [NETMAP_BUF_POOL] = {
- - .size = 2048,
- - .num = NETMAP_BUF_MAX_NUM,
- - },
- -};
- -
- static struct netmap_obj_params netmap_min_priv_params[NETMAP_POOLS_NR] = {
- [NETMAP_IF_POOL] = {
- .size = 1024,
- @@ -411,17 +412,32 @@ struct netmap_mem_d nm_mem = { /* Our memory allocator. */
- },
- },
- + .params = {
- + [NETMAP_IF_POOL] = {
- + .size = 1024,
- + .num = 100,
- + },
- + [NETMAP_RING_POOL] = {
- + .size = 9*PAGE_SIZE,
- + .num = 200,
- + },
- + [NETMAP_BUF_POOL] = {
- + .size = 2048,
- + .num = NETMAP_BUF_MAX_NUM,
- + },
- + },
- +
- .nm_id = 1,
- .nm_grp = -1,
- .prev = &nm_mem,
- .next = &nm_mem,
- - .ops = &netmap_mem_global_ops
- -};
- + .ops = &netmap_mem_global_ops,
- + .name = "1"
- +};
- -static struct netmap_mem_d *netmap_last_mem_d = &nm_mem;
- /* blueprint for the private memory allocators */
- extern struct netmap_mem_ops netmap_mem_private_ops; /* forward */
- @@ -451,9 +467,11 @@ static const struct netmap_mem_d nm_blueprint = {
- },
- },
- + .nm_grp = -1,
- +
- .flags = NETMAP_MEM_PRIVATE,
- - .ops = &netmap_mem_private_ops
- + .ops = &netmap_mem_global_ops,
- };
- /* memory allocator related sysctls */
- @@ -464,11 +482,11 @@ static const struct netmap_mem_d nm_blueprint = {
- #define DECLARE_SYSCTLS(id, name) \
- SYSBEGIN(mem2_ ## name); \
- SYSCTL_INT(_dev_netmap, OID_AUTO, name##_size, \
- - CTLFLAG_RW, &netmap_params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
- + CTLFLAG_RW, &nm_mem.params[id].size, 0, "Requested size of netmap " STRINGIFY(name) "s"); \
- SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_size, \
- CTLFLAG_RD, &nm_mem.pools[id]._objsize, 0, "Current size of netmap " STRINGIFY(name) "s"); \
- SYSCTL_INT(_dev_netmap, OID_AUTO, name##_num, \
- - CTLFLAG_RW, &netmap_params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
- + CTLFLAG_RW, &nm_mem.params[id].num, 0, "Requested number of netmap " STRINGIFY(name) "s"); \
- SYSCTL_INT(_dev_netmap, OID_AUTO, name##_curr_num, \
- CTLFLAG_RD, &nm_mem.pools[id].objtotal, 0, "Current number of netmap " STRINGIFY(name) "s"); \
- SYSCTL_INT(_dev_netmap, OID_AUTO, priv_##name##_size, \
- @@ -484,7 +502,7 @@ DECLARE_SYSCTLS(NETMAP_IF_POOL, if);
- DECLARE_SYSCTLS(NETMAP_RING_POOL, ring);
- DECLARE_SYSCTLS(NETMAP_BUF_POOL, buf);
- -/* call with NMA_LOCK(&nm_mem) held */
- +/* call with nm_mem_list_lock held */
- static int
- nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
- {
- @@ -505,6 +523,8 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
- scan->prev->next = nmd;
- scan->prev = nmd;
- netmap_last_mem_d = nmd;
- + nmd->refcount = 1;
- + NM_DBG_REFC(nmd, __FUNCTION__, __LINE__);
- error = 0;
- break;
- }
- @@ -513,24 +533,23 @@ nm_mem_assign_id_locked(struct netmap_mem_d *nmd)
- return error;
- }
- -/* call with NMA_LOCK(&nm_mem) *not* held */
- +/* call with nm_mem_list_lock *not* held */
- static int
- nm_mem_assign_id(struct netmap_mem_d *nmd)
- {
- int ret;
- - NMA_LOCK(&nm_mem);
- + NM_MTX_LOCK(nm_mem_list_lock);
- ret = nm_mem_assign_id_locked(nmd);
- - NMA_UNLOCK(&nm_mem);
- + NM_MTX_UNLOCK(nm_mem_list_lock);
- return ret;
- }
- +/* call with nm_mem_list_lock held */
- static void
- nm_mem_release_id(struct netmap_mem_d *nmd)
- {
- - NMA_LOCK(&nm_mem);
- -
- nmd->prev->next = nmd->next;
- nmd->next->prev = nmd->prev;
- @@ -538,8 +557,26 @@ nm_mem_release_id(struct netmap_mem_d *nmd)
- netmap_last_mem_d = nmd->prev;
- nmd->prev = nmd->next = NULL;
- +}
- - NMA_UNLOCK(&nm_mem);
- +struct netmap_mem_d *
- +netmap_mem_find(nm_memid_t id)
- +{
- + struct netmap_mem_d *nmd;
- +
- + NM_MTX_LOCK(nm_mem_list_lock);
- + nmd = netmap_last_mem_d;
- + do {
- + if (!(nmd->flags & NETMAP_MEM_HIDDEN) && nmd->nm_id == id) {
- + nmd->refcount++;
- + NM_DBG_REFC(nmd, __FUNCTION__, __LINE__);
- + NM_MTX_UNLOCK(nm_mem_list_lock);
- + return nmd;
- + }
- + nmd = nmd->next;
- + } while (nmd != netmap_last_mem_d);
- + NM_MTX_UNLOCK(nm_mem_list_lock);
- + return NULL;
- }
- static int
- @@ -1032,7 +1069,7 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p)
- if (p == NULL)
- return;
- if (p->bitmap)
- - free(p->bitmap, M_NETMAP);
- + nm_os_free(p->bitmap);
- p->bitmap = NULL;
- if (p->lut) {
- u_int i;
- @@ -1051,7 +1088,7 @@ netmap_reset_obj_allocator(struct netmap_obj_pool *p)
- #ifdef linux
- vfree(p->lut);
- #else
- - free(p->lut, M_NETMAP);
- + nm_os_free(p->lut);
- #endif
- }
- p->lut = NULL;
- @@ -1170,7 +1207,7 @@ nm_alloc_lut(u_int nobj)
- #ifdef linux
- lut = vmalloc(n);
- #else
- - lut = malloc(n, M_NETMAP, M_NOWAIT | M_ZERO);
- + lut = nm_os_malloc(n);
- #endif
- return lut;
- }
- @@ -1194,7 +1231,7 @@ netmap_finalize_obj_allocator(struct netmap_obj_pool *p)
- /* Allocate the bitmap */
- n = (p->objtotal + 31) / 32;
- - p->bitmap = malloc(sizeof(uint32_t) * n, M_NETMAP, M_NOWAIT | M_ZERO);
- + p->bitmap = nm_os_malloc(sizeof(uint32_t) * n);
- if (p->bitmap == NULL) {
- D("Unable to create bitmap (%d entries) for allocator '%s'", (int)n,
- p->name);
- @@ -1278,16 +1315,18 @@ clean:
- /* call with lock held */
- static int
- -netmap_memory_config_changed(struct netmap_mem_d *nmd)
- +netmap_mem_params_changed(struct netmap_obj_params* p)
- {
- - int i;
- + int i, rv = 0;
- for (i = 0; i < NETMAP_POOLS_NR; i++) {
- - if (nmd->pools[i].r_objsize != netmap_params[i].size ||
- - nmd->pools[i].r_objtotal != netmap_params[i].num)
- - return 1;
- + if (p[i].last_size != p[i].size || p[i].last_num != p[i].num) {
- + p[i].last_size = p[i].size;
- + p[i].last_num = p[i].num;
- + rv = 1;
- + }
- }
- - return 0;
- + return rv;
- }
- static void
- @@ -1308,7 +1347,7 @@ netmap_mem_unmap(struct netmap_obj_pool *p, struct netmap_adapter *na)
- {
- int i, lim = p->_objtotal;
- - if (na->pdev == NULL)
- + if (na == NULL || na->pdev == NULL)
- return 0;
- #if defined(__FreeBSD__)
- @@ -1386,66 +1425,16 @@ error:
- return nmd->lasterr;
- }
- -
- -
- -static void
- -netmap_mem_private_delete(struct netmap_mem_d *nmd)
- -{
- - if (nmd == NULL)
- - return;
- - if (netmap_verbose)
- - D("deleting %p", nmd);
- - if (nmd->active > 0)
- - D("bug: deleting mem allocator with active=%d!", nmd->active);
- - nm_mem_release_id(nmd);
- - if (netmap_verbose)
- - D("done deleting %p", nmd);
- - NMA_LOCK_DESTROY(nmd);
- - free(nmd, M_DEVBUF);
- -}
- -
- -static int
- -netmap_mem_private_config(struct netmap_mem_d *nmd)
- -{
- - /* nothing to do, we are configured on creation
- - * and configuration never changes thereafter
- - */
- - return 0;
- -}
- -
- -static int
- -netmap_mem_private_finalize(struct netmap_mem_d *nmd)
- -{
- - int err;
- - err = netmap_mem_finalize_all(nmd);
- - if (!err)
- - nmd->active++;
- - return err;
- -
- -}
- -
- -static void
- -netmap_mem_private_deref(struct netmap_mem_d *nmd)
- -{
- - if (--nmd->active <= 0)
- - netmap_mem_reset_all(nmd);
- -}
- -
- -
- /*
- * allocator for private memory
- */
- -struct netmap_mem_d *
- -netmap_mem_private_new(const char *name, u_int txr, u_int txd,
- - u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes, int *perr)
- +static struct netmap_mem_d *
- +_netmap_mem_private_new(struct netmap_obj_params *p, int *perr)
- {
- struct netmap_mem_d *d = NULL;
- - struct netmap_obj_params p[NETMAP_POOLS_NR];
- - int i, err;
- - u_int v, maxd;
- + int i, err = 0;
- - d = malloc(sizeof(struct netmap_mem_d),
- - M_DEVBUF, M_NOWAIT | M_ZERO);
- + d = nm_os_malloc(sizeof(struct netmap_mem_d));
- if (d == NULL) {
- err = ENOMEM;
- goto error;
- @@ -1456,7 +1445,41 @@ netmap_mem_private_new(const char *name, u_int txr, u_int txd,
- err = nm_mem_assign_id(d);
- if (err)
- goto error;
- + snprintf(d->name, NM_MEM_NAMESZ, "%d", d->nm_id);
- +
- + for (i = 0; i < NETMAP_POOLS_NR; i++) {
- + snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
- + nm_blueprint.pools[i].name,
- + d->name);
- + d->params[i].num = p[i].num;
- + d->params[i].size = p[i].size;
- + }
- + NMA_LOCK_INIT(d);
- +
- + err = netmap_mem_config(d);
- + if (err)
- + goto error;
- +
- + d->flags &= ~NETMAP_MEM_FINALIZED;
- +
- + return d;
- +
- +error:
- + netmap_mem_delete(d);
- + if (perr)
- + *perr = err;
- + return NULL;
- +}
- +
- +struct netmap_mem_d *
- +netmap_mem_private_new(u_int txr, u_int txd, u_int rxr, u_int rxd,
- + u_int extra_bufs, u_int npipes, int *perr)
- +{
- + struct netmap_mem_d *d = NULL;
- + struct netmap_obj_params p[NETMAP_POOLS_NR];
- + int i, err = 0;
- + u_int v, maxd;
- /* account for the fake host rings */
- txr++;
- rxr++;
- @@ -1502,23 +1525,13 @@ netmap_mem_private_new(const char *name, u_int txr, u_int txd,
- p[NETMAP_BUF_POOL].num,
- p[NETMAP_BUF_POOL].size);
- - for (i = 0; i < NETMAP_POOLS_NR; i++) {
- - snprintf(d->pools[i].name, NETMAP_POOL_MAX_NAMSZ,
- - nm_blueprint.pools[i].name,
- - name);
- - err = netmap_config_obj_allocator(&d->pools[i],
- - p[i].num, p[i].size);
- - if (err)
- - goto error;
- - }
- -
- - d->flags &= ~NETMAP_MEM_FINALIZED;
- -
- - NMA_LOCK_INIT(d);
- + d = _netmap_mem_private_new(p, perr);
- + if (d == NULL)
- + goto error;
- return d;
- error:
- - netmap_mem_private_delete(d);
- + netmap_mem_delete(d);
- if (perr)
- *perr = err;
- return NULL;
- @@ -1527,7 +1540,7 @@ error:
- /* call with lock held */
- static int
- -netmap_mem_global_config(struct netmap_mem_d *nmd)
- +netmap_mem2_config(struct netmap_mem_d *nmd)
- {
- int i;
- @@ -1535,7 +1548,7 @@ netmap_mem_global_config(struct netmap_mem_d *nmd)
- /* already in use, we cannot change the configuration */
- goto out;
- - if (!netmap_memory_config_changed(nmd))
- + if (!netmap_mem_params_changed(nmd->params))
- goto out;
- ND("reconfiguring");
- @@ -1550,7 +1563,7 @@ netmap_mem_global_config(struct netmap_mem_d *nmd)
- for (i = 0; i < NETMAP_POOLS_NR; i++) {
- nmd->lasterr = netmap_config_obj_allocator(&nmd->pools[i],
- - netmap_params[i].num, netmap_params[i].size);
- + nmd->params[i].num, nmd->params[i].size);
- if (nmd->lasterr)
- goto out;
- }
- @@ -1561,13 +1574,13 @@ out:
- }
- static int
- -netmap_mem_global_finalize(struct netmap_mem_d *nmd)
- +netmap_mem2_finalize(struct netmap_mem_d *nmd)
- {
- int err;
- /* update configuration if changed */
- - if (netmap_mem_global_config(nmd))
- - return nmd->lasterr;
- + if (netmap_mem2_config(nmd))
- + goto out1;
- nmd->active++;
- @@ -1585,6 +1598,7 @@ netmap_mem_global_finalize(struct netmap_mem_d *nmd)
- out:
- if (nmd->lasterr)
- nmd->active--;
- +out1:
- err = nmd->lasterr;
- return err;
- @@ -1592,20 +1606,23 @@ out:
- }
- static void
- -netmap_mem_global_delete(struct netmap_mem_d *nmd)
- +netmap_mem2_delete(struct netmap_mem_d *nmd)
- {
- int i;
- for (i = 0; i < NETMAP_POOLS_NR; i++) {
- - netmap_destroy_obj_allocator(&nm_mem.pools[i]);
- + netmap_destroy_obj_allocator(&nmd->pools[i]);
- }
- - NMA_LOCK_DESTROY(&nm_mem);
- + NMA_LOCK_DESTROY(nmd);
- + if (nmd != &nm_mem)
- + nm_os_free(nmd);
- }
- int
- netmap_mem_init(void)
- {
- + NM_MTX_INIT(nm_mem_list_lock);
- NMA_LOCK_INIT(&nm_mem);
- netmap_mem_get(&nm_mem);
- return (0);
- @@ -1742,7 +1759,7 @@ netmap_mem2_rings_delete(struct netmap_adapter *na)
- * the interface is in netmap mode.
- */
- static struct netmap_if *
- -netmap_mem2_if_new(struct netmap_adapter *na)
- +netmap_mem2_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
- {
- struct netmap_if *nifp;
- ssize_t base; /* handy for relative offsets between rings and nifp */
- @@ -1781,24 +1798,28 @@ netmap_mem2_if_new(struct netmap_adapter *na)
- */
- base = netmap_if_offset(na->nm_mem, nifp);
- for (i = 0; i < n[NR_TX]; i++) {
- - if (na->tx_rings[i].ring == NULL) {
- - // XXX maybe use the offset of an error ring,
- - // like we do for buffers?
- - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = 0;
- - continue;
- + /* XXX instead of ofs == 0 maybe use the offset of an error
- + * ring, like we do for buffers? */
- + ssize_t ofs = 0;
- +
- + if (na->tx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_TX]
- + && i < priv->np_qlast[NR_TX]) {
- + ofs = netmap_ring_offset(na->nm_mem,
- + na->tx_rings[i].ring) - base;
- }
- - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] =
- - netmap_ring_offset(na->nm_mem, na->tx_rings[i].ring) - base;
- + *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i] = ofs;
- }
- for (i = 0; i < n[NR_RX]; i++) {
- - if (na->rx_rings[i].ring == NULL) {
- - // XXX maybe use the offset of an error ring,
- - // like we do for buffers?
- - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = 0;
- - continue;
- + /* XXX instead of ofs == 0 maybe use the offset of an error
- + * ring, like we do for buffers? */
- + ssize_t ofs = 0;
- +
- + if (na->rx_rings[i].ring != NULL && i >= priv->np_qfirst[NR_RX]
- + && i < priv->np_qlast[NR_RX]) {
- + ofs = netmap_ring_offset(na->nm_mem,
- + na->rx_rings[i].ring) - base;
- }
- - *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] =
- - netmap_ring_offset(na->nm_mem, na->rx_rings[i].ring) - base;
- + *(ssize_t *)(uintptr_t)&nifp->ring_ofs[i+n[NR_TX]] = ofs;
- }
- NMA_UNLOCK(na->nm_mem);
- @@ -1821,7 +1842,7 @@ netmap_mem2_if_delete(struct netmap_adapter *na, struct netmap_if *nifp)
- }
- static void
- -netmap_mem_global_deref(struct netmap_mem_d *nmd)
- +netmap_mem2_deref(struct netmap_mem_d *nmd)
- {
- nmd->active--;
- @@ -1836,25 +1857,11 @@ struct netmap_mem_ops netmap_mem_global_ops = {
- .nmd_get_lut = netmap_mem2_get_lut,
- .nmd_get_info = netmap_mem2_get_info,
- .nmd_ofstophys = netmap_mem2_ofstophys,
- - .nmd_config = netmap_mem_global_config,
- - .nmd_finalize = netmap_mem_global_finalize,
- - .nmd_deref = netmap_mem_global_deref,
- - .nmd_delete = netmap_mem_global_delete,
- - .nmd_if_offset = netmap_mem2_if_offset,
- - .nmd_if_new = netmap_mem2_if_new,
- - .nmd_if_delete = netmap_mem2_if_delete,
- - .nmd_rings_create = netmap_mem2_rings_create,
- - .nmd_rings_delete = netmap_mem2_rings_delete
- -};
- -struct netmap_mem_ops netmap_mem_private_ops = {
- - .nmd_get_lut = netmap_mem2_get_lut,
- - .nmd_get_info = netmap_mem2_get_info,
- - .nmd_ofstophys = netmap_mem2_ofstophys,
- - .nmd_config = netmap_mem_private_config,
- - .nmd_finalize = netmap_mem_private_finalize,
- - .nmd_deref = netmap_mem_private_deref,
- + .nmd_config = netmap_mem2_config,
- + .nmd_finalize = netmap_mem2_finalize,
- + .nmd_deref = netmap_mem2_deref,
- + .nmd_delete = netmap_mem2_delete,
- .nmd_if_offset = netmap_mem2_if_offset,
- - .nmd_delete = netmap_mem_private_delete,
- .nmd_if_new = netmap_mem2_if_new,
- .nmd_if_delete = netmap_mem2_if_delete,
- .nmd_rings_create = netmap_mem2_rings_create,
- @@ -1862,20 +1869,15 @@ struct netmap_mem_ops netmap_mem_private_ops = {
- };
- int
- -netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_adapter *na)
- +netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_mem_d *nmd)
- {
- uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
- struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp);
- - struct netmap_mem_d *nmd = na->nm_mem;
- struct netmap_pools_info pi;
- unsigned int memsize;
- uint16_t memid;
- int ret;
- - if (!nmd) {
- - return -1;
- - }
- -
- ret = netmap_mem_get_info(nmd, &memsize, NULL, &memid);
- if (ret) {
- return ret;
- @@ -1883,6 +1885,7 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_adapter *na)
- pi.memsize = memsize;
- pi.memid = memid;
- + NMA_LOCK(nmd);
- pi.if_pool_offset = 0;
- pi.if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal;
- pi.if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize;
- @@ -1895,6 +1898,7 @@ netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_adapter *na)
- nmd->pools[NETMAP_RING_POOL].memtotal;
- pi.buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal;
- pi.buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize;
- + NMA_UNLOCK(nmd);
- ret = copyout(&pi, upi, sizeof(pi));
- if (ret) {
- @@ -1929,8 +1933,7 @@ netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp,
- unsigned int nifp_offset)
- {
- struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd;
- - struct mem_pt_if *ptif = malloc(sizeof(*ptif), M_NETMAP,
- - M_NOWAIT | M_ZERO);
- + struct mem_pt_if *ptif = nm_os_malloc(sizeof(*ptif));
- if (!ptif) {
- return ENOMEM;
- @@ -1989,7 +1992,7 @@ netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *nmd, struct ifnet *ifp)
- }
- D("removed (ifp=%p,nifp_offset=%u)",
- curr->ifp, curr->nifp_offset);
- - free(curr, M_NETMAP);
- + nm_os_free(curr);
- ret = 0;
- break;
- }
- @@ -2143,7 +2146,7 @@ netmap_mem_pt_guest_deref(struct netmap_mem_d *nmd)
- if (ptnmd->ptn_dev) {
- nm_os_pt_memdev_iounmap(ptnmd->ptn_dev);
- }
- - ptnmd->nm_addr = NULL;
- + ptnmd->nm_addr = 0;
- ptnmd->nm_paddr = 0;
- }
- }
- @@ -2165,15 +2168,14 @@ netmap_mem_pt_guest_delete(struct netmap_mem_d *nmd)
- D("deleting %p", nmd);
- if (nmd->active > 0)
- D("bug: deleting mem allocator with active=%d!", nmd->active);
- - nm_mem_release_id(nmd);
- if (netmap_verbose)
- D("done deleting %p", nmd);
- NMA_LOCK_DESTROY(nmd);
- - free(nmd, M_DEVBUF);
- + nm_os_free(nmd);
- }
- static struct netmap_if *
- -netmap_mem_pt_guest_if_new(struct netmap_adapter *na)
- +netmap_mem_pt_guest_if_new(struct netmap_adapter *na, struct netmap_priv_d *priv)
- {
- struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)na->nm_mem;
- struct mem_pt_if *ptif;
- @@ -2275,7 +2277,7 @@ static struct netmap_mem_ops netmap_mem_pt_guest_ops = {
- .nmd_rings_delete = netmap_mem_pt_guest_rings_delete
- };
- -/* Called with NMA_LOCK(&nm_mem) held. */
- +/* Called with nm_mem_list_lock held. */
- static struct netmap_mem_d *
- netmap_mem_pt_guest_find_memid(nm_memid_t mem_id)
- {
- @@ -2287,6 +2289,8 @@ netmap_mem_pt_guest_find_memid(nm_memid_t mem_id)
- if (scan->ops->nmd_deref == netmap_mem_pt_guest_deref &&
- ((struct netmap_mem_ptg *)(scan))->host_mem_id == mem_id) {
- mem = scan;
- + mem->refcount++;
- + NM_DBG_REFC(mem, __FUNCTION__, __LINE__);
- break;
- }
- scan = scan->next;
- @@ -2295,15 +2299,14 @@ netmap_mem_pt_guest_find_memid(nm_memid_t mem_id)
- return mem;
- }
- -/* Called with NMA_LOCK(&nm_mem) held. */
- +/* Called with nm_mem_list_lock held. */
- static struct netmap_mem_d *
- netmap_mem_pt_guest_create(nm_memid_t mem_id)
- {
- struct netmap_mem_ptg *ptnmd;
- int err = 0;
- - ptnmd = malloc(sizeof(struct netmap_mem_ptg),
- - M_DEVBUF, M_NOWAIT | M_ZERO);
- + ptnmd = nm_os_malloc(sizeof(struct netmap_mem_ptg));
- if (ptnmd == NULL) {
- err = ENOMEM;
- goto error;
- @@ -2323,6 +2326,9 @@ netmap_mem_pt_guest_create(nm_memid_t mem_id)
- NMA_LOCK_INIT(&ptnmd->up);
- + snprintf(ptnmd->up.name, NM_MEM_NAMESZ, "%d", ptnmd->up.nm_id);
- +
- +
- return &ptnmd->up;
- error:
- netmap_mem_pt_guest_delete(&ptnmd->up);
- @@ -2338,12 +2344,12 @@ netmap_mem_pt_guest_get(nm_memid_t mem_id)
- {
- struct netmap_mem_d *nmd;
- - NMA_LOCK(&nm_mem);
- + NM_MTX_LOCK(nm_mem_list_lock);
- nmd = netmap_mem_pt_guest_find_memid(mem_id);
- if (nmd == NULL) {
- nmd = netmap_mem_pt_guest_create(mem_id);
- }
- - NMA_UNLOCK(&nm_mem);
- + NM_MTX_UNLOCK(nm_mem_list_lock);
- return nmd;
- }
- diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h
- index f170df9d549..66e688afd39 100644
- --- a/sys/dev/netmap/netmap_mem2.h
- +++ b/sys/dev/netmap/netmap_mem2.h
- @@ -27,7 +27,7 @@
- */
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/dev/netmap/netmap_mem2.c 234290 2012-04-14 16:44:18Z luigi $
- *
- * (New) memory allocator for netmap
- */
- @@ -119,8 +119,10 @@
- */
- extern struct netmap_mem_d nm_mem;
- +typedef uint16_t nm_memid_t;
- int netmap_mem_get_lut(struct netmap_mem_d *, struct netmap_lut *);
- +nm_memid_t netmap_mem_get_id(struct netmap_mem_d *);
- vm_paddr_t netmap_mem_ofstophys(struct netmap_mem_d *, vm_ooffset_t);
- #ifdef _WIN32
- PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd);
- @@ -128,7 +130,7 @@ PMDL win32_build_user_vm_map(struct netmap_mem_d* nmd);
- int netmap_mem_finalize(struct netmap_mem_d *, struct netmap_adapter *);
- int netmap_mem_init(void);
- void netmap_mem_fini(void);
- -struct netmap_if * netmap_mem_if_new(struct netmap_adapter *);
- +struct netmap_if * netmap_mem_if_new(struct netmap_adapter *, struct netmap_priv_d *);
- void netmap_mem_if_delete(struct netmap_adapter *, struct netmap_if *);
- int netmap_mem_rings_create(struct netmap_adapter *);
- void netmap_mem_rings_delete(struct netmap_adapter *);
- @@ -136,33 +138,15 @@ void netmap_mem_deref(struct netmap_mem_d *, struct netmap_adapter *);
- int netmap_mem2_get_pool_info(struct netmap_mem_d *, u_int, u_int *, u_int *);
- int netmap_mem_get_info(struct netmap_mem_d *, u_int *size, u_int *memflags, uint16_t *id);
- ssize_t netmap_mem_if_offset(struct netmap_mem_d *, const void *vaddr);
- -struct netmap_mem_d* netmap_mem_private_new(const char *name,
- - u_int txr, u_int txd, u_int rxr, u_int rxd, u_int extra_bufs, u_int npipes,
- - int* error);
- +struct netmap_mem_d* netmap_mem_private_new( u_int txr, u_int txd, u_int rxr, u_int rxd,
- + u_int extra_bufs, u_int npipes, int* error);
- void netmap_mem_delete(struct netmap_mem_d *);
- -//#define NM_DEBUG_MEM_PUTGET 1
- -
- -#ifdef NM_DEBUG_MEM_PUTGET
- -
- -#define netmap_mem_get(nmd) \
- - do { \
- - __netmap_mem_get(nmd, __FUNCTION__, __LINE__); \
- - } while (0)
- -
- -#define netmap_mem_put(nmd) \
- - do { \
- - __netmap_mem_put(nmd, __FUNCTION__, __LINE__); \
- - } while (0)
- -
- -void __netmap_mem_get(struct netmap_mem_d *, const char *, int);
- +#define netmap_mem_get(d) __netmap_mem_get(d, __FUNCTION__, __LINE__)
- +#define netmap_mem_put(d) __netmap_mem_put(d, __FUNCTION__, __LINE__)
- +struct netmap_mem_d* __netmap_mem_get(struct netmap_mem_d *, const char *, int);
- void __netmap_mem_put(struct netmap_mem_d *, const char *, int);
- -#else /* !NM_DEBUG_MEM_PUTGET */
- -
- -void netmap_mem_get(struct netmap_mem_d *);
- -void netmap_mem_put(struct netmap_mem_d *);
- -
- -#endif /* !NM_DEBUG_PUTGET */
- +struct netmap_mem_d* netmap_mem_find(nm_memid_t);
- #ifdef WITH_PTNETMAP_GUEST
- struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *,
- @@ -173,7 +157,7 @@ struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16
- int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *);
- #endif /* WITH_PTNETMAP_GUEST */
- -int netmap_mem_pools_info_get(struct nmreq *, struct netmap_adapter *);
- +int netmap_mem_pools_info_get(struct nmreq *, struct netmap_mem_d *);
- #define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */
- #define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */
- diff --git a/sys/dev/netmap/netmap_monitor.c b/sys/dev/netmap/netmap_monitor.c
- index bf6e23f5546..174f35e5c6c 100644
- --- a/sys/dev/netmap/netmap_monitor.c
- +++ b/sys/dev/netmap/netmap_monitor.c
- @@ -25,7 +25,7 @@
- */
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/dev/netmap/netmap_zmon.c 270063 2014-08-16 15:00:01Z luigi $
- *
- * Monitors
- *
- @@ -128,6 +128,13 @@
- ********************************************************************
- */
- +static int netmap_zmon_reg(struct netmap_adapter *, int);
- +static int
- +nm_is_zmon(struct netmap_adapter *na)
- +{
- + return na->nm_register == netmap_zmon_reg;
- +}
- +
- /* nm_sync callback for the monitor's own tx rings.
- * This makes no sense and always returns error
- */
- @@ -148,7 +155,7 @@ static int
- netmap_monitor_rxsync(struct netmap_kring *kring, int flags)
- {
- ND("%s %x", kring->name, flags);
- - kring->nr_hwcur = kring->rcur;
- + kring->nr_hwcur = kring->rhead;
- mb();
- return 0;
- }
- @@ -185,19 +192,16 @@ nm_txrx2flag(enum txrx t)
- static int
- nm_monitor_alloc(struct netmap_kring *kring, u_int n)
- {
- - size_t len;
- + size_t old_len, len;
- struct netmap_kring **nm;
- if (n <= kring->max_monitors)
- /* we already have more entries that requested */
- return 0;
- + old_len = sizeof(struct netmap_kring *)*kring->max_monitors;
- len = sizeof(struct netmap_kring *) * n;
- -#ifndef _WIN32
- - nm = realloc(kring->monitors, len, M_DEVBUF, M_NOWAIT | M_ZERO);
- -#else
- - nm = realloc(kring->monitors, len, sizeof(struct netmap_kring *)*kring->max_monitors);
- -#endif
- + nm = nm_os_realloc(kring->monitors, len, old_len);
- if (nm == NULL)
- return ENOMEM;
- @@ -216,13 +220,22 @@ nm_monitor_dealloc(struct netmap_kring *kring)
- D("freeing not empty monitor array for %s (%d dangling monitors)!", kring->name,
- kring->n_monitors);
- }
- - free(kring->monitors, M_DEVBUF);
- + nm_os_free(kring->monitors);
- kring->monitors = NULL;
- kring->max_monitors = 0;
- kring->n_monitors = 0;
- }
- }
- +/* returns 1 iff kring has no monitors */
- +static inline int
- +nm_monitor_none(struct netmap_kring *kring)
- +{
- + return kring->n_monitors == 0 &&
- + kring->zmon_list[NR_TX].next == NULL &&
- + kring->zmon_list[NR_RX].next == NULL;
- +}
- +
- /*
- * monitors work by replacing the nm_sync() and possibly the
- * nm_notify() callbacks in the monitored rings.
- @@ -233,71 +246,122 @@ static int netmap_monitor_parent_txsync(struct netmap_kring *, int);
- static int netmap_monitor_parent_rxsync(struct netmap_kring *, int);
- static int netmap_monitor_parent_notify(struct netmap_kring *, int);
- -
- /* add the monitor mkring to the list of monitors of kring.
- * If this is the first monitor, intercept the callbacks
- */
- static int
- -netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zcopy)
- +netmap_monitor_add(struct netmap_kring *mkring, struct netmap_kring *kring, int zmon)
- {
- int error = NM_IRQ_COMPLETED;
- + enum txrx t = kring->tx;
- + struct netmap_zmon_list *z = &kring->zmon_list[t];
- + struct netmap_zmon_list *mz = &mkring->zmon_list[t];
- +
- + /* a zero-copy monitor which is not the first in the list
- + * must monitor the previous monitor
- + */
- + if (zmon && z->prev != NULL)
- + kring = z->prev;
- /* sinchronize with concurrently running nm_sync()s */
- nm_kr_stop(kring, NM_KR_LOCKED);
- - /* make sure the monitor array exists and is big enough */
- - error = nm_monitor_alloc(kring, kring->n_monitors + 1);
- - if (error)
- - goto out;
- - kring->monitors[kring->n_monitors] = mkring;
- - mkring->mon_pos = kring->n_monitors;
- - kring->n_monitors++;
- - if (kring->n_monitors == 1) {
- +
- + if (nm_monitor_none(kring)) {
- /* this is the first monitor, intercept callbacks */
- - ND("%s: intercept callbacks on %s", mkring->name, kring->name);
- + ND("intercept callbacks on %s", kring->name);
- kring->mon_sync = kring->nm_sync;
- - /* zcopy monitors do not override nm_notify(), but
- - * we save the original one regardless, so that
- - * netmap_monitor_del() does not need to know the
- - * monitor type
- - */
- kring->mon_notify = kring->nm_notify;
- if (kring->tx == NR_TX) {
- - kring->nm_sync = (zcopy ? netmap_zmon_parent_txsync :
- - netmap_monitor_parent_txsync);
- + kring->nm_sync = netmap_monitor_parent_txsync;
- } else {
- - kring->nm_sync = (zcopy ? netmap_zmon_parent_rxsync :
- - netmap_monitor_parent_rxsync);
- - if (!zcopy) {
- - /* also intercept notify */
- - kring->nm_notify = netmap_monitor_parent_notify;
- - kring->mon_tail = kring->nr_hwtail;
- - }
- + kring->nm_sync = netmap_monitor_parent_rxsync;
- + kring->nm_notify = netmap_monitor_parent_notify;
- + kring->mon_tail = kring->nr_hwtail;
- }
- }
- + if (zmon) {
- + /* append the zmon to the list */
- + struct netmap_monitor_adapter *mna =
- + (struct netmap_monitor_adapter *)mkring->na;
- + struct netmap_adapter *pna;
- +
- + if (z->prev != NULL)
- + z->prev->zmon_list[t].next = mkring;
- + mz->prev = z->prev;
- + z->prev = mkring;
- + if (z->next == NULL)
- + z->next = mkring;
- +
- + /* grap a reference to the previous netmap adapter
- + * in the chain (this may be the monitored port
- + * or another zero-copy monitor)
- + */
- + pna = kring->na;
- + netmap_adapter_get(pna);
- + netmap_adapter_put(mna->priv.np_na);
- + mna->priv.np_na = pna;
- + } else {
- + /* make sure the monitor array exists and is big enough */
- + error = nm_monitor_alloc(kring, kring->n_monitors + 1);
- + if (error)
- + goto out;
- + kring->monitors[kring->n_monitors] = mkring;
- + mkring->mon_pos[kring->tx] = kring->n_monitors;
- + kring->n_monitors++;
- + }
- +
- out:
- nm_kr_start(kring);
- return error;
- }
- -
- /* remove the monitor mkring from the list of monitors of kring.
- * If this is the last monitor, restore the original callbacks
- */
- static void
- netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
- {
- + struct netmap_zmon_list *mz = &mkring->zmon_list[kring->tx];
- + int zmon = nm_is_zmon(mkring->na);
- +
- +
- + if (zmon && mz->prev != NULL)
- + kring = mz->prev;
- +
- /* sinchronize with concurrently running nm_sync()s */
- nm_kr_stop(kring, NM_KR_LOCKED);
- - kring->n_monitors--;
- - if (mkring->mon_pos != kring->n_monitors) {
- - kring->monitors[mkring->mon_pos] = kring->monitors[kring->n_monitors];
- - kring->monitors[mkring->mon_pos]->mon_pos = mkring->mon_pos;
- +
- + if (zmon) {
- + /* remove the monitor from the list */
- + if (mz->prev != NULL)
- + mz->prev->zmon_list[kring->tx].next = mz->next;
- + else
- + kring->zmon_list[kring->tx].next = mz->next;
- + if (mz->next != NULL) {
- + mz->next->zmon_list[kring->tx].prev = mz->prev;
- + } else {
- + kring->zmon_list[kring->tx].prev = mz->prev;
- + }
- + } else {
- + /* this is a copy monitor */
- + uint32_t mon_pos = mkring->mon_pos[kring->tx];
- + kring->n_monitors--;
- + if (mon_pos != kring->n_monitors) {
- + kring->monitors[mon_pos] =
- + kring->monitors[kring->n_monitors];
- + kring->monitors[mon_pos]->mon_pos[kring->tx] = mon_pos;
- + }
- + kring->monitors[kring->n_monitors] = NULL;
- + if (kring->n_monitors == 0) {
- + nm_monitor_dealloc(kring);
- + }
- }
- - kring->monitors[kring->n_monitors] = NULL;
- - if (kring->n_monitors == 0) {
- - /* this was the last monitor, restore callbacks and delete monitor array */
- - ND("%s: restoring sync on %s: %p", mkring->name, kring->name, kring->mon_sync);
- +
- + if (nm_monitor_none(kring)) {
- + /* this was the last monitor, restore the callbacks */
- + ND("%s: restoring sync on %s: %p", mkring->name, kring->name,
- + kring->mon_sync);
- kring->nm_sync = kring->mon_sync;
- kring->mon_sync = NULL;
- if (kring->tx == NR_RX) {
- @@ -306,8 +370,8 @@ netmap_monitor_del(struct netmap_kring *mkring, struct netmap_kring *kring)
- kring->nm_notify = kring->mon_notify;
- kring->mon_notify = NULL;
- }
- - nm_monitor_dealloc(kring);
- }
- +
- nm_kr_start(kring);
- }
- @@ -329,6 +393,7 @@ netmap_monitor_stop(struct netmap_adapter *na)
- for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
- + struct netmap_kring *zkring;
- u_int j;
- for (j = 0; j < kring->n_monitors; j++) {
- @@ -337,8 +402,33 @@ netmap_monitor_stop(struct netmap_adapter *na)
- struct netmap_monitor_adapter *mna =
- (struct netmap_monitor_adapter *)mkring->na;
- /* forget about this adapter */
- - netmap_adapter_put(mna->priv.np_na);
- - mna->priv.np_na = NULL;
- + if (mna->priv.np_na != NULL) {
- + netmap_adapter_put(mna->priv.np_na);
- + mna->priv.np_na = NULL;
- + }
- + }
- +
- + zkring = kring->zmon_list[kring->tx].next;
- + if (zkring != NULL) {
- + struct netmap_monitor_adapter *next =
- + (struct netmap_monitor_adapter *)zkring->na;
- + struct netmap_monitor_adapter *this =
- + (struct netmap_monitor_adapter *)na;
- + struct netmap_adapter *pna = this->priv.np_na;
- + /* let the next monitor forget about us */
- + if (next->priv.np_na != NULL) {
- + netmap_adapter_put(next->priv.np_na);
- + }
- + if (pna != NULL && nm_is_zmon(na)) {
- + /* we are a monitor ourselves and we may
- + * need to pass down the reference to
- + * the previous adapter in the chain
- + */
- + netmap_adapter_get(pna);
- + next->priv.np_na = pna;
- + continue;
- + }
- + next->priv.np_na = NULL;
- }
- }
- }
- @@ -357,7 +447,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
- struct netmap_adapter *pna = priv->np_na;
- struct netmap_kring *kring, *mkring;
- int i;
- - enum txrx t;
- + enum txrx t, s;
- ND("%p: onoff %d", na, onoff);
- if (onoff) {
- @@ -367,13 +457,19 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
- return ENXIO;
- }
- for_rx_tx(t) {
- - if (mna->flags & nm_txrx2flag(t)) {
- - for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
- - kring = &NMR(pna, t)[i];
- - mkring = &na->rx_rings[i];
- - if (nm_kring_pending_on(mkring)) {
- + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- + mkring = &NMR(na, t)[i];
- + if (!nm_kring_pending_on(mkring))
- + continue;
- + mkring->nr_mode = NKR_NETMAP_ON;
- + if (t == NR_TX)
- + continue;
- + for_rx_tx(s) {
- + if (i > nma_get_nrings(pna, s))
- + continue;
- + if (mna->flags & nm_txrx2flag(s)) {
- + kring = &NMR(pna, s)[i];
- netmap_monitor_add(mkring, kring, zmon);
- - mkring->nr_mode = NKR_NETMAP_ON;
- }
- }
- }
- @@ -383,19 +479,25 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
- if (na->active_fds == 0)
- na->na_flags &= ~NAF_NETMAP_ON;
- for_rx_tx(t) {
- - if (mna->flags & nm_txrx2flag(t)) {
- - for (i = priv->np_qfirst[t]; i < priv->np_qlast[t]; i++) {
- - mkring = &na->rx_rings[i];
- - if (nm_kring_pending_off(mkring)) {
- - mkring->nr_mode = NKR_NETMAP_OFF;
- - /* we cannot access the parent krings if the parent
- - * has left netmap mode. This is signaled by a NULL
- - * pna pointer
- - */
- - if (pna) {
- - kring = &NMR(pna, t)[i];
- - netmap_monitor_del(mkring, kring);
- - }
- + for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- + mkring = &NMR(na, t)[i];
- + if (!nm_kring_pending_off(mkring))
- + continue;
- + mkring->nr_mode = NKR_NETMAP_OFF;
- + if (t == NR_TX)
- + continue;
- + /* we cannot access the parent krings if the parent
- + * has left netmap mode. This is signaled by a NULL
- + * pna pointer
- + */
- + if (pna == NULL)
- + continue;
- + for_rx_tx(s) {
- + if (i > nma_get_nrings(pna, s))
- + continue;
- + if (mna->flags & nm_txrx2flag(s)) {
- + kring = &NMR(pna, s)[i];
- + netmap_monitor_del(mkring, kring);
- }
- }
- }
- @@ -417,7 +519,7 @@ netmap_monitor_reg_common(struct netmap_adapter *na, int onoff, int zmon)
- static int
- netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
- {
- - struct netmap_kring *mkring = kring->monitors[0];
- + struct netmap_kring *mkring = kring->zmon_list[tx].next;
- struct netmap_ring *ring = kring->ring, *mring;
- int error = 0;
- int rel_slots, free_slots, busy, sent = 0;
- @@ -434,11 +536,11 @@ netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
- /* get the relased slots (rel_slots) */
- if (tx == NR_TX) {
- - beg = kring->nr_hwtail;
- + beg = kring->nr_hwtail + 1;
- error = kring->mon_sync(kring, flags);
- if (error)
- return error;
- - end = kring->nr_hwtail;
- + end = kring->nr_hwtail + 1;
- } else { /* NR_RX */
- beg = kring->nr_hwcur;
- end = kring->rhead;
- @@ -473,10 +575,10 @@ netmap_zmon_parent_sync(struct netmap_kring *kring, int flags, enum txrx tx)
- /* swap min(free_slots, rel_slots) slots */
- if (free_slots < rel_slots) {
- beg += (rel_slots - free_slots);
- - if (beg >= kring->nkr_num_slots)
- - beg -= kring->nkr_num_slots;
- rel_slots = free_slots;
- }
- + if (unlikely(beg >= kring->nkr_num_slots))
- + beg -= kring->nkr_num_slots;
- sent = rel_slots;
- for ( ; rel_slots; rel_slots--) {
- @@ -521,7 +623,6 @@ out_rxsync:
- static int
- netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
- {
- - ND("%s %x", kring->name, flags);
- return netmap_zmon_parent_sync(kring, flags, NR_TX);
- }
- @@ -529,11 +630,9 @@ netmap_zmon_parent_txsync(struct netmap_kring *kring, int flags)
- static int
- netmap_zmon_parent_rxsync(struct netmap_kring *kring, int flags)
- {
- - ND("%s %x", kring->name, flags);
- return netmap_zmon_parent_sync(kring, flags, NR_RX);
- }
- -
- static int
- netmap_zmon_reg(struct netmap_adapter *na, int onoff)
- {
- @@ -638,12 +737,17 @@ netmap_monitor_parent_txsync(struct netmap_kring *kring, int flags)
- int new_slots;
- /* get the new slots */
- - first_new = kring->nr_hwcur;
- - new_slots = kring->rhead - first_new;
- - if (new_slots < 0)
- - new_slots += kring->nkr_num_slots;
- - if (new_slots)
- - netmap_monitor_parent_sync(kring, first_new, new_slots);
- + if (kring->n_monitors > 0) {
- + first_new = kring->nr_hwcur;
- + new_slots = kring->rhead - first_new;
- + if (new_slots < 0)
- + new_slots += kring->nkr_num_slots;
- + if (new_slots)
- + netmap_monitor_parent_sync(kring, first_new, new_slots);
- + }
- + if (kring->zmon_list[NR_TX].next != NULL) {
- + return netmap_zmon_parent_txsync(kring, flags);
- + }
- return kring->mon_sync(kring, flags);
- }
- @@ -655,16 +759,22 @@ netmap_monitor_parent_rxsync(struct netmap_kring *kring, int flags)
- int new_slots, error;
- /* get the new slots */
- - error = kring->mon_sync(kring, flags);
- + if (kring->zmon_list[NR_RX].next != NULL) {
- + error = netmap_zmon_parent_rxsync(kring, flags);
- + } else {
- + error = kring->mon_sync(kring, flags);
- + }
- if (error)
- return error;
- - first_new = kring->mon_tail;
- - new_slots = kring->nr_hwtail - first_new;
- - if (new_slots < 0)
- - new_slots += kring->nkr_num_slots;
- - if (new_slots)
- - netmap_monitor_parent_sync(kring, first_new, new_slots);
- - kring->mon_tail = kring->nr_hwtail;
- + if (kring->n_monitors > 0) {
- + first_new = kring->mon_tail;
- + new_slots = kring->nr_hwtail - first_new;
- + if (new_slots < 0)
- + new_slots += kring->nkr_num_slots;
- + if (new_slots)
- + netmap_monitor_parent_sync(kring, first_new, new_slots);
- + kring->mon_tail = kring->nr_hwtail;
- + }
- return 0;
- }
- @@ -684,12 +794,14 @@ netmap_monitor_parent_notify(struct netmap_kring *kring, int flags)
- }
- if (kring->n_monitors > 0) {
- netmap_monitor_parent_rxsync(kring, NAF_FORCE_READ);
- - notify = kring->mon_notify;
- - } else {
- + }
- + if (nm_monitor_none(kring)) {
- /* we are no longer monitoring this ring, so both
- * mon_sync and mon_notify are NULL
- */
- notify = kring->nm_notify;
- + } else {
- + notify = kring->mon_notify;
- }
- nm_kr_put(kring);
- return notify(kring, flags);
- @@ -716,24 +828,21 @@ netmap_monitor_dtor(struct netmap_adapter *na)
- /* check if nmr is a request for a monitor adapter that we can satisfy */
- int
- -netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- +netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na,
- + struct netmap_mem_d *nmd, int create)
- {
- struct nmreq pnmr;
- struct netmap_adapter *pna; /* parent adapter */
- struct netmap_monitor_adapter *mna;
- struct ifnet *ifp = NULL;
- - int i, error;
- - enum txrx t;
- + int error;
- int zcopy = (nmr->nr_flags & NR_ZCOPY_MON);
- char monsuff[10] = "";
- + if (zcopy) {
- + nmr->nr_flags |= (NR_MONITOR_TX | NR_MONITOR_RX);
- + }
- if ((nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX)) == 0) {
- - if (nmr->nr_flags & NR_ZCOPY_MON) {
- - /* the flag makes no sense unless you are
- - * creating a monitor
- - */
- - return EINVAL;
- - }
- ND("not a monitor");
- return 0;
- }
- @@ -741,12 +850,6 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- ND("flags %x", nmr->nr_flags);
- - mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
- - if (mna == NULL) {
- - D("memory error");
- - return ENOMEM;
- - }
- -
- /* first, try to find the adapter that we want to monitor
- * We use the same nmr, after we have turned off the monitor flags.
- * In this way we can potentially monitor everything netmap understands,
- @@ -754,10 +857,9 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- */
- memcpy(&pnmr, nmr, sizeof(pnmr));
- pnmr.nr_flags &= ~(NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON);
- - error = netmap_get_na(&pnmr, &pna, &ifp, create);
- + error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create);
- if (error) {
- D("parent lookup failed: %d", error);
- - free(mna, M_DEVBUF);
- return error;
- }
- ND("found parent: %s", pna->name);
- @@ -772,12 +874,19 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- goto put_out;
- }
- - /* grab all the rings we need in the parent */
- + mna = nm_os_malloc(sizeof(*mna));
- + if (mna == NULL) {
- + D("memory error");
- + error = ENOMEM;
- + goto put_out;
- + }
- mna->priv.np_na = pna;
- +
- + /* grab all the rings we need in the parent */
- error = netmap_interp_ringid(&mna->priv, nmr->nr_ringid, nmr->nr_flags);
- if (error) {
- D("ringid error");
- - goto put_out;
- + goto free_out;
- }
- if (mna->priv.np_qlast[NR_TX] - mna->priv.np_qfirst[NR_TX] == 1) {
- snprintf(monsuff, 10, "-%d", mna->priv.np_qfirst[NR_TX]);
- @@ -788,57 +897,14 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- (nmr->nr_flags & NR_MONITOR_RX) ? "r" : "",
- (nmr->nr_flags & NR_MONITOR_TX) ? "t" : "");
- - if (zcopy) {
- - /* zero copy monitors need exclusive access to the monitored rings */
- - for_rx_tx(t) {
- - if (! (nmr->nr_flags & nm_txrx2flag(t)))
- - continue;
- - for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
- - struct netmap_kring *kring = &NMR(pna, t)[i];
- - if (kring->n_monitors > 0) {
- - error = EBUSY;
- - D("ring %s already monitored by %s", kring->name,
- - kring->monitors[0]->name);
- - goto put_out;
- - }
- - }
- - }
- - mna->up.nm_register = netmap_zmon_reg;
- - mna->up.nm_dtor = netmap_zmon_dtor;
- - /* to have zero copy, we need to use the same memory allocator
- - * as the monitored port
- - */
- - mna->up.nm_mem = pna->nm_mem;
- - mna->up.na_lut = pna->na_lut;
- - } else {
- - /* normal monitors are incompatible with zero copy ones */
- - for_rx_tx(t) {
- - if (! (nmr->nr_flags & nm_txrx2flag(t)))
- - continue;
- - for (i = mna->priv.np_qfirst[t]; i < mna->priv.np_qlast[t]; i++) {
- - struct netmap_kring *kring = &NMR(pna, t)[i];
- - if (kring->n_monitors > 0 &&
- - kring->monitors[0]->na->nm_register == netmap_zmon_reg)
- - {
- - error = EBUSY;
- - D("ring busy");
- - goto put_out;
- - }
- - }
- - }
- - mna->up.nm_rxsync = netmap_monitor_rxsync;
- - mna->up.nm_register = netmap_monitor_reg;
- - mna->up.nm_dtor = netmap_monitor_dtor;
- - }
- -
- /* the monitor supports the host rings iff the parent does */
- - mna->up.na_flags = (pna->na_flags & NAF_HOST_RINGS);
- + mna->up.na_flags |= (pna->na_flags & NAF_HOST_RINGS);
- /* a do-nothing txsync: monitors cannot be used to inject packets */
- mna->up.nm_txsync = netmap_monitor_txsync;
- mna->up.nm_rxsync = netmap_monitor_rxsync;
- mna->up.nm_krings_create = netmap_monitor_krings_create;
- mna->up.nm_krings_delete = netmap_monitor_krings_delete;
- - mna->up.num_tx_rings = 1; // XXX we don't need it, but field can't be zero
- + mna->up.num_tx_rings = 1; // XXX what should we do here with chained zmons?
- /* we set the number of our rx_rings to be max(num_rx_rings, num_rx_rings)
- * in the parent
- */
- @@ -855,14 +921,38 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- mna->up.num_rx_desc = nmr->nr_rx_slots;
- nm_bound_var(&mna->up.num_rx_desc, pna->num_rx_desc,
- 1, NM_MONITOR_MAXSLOTS, NULL);
- + if (zcopy) {
- + mna->up.nm_register = netmap_zmon_reg;
- + mna->up.nm_dtor = netmap_zmon_dtor;
- + /* to have zero copy, we need to use the same memory allocator
- + * as the monitored port
- + */
- + mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
- + /* and the allocator cannot be changed */
- + mna->up.na_flags |= NAF_MEM_OWNER;
- + } else {
- + mna->up.nm_register = netmap_monitor_reg;
- + mna->up.nm_dtor = netmap_monitor_dtor;
- + mna->up.nm_mem = netmap_mem_private_new(
- + mna->up.num_tx_rings,
- + mna->up.num_tx_desc,
- + mna->up.num_rx_rings,
- + mna->up.num_rx_desc,
- + 0, /* extra bufs */
- + 0, /* pipes */
- + &error);
- + if (mna->up.nm_mem == NULL)
- + goto put_out;
- + }
- +
- error = netmap_attach_common(&mna->up);
- if (error) {
- D("attach_common error");
- - goto put_out;
- + goto mem_put_out;
- }
- /* remember the traffic directions we have to monitor */
- - mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX));
- + mna->flags = (nmr->nr_flags & (NR_MONITOR_TX | NR_MONITOR_RX | NR_ZCOPY_MON));
- *na = &mna->up;
- netmap_adapter_get(*na);
- @@ -876,9 +966,12 @@ netmap_get_monitor_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- return 0;
- +mem_put_out:
- + netmap_mem_put(mna->up.nm_mem);
- +free_out:
- + nm_os_free(mna);
- put_out:
- netmap_unget_na(pna, ifp);
- - free(mna, M_DEVBUF);
- return error;
- }
- diff --git a/sys/dev/netmap/netmap_offloadings.c b/sys/dev/netmap/netmap_offloadings.c
- index f8da672ffa5..8e5de7f7a9f 100644
- --- a/sys/dev/netmap/netmap_offloadings.c
- +++ b/sys/dev/netmap/netmap_offloadings.c
- @@ -24,7 +24,7 @@
- * SUCH DAMAGE.
- */
- -/* $FreeBSD$ */
- +/* $FreeBSD: head/sys/dev/netmap/netmap_offloadings.c 261909 2014-02-15 04:53:04Z luigi $ */
- #if defined(__FreeBSD__)
- #include <sys/cdefs.h> /* prerequisite */
- diff --git a/sys/dev/netmap/netmap_pipe.c b/sys/dev/netmap/netmap_pipe.c
- index f00f73f8b9b..36f5a3c9d9b 100644
- --- a/sys/dev/netmap/netmap_pipe.c
- +++ b/sys/dev/netmap/netmap_pipe.c
- @@ -24,7 +24,7 @@
- * SUCH DAMAGE.
- */
- -/* $FreeBSD$ */
- +/* $FreeBSD: head/sys/dev/netmap/netmap_pipe.c 261909 2014-02-15 04:53:04Z luigi $ */
- #if defined(__FreeBSD__)
- #include <sys/cdefs.h> /* prerequisite */
- @@ -86,7 +86,7 @@ SYSEND;
- static int
- nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
- {
- - size_t len;
- + size_t old_len, len;
- struct netmap_pipe_adapter **npa;
- if (npipes <= na->na_max_pipes)
- @@ -96,12 +96,9 @@ nm_pipe_alloc(struct netmap_adapter *na, u_int npipes)
- if (npipes < na->na_next_pipe || npipes > NM_MAXPIPES)
- return EINVAL;
- + old_len = sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes;
- len = sizeof(struct netmap_pipe_adapter *) * npipes;
- -#ifndef _WIN32
- - npa = realloc(na->na_pipes, len, M_DEVBUF, M_NOWAIT | M_ZERO);
- -#else
- - npa = realloc(na->na_pipes, len, sizeof(struct netmap_pipe_adapter *)*na->na_max_pipes);
- -#endif
- + npa = nm_os_realloc(na->na_pipes, len, old_len);
- if (npa == NULL)
- return ENOMEM;
- @@ -120,7 +117,7 @@ netmap_pipe_dealloc(struct netmap_adapter *na)
- D("freeing not empty pipe array for %s (%d dangling pipes)!", na->name,
- na->na_next_pipe);
- }
- - free(na->na_pipes, M_DEVBUF);
- + nm_os_free(na->na_pipes);
- na->na_pipes = NULL;
- na->na_max_pipes = 0;
- na->na_next_pipe = 0;
- @@ -175,7 +172,7 @@ netmap_pipe_remove(struct netmap_adapter *parent, struct netmap_pipe_adapter *na
- parent->na_pipes[n] = NULL;
- }
- -static int
- +int
- netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
- {
- struct netmap_kring *rxkring = txkring->pipe;
- @@ -240,7 +237,7 @@ netmap_pipe_txsync(struct netmap_kring *txkring, int flags)
- return 0;
- }
- -static int
- +int
- netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
- {
- struct netmap_kring *txkring = rxkring->pipe;
- @@ -289,7 +286,7 @@ netmap_pipe_rxsync(struct netmap_kring *rxkring, int flags)
- */
- -/* netmap_pipe_krings_delete.
- +/* netmap_pipe_krings_create.
- *
- * There are two cases:
- *
- @@ -320,7 +317,7 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
- int i;
- /* case 1) above */
- - D("%p: case 1, create both ends", na);
- + ND("%p: case 1, create both ends", na);
- error = netmap_krings_create(na, 0);
- if (error)
- goto err;
- @@ -334,8 +331,8 @@ netmap_pipe_krings_create(struct netmap_adapter *na)
- for_rx_tx(t) {
- enum txrx r = nm_txrx_swap(t); /* swap NR_TX <-> NR_RX */
- for (i = 0; i < nma_get_nrings(na, t); i++) {
- - NMR(na, t)[i].pipe = NMR(&pna->peer->up, r) + i;
- - NMR(&pna->peer->up, r)[i].pipe = NMR(na, t) + i;
- + NMR(na, t)[i].pipe = NMR(ona, r) + i;
- + NMR(ona, r)[i].pipe = NMR(na, t) + i;
- }
- }
- @@ -393,11 +390,11 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
- ND("%p: onoff %d", na, onoff);
- if (onoff) {
- for_rx_tx(t) {
- - for (i = 0; i < nma_get_nrings(na, t) + 1; i++) {
- + for (i = 0; i < nma_get_nrings(na, t); i++) {
- struct netmap_kring *kring = &NMR(na, t)[i];
- if (nm_kring_pending_on(kring)) {
- - /* mark the partner ring as needed */
- + /* mark the peer ring as needed */
- kring->pipe->nr_kflags |= NKR_NEEDRING;
- }
- }
- @@ -432,7 +429,9 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
- /* mark the peer ring as no longer needed by us
- * (it may still be kept if sombody else is using it)
- */
- - kring->pipe->nr_kflags &= ~NKR_NEEDRING;
- + if (kring->pipe) {
- + kring->pipe->nr_kflags &= ~NKR_NEEDRING;
- + }
- }
- }
- }
- @@ -441,7 +440,7 @@ netmap_pipe_reg(struct netmap_adapter *na, int onoff)
- }
- if (na->active_fds) {
- - D("active_fds %d", na->active_fds);
- + ND("active_fds %d", na->active_fds);
- return 0;
- }
- @@ -494,7 +493,7 @@ netmap_pipe_krings_delete(struct netmap_adapter *na)
- return;
- }
- /* case 1) above */
- - ND("%p: case 1, deleting everyhing", na);
- + ND("%p: case 1, deleting everything", na);
- netmap_krings_delete(na); /* also zeroes tx_rings etc. */
- ona = &pna->peer->up;
- if (ona->tx_rings == NULL) {
- @@ -511,7 +510,7 @@ netmap_pipe_dtor(struct netmap_adapter *na)
- {
- struct netmap_pipe_adapter *pna =
- (struct netmap_pipe_adapter *)na;
- - ND("%p", na);
- + ND("%p %p", na, pna->parent_ifp);
- if (pna->peer_ref) {
- ND("%p: clean up peer", na);
- pna->peer_ref = 0;
- @@ -519,12 +518,15 @@ netmap_pipe_dtor(struct netmap_adapter *na)
- }
- if (pna->role == NR_REG_PIPE_MASTER)
- netmap_pipe_remove(pna->parent, pna);
- + if (pna->parent_ifp)
- + if_rele(pna->parent_ifp);
- netmap_adapter_put(pna->parent);
- pna->parent = NULL;
- }
- int
- -netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- +netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na,
- + struct netmap_mem_d *nmd, int create)
- {
- struct nmreq pnmr;
- struct netmap_adapter *pna; /* parent adapter */
- @@ -532,7 +534,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- struct ifnet *ifp = NULL;
- u_int pipe_id;
- int role = nmr->nr_flags & NR_REG_MASK;
- - int error;
- + int error, retries = 0;
- ND("flags %x", nmr->nr_flags);
- @@ -547,12 +549,28 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- memcpy(&pnmr.nr_name, nmr->nr_name, IFNAMSIZ);
- /* pass to parent the requested number of pipes */
- pnmr.nr_arg1 = nmr->nr_arg1;
- - error = netmap_get_na(&pnmr, &pna, &ifp, create);
- - if (error) {
- - ND("parent lookup failed: %d", error);
- - return error;
- + for (;;) {
- + int create_error;
- +
- + error = netmap_get_na(&pnmr, &pna, &ifp, nmd, create);
- + if (!error)
- + break;
- + if (error != ENXIO || retries++) {
- + ND("parent lookup failed: %d", error);
- + return error;
- + }
- + ND("try to create a persistent vale port");
- + /* create a persistent vale port and try again */
- + NMG_UNLOCK();
- + create_error = netmap_vi_create(&pnmr, 1 /* autodelete */);
- + NMG_LOCK();
- + if (create_error && create_error != EEXIST) {
- + if (create_error != EOPNOTSUPP) {
- + D("failed to create a persistent vale port: %d", create_error);
- + }
- + return error;
- + }
- }
- - ND("found parent: %s", na->name);
- if (NETMAP_OWNED_BY_KERN(pna)) {
- ND("parent busy");
- @@ -575,7 +593,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- /* the pipe we have found already holds a ref to the parent,
- * so we need to drop the one we got from netmap_get_na()
- */
- - netmap_adapter_put(pna);
- + netmap_unget_na(pna, ifp);
- goto found;
- }
- ND("pipe %d not found, create %d", pipe_id, create);
- @@ -587,7 +605,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- * The endpoint we were asked for holds a reference to
- * the other one.
- */
- - mna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
- + mna = nm_os_malloc(sizeof(*mna));
- if (mna == NULL) {
- error = ENOMEM;
- goto put_out;
- @@ -597,6 +615,7 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- mna->id = pipe_id;
- mna->role = NR_REG_PIPE_MASTER;
- mna->parent = pna;
- + mna->parent_ifp = ifp;
- mna->up.nm_txsync = netmap_pipe_txsync;
- mna->up.nm_rxsync = netmap_pipe_rxsync;
- @@ -604,7 +623,8 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- mna->up.nm_dtor = netmap_pipe_dtor;
- mna->up.nm_krings_create = netmap_pipe_krings_create;
- mna->up.nm_krings_delete = netmap_pipe_krings_delete;
- - mna->up.nm_mem = pna->nm_mem;
- + mna->up.nm_mem = netmap_mem_get(pna->nm_mem);
- + mna->up.na_flags |= NAF_MEM_OWNER;
- mna->up.na_lut = pna->na_lut;
- mna->up.num_tx_rings = 1;
- @@ -624,13 +644,14 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- goto free_mna;
- /* create the slave */
- - sna = malloc(sizeof(*mna), M_DEVBUF, M_NOWAIT | M_ZERO);
- + sna = nm_os_malloc(sizeof(*mna));
- if (sna == NULL) {
- error = ENOMEM;
- goto unregister_mna;
- }
- /* most fields are the same, copy from master and then fix */
- *sna = *mna;
- + sna->up.nm_mem = netmap_mem_get(mna->up.nm_mem);
- snprintf(sna->up.name, sizeof(sna->up.name), "%s}%d", pna->name, pipe_id);
- sna->role = NR_REG_PIPE_SLAVE;
- error = netmap_attach_common(&sna->up);
- @@ -645,6 +666,9 @@ netmap_get_pipe_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- * need another one for the other endpoint we created
- */
- netmap_adapter_get(pna);
- + /* likewise for the ifp, if any */
- + if (ifp)
- + if_ref(ifp);
- if (role == NR_REG_PIPE_MASTER) {
- req = mna;
- @@ -667,19 +691,14 @@ found:
- * It will be released by the req destructor
- */
- - /* drop the ifp reference, if any */
- - if (ifp) {
- - if_rele(ifp);
- - }
- -
- return 0;
- free_sna:
- - free(sna, M_DEVBUF);
- + nm_os_free(sna);
- unregister_mna:
- netmap_pipe_remove(pna, mna);
- free_mna:
- - free(mna, M_DEVBUF);
- + nm_os_free(mna);
- put_out:
- netmap_unget_na(pna, ifp);
- return error;
- diff --git a/sys/dev/netmap/netmap_pt.c b/sys/dev/netmap/netmap_pt.c
- index 3913f4b957f..27eaa0232ae 100644
- --- a/sys/dev/netmap/netmap_pt.c
- +++ b/sys/dev/netmap/netmap_pt.c
- @@ -170,7 +170,7 @@ rate_batch_stats_update(struct rate_batch_stats *bf, uint32_t pre_tail,
- struct ptnetmap_state {
- /* Kthreads. */
- - struct nm_kthread **kthreads;
- + struct nm_kctx **kctxs;
- /* Shared memory with the guest (TX/RX) */
- struct ptnet_ring __user *ptrings;
- @@ -186,11 +186,11 @@ struct ptnetmap_state {
- static inline void
- ptnetmap_kring_dump(const char *title, const struct netmap_kring *kring)
- {
- - RD(1, "%s - name: %s hwcur: %d hwtail: %d rhead: %d rcur: %d \
- - rtail: %d head: %d cur: %d tail: %d",
- - title, kring->name, kring->nr_hwcur,
- - kring->nr_hwtail, kring->rhead, kring->rcur, kring->rtail,
- - kring->ring->head, kring->ring->cur, kring->ring->tail);
- + D("%s - name: %s hwcur: %d hwtail: %d rhead: %d rcur: %d"
- + " rtail: %d head: %d cur: %d tail: %d",
- + title, kring->name, kring->nr_hwcur,
- + kring->nr_hwtail, kring->rhead, kring->rcur, kring->rtail,
- + kring->ring->head, kring->ring->cur, kring->ring->tail);
- }
- /*
- @@ -225,7 +225,7 @@ ptring_intr_enable(struct ptnet_ring __user *ptring, uint32_t val)
- /* Handle TX events: from the guest or from the backend */
- static void
- -ptnetmap_tx_handler(void *data)
- +ptnetmap_tx_handler(void *data, int is_kthread)
- {
- struct netmap_kring *kring = data;
- struct netmap_pt_host_adapter *pth_na =
- @@ -234,7 +234,7 @@ ptnetmap_tx_handler(void *data)
- struct ptnet_ring __user *ptring;
- struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
- bool more_txspace = false;
- - struct nm_kthread *kth;
- + struct nm_kctx *kth;
- uint32_t num_slots;
- int batch;
- IFRATE(uint32_t pre_tail);
- @@ -259,7 +259,7 @@ ptnetmap_tx_handler(void *data)
- /* Get TX ptring pointer from the CSB. */
- ptring = ptns->ptrings + kring->ring_id;
- - kth = ptns->kthreads[kring->ring_id];
- + kth = ptns->kctxs[kring->ring_id];
- num_slots = kring->nkr_num_slots;
- shadow_ring.head = kring->rhead;
- @@ -337,10 +337,10 @@ ptnetmap_tx_handler(void *data)
- #ifndef BUSY_WAIT
- /* Interrupt the guest if needed. */
- - if (more_txspace && ptring_intr_enabled(ptring)) {
- + if (more_txspace && ptring_intr_enabled(ptring) && is_kthread) {
- /* Disable guest kick to avoid sending unnecessary kicks */
- ptring_intr_enable(ptring, 0);
- - nm_os_kthread_send_irq(kth);
- + nm_os_kctx_send_irq(kth);
- IFRATE(ptns->rate_ctx.new.htxk++);
- more_txspace = false;
- }
- @@ -354,7 +354,9 @@ ptnetmap_tx_handler(void *data)
- * go to sleep, waiting for a kick from the guest when new
- * new slots are ready for transmission.
- */
- - usleep_range(1,1);
- + if (is_kthread) {
- + usleep_range(1,1);
- + }
- /* Reenable notifications. */
- ptring_kick_enable(ptring, 1);
- /* Doublecheck. */
- @@ -383,13 +385,40 @@ ptnetmap_tx_handler(void *data)
- nm_kr_put(kring);
- - if (more_txspace && ptring_intr_enabled(ptring)) {
- + if (more_txspace && ptring_intr_enabled(ptring) && is_kthread) {
- ptring_intr_enable(ptring, 0);
- - nm_os_kthread_send_irq(kth);
- + nm_os_kctx_send_irq(kth);
- IFRATE(ptns->rate_ctx.new.htxk++);
- }
- }
- +/* Called on backend nm_notify when there is no worker thread. */
- +static void
- +ptnetmap_tx_nothread_notify(void *data)
- +{
- + struct netmap_kring *kring = data;
- + struct netmap_pt_host_adapter *pth_na =
- + (struct netmap_pt_host_adapter *)kring->na->na_private;
- + struct ptnetmap_state *ptns = pth_na->ptns;
- +
- + if (unlikely(!ptns)) {
- + D("ERROR ptnetmap state is NULL");
- + return;
- + }
- +
- + if (unlikely(ptns->stopped)) {
- + D("backend netmap is being stopped");
- + return;
- + }
- +
- + /* We cannot access the CSB here (to check ptring->guest_need_kick),
- + * unless we switch address space to the one of the guest. For now
- + * we unconditionally inject an interrupt. */
- + nm_os_kctx_send_irq(ptns->kctxs[kring->ring_id]);
- + IFRATE(ptns->rate_ctx.new.htxk++);
- + ND(1, "%s interrupt", kring->name);
- +}
- +
- /*
- * We need RX kicks from the guest when (tail == head-1), where we wait
- * for the guest to refill.
- @@ -405,7 +434,7 @@ ptnetmap_norxslots(struct netmap_kring *kring, uint32_t g_head)
- /* Handle RX events: from the guest or from the backend */
- static void
- -ptnetmap_rx_handler(void *data)
- +ptnetmap_rx_handler(void *data, int is_kthread)
- {
- struct netmap_kring *kring = data;
- struct netmap_pt_host_adapter *pth_na =
- @@ -413,7 +442,7 @@ ptnetmap_rx_handler(void *data)
- struct ptnetmap_state *ptns = pth_na->ptns;
- struct ptnet_ring __user *ptring;
- struct netmap_ring shadow_ring; /* shadow copy of the netmap_ring */
- - struct nm_kthread *kth;
- + struct nm_kctx *kth;
- uint32_t num_slots;
- int dry_cycles = 0;
- bool some_recvd = false;
- @@ -440,7 +469,7 @@ ptnetmap_rx_handler(void *data)
- /* Get RX ptring pointer from the CSB. */
- ptring = ptns->ptrings + (pth_na->up.num_tx_rings + kring->ring_id);
- - kth = ptns->kthreads[pth_na->up.num_tx_rings + kring->ring_id];
- + kth = ptns->kctxs[pth_na->up.num_tx_rings + kring->ring_id];
- num_slots = kring->nkr_num_slots;
- shadow_ring.head = kring->rhead;
- @@ -500,7 +529,7 @@ ptnetmap_rx_handler(void *data)
- if (some_recvd && ptring_intr_enabled(ptring)) {
- /* Disable guest kick to avoid sending unnecessary kicks */
- ptring_intr_enable(ptring, 0);
- - nm_os_kthread_send_irq(kth);
- + nm_os_kctx_send_irq(kth);
- IFRATE(ptns->rate_ctx.new.hrxk++);
- some_recvd = false;
- }
- @@ -549,7 +578,7 @@ ptnetmap_rx_handler(void *data)
- /* Interrupt the guest if needed. */
- if (some_recvd && ptring_intr_enabled(ptring)) {
- ptring_intr_enable(ptring, 0);
- - nm_os_kthread_send_irq(kth);
- + nm_os_kctx_send_irq(kth);
- IFRATE(ptns->rate_ctx.new.hrxk++);
- }
- }
- @@ -597,14 +626,14 @@ ptnetmap_print_configuration(struct ptnetmap_cfg *cfg)
- static int
- ptnetmap_kring_snapshot(struct netmap_kring *kring, struct ptnet_ring __user *ptring)
- {
- - if(CSB_WRITE(ptring, head, kring->rhead))
- + if (CSB_WRITE(ptring, head, kring->rhead))
- goto err;
- - if(CSB_WRITE(ptring, cur, kring->rcur))
- + if (CSB_WRITE(ptring, cur, kring->rcur))
- goto err;
- - if(CSB_WRITE(ptring, hwcur, kring->nr_hwcur))
- + if (CSB_WRITE(ptring, hwcur, kring->nr_hwcur))
- goto err;
- - if(CSB_WRITE(ptring, hwtail, NM_ACCESS_ONCE(kring->nr_hwtail)))
- + if (CSB_WRITE(ptring, hwtail, NM_ACCESS_ONCE(kring->nr_hwtail)))
- goto err;
- DBG(ptnetmap_kring_dump("ptnetmap_kring_snapshot", kring);)
- @@ -643,15 +672,15 @@ ptnetmap_krings_snapshot(struct netmap_pt_host_adapter *pth_na)
- }
- /*
- - * Functions to create, start and stop the kthreads
- + * Functions to create kernel contexts, and start/stop the workers.
- */
- static int
- -ptnetmap_create_kthreads(struct netmap_pt_host_adapter *pth_na,
- - struct ptnetmap_cfg *cfg)
- +ptnetmap_create_kctxs(struct netmap_pt_host_adapter *pth_na,
- + struct ptnetmap_cfg *cfg, int use_tx_kthreads)
- {
- struct ptnetmap_state *ptns = pth_na->ptns;
- - struct nm_kthread_cfg nmk_cfg;
- + struct nm_kctx_cfg nmk_cfg;
- unsigned int num_rings;
- uint8_t *cfg_entries = (uint8_t *)(cfg + 1);
- int k;
- @@ -665,13 +694,16 @@ ptnetmap_create_kthreads(struct netmap_pt_host_adapter *pth_na,
- nmk_cfg.type = k;
- if (k < pth_na->up.num_tx_rings) {
- nmk_cfg.worker_fn = ptnetmap_tx_handler;
- + nmk_cfg.use_kthread = use_tx_kthreads;
- + nmk_cfg.notify_fn = ptnetmap_tx_nothread_notify;
- } else {
- nmk_cfg.worker_fn = ptnetmap_rx_handler;
- + nmk_cfg.use_kthread = 1;
- }
- - ptns->kthreads[k] = nm_os_kthread_create(&nmk_cfg,
- + ptns->kctxs[k] = nm_os_kctx_create(&nmk_cfg,
- cfg->cfgtype, cfg_entries + k * cfg->entry_size);
- - if (ptns->kthreads[k] == NULL) {
- + if (ptns->kctxs[k] == NULL) {
- goto err;
- }
- }
- @@ -679,16 +711,16 @@ ptnetmap_create_kthreads(struct netmap_pt_host_adapter *pth_na,
- return 0;
- err:
- for (k = 0; k < num_rings; k++) {
- - if (ptns->kthreads[k]) {
- - nm_os_kthread_delete(ptns->kthreads[k]);
- - ptns->kthreads[k] = NULL;
- + if (ptns->kctxs[k]) {
- + nm_os_kctx_destroy(ptns->kctxs[k]);
- + ptns->kctxs[k] = NULL;
- }
- }
- return EFAULT;
- }
- static int
- -ptnetmap_start_kthreads(struct netmap_pt_host_adapter *pth_na)
- +ptnetmap_start_kctx_workers(struct netmap_pt_host_adapter *pth_na)
- {
- struct ptnetmap_state *ptns = pth_na->ptns;
- int num_rings;
- @@ -705,8 +737,8 @@ ptnetmap_start_kthreads(struct netmap_pt_host_adapter *pth_na)
- num_rings = ptns->pth_na->up.num_tx_rings +
- ptns->pth_na->up.num_rx_rings;
- for (k = 0; k < num_rings; k++) {
- - //nm_os_kthread_set_affinity(ptns->kthreads[k], xxx);
- - error = nm_os_kthread_start(ptns->kthreads[k]);
- + //nm_os_kctx_worker_setaff(ptns->kctxs[k], xxx);
- + error = nm_os_kctx_worker_start(ptns->kctxs[k]);
- if (error) {
- return error;
- }
- @@ -716,7 +748,7 @@ ptnetmap_start_kthreads(struct netmap_pt_host_adapter *pth_na)
- }
- static void
- -ptnetmap_stop_kthreads(struct netmap_pt_host_adapter *pth_na)
- +ptnetmap_stop_kctx_workers(struct netmap_pt_host_adapter *pth_na)
- {
- struct ptnetmap_state *ptns = pth_na->ptns;
- int num_rings;
- @@ -732,7 +764,7 @@ ptnetmap_stop_kthreads(struct netmap_pt_host_adapter *pth_na)
- num_rings = ptns->pth_na->up.num_tx_rings +
- ptns->pth_na->up.num_rx_rings;
- for (k = 0; k < num_rings; k++) {
- - nm_os_kthread_stop(ptns->kthreads[k]);
- + nm_os_kctx_worker_stop(ptns->kctxs[k]);
- }
- }
- @@ -750,14 +782,14 @@ ptnetmap_read_cfg(struct nmreq *nmr)
- }
- cfglen = sizeof(tmp) + tmp.num_rings * tmp.entry_size;
- - cfg = malloc(cfglen, M_DEVBUF, M_NOWAIT | M_ZERO);
- + cfg = nm_os_malloc(cfglen);
- if (!cfg) {
- return NULL;
- }
- if (copyin((const void *)*nmr_ptncfg, cfg, cfglen)) {
- D("Full copyin() failed");
- - free(cfg, M_DEVBUF);
- + nm_os_free(cfg);
- return NULL;
- }
- @@ -772,6 +804,7 @@ static int
- ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
- struct ptnetmap_cfg *cfg)
- {
- + int use_tx_kthreads = ptnetmap_tx_workers; /* snapshot */
- struct ptnetmap_state *ptns;
- unsigned int num_rings;
- int ret, i;
- @@ -790,13 +823,18 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
- return EINVAL;
- }
- - ptns = malloc(sizeof(*ptns) + num_rings * sizeof(*ptns->kthreads),
- - M_DEVBUF, M_NOWAIT | M_ZERO);
- + if (!use_tx_kthreads && na_is_generic(pth_na->parent)) {
- + D("ERROR ptnetmap direct transmission not supported with "
- + "passed-through emulated adapters");
- + return EOPNOTSUPP;
- + }
- +
- + ptns = nm_os_malloc(sizeof(*ptns) + num_rings * sizeof(*ptns->kctxs));
- if (!ptns) {
- return ENOMEM;
- }
- - ptns->kthreads = (struct nm_kthread **)(ptns + 1);
- + ptns->kctxs = (struct nm_kctx **)(ptns + 1);
- ptns->stopped = true;
- /* Cross-link data structures. */
- @@ -808,9 +846,9 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
- DBG(ptnetmap_print_configuration(cfg));
- - /* Create kthreads */
- - if ((ret = ptnetmap_create_kthreads(pth_na, cfg))) {
- - D("ERROR ptnetmap_create_kthreads()");
- + /* Create kernel contexts. */
- + if ((ret = ptnetmap_create_kctxs(pth_na, cfg, use_tx_kthreads))) {
- + D("ERROR ptnetmap_create_kctxs()");
- goto err;
- }
- /* Copy krings state into the CSB for the guest initialization */
- @@ -819,10 +857,17 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
- goto err;
- }
- - /* Overwrite parent nm_notify krings callback. */
- + /* Overwrite parent nm_notify krings callback, and
- + * clear NAF_BDG_MAYSLEEP if needed. */
- pth_na->parent->na_private = pth_na;
- pth_na->parent_nm_notify = pth_na->parent->nm_notify;
- pth_na->parent->nm_notify = nm_unused_notify;
- + pth_na->parent_na_flags = pth_na->parent->na_flags;
- + if (!use_tx_kthreads) {
- + /* VALE port txsync is executed under spinlock on Linux, so
- + * we need to make sure the bridge cannot sleep. */
- + pth_na->parent->na_flags &= ~NAF_BDG_MAYSLEEP;
- + }
- for (i = 0; i < pth_na->parent->num_rx_rings; i++) {
- pth_na->up.rx_rings[i].save_notify =
- @@ -849,7 +894,7 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na,
- err:
- pth_na->ptns = NULL;
- - free(ptns, M_DEVBUF);
- + nm_os_free(ptns);
- return ret;
- }
- @@ -870,6 +915,7 @@ ptnetmap_delete(struct netmap_pt_host_adapter *pth_na)
- /* Restore parent adapter callbacks. */
- pth_na->parent->nm_notify = pth_na->parent_nm_notify;
- pth_na->parent->na_private = NULL;
- + pth_na->parent->na_flags = pth_na->parent_na_flags;
- for (i = 0; i < pth_na->parent->num_rx_rings; i++) {
- pth_na->up.rx_rings[i].nm_notify =
- @@ -882,17 +928,17 @@ ptnetmap_delete(struct netmap_pt_host_adapter *pth_na)
- pth_na->up.tx_rings[i].save_notify = NULL;
- }
- - /* Delete kthreads. */
- + /* Destroy kernel contexts. */
- num_rings = ptns->pth_na->up.num_tx_rings +
- ptns->pth_na->up.num_rx_rings;
- for (i = 0; i < num_rings; i++) {
- - nm_os_kthread_delete(ptns->kthreads[i]);
- - ptns->kthreads[i] = NULL;
- + nm_os_kctx_destroy(ptns->kctxs[i]);
- + ptns->kctxs[i] = NULL;
- }
- IFRATE(del_timer(&ptns->rate_ctx.timer));
- - free(ptns, M_DEVBUF);
- + nm_os_free(ptns);
- pth_na->ptns = NULL;
- @@ -932,21 +978,21 @@ ptnetmap_ctl(struct nmreq *nmr, struct netmap_adapter *na)
- cfg = ptnetmap_read_cfg(nmr);
- if (!cfg)
- break;
- - /* Create ptnetmap state (kthreads, ...) and switch parent
- + /* Create ptnetmap state (kctxs, ...) and switch parent
- * adapter to ptnetmap mode. */
- error = ptnetmap_create(pth_na, cfg);
- - free(cfg, M_DEVBUF);
- + nm_os_free(cfg);
- if (error)
- break;
- /* Start kthreads. */
- - error = ptnetmap_start_kthreads(pth_na);
- + error = ptnetmap_start_kctx_workers(pth_na);
- if (error)
- ptnetmap_delete(pth_na);
- break;
- case NETMAP_PT_HOST_DELETE:
- /* Stop kthreads. */
- - ptnetmap_stop_kthreads(pth_na);
- + ptnetmap_stop_kctx_workers(pth_na);
- /* Switch parent adapter back to normal mode and destroy
- * ptnetmap state (kthreads, ...). */
- ptnetmap_delete(pth_na);
- @@ -994,7 +1040,7 @@ nm_pt_host_notify(struct netmap_kring *kring, int flags)
- ND(1, "RX backend irq");
- IFRATE(ptns->rate_ctx.new.brxwu++);
- }
- - nm_os_kthread_wakeup_worker(ptns->kthreads[k]);
- + nm_os_kctx_worker_wakeup(ptns->kctxs[k]);
- return NM_IRQ_COMPLETED;
- }
- @@ -1136,7 +1182,7 @@ nm_pt_host_dtor(struct netmap_adapter *na)
- /* The equivalent of NETMAP_PT_HOST_DELETE if the hypervisor
- * didn't do it. */
- - ptnetmap_stop_kthreads(pth_na);
- + ptnetmap_stop_kctx_workers(pth_na);
- ptnetmap_delete(pth_na);
- parent->na_flags &= ~NAF_BUSY;
- @@ -1147,7 +1193,8 @@ nm_pt_host_dtor(struct netmap_adapter *na)
- /* check if nmr is a request for a ptnetmap adapter that we can satisfy */
- int
- -netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- +netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na,
- + struct netmap_mem_d *nmd, int create)
- {
- struct nmreq parent_nmr;
- struct netmap_adapter *parent; /* target adapter */
- @@ -1162,7 +1209,7 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- D("Requesting a ptnetmap host adapter");
- - pth_na = malloc(sizeof(*pth_na), M_DEVBUF, M_NOWAIT | M_ZERO);
- + pth_na = nm_os_malloc(sizeof(*pth_na));
- if (pth_na == NULL) {
- D("ERROR malloc");
- return ENOMEM;
- @@ -1174,7 +1221,7 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- */
- memcpy(&parent_nmr, nmr, sizeof(parent_nmr));
- parent_nmr.nr_flags &= ~(NR_PTNETMAP_HOST);
- - error = netmap_get_na(&parent_nmr, &parent, &ifp, create);
- + error = netmap_get_na(&parent_nmr, &parent, &ifp, nmd, create);
- if (error) {
- D("parent lookup failed: %d", error);
- goto put_out_noputparent;
- @@ -1216,7 +1263,7 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- * directly. */
- pth_na->up.nm_notify = nm_unused_notify;
- - pth_na->up.nm_mem = parent->nm_mem;
- + pth_na->up.nm_mem = netmap_mem_get(parent->nm_mem);
- pth_na->up.na_flags |= NAF_HOST_RINGS;
- @@ -1248,7 +1295,7 @@ put_out:
- if (ifp)
- if_rele(ifp);
- put_out_noputparent:
- - free(pth_na, M_DEVBUF);
- + nm_os_free(pth_na);
- return error;
- }
- #endif /* WITH_PTNETMAP_HOST */
- @@ -1290,8 +1337,8 @@ netmap_pt_guest_txsync(struct ptnet_ring *ptring, struct netmap_kring *kring,
- ptnetmap_guest_write_kring_csb(ptring, kring->rcur, kring->rhead);
- /* Ask for a kick from a guest to the host if needed. */
- - if ((kring->rhead != kring->nr_hwcur &&
- - NM_ACCESS_ONCE(ptring->host_need_kick)) ||
- + if (((kring->rhead != kring->nr_hwcur || nm_kr_txempty(kring))
- + && NM_ACCESS_ONCE(ptring->host_need_kick)) ||
- (flags & NAF_FORCE_RECLAIM)) {
- ptring->sync_flags = flags;
- notify = true;
- @@ -1320,9 +1367,9 @@ netmap_pt_guest_txsync(struct ptnet_ring *ptring, struct netmap_kring *kring,
- }
- }
- - ND(1, "TX - CSB: head:%u cur:%u hwtail:%u - KRING: head:%u cur:%u tail: %u",
- - ptring->head, ptring->cur, ptring->hwtail,
- - kring->rhead, kring->rcur, kring->nr_hwtail);
- + ND(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
- + kring->name, ptring->head, ptring->cur, ptring->hwtail,
- + kring->rhead, kring->rcur, kring->nr_hwtail);
- return notify;
- }
- @@ -1385,9 +1432,9 @@ netmap_pt_guest_rxsync(struct ptnet_ring *ptring, struct netmap_kring *kring,
- }
- }
- - ND(1, "RX - CSB: head:%u cur:%u hwtail:%u - KRING: head:%u cur:%u",
- - ptring->head, ptring->cur, ptring->hwtail,
- - kring->rhead, kring->rcur);
- + ND(1, "%s CSB(head:%u cur:%u hwtail:%u) KRING(head:%u cur:%u tail:%u)",
- + kring->name, ptring->head, ptring->cur, ptring->hwtail,
- + kring->rhead, kring->rcur, kring->nr_hwtail);
- return notify;
- }
- @@ -1445,9 +1492,43 @@ ptnet_nm_dtor(struct netmap_adapter *na)
- struct netmap_pt_guest_adapter *ptna =
- (struct netmap_pt_guest_adapter *)na;
- - netmap_mem_put(ptna->dr.up.nm_mem);
- + netmap_mem_put(ptna->dr.up.nm_mem); // XXX is this needed?
- memset(&ptna->dr, 0, sizeof(ptna->dr));
- netmap_mem_pt_guest_ifp_del(na->nm_mem, na->ifp);
- }
- +int
- +netmap_pt_guest_attach(struct netmap_adapter *arg, void *csb,
- + unsigned int nifp_offset, unsigned int memid)
- +{
- + struct netmap_pt_guest_adapter *ptna;
- + struct ifnet *ifp = arg ? arg->ifp : NULL;
- + int error;
- +
- + /* get allocator */
- + arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid);
- + if (arg->nm_mem == NULL)
- + return ENOMEM;
- + arg->na_flags |= NAF_MEM_OWNER;
- + error = netmap_attach_ext(arg, sizeof(struct netmap_pt_guest_adapter));
- + if (error)
- + return error;
- +
- + /* get the netmap_pt_guest_adapter */
- + ptna = (struct netmap_pt_guest_adapter *) NA(ifp);
- + ptna->csb = csb;
- +
- + /* Initialize a separate pass-through netmap adapter that is going to
- + * be used by the ptnet driver only, and so never exposed to netmap
- + * applications. We only need a subset of the available fields. */
- + memset(&ptna->dr, 0, sizeof(ptna->dr));
- + ptna->dr.up.ifp = ifp;
- + ptna->dr.up.nm_mem = netmap_mem_get(ptna->hwup.up.nm_mem);
- + ptna->dr.up.nm_config = ptna->hwup.up.nm_config;
- +
- + ptna->backend_regifs = 0;
- +
- + return 0;
- +}
- +
- #endif /* WITH_PTNETMAP_GUEST */
- diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c
- index 71b3aedddd4..a018f60ecb9 100644
- --- a/sys/dev/netmap/netmap_vale.c
- +++ b/sys/dev/netmap/netmap_vale.c
- @@ -58,7 +58,7 @@ ports attached to the switch)
- #if defined(__FreeBSD__)
- #include <sys/cdefs.h> /* prerequisite */
- -__FBSDID("$FreeBSD$");
- +__FBSDID("$FreeBSD: head/sys/dev/netmap/netmap.c 257176 2013-10-26 17:58:36Z glebius $");
- #include <sys/types.h>
- #include <sys/errno.h>
- @@ -161,7 +161,8 @@ SYSCTL_DECL(_dev_netmap);
- SYSCTL_INT(_dev_netmap, OID_AUTO, bridge_batch, CTLFLAG_RW, &bridge_batch, 0 , "");
- SYSEND;
- -static int netmap_vp_create(struct nmreq *, struct ifnet *, struct netmap_vp_adapter **);
- +static int netmap_vp_create(struct nmreq *, struct ifnet *,
- + struct netmap_mem_d *nmd, struct netmap_vp_adapter **);
- static int netmap_vp_reg(struct netmap_adapter *na, int onoff);
- static int netmap_bwrap_reg(struct netmap_adapter *, int onoff);
- @@ -393,7 +394,7 @@ nm_free_bdgfwd(struct netmap_adapter *na)
- kring = na->tx_rings;
- for (i = 0; i < nrings; i++) {
- if (kring[i].nkr_ft) {
- - free(kring[i].nkr_ft, M_DEVBUF);
- + nm_os_free(kring[i].nkr_ft);
- kring[i].nkr_ft = NULL; /* protect from freeing twice */
- }
- }
- @@ -423,7 +424,7 @@ nm_alloc_bdgfwd(struct netmap_adapter *na)
- struct nm_bdg_q *dstq;
- int j;
- - ft = malloc(l, M_DEVBUF, M_NOWAIT | M_ZERO);
- + ft = nm_os_malloc(l);
- if (!ft) {
- nm_free_bdgfwd(na);
- return ENOMEM;
- @@ -538,6 +539,13 @@ netmap_vp_dtor(struct netmap_adapter *na)
- if (b) {
- netmap_bdg_detach_common(b, vpna->bdg_port, -1);
- }
- +
- + if (vpna->autodelete && na->ifp != NULL) {
- + ND("releasing %s", na->ifp->if_xname);
- + NMG_UNLOCK();
- + nm_os_vi_detach(na->ifp);
- + NMG_LOCK();
- + }
- }
- /* remove a persistent VALE port from the system */
- @@ -545,6 +553,7 @@ static int
- nm_vi_destroy(const char *name)
- {
- struct ifnet *ifp;
- + struct netmap_vp_adapter *vpna;
- int error;
- ifp = ifunit_ref(name);
- @@ -557,18 +566,29 @@ nm_vi_destroy(const char *name)
- goto err;
- }
- - if (NA(ifp)->na_refcount > 1) {
- + vpna = (struct netmap_vp_adapter *)NA(ifp);
- +
- + /* we can only destroy ports that were created via NETMAP_BDG_NEWIF */
- + if (vpna->autodelete) {
- + error = EINVAL;
- + goto err;
- + }
- +
- + /* also make sure that nobody is using the inferface */
- + if (NETMAP_OWNED_BY_ANY(&vpna->up) ||
- + vpna->up.na_refcount > 1 /* any ref besides the one in nm_vi_create()? */) {
- error = EBUSY;
- goto err;
- }
- +
- NMG_UNLOCK();
- D("destroying a persistent vale interface %s", ifp->if_xname);
- /* Linux requires all the references are released
- * before unregister
- */
- - if_rele(ifp);
- netmap_detach(ifp);
- + if_rele(ifp);
- nm_os_vi_detach(ifp);
- return 0;
- @@ -578,15 +598,26 @@ err:
- return error;
- }
- +static int
- +nm_update_info(struct nmreq *nmr, struct netmap_adapter *na)
- +{
- + nmr->nr_rx_rings = na->num_rx_rings;
- + nmr->nr_tx_rings = na->num_tx_rings;
- + nmr->nr_rx_slots = na->num_rx_desc;
- + nmr->nr_tx_slots = na->num_tx_desc;
- + return netmap_mem_get_info(na->nm_mem, &nmr->nr_memsize, NULL, &nmr->nr_arg2);
- +}
- +
- /*
- * Create a virtual interface registered to the system.
- * The interface will be attached to a bridge later.
- */
- -static int
- -nm_vi_create(struct nmreq *nmr)
- +int
- +netmap_vi_create(struct nmreq *nmr, int autodelete)
- {
- struct ifnet *ifp;
- struct netmap_vp_adapter *vpna;
- + struct netmap_mem_d *nmd = NULL;
- int error;
- /* don't include VALE prefix */
- @@ -594,28 +625,64 @@ nm_vi_create(struct nmreq *nmr)
- return EINVAL;
- ifp = ifunit_ref(nmr->nr_name);
- if (ifp) { /* already exist, cannot create new one */
- + error = EEXIST;
- + NMG_LOCK();
- + if (NM_NA_VALID(ifp)) {
- + int update_err = nm_update_info(nmr, NA(ifp));
- + if (update_err)
- + error = update_err;
- + }
- + NMG_UNLOCK();
- if_rele(ifp);
- - return EEXIST;
- + return error;
- }
- error = nm_os_vi_persist(nmr->nr_name, &ifp);
- if (error)
- return error;
- NMG_LOCK();
- + if (nmr->nr_arg2) {
- + nmd = netmap_mem_find(nmr->nr_arg2);
- + if (nmd == NULL) {
- + error = EINVAL;
- + goto err_1;
- + }
- + }
- /* netmap_vp_create creates a struct netmap_vp_adapter */
- - error = netmap_vp_create(nmr, ifp, &vpna);
- + error = netmap_vp_create(nmr, ifp, nmd, &vpna);
- if (error) {
- D("error %d", error);
- - nm_os_vi_detach(ifp);
- - return error;
- + goto err_1;
- }
- /* persist-specific routines */
- vpna->up.nm_bdg_ctl = netmap_vp_bdg_ctl;
- - netmap_adapter_get(&vpna->up);
- + if (!autodelete) {
- + netmap_adapter_get(&vpna->up);
- + } else {
- + vpna->autodelete = 1;
- + }
- NM_ATTACH_NA(ifp, &vpna->up);
- + /* return the updated info */
- + error = nm_update_info(nmr, &vpna->up);
- + if (error) {
- + goto err_2;
- + }
- + D("returning nr_arg2 %d", nmr->nr_arg2);
- + if (nmd)
- + netmap_mem_put(nmd);
- NMG_UNLOCK();
- D("created %s", ifp->if_xname);
- return 0;
- +
- +err_2:
- + netmap_detach(ifp);
- +err_1:
- + if (nmd)
- + netmap_mem_put(nmd);
- + NMG_UNLOCK();
- + nm_os_vi_detach(ifp);
- +
- + return error;
- }
- /* Try to get a reference to a netmap adapter attached to a VALE switch.
- @@ -628,11 +695,12 @@ nm_vi_create(struct nmreq *nmr)
- * (*na != NULL && return == 0).
- */
- int
- -netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- +netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na,
- + struct netmap_mem_d *nmd, int create)
- {
- char *nr_name = nmr->nr_name;
- const char *ifname;
- - struct ifnet *ifp;
- + struct ifnet *ifp = NULL;
- int error = 0;
- struct netmap_vp_adapter *vpna, *hostna = NULL;
- struct nm_bridge *b;
- @@ -702,15 +770,15 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- */
- if (nmr->nr_cmd) {
- /* nr_cmd must be 0 for a virtual port */
- - return EINVAL;
- + error = EINVAL;
- + goto out;
- }
- /* bdg_netmap_attach creates a struct netmap_adapter */
- - error = netmap_vp_create(nmr, NULL, &vpna);
- + error = netmap_vp_create(nmr, NULL, nmd, &vpna);
- if (error) {
- D("error %d", error);
- - free(ifp, M_DEVBUF);
- - return error;
- + goto out;
- }
- /* shortcut - we can skip get_hw_na(),
- * ownership check and nm_bdg_attach()
- @@ -718,7 +786,7 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- } else {
- struct netmap_adapter *hw;
- - error = netmap_get_hw_na(ifp, &hw);
- + error = netmap_get_hw_na(ifp, nmd, &hw);
- if (error || hw == NULL)
- goto out;
- @@ -751,10 +819,10 @@ netmap_get_bdg_na(struct nmreq *nmr, struct netmap_adapter **na, int create)
- BDG_WUNLOCK(b);
- *na = &vpna->up;
- netmap_adapter_get(*na);
- - return 0;
- out:
- - if_rele(ifp);
- + if (ifp)
- + if_rele(ifp);
- return error;
- }
- @@ -765,11 +833,20 @@ static int
- nm_bdg_ctl_attach(struct nmreq *nmr)
- {
- struct netmap_adapter *na;
- + struct netmap_mem_d *nmd = NULL;
- int error;
- NMG_LOCK();
- - error = netmap_get_bdg_na(nmr, &na, 1 /* create if not exists */);
- + if (nmr->nr_arg2) {
- + nmd = netmap_mem_find(nmr->nr_arg2);
- + if (nmd == NULL) {
- + error = EINVAL;
- + goto unlock_exit;
- + }
- + }
- +
- + error = netmap_get_bdg_na(nmr, &na, nmd, 1 /* create if not exists */);
- if (error) /* no device */
- goto unlock_exit;
- @@ -816,7 +893,7 @@ nm_bdg_ctl_detach(struct nmreq *nmr)
- int error;
- NMG_LOCK();
- - error = netmap_get_bdg_na(nmr, &na, 0 /* don't create */);
- + error = netmap_get_bdg_na(nmr, &na, NULL, 0 /* don't create */);
- if (error) { /* no device, or another bridge or user owns the device */
- goto unlock_exit;
- }
- @@ -848,7 +925,7 @@ unlock_exit:
- struct nm_bdg_polling_state;
- struct
- nm_bdg_kthread {
- - struct nm_kthread *nmk;
- + struct nm_kctx *nmk;
- u_int qfirst;
- u_int qlast;
- struct nm_bdg_polling_state *bps;
- @@ -867,7 +944,7 @@ struct nm_bdg_polling_state {
- };
- static void
- -netmap_bwrap_polling(void *data)
- +netmap_bwrap_polling(void *data, int is_kthread)
- {
- struct nm_bdg_kthread *nbk = data;
- struct netmap_bwrap_adapter *bna;
- @@ -890,16 +967,16 @@ netmap_bwrap_polling(void *data)
- static int
- nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
- {
- - struct nm_kthread_cfg kcfg;
- + struct nm_kctx_cfg kcfg;
- int i, j;
- - bps->kthreads = malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus,
- - M_DEVBUF, M_NOWAIT | M_ZERO);
- + bps->kthreads = nm_os_malloc(sizeof(struct nm_bdg_kthread) * bps->ncpus);
- if (bps->kthreads == NULL)
- return ENOMEM;
- bzero(&kcfg, sizeof(kcfg));
- kcfg.worker_fn = netmap_bwrap_polling;
- + kcfg.use_kthread = 1;
- for (i = 0; i < bps->ncpus; i++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- int all = (bps->ncpus == 1 && bps->reg == NR_REG_ALL_NIC);
- @@ -913,24 +990,24 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps)
- kcfg.type = i;
- kcfg.worker_private = t;
- - t->nmk = nm_os_kthread_create(&kcfg, 0, NULL);
- + t->nmk = nm_os_kctx_create(&kcfg, 0, NULL);
- if (t->nmk == NULL) {
- goto cleanup;
- }
- - nm_os_kthread_set_affinity(t->nmk, affinity);
- + nm_os_kctx_worker_setaff(t->nmk, affinity);
- }
- return 0;
- cleanup:
- for (j = 0; j < i; j++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- - nm_os_kthread_delete(t->nmk);
- + nm_os_kctx_destroy(t->nmk);
- }
- - free(bps->kthreads, M_DEVBUF);
- + nm_os_free(bps->kthreads);
- return EFAULT;
- }
- -/* a version of ptnetmap_start_kthreads() */
- +/* A variant of ptnetmap_start_kthreads() */
- static int
- nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
- {
- @@ -944,7 +1021,7 @@ nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
- for (i = 0; i < bps->ncpus; i++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- - error = nm_os_kthread_start(t->nmk);
- + error = nm_os_kctx_worker_start(t->nmk);
- if (error) {
- D("error in nm_kthread_start()");
- goto cleanup;
- @@ -955,7 +1032,7 @@ nm_bdg_polling_start_kthreads(struct nm_bdg_polling_state *bps)
- cleanup:
- for (j = 0; j < i; j++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- - nm_os_kthread_stop(t->nmk);
- + nm_os_kctx_worker_stop(t->nmk);
- }
- bps->stopped = true;
- return error;
- @@ -971,8 +1048,8 @@ nm_bdg_polling_stop_delete_kthreads(struct nm_bdg_polling_state *bps)
- for (i = 0; i < bps->ncpus; i++) {
- struct nm_bdg_kthread *t = bps->kthreads + i;
- - nm_os_kthread_stop(t->nmk);
- - nm_os_kthread_delete(t->nmk);
- + nm_os_kctx_worker_stop(t->nmk);
- + nm_os_kctx_destroy(t->nmk);
- }
- bps->stopped = true;
- }
- @@ -1050,19 +1127,19 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
- return EFAULT;
- }
- - bps = malloc(sizeof(*bps), M_DEVBUF, M_NOWAIT | M_ZERO);
- + bps = nm_os_malloc(sizeof(*bps));
- if (!bps)
- return ENOMEM;
- bps->configured = false;
- bps->stopped = true;
- if (get_polling_cfg(nmr, na, bps)) {
- - free(bps, M_DEVBUF);
- + nm_os_free(bps);
- return EINVAL;
- }
- if (nm_bdg_create_kthreads(bps)) {
- - free(bps, M_DEVBUF);
- + nm_os_free(bps);
- return EFAULT;
- }
- @@ -1077,8 +1154,8 @@ nm_bdg_ctl_polling_start(struct nmreq *nmr, struct netmap_adapter *na)
- error = nm_bdg_polling_start_kthreads(bps);
- if (error) {
- D("ERROR nm_bdg_polling_start_kthread()");
- - free(bps->kthreads, M_DEVBUF);
- - free(bps, M_DEVBUF);
- + nm_os_free(bps->kthreads);
- + nm_os_free(bps);
- bna->na_polling_state = NULL;
- if (bna->hwna->nm_intr)
- bna->hwna->nm_intr(bna->hwna, 1);
- @@ -1099,7 +1176,7 @@ nm_bdg_ctl_polling_stop(struct nmreq *nmr, struct netmap_adapter *na)
- bps = bna->na_polling_state;
- nm_bdg_polling_stop_delete_kthreads(bna->na_polling_state);
- bps->configured = false;
- - free(bps, M_DEVBUF);
- + nm_os_free(bps);
- bna->na_polling_state = NULL;
- /* reenable interrupt */
- if (bna->hwna->nm_intr)
- @@ -1130,7 +1207,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
- switch (cmd) {
- case NETMAP_BDG_NEWIF:
- - error = nm_vi_create(nmr);
- + error = netmap_vi_create(nmr, 0 /* no autodelete */);
- break;
- case NETMAP_BDG_DELIF:
- @@ -1193,18 +1270,19 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
- NMG_LOCK();
- for (error = ENOENT; i < NM_BRIDGES; i++) {
- b = bridges + i;
- - if (j >= b->bdg_active_ports) {
- - j = 0; /* following bridges scan from 0 */
- - continue;
- + for ( ; j < NM_BDG_MAXPORTS; j++) {
- + if (b->bdg_ports[j] == NULL)
- + continue;
- + vpna = b->bdg_ports[j];
- + strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
- + error = 0;
- + goto out;
- }
- - nmr->nr_arg1 = i;
- - nmr->nr_arg2 = j;
- - j = b->bdg_port_index[j];
- - vpna = b->bdg_ports[j];
- - strncpy(name, vpna->up.name, (size_t)IFNAMSIZ);
- - error = 0;
- - break;
- + j = 0; /* following bridges scan from 0 */
- }
- + out:
- + nmr->nr_arg1 = i;
- + nmr->nr_arg2 = j;
- NMG_UNLOCK();
- }
- break;
- @@ -1238,7 +1316,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
- break;
- }
- NMG_LOCK();
- - error = netmap_get_bdg_na(nmr, &na, 0);
- + error = netmap_get_bdg_na(nmr, &na, NULL, 0);
- if (na && !error) {
- vpna = (struct netmap_vp_adapter *)na;
- na->virt_hdr_len = nmr->nr_arg1;
- @@ -1256,7 +1334,7 @@ netmap_bdg_ctl(struct nmreq *nmr, struct netmap_bdg_ops *bdg_ops)
- case NETMAP_BDG_POLLING_ON:
- case NETMAP_BDG_POLLING_OFF:
- NMG_LOCK();
- - error = netmap_get_bdg_na(nmr, &na, 0);
- + error = netmap_get_bdg_na(nmr, &na, NULL, 0);
- if (na && !error) {
- if (!nm_is_bwrap(na)) {
- error = EOPNOTSUPP;
- @@ -1384,7 +1462,7 @@ nm_bdg_preflush(struct netmap_kring *kring, u_int end)
- if (na->up.na_flags & NAF_BDG_MAYSLEEP)
- BDG_RLOCK(b);
- else if (!BDG_RTRYLOCK(b))
- - return 0;
- + return j;
- ND(5, "rlock acquired for %d packets", ((j > end ? lim+1 : 0) + end) - j);
- ft = kring->nkr_ft;
- @@ -1802,8 +1880,10 @@ nm_bdg_flush(struct nm_bdg_fwd *ft, u_int n, struct netmap_vp_adapter *na,
- needed = d->bq_len + brddst->bq_len;
- if (unlikely(dst_na->up.virt_hdr_len != na->up.virt_hdr_len)) {
- - RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
- - dst_na->up.virt_hdr_len);
- + if (netmap_verbose) {
- + RD(3, "virt_hdr_mismatch, src %d dst %d", na->up.virt_hdr_len,
- + dst_na->up.virt_hdr_len);
- + }
- /* There is a virtio-net header/offloadings mismatch between
- * source and destination. The slower mismatch datapath will
- * be used to cope with all the mismatches.
- @@ -2125,14 +2205,16 @@ netmap_vp_bdg_attach(const char *name, struct netmap_adapter *na)
- * Only persistent VALE ports have a non-null ifp.
- */
- static int
- -netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter **ret)
- +netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp,
- + struct netmap_mem_d *nmd,
- + struct netmap_vp_adapter **ret)
- {
- struct netmap_vp_adapter *vpna;
- struct netmap_adapter *na;
- - int error;
- + int error = 0;
- u_int npipes = 0;
- - vpna = malloc(sizeof(*vpna), M_DEVBUF, M_NOWAIT | M_ZERO);
- + vpna = nm_os_malloc(sizeof(*vpna));
- if (vpna == NULL)
- return ENOMEM;
- @@ -2183,7 +2265,10 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter
- na->nm_krings_create = netmap_vp_krings_create;
- na->nm_krings_delete = netmap_vp_krings_delete;
- na->nm_dtor = netmap_vp_dtor;
- - na->nm_mem = netmap_mem_private_new(na->name,
- + D("nr_arg2 %d", nmr->nr_arg2);
- + na->nm_mem = nmd ?
- + netmap_mem_get(nmd):
- + netmap_mem_private_new(
- na->num_tx_rings, na->num_tx_desc,
- na->num_rx_rings, na->num_rx_desc,
- nmr->nr_arg3, npipes, &error);
- @@ -2199,8 +2284,8 @@ netmap_vp_create(struct nmreq *nmr, struct ifnet *ifp, struct netmap_vp_adapter
- err:
- if (na->nm_mem != NULL)
- - netmap_mem_delete(na->nm_mem);
- - free(vpna, M_DEVBUF);
- + netmap_mem_put(na->nm_mem);
- + nm_os_free(vpna);
- return error;
- }
- @@ -2243,6 +2328,8 @@ netmap_bwrap_dtor(struct netmap_adapter *na)
- struct nm_bridge *b = bna->up.na_bdg,
- *bh = bna->host.na_bdg;
- + netmap_mem_put(bna->host.up.nm_mem);
- +
- if (b) {
- netmap_bdg_detach_common(b, bna->up.bdg_port,
- (bh ? bna->host.bdg_port : -1));
- @@ -2644,7 +2731,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
- return EBUSY;
- }
- - bna = malloc(sizeof(*bna), M_DEVBUF, M_NOWAIT | M_ZERO);
- + bna = nm_os_malloc(sizeof(*bna));
- if (bna == NULL) {
- return ENOMEM;
- }
- @@ -2652,6 +2739,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
- na = &bna->up.up;
- /* make bwrap ifp point to the real ifp */
- na->ifp = hwna->ifp;
- + if_ref(na->ifp);
- na->na_private = bna;
- strncpy(na->name, nr_name, sizeof(na->name));
- /* fill the ring data for the bwrap adapter with rx/tx meanings
- @@ -2673,7 +2761,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
- na->nm_notify = netmap_bwrap_notify;
- na->nm_bdg_ctl = netmap_bwrap_bdg_ctl;
- na->pdev = hwna->pdev;
- - na->nm_mem = hwna->nm_mem;
- + na->nm_mem = netmap_mem_get(hwna->nm_mem);
- na->virt_hdr_len = hwna->virt_hdr_len;
- bna->up.retry = 1; /* XXX maybe this should depend on the hwna */
- @@ -2697,7 +2785,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
- // hostna->nm_txsync = netmap_bwrap_host_txsync;
- // hostna->nm_rxsync = netmap_bwrap_host_rxsync;
- hostna->nm_notify = netmap_bwrap_notify;
- - hostna->nm_mem = na->nm_mem;
- + hostna->nm_mem = netmap_mem_get(na->nm_mem);
- hostna->na_private = bna;
- hostna->na_vp = &bna->up;
- na->na_hostvp = hwna->na_hostvp =
- @@ -2720,7 +2808,7 @@ netmap_bwrap_attach(const char *nr_name, struct netmap_adapter *hwna)
- err_free:
- hwna->na_vp = hwna->na_hostvp = NULL;
- netmap_adapter_put(hwna);
- - free(bna, M_DEVBUF);
- + nm_os_free(bna);
- return error;
- }
- @@ -2731,8 +2819,7 @@ netmap_init_bridges2(u_int n)
- int i;
- struct nm_bridge *b;
- - b = malloc(sizeof(struct nm_bridge) * n, M_DEVBUF,
- - M_NOWAIT | M_ZERO);
- + b = nm_os_malloc(sizeof(struct nm_bridge) * n);
- if (b == NULL)
- return NULL;
- for (i = 0; i < n; i++)
- @@ -2750,7 +2837,7 @@ netmap_uninit_bridges2(struct nm_bridge *b, u_int n)
- for (i = 0; i < n; i++)
- BDG_RWDESTROY(&b[i]);
- - free(b, M_DEVBUF);
- + nm_os_free(b);
- }
- int
- diff --git a/sys/modules/vmm/Makefile b/sys/modules/vmm/Makefile
- index cb9152f3d56..33855c709c8 100644
- --- a/sys/modules/vmm/Makefile
- +++ b/sys/modules/vmm/Makefile
- @@ -21,6 +21,7 @@ SRCS+= vmm.c \
- vmm_ioport.c \
- vmm_lapic.c \
- vmm_mem.c \
- + vmm_usermem.c \
- vmm_stat.c \
- vmm_util.c \
- x86.c
- diff --git a/sys/net/netmap.h b/sys/net/netmap.h
- index 3e0cdab4248..3543426b680 100644
- --- a/sys/net/netmap.h
- +++ b/sys/net/netmap.h
- @@ -25,7 +25,7 @@
- */
- /*
- - * $FreeBSD$
- + * $FreeBSD: head/sys/net/netmap.h 251139 2013-05-30 14:07:14Z luigi $
- *
- * Definitions of constants and the structures used by the netmap
- * framework, for the part visible to both kernel and userspace.
- diff --git a/sys/net/netmap_user.h b/sys/net/netmap_user.h
- index 4fbf38731d3..758084c1dcc 100644
- --- a/sys/net/netmap_user.h
- +++ b/sys/net/netmap_user.h
- @@ -309,16 +309,16 @@ typedef void (*nm_cb_t)(u_char *, const struct nm_pkthdr *, const u_char *d);
- * ifname (netmap:foo or vale:foo) is the port name
- * a suffix can indicate the follwing:
- * ^ bind the host (sw) ring pair
- - * * bind host and NIC ring pairs (transparent)
- + * * bind host and NIC ring pairs
- * -NN bind individual NIC ring pair
- * {NN bind master side of pipe NN
- * }NN bind slave side of pipe NN
- * a suffix starting with / and the following flags,
- * in any order:
- * x exclusive access
- - * z zero copy monitor
- - * t monitor tx side
- - * r monitor rx side
- + * z zero copy monitor (both tx and rx)
- + * t monitor tx side (copy monitor)
- + * r monitor rx side (copy monitor)
- * R bind only RX ring(s)
- * T bind only TX ring(s)
- *
- @@ -634,9 +634,10 @@ nm_open(const char *ifname, const struct nmreq *req,
- const char *vpname = NULL;
- #define MAXERRMSG 80
- char errmsg[MAXERRMSG] = "";
- - enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK } p_state;
- + enum { P_START, P_RNGSFXOK, P_GETNUM, P_FLAGS, P_FLAGSOK, P_MEMID } p_state;
- int is_vale;
- long num;
- + uint16_t nr_arg2 = 0;
- if (strncmp(ifname, "netmap:", 7) &&
- strncmp(ifname, NM_BDG_NAME, strlen(NM_BDG_NAME))) {
- @@ -665,7 +666,7 @@ nm_open(const char *ifname, const struct nmreq *req,
- }
- /* scan for a separator */
- - for (; *port && !index("-*^{}/", *port); port++)
- + for (; *port && !index("-*^{}/@", *port); port++)
- ;
- if (is_vale && !nm_is_identifier(vpname, port)) {
- @@ -707,6 +708,9 @@ nm_open(const char *ifname, const struct nmreq *req,
- case '/': /* start of flags */
- p_state = P_FLAGS;
- break;
- + case '@': /* start of memid */
- + p_state = P_MEMID;
- + break;
- default:
- snprintf(errmsg, MAXERRMSG, "unknown modifier: '%c'", *port);
- goto fail;
- @@ -718,6 +722,9 @@ nm_open(const char *ifname, const struct nmreq *req,
- case '/':
- p_state = P_FLAGS;
- break;
- + case '@':
- + p_state = P_MEMID;
- + break;
- default:
- snprintf(errmsg, MAXERRMSG, "unexpected character: '%c'", *port);
- goto fail;
- @@ -736,6 +743,11 @@ nm_open(const char *ifname, const struct nmreq *req,
- break;
- case P_FLAGS:
- case P_FLAGSOK:
- + if (*port == '@') {
- + port++;
- + p_state = P_MEMID;
- + break;
- + }
- switch (*port) {
- case 'x':
- nr_flags |= NR_EXCLUSIVE;
- @@ -762,17 +774,25 @@ nm_open(const char *ifname, const struct nmreq *req,
- port++;
- p_state = P_FLAGSOK;
- break;
- + case P_MEMID:
- + if (nr_arg2 != 0) {
- + snprintf(errmsg, MAXERRMSG, "double setting of memid");
- + goto fail;
- + }
- + num = strtol(port, (char **)&port, 10);
- + if (num <= 0) {
- + snprintf(errmsg, MAXERRMSG, "invalid memid %ld, must be >0", num);
- + goto fail;
- + }
- + nr_arg2 = num;
- + p_state = P_RNGSFXOK;
- + break;
- }
- }
- if (p_state != P_START && p_state != P_RNGSFXOK && p_state != P_FLAGSOK) {
- snprintf(errmsg, MAXERRMSG, "unexpected end of port name");
- goto fail;
- }
- - if ((nr_flags & NR_ZCOPY_MON) &&
- - !(nr_flags & (NR_MONITOR_TX|NR_MONITOR_RX))) {
- - snprintf(errmsg, MAXERRMSG, "'z' used but neither 'r', nor 't' found");
- - goto fail;
- - }
- ND("flags: %s %s %s %s",
- (nr_flags & NR_EXCLUSIVE) ? "EXCLUSIVE" : "",
- (nr_flags & NR_ZCOPY_MON) ? "ZCOPY_MON" : "",
- @@ -799,6 +819,8 @@ nm_open(const char *ifname, const struct nmreq *req,
- /* these fields are overridden by ifname and flags processing */
- d->req.nr_ringid |= nr_ringid;
- d->req.nr_flags |= nr_flags;
- + if (nr_arg2)
- + d->req.nr_arg2 = nr_arg2;
- memcpy(d->req.nr_name, ifname, namelen);
- d->req.nr_name[namelen] = '\0';
- /* optionally import info from parent */
- @@ -848,7 +870,7 @@ nm_open(const char *ifname, const struct nmreq *req,
- nr_reg = d->req.nr_flags & NR_REG_MASK;
- - if (nr_reg == NR_REG_SW) { /* host stack */
- + if (nr_reg == NR_REG_SW) { /* host stack */
- d->first_tx_ring = d->last_tx_ring = d->req.nr_tx_rings;
- d->first_rx_ring = d->last_rx_ring = d->req.nr_rx_rings;
- } else if (nr_reg == NR_REG_ALL_NIC) { /* only nic */
- diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
- index 8159d7ddbd7..dbd1781cee7 100644
- --- a/usr.sbin/bhyve/Makefile
- +++ b/usr.sbin/bhyve/Makefile
- @@ -29,6 +29,8 @@ SRCS= \
- mem.c \
- mevent.c \
- mptbl.c \
- + net_backends.c \
- + net_utils.c \
- pci_ahci.c \
- pci_e82545.c \
- pci_emul.c \
- @@ -37,6 +39,8 @@ SRCS= \
- pci_irq.c \
- pci_lpc.c \
- pci_passthru.c \
- + pci_ptnetmap_memdev.c \
- + pci_ptnetmap_netif.c \
- pci_virtio_block.c \
- pci_virtio_console.c \
- pci_virtio_net.c \
- @@ -74,6 +78,8 @@ LIBADD+= crypto
- CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/e1000
- CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/mii
- CFLAGS+= -I${BHYVE_SYSDIR}/sys/dev/usb/controller
- +CFLAGS+= -I${BHYVE_SYSDIR}/sys/
- +CFLAGS+= -DWITH_NETMAP
- WARNS?= 2
- diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c
- new file mode 100644
- index 00000000000..0322a7dd0a5
- --- /dev/null
- +++ b/usr.sbin/bhyve/net_backends.c
- @@ -0,0 +1,1082 @@
- +/*-
- + * Copyright (c) 2014-2016 Vincenzo Maffione
- + * All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + * 2. Redistributions in binary form must reproduce the above copyright
- + * notice, this list of conditions and the following disclaimer in the
- + * documentation and/or other materials provided with the distribution.
- + *
- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
- + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- + */
- +
- +/*
- + * This file implements multiple network backends (null, tap, netmap, ...),
- + * to be used by network frontends such as virtio-net and ptnet.
- + * The API to access the backend (e.g. send/receive packets, negotiate
- + * features) is exported by net_backends.h.
- + */
- +
- +#include <sys/cdefs.h>
- +#include <sys/uio.h>
- +#include <sys/ioctl.h>
- +#include <sys/mman.h>
- +#include <sys/types.h> /* u_short etc */
- +#include <net/if.h>
- +
- +#include <errno.h>
- +#include <fcntl.h>
- +#include <stdio.h>
- +#include <stdlib.h>
- +#include <stdint.h>
- +#include <string.h>
- +#include <unistd.h>
- +#include <assert.h>
- +#include <pthread.h>
- +#include <pthread_np.h>
- +#include <poll.h>
- +#include <assert.h>
- +
- +#include "mevent.h"
- +#include "net_backends.h"
- +
- +#include <sys/linker_set.h>
- +
- +/*
- + * Each network backend registers a set of function pointers that are
- + * used to implement the net backends API.
- + * This might need to be exposed if we implement backends in separate files.
- + */
- +struct net_backend {
- + const char *name; /* name of the backend */
- + /*
- + * The init and cleanup functions are used internally,
- + * virtio-net should never use it.
- + */
- + int (*init)(struct net_backend *be, const char *devname,
- + net_backend_cb_t cb, void *param);
- + void (*cleanup)(struct net_backend *be);
- +
- +
- + /*
- + * Called to serve a guest transmit request. The scatter-gather
- + * vector provided by the caller has 'iovcnt' elements and contains
- + * the packet to send. 'len' is the length of whole packet in bytes.
- + */
- + int (*send)(struct net_backend *be, struct iovec *iov,
- + int iovcnt, uint32_t len, int more);
- +
- + /*
- + * Called to serve guest receive request. When the function
- + * returns a positive value, the scatter-gather vector
- + * provided by the caller (having 'iovcnt' elements in it) will
- + * contain a chunk of the received packet. The 'more' flag will
- + * be set if the returned chunk was the last one for the current
- + * packet, and 0 otherwise. The function returns the chunk size
- + * in bytes, or 0 if the backend doesn't have a new packet to
- + * receive.
- + * Note that it may be necessary to call this callback many
- + * times to receive a single packet, depending of how big is
- + * buffers you provide.
- + */
- + int (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt);
- +
- + /*
- + * Ask the backend for the virtio-net features it is able to
- + * support. Possible features are TSO, UFO and checksum offloading
- + * in both rx and tx direction and for both IPv4 and IPv6.
- + */
- + uint64_t (*get_cap)(struct net_backend *be);
- +
- + /*
- + * Tell the backend to enable/disable the specified virtio-net
- + * features (capabilities).
- + */
- + int (*set_cap)(struct net_backend *be, uint64_t features,
- + unsigned int vnet_hdr_len);
- +
- + struct pci_vtnet_softc *sc;
- + int fd;
- + unsigned int be_vnet_hdr_len;
- + unsigned int fe_vnet_hdr_len;
- + void *priv; /* Pointer to backend-specific data. */
- +};
- +
- +SET_DECLARE(net_backend_s, struct net_backend);
- +
- +#define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr)
- +
- +#define WPRINTF(params) printf params
- +
- +/* the null backend */
- +static int
- +netbe_null_init(struct net_backend *be, const char *devname,
- + net_backend_cb_t cb, void *param)
- +{
- + (void)devname; (void)cb; (void)param;
- + be->fd = -1;
- + return 0;
- +}
- +
- +static void
- +netbe_null_cleanup(struct net_backend *be)
- +{
- + (void)be;
- +}
- +
- +static uint64_t
- +netbe_null_get_cap(struct net_backend *be)
- +{
- + (void)be;
- + return 0;
- +}
- +
- +static int
- +netbe_null_set_cap(struct net_backend *be, uint64_t features,
- + unsigned vnet_hdr_len)
- +{
- + (void)be; (void)features; (void)vnet_hdr_len;
- + return 0;
- +}
- +
- +static int
- +netbe_null_send(struct net_backend *be, struct iovec *iov,
- + int iovcnt, uint32_t len, int more)
- +{
- + (void)be; (void)iov; (void)iovcnt; (void)len; (void)more;
- + return 0; /* pretend we send */
- +}
- +
- +static int
- +netbe_null_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
- +{
- + (void)be; (void)iov; (void)iovcnt;
- + fprintf(stderr, "netbe_null_recv called ?\n");
- + return -1; /* never called, i believe */
- +}
- +
- +static struct net_backend n_be = {
- + .name = "null",
- + .init = netbe_null_init,
- + .cleanup = netbe_null_cleanup,
- + .send = netbe_null_send,
- + .recv = netbe_null_recv,
- + .get_cap = netbe_null_get_cap,
- + .set_cap = netbe_null_set_cap,
- +};
- +
- +DATA_SET(net_backend_s, n_be);
- +
- +
- +/* the tap backend */
- +
- +struct tap_priv {
- + struct mevent *mevp;
- +};
- +
- +static void
- +tap_cleanup(struct net_backend *be)
- +{
- + struct tap_priv *priv = be->priv;
- +
- + if (be->priv) {
- + mevent_delete(priv->mevp);
- + free(be->priv);
- + be->priv = NULL;
- + }
- + if (be->fd != -1) {
- + close(be->fd);
- + be->fd = -1;
- + }
- +}
- +
- +static int
- +tap_init(struct net_backend *be, const char *devname,
- + net_backend_cb_t cb, void *param)
- +{
- + char tbuf[80];
- + int fd;
- + int opt = 1;
- + struct tap_priv *priv;
- +
- + if (cb == NULL) {
- + WPRINTF(("TAP backend requires non-NULL callback\n"));
- + return -1;
- + }
- +
- + priv = calloc(1, sizeof(struct tap_priv));
- + if (priv == NULL) {
- + WPRINTF(("tap_priv alloc failed\n"));
- + return -1;
- + }
- +
- + strcpy(tbuf, "/dev/");
- + strlcat(tbuf, devname, sizeof(tbuf));
- +
- + fd = open(tbuf, O_RDWR);
- + if (fd == -1) {
- + WPRINTF(("open of tap device %s failed\n", tbuf));
- + goto error;
- + }
- +
- + /*
- + * Set non-blocking and register for read
- + * notifications with the event loop
- + */
- + if (ioctl(fd, FIONBIO, &opt) < 0) {
- + WPRINTF(("tap device O_NONBLOCK failed\n"));
- + goto error;
- + }
- +
- + priv->mevp = mevent_add(fd, EVF_READ, cb, param);
- + if (priv->mevp == NULL) {
- + WPRINTF(("Could not register event\n"));
- + goto error;
- + }
- +
- + be->fd = fd;
- + be->priv = priv;
- +
- + return 0;
- +
- +error:
- + tap_cleanup(be);
- + return -1;
- +}
- +
- +/*
- + * Called to send a buffer chain out to the tap device
- + */
- +static int
- +tap_send(struct net_backend *be, struct iovec *iov, int iovcnt, uint32_t len,
- + int more)
- +{
- + static char pad[60]; /* all zero bytes */
- +
- + (void)more;
- + /*
- + * If the length is < 60, pad out to that and add the
- + * extra zero'd segment to the iov. It is guaranteed that
- + * there is always an extra iov available by the caller.
- + */
- + if (len < 60) {
- + iov[iovcnt].iov_base = pad;
- + iov[iovcnt].iov_len = (size_t)(60 - len);
- + iovcnt++;
- + }
- +
- + return (int)writev(be->fd, iov, iovcnt);
- +}
- +
- +static int
- +tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
- +{
- + int ret;
- +
- + /* Should never be called without a valid tap fd */
- + assert(be->fd != -1);
- +
- + ret = (int)readv(be->fd, iov, iovcnt);
- +
- + if (ret < 0 && errno == EWOULDBLOCK) {
- + return 0;
- + }
- +
- + return ret;
- +}
- +
- +static uint64_t
- +tap_get_cap(struct net_backend *be)
- +{
- + (void)be;
- + return 0; // nothing extra
- +}
- +
- +static int
- +tap_set_cap(struct net_backend *be, uint64_t features,
- + unsigned vnet_hdr_len)
- +{
- + (void)be;
- + return (features || vnet_hdr_len) ? -1 : 0;
- +}
- +
- +static struct net_backend tap_backend = {
- + .name = "tap|vmmnet",
- + .init = tap_init,
- + .cleanup = tap_cleanup,
- + .send = tap_send,
- + .recv = tap_recv,
- + .get_cap = tap_get_cap,
- + .set_cap = tap_set_cap,
- +};
- +
- +DATA_SET(net_backend_s, tap_backend);
- +
- +#ifdef WITH_NETMAP
- +
- +/*
- + * The netmap backend
- + */
- +
- +/* The virtio-net features supported by netmap. */
- +#define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
- + VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
- + VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
- + VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
- +
- +#define NETMAP_POLLMASK (POLLIN | POLLRDNORM | POLLRDBAND)
- +
- +struct netmap_priv {
- + char ifname[IFNAMSIZ];
- + struct nm_desc *nmd;
- + uint16_t memid;
- + struct netmap_ring *rx;
- + struct netmap_ring *tx;
- + pthread_t evloop_tid;
- + net_backend_cb_t cb;
- + void *cb_param;
- +
- + struct ptnetmap_state ptnetmap;
- +};
- +
- +static void *
- +netmap_evloop_thread(void *param)
- +{
- + struct net_backend *be = param;
- + struct netmap_priv *priv = be->priv;
- + struct pollfd pfd;
- + int ret;
- +
- + for (;;) {
- + pfd.fd = be->fd;
- + pfd.events = NETMAP_POLLMASK;
- + ret = poll(&pfd, 1, INFTIM);
- + if (ret == -1 && errno != EINTR) {
- + WPRINTF(("netmap poll failed, %d\n", errno));
- + } else if (ret == 1 && (pfd.revents & NETMAP_POLLMASK)) {
- + priv->cb(pfd.fd, EVF_READ, priv->cb_param);
- + }
- + }
- +
- + return NULL;
- +}
- +
- +static void
- +nmreq_init(struct nmreq *req, char *ifname)
- +{
- + memset(req, 0, sizeof(*req));
- + strncpy(req->nr_name, ifname, sizeof(req->nr_name));
- + req->nr_version = NETMAP_API;
- +}
- +
- +static int
- +netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
- +{
- + int err;
- + struct nmreq req;
- + struct netmap_priv *priv = be->priv;
- +
- + nmreq_init(&req, priv->ifname);
- + req.nr_cmd = NETMAP_BDG_VNET_HDR;
- + req.nr_arg1 = vnet_hdr_len;
- + err = ioctl(be->fd, NIOCREGIF, &req);
- + if (err) {
- + WPRINTF(("Unable to set vnet header length %d\n",
- + vnet_hdr_len));
- + return err;
- + }
- +
- + be->be_vnet_hdr_len = vnet_hdr_len;
- +
- + return 0;
- +}
- +
- +static int
- +netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
- +{
- + int prev_hdr_len = be->be_vnet_hdr_len;
- + int ret;
- +
- + if (vnet_hdr_len == prev_hdr_len) {
- + return 1;
- + }
- +
- + ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
- + if (ret) {
- + return 0;
- + }
- +
- + netmap_set_vnet_hdr_len(be, prev_hdr_len);
- +
- + return 1;
- +}
- +
- +static uint64_t
- +netmap_get_cap(struct net_backend *be)
- +{
- + return netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
- + NETMAP_FEATURES : 0;
- +}
- +
- +static int
- +netmap_set_cap(struct net_backend *be, uint64_t features,
- + unsigned vnet_hdr_len)
- +{
- + return netmap_set_vnet_hdr_len(be, vnet_hdr_len);
- +}
- +
- +/* Store and return the features we agreed upon. */
- +uint32_t
- +ptnetmap_ack_features(struct ptnetmap_state *ptn, uint32_t wanted_features)
- +{
- + ptn->acked_features = ptn->features & wanted_features;
- +
- + return ptn->acked_features;
- +}
- +
- +struct ptnetmap_state *
- +get_ptnetmap(struct net_backend *be)
- +{
- + struct netmap_priv *priv = be ? be->priv : NULL;
- + struct netmap_pools_info pi;
- + struct nmreq req;
- + int err;
- +
- + /* Check that this is a ptnetmap backend. */
- + if (!be || be->set_cap != netmap_set_cap ||
- + !(priv->nmd->req.nr_flags & NR_PTNETMAP_HOST)) {
- + return NULL;
- + }
- +
- + nmreq_init(&req, priv->ifname);
- + req.nr_cmd = NETMAP_POOLS_INFO_GET;
- + nmreq_pointer_put(&req, &pi);
- + err = ioctl(priv->nmd->fd, NIOCREGIF, &req);
- + if (err) {
- + return NULL;
- + }
- +
- + err = ptn_memdev_attach(priv->nmd->mem, &pi);
- + if (err) {
- + return NULL;
- + }
- +
- + return &priv->ptnetmap;
- +}
- +
- +int
- +ptnetmap_get_netmap_if(struct ptnetmap_state *ptn, struct netmap_if_info *nif)
- +{
- + struct netmap_priv *priv = ptn->netmap_priv;
- +
- + memset(nif, 0, sizeof(*nif));
- + if (priv->nmd == NULL) {
- + return EINVAL;
- + }
- +
- + nif->nifp_offset = priv->nmd->req.nr_offset;
- + nif->num_tx_rings = priv->nmd->req.nr_tx_rings;
- + nif->num_rx_rings = priv->nmd->req.nr_rx_rings;
- + nif->num_tx_slots = priv->nmd->req.nr_tx_slots;
- + nif->num_rx_slots = priv->nmd->req.nr_rx_slots;
- +
- + return 0;
- +}
- +
- +int
- +ptnetmap_get_hostmemid(struct ptnetmap_state *ptn)
- +{
- + struct netmap_priv *priv = ptn->netmap_priv;
- +
- + if (priv->nmd == NULL) {
- + return EINVAL;
- + }
- +
- + return priv->memid;
- +}
- +
- +int
- +ptnetmap_create(struct ptnetmap_state *ptn, struct ptnetmap_cfg *cfg)
- +{
- + struct netmap_priv *priv = ptn->netmap_priv;
- + struct nmreq req;
- + int err;
- +
- + if (ptn->running) {
- + return 0;
- + }
- +
- + /* XXX We should stop the netmap evloop here. */
- +
- + /* Ask netmap to create kthreads for this interface. */
- + nmreq_init(&req, priv->ifname);
- + nmreq_pointer_put(&req, cfg);
- + req.nr_cmd = NETMAP_PT_HOST_CREATE;
- + err = ioctl(priv->nmd->fd, NIOCREGIF, &req);
- + if (err) {
- + fprintf(stderr, "%s: Unable to create ptnetmap kthreads on "
- + "%s [errno=%d]", __func__, priv->ifname, errno);
- + return err;
- + }
- +
- + ptn->running = 1;
- +
- + return 0;
- +}
- +
- +int
- +ptnetmap_delete(struct ptnetmap_state *ptn)
- +{
- + struct netmap_priv *priv = ptn->netmap_priv;
- + struct nmreq req;
- + int err;
- +
- + if (!ptn->running) {
- + return 0;
- + }
- +
- + /* Ask netmap to delete kthreads for this interface. */
- + nmreq_init(&req, priv->ifname);
- + req.nr_cmd = NETMAP_PT_HOST_DELETE;
- + err = ioctl(priv->nmd->fd, NIOCREGIF, &req);
- + if (err) {
- + fprintf(stderr, "%s: Unable to create ptnetmap kthreads on "
- + "%s [errno=%d]", __func__, priv->ifname, errno);
- + return err;
- + }
- +
- + ptn->running = 0;
- +
- + return 0;
- +}
- +
- +static int
- +netmap_init(struct net_backend *be, const char *devname,
- + net_backend_cb_t cb, void *param)
- +{
- + const char *ndname = "/dev/netmap";
- + struct netmap_priv *priv = NULL;
- + struct nmreq req;
- + int ptnetmap = (cb == NULL);
- +
- + priv = calloc(1, sizeof(struct netmap_priv));
- + if (priv == NULL) {
- + WPRINTF(("Unable alloc netmap private data\n"));
- + return -1;
- + }
- +
- + strncpy(priv->ifname, devname, sizeof(priv->ifname));
- + priv->ifname[sizeof(priv->ifname) - 1] = '\0';
- +
- + memset(&req, 0, sizeof(req));
- + req.nr_flags = ptnetmap ? NR_PTNETMAP_HOST : 0;
- +
- + priv->nmd = nm_open(priv->ifname, &req, NETMAP_NO_TX_POLL, NULL);
- + if (priv->nmd == NULL) {
- + WPRINTF(("Unable to nm_open(): device '%s', "
- + "interface '%s', errno (%s)\n",
- + ndname, devname, strerror(errno)));
- + free(priv);
- + return -1;
- + }
- +
- + priv->memid = priv->nmd->req.nr_arg2;
- + priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
- + priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
- + priv->cb = cb;
- + priv->cb_param = param;
- + be->fd = priv->nmd->fd;
- + be->priv = priv;
- +
- + priv->ptnetmap.netmap_priv = priv;
- + priv->ptnetmap.features = 0;
- + priv->ptnetmap.acked_features = 0;
- + priv->ptnetmap.running = 0;
- + if (ptnetmap) {
- + if (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN)) {
- + priv->ptnetmap.features |= PTNETMAP_F_VNET_HDR;
- + }
- + } else {
- + char tname[40];
- +
- + /* Create a thread for netmap poll. */
- + pthread_create(&priv->evloop_tid, NULL, netmap_evloop_thread, (void *)be);
- + snprintf(tname, sizeof(tname), "netmap-evloop-%p", priv);
- + pthread_set_name_np(priv->evloop_tid, tname);
- + }
- +
- + return 0;
- +}
- +
- +static void
- +netmap_cleanup(struct net_backend *be)
- +{
- + struct netmap_priv *priv = be->priv;
- +
- + if (be->priv) {
- + if (priv->ptnetmap.running) {
- + ptnetmap_delete(&priv->ptnetmap);
- + }
- + nm_close(priv->nmd);
- + free(be->priv);
- + be->priv = NULL;
- + }
- + be->fd = -1;
- +}
- +
- +/* A fast copy routine only for multiples of 64 bytes, non overlapped. */
- +static inline void
- +pkt_copy(const void *_src, void *_dst, int l)
- +{
- + const uint64_t *src = _src;
- + uint64_t *dst = _dst;
- + if (l >= 1024) {
- + bcopy(src, dst, l);
- + return;
- + }
- + for (; l > 0; l -= 64) {
- + *dst++ = *src++;
- + *dst++ = *src++;
- + *dst++ = *src++;
- + *dst++ = *src++;
- + *dst++ = *src++;
- + *dst++ = *src++;
- + *dst++ = *src++;
- + *dst++ = *src++;
- + }
- +}
- +
- +static int
- +netmap_send(struct net_backend *be, struct iovec *iov,
- + int iovcnt, uint32_t size, int more)
- +{
- + struct netmap_priv *priv = be->priv;
- + struct netmap_ring *ring;
- + int nm_buf_size;
- + int nm_buf_len;
- + uint32_t head;
- + void *nm_buf;
- + int j;
- +
- + if (iovcnt <= 0 || size <= 0) {
- + D("Wrong iov: iovcnt %d size %d", iovcnt, size);
- + return 0;
- + }
- +
- + ring = priv->tx;
- + head = ring->head;
- + if (head == ring->tail) {
- + RD(1, "No space, drop %d bytes", size);
- + goto txsync;
- + }
- + nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
- + nm_buf_size = ring->nr_buf_size;
- + nm_buf_len = 0;
- +
- + for (j = 0; j < iovcnt; j++) {
- + int iov_frag_size = iov[j].iov_len;
- + void *iov_frag_buf = iov[j].iov_base;
- +
- + /* Split each iovec fragment over more netmap slots, if
- + necessary. */
- + for (;;) {
- + int copylen;
- +
- + copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
- + pkt_copy(iov_frag_buf, nm_buf, copylen);
- +
- + iov_frag_buf += copylen;
- + iov_frag_size -= copylen;
- + nm_buf += copylen;
- + nm_buf_size -= copylen;
- + nm_buf_len += copylen;
- +
- + if (iov_frag_size == 0) {
- + break;
- + }
- +
- + ring->slot[head].len = nm_buf_len;
- + ring->slot[head].flags = NS_MOREFRAG;
- + head = nm_ring_next(ring, head);
- + if (head == ring->tail) {
- + /* We ran out of netmap slots while
- + * splitting the iovec fragments. */
- + RD(1, "No space, drop %d bytes", size);
- + goto txsync;
- + }
- + nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
- + nm_buf_size = ring->nr_buf_size;
- + nm_buf_len = 0;
- + }
- + }
- +
- + /* Complete the last slot, which must not have NS_MOREFRAG set. */
- + ring->slot[head].len = nm_buf_len;
- + ring->slot[head].flags = 0;
- + head = nm_ring_next(ring, head);
- +
- + /* Now update ring->head and ring->cur. */
- + ring->head = ring->cur = head;
- +
- + if (more) {// && nm_ring_space(ring) > 64
- + return 0;
- + }
- +txsync:
- + ioctl(be->fd, NIOCTXSYNC, NULL);
- +
- + return 0;
- +}
- +
- +static int
- +netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
- +{
- + struct netmap_priv *priv = be->priv;
- + struct netmap_slot *slot = NULL;
- + struct netmap_ring *ring;
- + void *iov_frag_buf;
- + int iov_frag_size;
- + int totlen = 0;
- + uint32_t head;
- +
- + assert(iovcnt);
- +
- + ring = priv->rx;
- + head = ring->head;
- + iov_frag_buf = iov->iov_base;
- + iov_frag_size = iov->iov_len;
- +
- + do {
- + int nm_buf_len;
- + void *nm_buf;
- +
- + if (head == ring->tail) {
- + return 0;
- + }
- +
- + slot = ring->slot + head;
- + nm_buf = NETMAP_BUF(ring, slot->buf_idx);
- + nm_buf_len = slot->len;
- +
- + for (;;) {
- + int copylen = nm_buf_len < iov_frag_size ? nm_buf_len : iov_frag_size;
- +
- + pkt_copy(nm_buf, iov_frag_buf, copylen);
- + nm_buf += copylen;
- + nm_buf_len -= copylen;
- + iov_frag_buf += copylen;
- + iov_frag_size -= copylen;
- + totlen += copylen;
- +
- + if (nm_buf_len == 0) {
- + break;
- + }
- +
- + iov++;
- + iovcnt--;
- + if (iovcnt == 0) {
- + /* No space to receive. */
- + D("Short iov, drop %d bytes", totlen);
- + return -ENOSPC;
- + }
- + iov_frag_buf = iov->iov_base;
- + iov_frag_size = iov->iov_len;
- + }
- +
- + head = nm_ring_next(ring, head);
- +
- + } while (slot->flags & NS_MOREFRAG);
- +
- + /* Release slots to netmap. */
- + ring->head = ring->cur = head;
- +
- + return totlen;
- +}
- +
- +static struct net_backend netmap_backend = {
- + .name = "netmap|vale",
- + .init = netmap_init,
- + .cleanup = netmap_cleanup,
- + .send = netmap_send,
- + .recv = netmap_recv,
- + .get_cap = netmap_get_cap,
- + .set_cap = netmap_set_cap,
- +};
- +
- +DATA_SET(net_backend_s, netmap_backend);
- +
- +#endif /* WITH_NETMAP */
- +
- +/*
- + * make sure a backend is properly initialized
- + */
- +static void
- +netbe_fix(struct net_backend *be)
- +{
- + if (be == NULL)
- + return;
- + if (be->name == NULL) {
- + fprintf(stderr, "missing name for %p\n", be);
- + be->name = "unnamed netbe";
- + }
- + if (be->init == NULL) {
- + fprintf(stderr, "missing init for %p %s\n", be, be->name);
- + be->init = netbe_null_init;
- + }
- + if (be->cleanup == NULL) {
- + fprintf(stderr, "missing cleanup for %p %s\n", be, be->name);
- + be->cleanup = netbe_null_cleanup;
- + }
- + if (be->send == NULL) {
- + fprintf(stderr, "missing send for %p %s\n", be, be->name);
- + be->send = netbe_null_send;
- + }
- + if (be->recv == NULL) {
- + fprintf(stderr, "missing recv for %p %s\n", be, be->name);
- + be->recv = netbe_null_recv;
- + }
- + if (be->get_cap == NULL) {
- + fprintf(stderr, "missing get_cap for %p %s\n",
- + be, be->name);
- + be->get_cap = netbe_null_get_cap;
- + }
- + if (be->set_cap == NULL) {
- + fprintf(stderr, "missing set_cap for %p %s\n",
- + be, be->name);
- + be->set_cap = netbe_null_set_cap;
- + }
- +}
- +
- +/*
- + * keys is a set of prefixes separated by '|',
- + * return 1 if the leftmost part of name matches one prefix.
- + */
- +static const char *
- +netbe_name_match(const char *keys, const char *name)
- +{
- + const char *n = name, *good = keys;
- + char c;
- +
- + if (!keys || !name)
- + return NULL;
- + while ( (c = *keys++) ) {
- + if (c == '|') { /* reached the separator */
- + if (good)
- + break;
- + /* prepare for new round */
- + n = name;
- + good = keys;
- + } else if (good && c != *n++) {
- + good = NULL; /* drop till next keyword */
- + }
- + }
- + return good;
- +}
- +
- +/*
- + * Initialize a backend and attach to the frontend.
- + * This is called during frontend initialization.
- + * devname is the backend-name as supplied on the command line,
- + * e.g. -s 2:0,frontend-name,backend-name[,other-args]
- + * cb is the receive callback supplied by the frontend,
- + * and it is invoked in the event loop when a receive
- + * event is generated in the hypervisor,
- + * param is a pointer to the frontend, and normally used as
- + * the argument for the callback.
- + */
- +struct net_backend *
- +netbe_init(const char *devname, net_backend_cb_t cb, void *param)
- +{
- + struct net_backend **pbe, *be, *tbe = NULL;
- + int err;
- +
- + /*
- + * Find the network backend depending on the user-provided
- + * device name. net_backend_s is built using a linker set.
- + */
- + SET_FOREACH(pbe, net_backend_s) {
- + if (netbe_name_match((*pbe)->name, devname)) {
- + tbe = *pbe;
- + break;
- + }
- + }
- + if (tbe == NULL)
- + return NULL; /* or null backend ? */
- + be = calloc(1, sizeof(*be));
- + *be = *tbe; /* copy the template */
- + netbe_fix(be); /* make sure we have all fields */
- + be->fd = -1;
- + be->priv = NULL;
- + be->sc = param;
- + be->be_vnet_hdr_len = 0;
- + be->fe_vnet_hdr_len = 0;
- +
- + /* initialize the backend */
- + err = be->init(be, devname, cb, param);
- + if (err) {
- + free(be);
- + be = NULL;
- + }
- + return be;
- +}
- +
- +void
- +netbe_cleanup(struct net_backend *be)
- +{
- + if (be == NULL)
- + return;
- + be->cleanup(be);
- + free(be);
- +}
- +
- +uint64_t
- +netbe_get_cap(struct net_backend *be)
- +{
- + if (be == NULL)
- + return 0;
- + return be->get_cap(be);
- +}
- +
- +int
- +netbe_set_cap(struct net_backend *be, uint64_t features,
- + unsigned vnet_hdr_len)
- +{
- + int ret;
- +
- + if (be == NULL)
- + return 0;
- +
- + /* There are only three valid lengths. */
- + if (vnet_hdr_len && vnet_hdr_len != VNET_HDR_LEN
- + && vnet_hdr_len != (VNET_HDR_LEN - sizeof(uint16_t)))
- + return -1;
- +
- + be->fe_vnet_hdr_len = vnet_hdr_len;
- +
- + ret = be->set_cap(be, features, vnet_hdr_len);
- + assert(be->be_vnet_hdr_len == 0 ||
- + be->be_vnet_hdr_len == be->fe_vnet_hdr_len);
- +
- + return ret;
- +}
- +
- +static __inline struct iovec *
- +iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen)
- +{
- + struct iovec *riov;
- +
- + /* XXX short-cut: assume first segment is >= tlen */
- + assert(iov[0].iov_len >= tlen);
- +
- + iov[0].iov_len -= tlen;
- + if (iov[0].iov_len == 0) {
- + assert(*iovcnt > 1);
- + *iovcnt -= 1;
- + riov = &iov[1];
- + } else {
- + iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
- + riov = &iov[0];
- + }
- +
- + return (riov);
- +}
- +
- +void
- +netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt, uint32_t len,
- + int more)
- +{
- + if (be == NULL)
- + return;
- +#if 0
- + int i;
- + D("sending iovcnt %d len %d iovec %p", iovcnt, len, iov);
- + for (i=0; i < iovcnt; i++)
- + D(" %3d: %4d %p", i, (int)iov[i].iov_len, iov[i].iov_base);
- +#endif
- + if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
- + /* Here we are sure be->be_vnet_hdr_len is 0. */
- + iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len);
- + }
- +
- + be->send(be, iov, iovcnt, len, more);
- +}
- +
- +/*
- + * can return -1 in case of errors
- + */
- +int
- +netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
- +{
- + unsigned int hlen = 0; /* length of prepended virtio-net header */
- + int ret;
- +
- + if (be == NULL)
- + return -1;
- +
- + if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
- + struct virtio_net_rxhdr *vh;
- +
- + /* Here we are sure be->be_vnet_hdr_len is 0. */
- + hlen = be->fe_vnet_hdr_len;
- + /*
- + * Get a pointer to the rx header, and use the
- + * data immediately following it for the packet buffer.
- + */
- + vh = iov[0].iov_base;
- + iov = iov_trim(iov, &iovcnt, hlen);
- +
- + /*
- + * Here we are sure be->fe_vnet_hdr_len is 0.
- + * The only valid field in the rx packet header is the
- + * number of buffers if merged rx bufs were negotiated.
- + */
- + memset(vh, 0, hlen);
- +
- + if (hlen == VNET_HDR_LEN) {
- + vh->vrh_bufs = 1;
- + }
- + }
- +
- + ret = be->recv(be, iov, iovcnt);
- + if (ret > 0) {
- + ret += hlen;
- + }
- +
- + return ret;
- +}
- +
- +/*
- + * Read a packet from the backend and discard it.
- + * Returns the size of the discarded packet or zero if no packet was available.
- + * A negative error code is returned in case of read error.
- + */
- +int
- +netbe_rx_discard(struct net_backend *be)
- +{
- + /*
- + * MP note: the dummybuf is only used to discard frames,
- + * so there is no need for it to be per-vtnet or locked.
- + * We only make it large enough for TSO-sized segment.
- + */
- + static uint8_t dummybuf[65536+64];
- + struct iovec iov;
- +
- + iov.iov_base = dummybuf;
- + iov.iov_len = sizeof(dummybuf);
- +
- + return netbe_recv(be, &iov, 1);
- +}
- +
- diff --git a/usr.sbin/bhyve/net_backends.h b/usr.sbin/bhyve/net_backends.h
- new file mode 100644
- index 00000000000..03c083bdbfd
- --- /dev/null
- +++ b/usr.sbin/bhyve/net_backends.h
- @@ -0,0 +1,144 @@
- +/*-
- + * Copyright (c) 2014 Vincenzo Maffione <v.maffione@gmail.com>
- + * All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + * 2. Redistributions in binary form must reproduce the above copyright
- + * notice, this list of conditions and the following disclaimer in the
- + * documentation and/or other materials provided with the distribution.
- + *
- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
- + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- + */
- +
- +#ifndef __NET_BACKENDS_H__
- +#define __NET_BACKENDS_H__
- +
- +#include <stdint.h>
- +
- +#ifdef WITH_NETMAP
- +#include <net/netmap.h>
- +#include <net/netmap_virt.h>
- +#define NETMAP_WITH_LIBS
- +#include <net/netmap_user.h>
- +#if (NETMAP_API < 11)
- +#error "Netmap API version must be >= 11"
- +#endif
- +#endif /* WITH_NETMAP */
- +
- +#include "mevent.h"
- +
- +extern int netmap_ioctl_counter;
- +
- +typedef void (*net_backend_cb_t)(int, enum ev_type, void *param);
- +
- +/* Interface between virtio-net and the network backend. */
- +struct net_backend;
- +
- +struct net_backend *netbe_init(const char *devname,
- + net_backend_cb_t cb, void *param);
- +void netbe_cleanup(struct net_backend *be);
- +uint64_t netbe_get_cap(struct net_backend *be);
- +int netbe_set_cap(struct net_backend *be, uint64_t cap,
- + unsigned vnet_hdr_len);
- +void netbe_send(struct net_backend *be, struct iovec *iov,
- + int iovcnt, uint32_t len, int more);
- +int netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt);
- +int netbe_rx_discard(struct net_backend *be);
- +
- +
- +/*
- + * Network device capabilities taken from VirtIO standard.
- + * Despite the name, these capabilities can be used by different frontents
- + * (virtio-net, ptnet) and supported by different backends (netmap, tap, ...).
- + */
- +#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */
- +#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */
- +#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */
- +#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */
- +#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */
- +#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */
- +#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */
- +#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */
- +#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */
- +#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */
- +#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */
- +#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */
- +#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */
- +#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */
- +#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */
- +#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */
- +#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */
- +#define VIRTIO_NET_F_GUEST_ANNOUNCE \
- + (1 << 21) /* guest can send gratuitous pkts */
- +
- +/*
- + * Fixed network header size
- + */
- +struct virtio_net_rxhdr {
- + uint8_t vrh_flags;
- + uint8_t vrh_gso_type;
- + uint16_t vrh_hdr_len;
- + uint16_t vrh_gso_size;
- + uint16_t vrh_csum_start;
- + uint16_t vrh_csum_offset;
- + uint16_t vrh_bufs;
- +} __packed;
- +
- +/*
- + * ptnetmap definitions
- + */
- +struct ptnetmap_state {
- + void *netmap_priv;
- +
- + /* True if ptnetmap kthreads are running. */
- + int running;
- +
- + /* Feature acknoweledgement support. */
- + unsigned long features;
- + unsigned long acked_features;
- +
- + /* Info about netmap memory. */
- + uint32_t memsize;
- + void *mem;
- +};
- +
- +#ifdef WITH_NETMAP
- +/* Used to get read-only info. */
- +struct netmap_if_info {
- + uint32_t nifp_offset;
- + uint16_t num_tx_rings;
- + uint16_t num_rx_rings;
- + uint16_t num_tx_slots;
- + uint16_t num_rx_slots;
- +};
- +
- +int ptn_memdev_attach(void *mem_ptr, struct netmap_pools_info *);
- +int ptnetmap_get_netmap_if(struct ptnetmap_state *ptn,
- + struct netmap_if_info *nif);
- +struct ptnetmap_state * get_ptnetmap(struct net_backend *be);
- +uint32_t ptnetmap_ack_features(struct ptnetmap_state *ptn,
- + uint32_t wanted_features);
- +int ptnetmap_get_hostmemid(struct ptnetmap_state *ptn);
- +int ptnetmap_create(struct ptnetmap_state *ptn, struct ptnetmap_cfg *cfg);
- +int ptnetmap_delete(struct ptnetmap_state *ptn);
- +#endif /* WITH_NETMAP */
- +
- +#include "pci_emul.h"
- +int net_parsemac(char *mac_str, uint8_t *mac_addr);
- +void net_genmac(struct pci_devinst *pi, uint8_t *macaddr);
- +
- +#endif /* __NET_BACKENDS_H__ */
- diff --git a/usr.sbin/bhyve/net_utils.c b/usr.sbin/bhyve/net_utils.c
- new file mode 100644
- index 00000000000..a5a004a1a78
- --- /dev/null
- +++ b/usr.sbin/bhyve/net_utils.c
- @@ -0,0 +1,86 @@
- +/*-
- + * Copyright (c) 2011 NetApp, Inc.
- + * All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + * 2. Redistributions in binary form must reproduce the above copyright
- + * notice, this list of conditions and the following disclaimer in the
- + * documentation and/or other materials provided with the distribution.
- + *
- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
- + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- + */
- +#include "net_utils.h"
- +#include "bhyverun.h"
- +#include <md5.h>
- +#include <net/ethernet.h>
- +#include <string.h>
- +#include <stdio.h>
- +#include <errno.h>
- +
- +/*
- + * Some utils functions, used by net front-ends. Originally, they were
- + * in pci_virtio_net.c.
- + */
- +
- +int
- +net_parsemac(char *mac_str, uint8_t *mac_addr)
- +{
- + struct ether_addr *ea;
- + char *tmpstr;
- + char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
- +
- + tmpstr = strsep(&mac_str,"=");
- +
- + if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
- + ea = ether_aton(mac_str);
- +
- + if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
- + memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
- + fprintf(stderr, "Invalid MAC %s\n", mac_str);
- + return (EINVAL);
- + } else
- + memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
- + }
- +
- + return (0);
- +}
- +
- +void
- +net_genmac(struct pci_devinst *pi, uint8_t *macaddr)
- +{
- + /*
- + * The default MAC address is the standard NetApp OUI of 00-a0-98,
- + * followed by an MD5 of the PCI slot/func number and dev name
- + */
- + MD5_CTX mdctx;
- + unsigned char digest[16];
- + char nstr[80];
- +
- + snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
- + pi->pi_func, vmname);
- +
- + MD5Init(&mdctx);
- + MD5Update(&mdctx, nstr, (unsigned int)strlen(nstr));
- + MD5Final(digest, &mdctx);
- +
- + macaddr[0] = 0x00;
- + macaddr[1] = 0xa0;
- + macaddr[2] = 0x98;
- + macaddr[3] = digest[0];
- + macaddr[4] = digest[1];
- + macaddr[5] = digest[2];
- +}
- diff --git a/usr.sbin/bhyve/net_utils.h b/usr.sbin/bhyve/net_utils.h
- new file mode 100644
- index 00000000000..2a0f3a86efb
- --- /dev/null
- +++ b/usr.sbin/bhyve/net_utils.h
- @@ -0,0 +1,34 @@
- +/*-
- + * Copyright (c) 2011 NetApp, Inc.
- + * All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + * 2. Redistributions in binary form must reproduce the above copyright
- + * notice, this list of conditions and the following disclaimer in the
- + * documentation and/or other materials provided with the distribution.
- + *
- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
- + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
- + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
- + * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- + * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
- + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
- + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- + */
- +#ifndef _NET_UTILS_H_
- +#define _NET_UTILS_H_
- +
- +#include <stdint.h>
- +#include "pci_emul.h"
- +
- +void net_genmac(struct pci_devinst *pi, uint8_t *macaddr);
- +int net_parsemac(char *mac_str, uint8_t *mac_addr);
- +#endif /* _NET_UTILS_H_ */
- diff --git a/usr.sbin/bhyve/pci_e82545.c b/usr.sbin/bhyve/pci_e82545.c
- index 7db7cab4e74..899b273ddca 100644
- --- a/usr.sbin/bhyve/pci_e82545.c
- +++ b/usr.sbin/bhyve/pci_e82545.c
- @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
- #include <net/ethernet.h>
- #include <netinet/in.h>
- #include <netinet/tcp.h>
- +#include <net/if.h> /* IFNAMSIZ */
- #include <err.h>
- #include <errno.h>
- @@ -60,6 +61,8 @@ __FBSDID("$FreeBSD$");
- #include "bhyverun.h"
- #include "pci_emul.h"
- #include "mevent.h"
- +#include "net_utils.h" /* MAC address generation */
- +#include "net_backends.h"
- /* Hardware/register definitions XXX: move some to common code. */
- #define E82545_VENDOR_ID_INTEL 0x8086
- @@ -239,11 +242,10 @@ struct eth_uni {
- struct e82545_softc {
- struct pci_devinst *esc_pi;
- struct vmctx *esc_ctx;
- - struct mevent *esc_mevp;
- struct mevent *esc_mevpitr;
- pthread_mutex_t esc_mtx;
- struct ether_addr esc_mac;
- - int esc_tapfd;
- + struct net_backend *esc_be;
- /* General */
- uint32_t esc_CTRL; /* x0000 device ctl */
- @@ -349,7 +351,7 @@ struct e82545_softc {
- static void e82545_reset(struct e82545_softc *sc, int dev);
- static void e82545_rx_enable(struct e82545_softc *sc);
- static void e82545_rx_disable(struct e82545_softc *sc);
- -static void e82545_tap_callback(int fd, enum ev_type type, void *param);
- +static void e82545_rx_callback(int fd, enum ev_type type, void *param);
- static void e82545_tx_start(struct e82545_softc *sc);
- static void e82545_tx_enable(struct e82545_softc *sc);
- static void e82545_tx_disable(struct e82545_softc *sc);
- @@ -818,11 +820,9 @@ e82545_bufsz(uint32_t rctl)
- return (256); /* Forbidden value. */
- }
- -static uint8_t dummybuf[2048];
- -
- /* XXX one packet at a time until this is debugged */
- static void
- -e82545_tap_callback(int fd, enum ev_type type, void *param)
- +e82545_rx_callback(int fd, enum ev_type type, void *param)
- {
- struct e82545_softc *sc = param;
- struct e1000_rx_desc *rxd;
- @@ -837,7 +837,7 @@ e82545_tap_callback(int fd, enum ev_type type, void *param)
- if (!sc->esc_rx_enabled || sc->esc_rx_loopback) {
- DPRINTF("rx disabled (!%d || %d) -- packet(s) dropped\r\n",
- sc->esc_rx_enabled, sc->esc_rx_loopback);
- - while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) {
- + while (netbe_rx_discard(sc->esc_be) > 0) {
- }
- goto done1;
- }
- @@ -850,7 +850,7 @@ e82545_tap_callback(int fd, enum ev_type type, void *param)
- if (left < maxpktdesc) {
- DPRINTF("rx overflow (%d < %d) -- packet(s) dropped\r\n",
- left, maxpktdesc);
- - while (read(sc->esc_tapfd, dummybuf, sizeof(dummybuf)) > 0) {
- + while (netbe_rx_discard(sc->esc_be) > 0) {
- }
- goto done1;
- }
- @@ -867,9 +867,9 @@ e82545_tap_callback(int fd, enum ev_type type, void *param)
- rxd->buffer_addr, bufsz);
- vec[i].iov_len = bufsz;
- }
- - len = readv(sc->esc_tapfd, vec, maxpktdesc);
- + len = netbe_recv(sc->esc_be, vec, maxpktdesc);
- if (len <= 0) {
- - DPRINTF("tap: readv() returned %d\n", len);
- + DPRINTF("be: recv() returned %d\n", len);
- goto done;
- }
- @@ -1041,13 +1041,10 @@ e82545_transmit_checksum(struct iovec *iov, int iovcnt, struct ck_info *ck)
- }
- static void
- -e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt)
- +e82545_transmit_backend(struct e82545_softc *sc, struct iovec *iov, int iovcnt,
- + uint32_t len)
- {
- -
- - if (sc->esc_tapfd == -1)
- - return;
- -
- - (void) writev(sc->esc_tapfd, iov, iovcnt);
- + netbe_send(sc->esc_be, iov, iovcnt, len, 0);
- }
- static void
- @@ -1083,7 +1080,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
- ckinfo[0].ck_valid = ckinfo[1].ck_valid = 0;
- iovcnt = 0;
- - tlen = 0;
- + tlen = 0; /* total length */
- ntype = 0;
- tso = 0;
- ohead = head;
- @@ -1208,6 +1205,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
- hdrlen = ETHER_ADDR_LEN*2;
- vlen = ETHER_VLAN_ENCAP_LEN;
- }
- + tlen += vlen;
- if (!tso) {
- /* Estimate required writable space for checksums. */
- if (ckinfo[0].ck_valid)
- @@ -1273,7 +1271,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
- e82545_transmit_checksum(iov, iovcnt, &ckinfo[0]);
- if (ckinfo[1].ck_valid)
- e82545_transmit_checksum(iov, iovcnt, &ckinfo[1]);
- - e82545_transmit_backend(sc, iov, iovcnt);
- + e82545_transmit_backend(sc, iov, iovcnt, tlen);
- goto done;
- }
- @@ -1297,13 +1295,14 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
- /* Construct IOVs for the segment. */
- /* Include whole original header. */
- tiov[0].iov_base = hdr;
- - tiov[0].iov_len = hdrlen;
- + tiov[0].iov_len = tlen = hdrlen;
- tiovcnt = 1;
- /* Include respective part of payload IOV. */
- for (nleft = now; pv < iovcnt && nleft > 0; nleft -= nnow) {
- nnow = MIN(nleft, iov[pv].iov_len - pvoff);
- tiov[tiovcnt].iov_base = iov[pv].iov_base + pvoff;
- tiov[tiovcnt++].iov_len = nnow;
- + tlen += nnow;
- if (pvoff + nnow == iov[pv].iov_len) {
- pv++;
- pvoff = 0;
- @@ -1356,7 +1355,7 @@ e82545_transmit(struct e82545_softc *sc, uint16_t head, uint16_t tail,
- e82545_carry(tcpsum);
- e82545_transmit_checksum(tiov, tiovcnt, &ckinfo[1]);
- }
- - e82545_transmit_backend(sc, tiov, tiovcnt);
- + e82545_transmit_backend(sc, tiov, tiovcnt, tlen);
- }
- done:
- @@ -2203,89 +2202,17 @@ e82545_reset(struct e82545_softc *sc, int drvr)
- sc->esc_TXDCTL = 0;
- }
- -static void
- -e82545_open_tap(struct e82545_softc *sc, char *opts)
- -{
- - char tbuf[80];
- -#ifndef WITHOUT_CAPSICUM
- - cap_rights_t rights;
- -#endif
- -
- - if (opts == NULL) {
- - sc->esc_tapfd = -1;
- - return;
- - }
- -
- - strcpy(tbuf, "/dev/");
- - strlcat(tbuf, opts, sizeof(tbuf));
- -
- - sc->esc_tapfd = open(tbuf, O_RDWR);
- - if (sc->esc_tapfd == -1) {
- - DPRINTF("unable to open tap device %s\n", opts);
- - exit(1);
- - }
- -
- - /*
- - * Set non-blocking and register for read
- - * notifications with the event loop
- - */
- - int opt = 1;
- - if (ioctl(sc->esc_tapfd, FIONBIO, &opt) < 0) {
- - WPRINTF("tap device O_NONBLOCK failed: %d\n", errno);
- - close(sc->esc_tapfd);
- - sc->esc_tapfd = -1;
- - }
- -
- -#ifndef WITHOUT_CAPSICUM
- - cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
- - if (cap_rights_limit(sc->esc_tapfd, &rights) == -1 && errno != ENOSYS)
- - errx(EX_OSERR, "Unable to apply rights for sandbox");
- -#endif
- -
- - sc->esc_mevp = mevent_add(sc->esc_tapfd,
- - EVF_READ,
- - e82545_tap_callback,
- - sc);
- - if (sc->esc_mevp == NULL) {
- - DPRINTF("Could not register mevent %d\n", EVF_READ);
- - close(sc->esc_tapfd);
- - sc->esc_tapfd = -1;
- - }
- -}
- -
- -static int
- -e82545_parsemac(char *mac_str, uint8_t *mac_addr)
- -{
- - struct ether_addr *ea;
- - char *tmpstr;
- - char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
- -
- - tmpstr = strsep(&mac_str,"=");
- - if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
- - ea = ether_aton(mac_str);
- - if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
- - memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
- - fprintf(stderr, "Invalid MAC %s\n", mac_str);
- - return (1);
- - } else
- - memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
- - }
- - return (0);
- -}
- -
- static int
- e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- {
- - DPRINTF("Loading with options: %s\r\n", opts);
- -
- - MD5_CTX mdctx;
- - unsigned char digest[16];
- char nstr[80];
- struct e82545_softc *sc;
- char *devname;
- char *vtopts;
- int mac_provided;
- + DPRINTF("Loading with options: %s\r\n", opts);
- +
- /* Setup our softc */
- sc = calloc(1, sizeof(*sc));
- @@ -2323,11 +2250,10 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- E82545_BAR_IO_LEN);
- /*
- - * Attempt to open the tap device and read the MAC address
- - * if specified. Copied from virtio-net, slightly modified.
- + * Attempt to open the backend device and read the MAC address
- + * if specified. Copied from virtio-net, slightly modified.
- */
- mac_provided = 0;
- - sc->esc_tapfd = -1;
- if (opts != NULL) {
- int err;
- @@ -2335,7 +2261,7 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- (void) strsep(&vtopts, ",");
- if (vtopts != NULL) {
- - err = e82545_parsemac(vtopts, sc->esc_mac.octet);
- + err = net_parsemac(vtopts, sc->esc_mac.octet);
- if (err != 0) {
- free(devname);
- return (err);
- @@ -2343,9 +2269,11 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- mac_provided = 1;
- }
- - if (strncmp(devname, "tap", 3) == 0 ||
- - strncmp(devname, "vmnet", 5) == 0)
- - e82545_open_tap(sc, devname);
- + sc->esc_be = netbe_init(devname, e82545_rx_callback, sc);
- + if (!sc->esc_be) {
- + WPRINTF("net backend '%s' initialization failed\n",
- + devname);
- + }
- free(devname);
- }
- @@ -2355,19 +2283,7 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- * followed by an MD5 of the PCI slot/func number and dev name
- */
- if (!mac_provided) {
- - snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
- - pi->pi_func, vmname);
- -
- - MD5Init(&mdctx);
- - MD5Update(&mdctx, nstr, strlen(nstr));
- - MD5Final(digest, &mdctx);
- -
- - sc->esc_mac.octet[0] = 0x00;
- - sc->esc_mac.octet[1] = 0xa0;
- - sc->esc_mac.octet[2] = 0x98;
- - sc->esc_mac.octet[3] = digest[0];
- - sc->esc_mac.octet[4] = digest[1];
- - sc->esc_mac.octet[5] = digest[2];
- + net_genmac(pi, sc->esc_mac.octet);
- }
- /* H/w initiated reset */
- diff --git a/usr.sbin/bhyve/pci_ptnetmap_memdev.c b/usr.sbin/bhyve/pci_ptnetmap_memdev.c
- new file mode 100644
- index 00000000000..a1e95a1ed0f
- --- /dev/null
- +++ b/usr.sbin/bhyve/pci_ptnetmap_memdev.c
- @@ -0,0 +1,341 @@
- +/*
- + * Copyright (C) 2015 Stefano Garzarella (stefano.garzarella@gmail.com)
- + * All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + * 2. Redistributions in binary form must reproduce the above copyright
- + * notice, this list of conditions and the following disclaimer in the
- + * documentation and/or other materials provided with the distribution.
- + *
- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- + * SUCH DAMAGE.
- + */
- +
- +#ifdef WITH_NETMAP
- +
- +#include <sys/cdefs.h>
- +__FBSDID("$FreeBSD$");
- +
- +#include <errno.h>
- +#include <stdio.h>
- +#include <stdlib.h>
- +#include <stdint.h>
- +
- +#include <net/if.h> /* IFNAMSIZ */
- +#include <net/netmap.h>
- +#include <net/netmap_virt.h>
- +
- +#include <machine/vmm.h>
- +#include <vmmapi.h>
- +
- +#include "bhyverun.h"
- +#include "pci_emul.h"
- +
- +/*
- + * ptnetmap memdev PCI device
- + *
- + * This device is used to map a netmap memory allocator on the guest VM
- + * through PCI_BAR. The same allocator can be shared between multiple ptnetmap
- + * ports in the guest.
- + *
- + * Each netmap allocator has a unique ID assigned by the netmap host module.
- + *
- + * The implementation here is based on the QEMU/KVM one.
- + */
- +struct ptn_memdev_softc {
- + struct pci_devinst *pi; /* PCI device instance */
- +
- + void *mem_ptr; /* netmap shared memory */
- + struct netmap_pools_info info;
- +
- + TAILQ_ENTRY(ptn_memdev_softc) next;
- +};
- +static TAILQ_HEAD(, ptn_memdev_softc) ptn_memdevs = TAILQ_HEAD_INITIALIZER(ptn_memdevs);
- +
- +/*
- + * ptn_memdev_softc can be created by pe_init or ptnetmap backend,
- + * this depends on the order of initialization.
- + */
- +static struct ptn_memdev_softc *
- +ptn_memdev_create()
- +{
- + struct ptn_memdev_softc *sc;
- +
- + sc = calloc(1, sizeof(struct ptn_memdev_softc));
- + if (sc != NULL) {
- + TAILQ_INSERT_TAIL(&ptn_memdevs, sc, next);
- + }
- +
- + return sc;
- +}
- +
- +static void
- +ptn_memdev_delete(struct ptn_memdev_softc *sc)
- +{
- + TAILQ_REMOVE(&ptn_memdevs, sc, next);
- +
- + free(sc);
- +}
- +
- +/*
- + * Find ptn_memdev through memid (netmap memory allocator ID)
- + */
- +static struct ptn_memdev_softc *
- +ptn_memdev_find_memid(uint32_t mem_id)
- +{
- + struct ptn_memdev_softc *sc;
- +
- + TAILQ_FOREACH(sc, &ptn_memdevs, next) {
- + if (sc->mem_ptr != NULL && mem_id == sc->info.memid) {
- + return sc;
- + }
- + }
- +
- + return NULL;
- +}
- +
- +/*
- + * Find ptn_memdev that has not netmap memory (attached by ptnetmap backend)
- + */
- +static struct ptn_memdev_softc *
- +ptn_memdev_find_empty_mem()
- +{
- + struct ptn_memdev_softc *sc;
- +
- + TAILQ_FOREACH(sc, &ptn_memdevs, next) {
- + if (sc->mem_ptr == NULL) {
- + return sc;
- + }
- + }
- +
- + return NULL;
- +}
- +
- +/*
- + * Find ptn_memdev that has not PCI device istance (created by pe_init)
- + */
- +static struct ptn_memdev_softc *
- +ptn_memdev_find_empty_pi()
- +{
- + struct ptn_memdev_softc *sc;
- +
- + TAILQ_FOREACH(sc, &ptn_memdevs, next) {
- + if (sc->pi == NULL) {
- + return sc;
- + }
- + }
- +
- + return NULL;
- +}
- +
- +/*
- + * Handle read on ptnetmap-memdev register
- + */
- +static uint64_t
- +ptn_pci_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- + int baridx, uint64_t offset, int size)
- +{
- + struct ptn_memdev_softc *sc = pi->pi_arg;
- +
- + if (sc == NULL)
- + return 0;
- +
- + if (baridx == PTNETMAP_IO_PCI_BAR) {
- + switch (offset) {
- + case PTNET_MDEV_IO_MEMSIZE_LO:
- + return sc->info.memsize & 0xffffffff;
- + case PTNET_MDEV_IO_MEMSIZE_HI:
- + return sc->info.memsize >> 32;
- + case PTNET_MDEV_IO_MEMID:
- + return sc->info.memid;
- + case PTNET_MDEV_IO_IF_POOL_OFS:
- + return sc->info.if_pool_offset;
- + case PTNET_MDEV_IO_IF_POOL_OBJNUM:
- + return sc->info.if_pool_objtotal;
- + case PTNET_MDEV_IO_IF_POOL_OBJSZ:
- + return sc->info.if_pool_objsize;
- + case PTNET_MDEV_IO_RING_POOL_OFS:
- + return sc->info.ring_pool_offset;
- + case PTNET_MDEV_IO_RING_POOL_OBJNUM:
- + return sc->info.ring_pool_objtotal;
- + case PTNET_MDEV_IO_RING_POOL_OBJSZ:
- + return sc->info.ring_pool_objsize;
- + case PTNET_MDEV_IO_BUF_POOL_OFS:
- + return sc->info.buf_pool_offset;
- + case PTNET_MDEV_IO_BUF_POOL_OBJNUM:
- + return sc->info.buf_pool_objtotal;
- + case PTNET_MDEV_IO_BUF_POOL_OBJSZ:
- + return sc->info.buf_pool_objsize;
- + }
- + }
- +
- + printf("%s: Unexpected register read [bar %u, offset %lx size %d]\n",
- + __func__, baridx, offset, size);
- +
- + return 0;
- +}
- +
- +/*
- + * Handle write on ptnetmap-memdev register (unused for now)
- + */
- +static void
- +ptn_pci_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- + int baridx, uint64_t offset, int size, uint64_t value)
- +{
- + struct ptn_memdev_softc *sc = pi->pi_arg;
- +
- + if (sc == NULL)
- + return;
- +
- + printf("%s: Unexpected register write [bar %u, offset %lx size %d "
- + "value %lx]\n", __func__, baridx, offset, size, value);
- +}
- +
- +/*
- + * Configure the ptnetmap-memdev PCI BARs. PCI BARs can only be created
- + * when the PCI device is created and the netmap memory is attached.
- + */
- +static int
- +ptn_memdev_configure_bars(struct ptn_memdev_softc *sc)
- +{
- + int ret;
- +
- + if (sc->pi == NULL || sc->mem_ptr == NULL)
- + return 0;
- +
- + /* Allocate a BAR for an I/O region. */
- + ret = pci_emul_alloc_bar(sc->pi, PTNETMAP_IO_PCI_BAR, PCIBAR_IO,
- + PTNET_MDEV_IO_END);
- + if (ret) {
- + printf("ptnetmap_memdev: iobar allocation error %d\n", ret);
- + return ret;
- + }
- +
- + /* Allocate a BAR for a memory region. */
- + ret = pci_emul_alloc_bar(sc->pi, PTNETMAP_MEM_PCI_BAR, PCIBAR_MEM32,
- + sc->info.memsize);
- + if (ret) {
- + printf("ptnetmap_memdev: membar allocation error %d\n", ret);
- + return ret;
- + }
- +
- + /* Map netmap memory on the memory BAR. */
- + ret = vm_map_user_buf(sc->pi->pi_vmctx,
- + sc->pi->pi_bar[PTNETMAP_MEM_PCI_BAR].addr,
- + sc->info.memsize, sc->mem_ptr, 1);
- + if (ret) {
- + printf("ptnetmap_memdev: membar map error %d\n", ret);
- + return ret;
- + }
- +
- + return 0;
- +}
- +
- +/*
- + * PCI device initialization
- + */
- +static int
- +ptn_memdev_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- +{
- + struct ptn_memdev_softc *sc;
- + int ret;
- +
- + sc = ptn_memdev_find_empty_pi();
- + if (sc == NULL) {
- + sc = ptn_memdev_create();
- + if (sc == NULL) {
- + printf("ptnetmap_memdev: calloc error\n");
- + return (ENOMEM);
- + }
- + }
- +
- + /* Link our softc in the pci_devinst. */
- + pi->pi_arg = sc;
- + sc->pi = pi;
- +
- + /* Initialize PCI configuration space. */
- + pci_set_cfgdata16(pi, PCIR_VENDOR, PTNETMAP_PCI_VENDOR_ID);
- + pci_set_cfgdata16(pi, PCIR_DEVICE, PTNETMAP_PCI_DEVICE_ID);
- + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
- + pci_set_cfgdata16(pi, PCIR_SUBDEV_0, 1);
- + pci_set_cfgdata16(pi, PCIR_SUBVEND_0, PTNETMAP_PCI_VENDOR_ID);
- +
- + /* Configure PCI-BARs. */
- + ret = ptn_memdev_configure_bars(sc);
- + if (ret) {
- + printf("ptnetmap_memdev: configure error\n");
- + goto err;
- + }
- +
- + return 0;
- +err:
- + ptn_memdev_delete(sc);
- + pi->pi_arg = NULL;
- + return ret;
- +}
- +
- +/*
- + * used by ptnetmap backend to attach the netmap memory allocator to the
- + * ptnetmap-memdev. (shared with the guest VM through PCI-BAR)
- + */
- +int
- +ptn_memdev_attach(void *mem_ptr, struct netmap_pools_info *info)
- +{
- + struct ptn_memdev_softc *sc;
- + int ret;
- +
- + /* if a device with the same mem_id is already attached, we are done */
- + if (ptn_memdev_find_memid(info->memid)) {
- + printf("ptnetmap_memdev: already attched\n");
- + return 0;
- + }
- +
- + sc = ptn_memdev_find_empty_mem();
- + if (sc == NULL) {
- + sc = ptn_memdev_create();
- + if (sc == NULL) {
- + printf("ptnetmap_memdev: calloc error\n");
- + return (ENOMEM);
- + }
- + }
- +
- + sc->mem_ptr = mem_ptr;
- + sc->info = *info;
- +
- + /* configure device PCI-BARs */
- + ret = ptn_memdev_configure_bars(sc);
- + if (ret) {
- + printf("ptnetmap_memdev: configure error\n");
- + goto err;
- + }
- +
- +
- + return 0;
- +err:
- + ptn_memdev_delete(sc);
- + sc->pi->pi_arg = NULL;
- + return ret;
- +}
- +
- +struct pci_devemu pci_de_ptnetmap = {
- + .pe_emu = PTNETMAP_MEMDEV_NAME,
- + .pe_init = ptn_memdev_init,
- + .pe_barwrite = ptn_pci_write,
- + .pe_barread = ptn_pci_read
- +};
- +PCI_EMUL_SET(pci_de_ptnetmap);
- +
- +#endif /* WITH_NETMAP */
- diff --git a/usr.sbin/bhyve/pci_ptnetmap_netif.c b/usr.sbin/bhyve/pci_ptnetmap_netif.c
- new file mode 100644
- index 00000000000..060062f2dfd
- --- /dev/null
- +++ b/usr.sbin/bhyve/pci_ptnetmap_netif.c
- @@ -0,0 +1,411 @@
- +/*
- + * Copyright (C) 2016 Vincenzo Maffione
- + * All rights reserved.
- + *
- + * Redistribution and use in source and binary forms, with or without
- + * modification, are permitted provided that the following conditions
- + * are met:
- + * 1. Redistributions of source code must retain the above copyright
- + * notice, this list of conditions and the following disclaimer.
- + * 2. Redistributions in binary form must reproduce the above copyright
- + * notice, this list of conditions and the following disclaimer in the
- + * documentation and/or other materials provided with the distribution.
- + *
- + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- + * SUCH DAMAGE.
- + */
- +
- +/*
- + * This file contains the emulation of the ptnet network frontend, to be used
- + * with netmap backend.
- + */
- +
- +#ifdef WITH_NETMAP
- +
- +#include <sys/cdefs.h>
- +__FBSDID("$FreeBSD$");
- +
- +#include <errno.h>
- +#include <stdio.h>
- +#include <stdlib.h>
- +#include <stdint.h>
- +#include <string.h>
- +
- +#include <net/if.h> /* IFNAMSIZ */
- +#include <net/netmap.h>
- +#include <net/netmap_virt.h>
- +
- +#include <sys/ioctl.h>
- +#include <sys/param.h>
- +#include <sys/_cpuset.h>
- +#include <machine/vmm.h>
- +#include <machine/vmm_dev.h> /* VM_LAPIC_MSI */
- +#include <vmmapi.h>
- +
- +#include "bhyverun.h"
- +#include "pci_emul.h"
- +#include "net_utils.h"
- +#include "net_backends.h"
- +
- +#ifndef PTNET_CSB_ALLOC
- +#error "Hypervisor-allocated CSB not supported"
- +#endif
- +
- +
- +struct ptnet_softc {
- + struct pci_devinst *pi;
- +
- + struct net_backend *be;
- + struct ptnetmap_state *ptbe;
- +
- + unsigned int num_rings;
- + uint32_t ioregs[PTNET_IO_END >> 2];
- + void *csb;
- +};
- +
- +static int
- +ptnet_get_netmap_if(struct ptnet_softc *sc)
- +{
- + unsigned int num_rings;
- + struct netmap_if_info nif;
- + int ret;
- +
- + ret = ptnetmap_get_netmap_if(sc->ptbe, &nif);
- + if (ret) {
- + return ret;
- + }
- +
- + sc->ioregs[PTNET_IO_NIFP_OFS >> 2] = nif.nifp_offset;
- + sc->ioregs[PTNET_IO_NUM_TX_RINGS >> 2] = nif.num_tx_rings;
- + sc->ioregs[PTNET_IO_NUM_RX_RINGS >> 2] = nif.num_rx_rings;
- + sc->ioregs[PTNET_IO_NUM_TX_SLOTS >> 2] = nif.num_tx_slots;
- + sc->ioregs[PTNET_IO_NUM_RX_SLOTS >> 2] = nif.num_rx_slots;
- +
- + num_rings = sc->ioregs[PTNET_IO_NUM_TX_RINGS >> 2] +
- + sc->ioregs[PTNET_IO_NUM_RX_RINGS >> 2];
- + if (sc->num_rings && num_rings && sc->num_rings != num_rings) {
- + fprintf(stderr, "Number of rings changed: not supported\n");
- + return EINVAL;
- + }
- + sc->num_rings = num_rings;
- +
- + return 0;
- +}
- +
- +static int
- +ptnet_ptctl_create(struct ptnet_softc *sc)
- +{
- + struct ptnetmap_cfgentry_bhyve *cfgentry;
- + struct pci_devinst *pi = sc->pi;
- + struct vmctx *vmctx = pi->pi_vmctx;
- + struct ptnetmap_cfg *cfg;
- + unsigned int kick_addr;
- + int ret;
- + int i;
- +
- + if (sc->csb == NULL) {
- + fprintf(stderr, "%s: Unexpected NULL CSB", __func__);
- + return -1;
- + }
- +
- + cfg = calloc(1, sizeof(*cfg) + sc->num_rings * sizeof(*cfgentry));
- +
- + cfg->cfgtype = PTNETMAP_CFGTYPE_BHYVE;
- + cfg->entry_size = sizeof(*cfgentry);
- + cfg->num_rings = sc->num_rings;
- + cfg->ptrings = sc->csb;
- +
- + kick_addr = pi->pi_bar[PTNETMAP_IO_PCI_BAR].addr + PTNET_IO_KICK_BASE;
- + cfgentry = (struct ptnetmap_cfgentry_bhyve *)(cfg + 1);
- +
- + for (i = 0; i < sc->num_rings; i++, kick_addr += 4, cfgentry++) {
- + struct msix_table_entry *mte;
- + uint64_t cookie = sc->ioregs[PTNET_IO_MAC_LO >> 2] + 4*i;
- +
- + cfgentry->ioctl_fd = vm_get_fd(vmctx);
- + cfgentry->ioctl_cmd = VM_LAPIC_MSI;
- + mte = &pi->pi_msix.table[i];
- + cfgentry->ioctl_data.addr = mte->addr;
- + cfgentry->ioctl_data.msg_data = mte->msg_data;
- +
- + fprintf(stderr, "%s: vector %u, addr %lu, data %u, "
- + "kick_addr %u, cookie: %p\n",
- + __func__, i, mte->addr, mte->msg_data, kick_addr,
- + (void*)cookie);
- +
- + ret = vm_io_reg_handler(vmctx, kick_addr /* ioaddr */,
- + 0 /* in */, 0 /* mask_data */,
- + 0 /* data */, VM_IO_REGH_KWEVENTS,
- + (void*)cookie /* cookie */);
- + if (ret) {
- + fprintf(stderr, "%s: vm_io_reg_handler %d\n",
- + __func__, ret);
- + }
- + cfgentry->wchan = (uint64_t) cookie;
- + }
- +
- + ret = ptnetmap_create(sc->ptbe, cfg);
- + free(cfg);
- +
- + return ret;
- +}
- +
- +static int
- +ptnet_ptctl_delete(struct ptnet_softc *sc)
- +{
- + struct pci_devinst *pi = sc->pi;
- + struct vmctx *vmctx = pi->pi_vmctx;
- + unsigned int kick_addr;
- + int i;
- +
- + kick_addr = pi->pi_bar[PTNETMAP_IO_PCI_BAR].addr + PTNET_IO_KICK_BASE;
- +
- + for (i = 0; i < sc->num_rings; i++, kick_addr += 4) {
- + vm_io_reg_handler(vmctx, kick_addr, 0, 0, 0,
- + VM_IO_REGH_DELETE, 0);
- + }
- +
- + return ptnetmap_delete(sc->ptbe);
- +}
- +
- +static void
- +ptnet_ptctl(struct ptnet_softc *sc, uint64_t cmd)
- +{
- + int ret = EINVAL;
- +
- + switch (cmd) {
- + case PTNETMAP_PTCTL_CREATE:
- + /* React to a REGIF in the guest. */
- + ret = ptnet_ptctl_create(sc);
- + break;
- +
- + case PTNETMAP_PTCTL_DELETE:
- + /* React to an UNREGIF in the guest. */
- + ret = ptnet_ptctl_delete(sc);
- + break;
- + }
- +
- + sc->ioregs[PTNET_IO_PTCTL >> 2] = ret;
- +}
- +
- +static void
- +ptnet_csb_mapping(struct ptnet_softc *sc)
- +{
- + uint64_t base = ((uint64_t)sc->ioregs[PTNET_IO_CSBBAH >> 2] << 32) |
- + sc->ioregs[PTNET_IO_CSBBAL >> 2];
- + uint64_t len = 4096;
- +
- + sc->csb = NULL;
- + if (base) {
- + sc->csb = paddr_guest2host(sc->pi->pi_vmctx, base, len);
- + }
- +}
- +
- +static void
- +ptnet_bar_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- + int baridx, uint64_t offset, int size, uint64_t value)
- +{
- + struct ptnet_softc *sc = pi->pi_arg;
- + unsigned int index;
- +
- + /* Redirect to MSI-X emulation code. */
- + if (baridx == pci_msix_table_bar(pi) ||
- + baridx == pci_msix_pba_bar(pi)) {
- + pci_emul_msix_twrite(pi, offset, size, value);
- + return;
- + }
- +
- + if (sc == NULL)
- + return;
- +
- + offset &= PTNET_IO_MASK;
- + index = offset >> 2;
- +
- + if (baridx != PTNETMAP_IO_PCI_BAR || offset >= PTNET_IO_END) {
- + fprintf(stderr, "%s: Unexpected register write [bar %u, "
- + "offset %lx size %d value %lx]\n", __func__, baridx,
- + offset, size, value);
- + return;
- + }
- +
- + switch (offset) {
- + case PTNET_IO_PTFEAT:
- + value = ptnetmap_ack_features(sc->ptbe, value);
- + sc->ioregs[index] = value;
- + break;
- +
- + case PTNET_IO_PTCTL:
- + ptnet_ptctl(sc, value);
- + break;
- +
- + case PTNET_IO_CSBBAH:
- + sc->ioregs[index] = value;
- + break;
- +
- + case PTNET_IO_CSBBAL:
- + sc->ioregs[index] = value;
- + ptnet_csb_mapping(sc);
- + break;
- +
- + case PTNET_IO_VNET_HDR_LEN:
- + if (netbe_set_cap(sc->be, netbe_get_cap(sc->be),
- + value) == 0) {
- + sc->ioregs[index] = value;
- + }
- + break;
- + }
- +}
- +
- +static uint64_t
- +ptnet_bar_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
- + int baridx, uint64_t offset, int size)
- +{
- + struct ptnet_softc *sc = pi->pi_arg;
- + uint64_t index = offset >> 2;
- +
- + if (baridx == pci_msix_table_bar(pi) ||
- + baridx == pci_msix_pba_bar(pi)) {
- + return pci_emul_msix_tread(pi, offset, size);
- + }
- +
- + if (sc == NULL)
- + return 0;
- +
- + offset &= PTNET_IO_MASK;
- +
- + if (baridx != PTNETMAP_IO_PCI_BAR || offset >= PTNET_IO_END) {
- + fprintf(stderr, "%s: Unexpected register read [bar %u, "
- + "offset %lx size %d]\n", __func__, baridx, offset,
- + size);
- + return 0;
- + }
- +
- + switch (offset) {
- + case PTNET_IO_NIFP_OFS:
- + case PTNET_IO_NUM_TX_RINGS:
- + case PTNET_IO_NUM_RX_RINGS:
- + case PTNET_IO_NUM_TX_SLOTS:
- + case PTNET_IO_NUM_RX_SLOTS:
- + /* Fill in device registers with information about
- + * nifp_offset, num_*x_rings, and num_*x_slots. */
- + ptnet_get_netmap_if(sc);
- + break;
- +
- + case PTNET_IO_HOSTMEMID:
- + sc->ioregs[index] = ptnetmap_get_hostmemid(sc->ptbe);
- + break;
- + }
- +
- + return sc->ioregs[index];
- +}
- +
- +/* PCI device initialization. */
- +static int
- +ptnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- +{
- + struct ptnet_softc *sc;
- + char *ptopts, *devname;
- + uint8_t macaddr[6];
- + int mac_provided = 0;
- + int ret;
- +
- + sc = calloc(1, sizeof(*sc));
- + if (sc == NULL) {
- + fprintf(stderr, "%s: out of memory\n", __func__);
- + return -1;
- + }
- +
- + /* Link our softc in the pci_devinst. */
- + pi->pi_arg = sc;
- + sc->pi = pi;
- +
- + /* Parse command line options. */
- + if (opts == NULL) {
- + fprintf(stderr, "%s: No backend specified\n", __func__);
- + return -1;
- + }
- +
- + devname = ptopts = strdup(opts);
- + (void) strsep(&ptopts, ",");
- +
- + if (ptopts != NULL) {
- + ret = net_parsemac(ptopts, macaddr);
- + if (ret != 0) {
- + free(devname);
- + return ret;
- + }
- + mac_provided = 1;
- + }
- +
- + if (!mac_provided) {
- + net_genmac(pi, macaddr);
- + }
- +
- + /* Initialize backend. A NULL callback is used here to tell
- + * the ask the netmap backend to use ptnetmap. */
- + sc->be = netbe_init(devname, NULL, sc);
- + if (!sc->be) {
- + fprintf(stderr, "net backend initialization failed\n");
- + return -1;
- + }
- +
- + free(devname);
- +
- + sc->ptbe = get_ptnetmap(sc->be);
- + if (!sc->ptbe) {
- + fprintf(stderr, "%s: failed to get ptnetmap\n", __func__);
- + return -1;
- + }
- +
- + /* Initialize PCI configuration space. */
- + pci_set_cfgdata16(pi, PCIR_VENDOR, PTNETMAP_PCI_VENDOR_ID);
- + pci_set_cfgdata16(pi, PCIR_DEVICE, PTNETMAP_PCI_NETIF_ID);
- + pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
- + pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_NETWORK_ETHERNET);
- + pci_set_cfgdata16(pi, PCIR_SUBDEV_0, 1);
- + pci_set_cfgdata16(pi, PCIR_SUBVEND_0, PTNETMAP_PCI_VENDOR_ID);
- +
- + /* Allocate a BAR for an I/O region. */
- + ret = pci_emul_alloc_bar(pi, PTNETMAP_IO_PCI_BAR, PCIBAR_IO,
- + PTNET_IO_MASK + 1);
- + if (ret) {
- + fprintf(stderr, "%s: failed to allocate BAR [%d]\n",
- + __func__, ret);
- + return ret;
- + }
- +
- + /* Initialize registers and data structures. */
- + memset(sc->ioregs, 0, sizeof(sc->ioregs));
- + sc->csb = NULL;
- + sc->ioregs[PTNET_IO_MAC_HI >> 2] = (macaddr[0] << 8) | macaddr[1];
- + sc->ioregs[PTNET_IO_MAC_LO >> 2] = (macaddr[2] << 24) |
- + (macaddr[3] << 16) |
- + (macaddr[4] << 8) | macaddr[5];
- +
- + sc->num_rings = 0;
- + ptnet_get_netmap_if(sc);
- +
- + /* Allocate a BAR for MSI-X vectors. */
- + pci_emul_add_msixcap(pi, sc->num_rings, PTNETMAP_MSIX_PCI_BAR);
- +
- + return 0;
- +}
- +
- +struct pci_devemu pci_de_ptnet = {
- + .pe_emu = "ptnet",
- + .pe_init = ptnet_init,
- + .pe_barwrite = ptnet_bar_write,
- + .pe_barread = ptnet_bar_read,
- +};
- +PCI_EMUL_SET(pci_de_ptnet);
- +
- +#endif /* WITH_NETMAP */
- diff --git a/usr.sbin/bhyve/pci_virtio_net.c b/usr.sbin/bhyve/pci_virtio_net.c
- index c6104a646a5..91a6d9c59d1 100644
- --- a/usr.sbin/bhyve/pci_virtio_net.c
- +++ b/usr.sbin/bhyve/pci_virtio_net.c
- @@ -26,6 +26,22 @@
- * $FreeBSD$
- */
- +/*
- + * This file contains the emulation of the virtio-net network frontend. Network
- + * backends are in net_backends.c.
- + *
- + * The frontend is selected using the pe_emu field of the descriptor,
- + * Upon a match, the pe_init function is invoked, which initializes
- + * the emulated PCI device, attaches to the backend, and calls virtio
- + * initialization functions.
- + *
- + * PCI register read/writes are handled through generic PCI methods
- + *
- + * virtio TX is handled by a dedicated thread, pci_vtnet_tx_thread()
- + * virtio RX is handled by the backend (often with some helper thread),
- + * which in turn calls a frontend callback, pci_vtnet_rx_callback()
- + */
- +
- #include <sys/cdefs.h>
- __FBSDID("$FreeBSD$");
- @@ -39,10 +55,7 @@ __FBSDID("$FreeBSD$");
- #include <sys/ioctl.h>
- #include <machine/atomic.h>
- #include <net/ethernet.h>
- -#ifndef NETMAP_WITH_LIBS
- -#define NETMAP_WITH_LIBS
- -#endif
- -#include <net/netmap_user.h>
- +#include <net/if.h> /* IFNAMSIZ */
- #include <err.h>
- #include <errno.h>
- @@ -54,7 +67,6 @@ __FBSDID("$FreeBSD$");
- #include <strings.h>
- #include <unistd.h>
- #include <assert.h>
- -#include <md5.h>
- #include <pthread.h>
- #include <pthread_np.h>
- #include <sysexits.h>
- @@ -63,36 +75,16 @@ __FBSDID("$FreeBSD$");
- #include "pci_emul.h"
- #include "mevent.h"
- #include "virtio.h"
- +#include "net_utils.h" /* MAC address generation */
- +#include "net_backends.h" /* VirtIO capabilities */
- #define VTNET_RINGSZ 1024
- #define VTNET_MAXSEGS 256
- -/*
- - * Host capabilities. Note that we only offer a few of these.
- - */
- -#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */
- -#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */
- -#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */
- -#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */
- -#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */
- -#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */
- -#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */
- -#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */
- -#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */
- -#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */
- -#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */
- -#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */
- -#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */
- -#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */
- -#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */
- -#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */
- -#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */
- -#define VIRTIO_NET_F_GUEST_ANNOUNCE \
- - (1 << 21) /* guest can send gratuitous pkts */
- -
- +/* Our capabilities: we don't support VIRTIO_NET_F_MRG_RXBUF at the moment. */
- #define VTNET_S_HOSTCAPS \
- - ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
- + ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \
- VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC)
- /*
- @@ -101,6 +93,7 @@ __FBSDID("$FreeBSD$");
- struct virtio_net_config {
- uint8_t mac[6];
- uint16_t status;
- + uint16_t max_virtqueue_pairs;
- } __packed;
- /*
- @@ -112,19 +105,6 @@ struct virtio_net_config {
- #define VTNET_MAXQ 3
- -/*
- - * Fixed network header size
- - */
- -struct virtio_net_rxhdr {
- - uint8_t vrh_flags;
- - uint8_t vrh_gso_type;
- - uint16_t vrh_hdr_len;
- - uint16_t vrh_gso_size;
- - uint16_t vrh_csum_start;
- - uint16_t vrh_csum_offset;
- - uint16_t vrh_bufs;
- -} __packed;
- -
- /*
- * Debug printf
- */
- @@ -139,31 +119,24 @@ struct pci_vtnet_softc {
- struct virtio_softc vsc_vs;
- struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
- pthread_mutex_t vsc_mtx;
- - struct mevent *vsc_mevp;
- - int vsc_tapfd;
- - struct nm_desc *vsc_nmd;
- + struct net_backend *vsc_be;
- int vsc_rx_ready;
- volatile int resetting; /* set and checked outside lock */
- uint64_t vsc_features; /* negotiated features */
- - struct virtio_net_config vsc_config;
- -
- pthread_mutex_t rx_mtx;
- - int rx_in_progress;
- - int rx_vhdrlen;
- + unsigned int rx_vhdrlen;
- int rx_merge; /* merged rx bufs in use */
- pthread_t tx_tid;
- pthread_mutex_t tx_mtx;
- pthread_cond_t tx_cond;
- int tx_in_progress;
- + struct virtio_net_config vsc_config;
- - void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc);
- - void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov,
- - int iovcnt, int len);
- };
- static void pci_vtnet_reset(void *);
- @@ -186,6 +159,7 @@ static struct virtio_consts vtnet_vi_consts = {
- /*
- * If the transmit thread is active then stall until it is done.
- + * Only used once in pci_vtnet_reset()
- */
- static void
- pci_vtnet_txwait(struct pci_vtnet_softc *sc)
- @@ -202,20 +176,18 @@ pci_vtnet_txwait(struct pci_vtnet_softc *sc)
- /*
- * If the receive thread is active then stall until it is done.
- + * It is enough to lock and unlock the RX mutex.
- + * Only used once in pci_vtnet_reset()
- */
- static void
- pci_vtnet_rxwait(struct pci_vtnet_softc *sc)
- {
- pthread_mutex_lock(&sc->rx_mtx);
- - while (sc->rx_in_progress) {
- - pthread_mutex_unlock(&sc->rx_mtx);
- - usleep(10000);
- - pthread_mutex_lock(&sc->rx_mtx);
- - }
- pthread_mutex_unlock(&sc->rx_mtx);
- }
- +/* handler for virtio_reset */
- static void
- pci_vtnet_reset(void *vsc)
- {
- @@ -242,360 +214,80 @@ pci_vtnet_reset(void *vsc)
- sc->resetting = 0;
- }
- -/*
- - * Called to send a buffer chain out to the tap device
- - */
- static void
- -pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
- - int len)
- +pci_vtnet_rx(struct pci_vtnet_softc *sc)
- {
- - static char pad[60]; /* all zero bytes */
- -
- - if (sc->vsc_tapfd == -1)
- - return;
- -
- - /*
- - * If the length is < 60, pad out to that and add the
- - * extra zero'd segment to the iov. It is guaranteed that
- - * there is always an extra iov available by the caller.
- - */
- - if (len < 60) {
- - iov[iovcnt].iov_base = pad;
- - iov[iovcnt].iov_len = 60 - len;
- - iovcnt++;
- - }
- - (void) writev(sc->vsc_tapfd, iov, iovcnt);
- -}
- -
- -/*
- - * Called when there is read activity on the tap file descriptor.
- - * Each buffer posted by the guest is assumed to be able to contain
- - * an entire ethernet frame + rx header.
- - * MP note: the dummybuf is only used for discarding frames, so there
- - * is no need for it to be per-vtnet or locked.
- - */
- -static uint8_t dummybuf[2048];
- -
- -static __inline struct iovec *
- -rx_iov_trim(struct iovec *iov, int *niov, int tlen)
- -{
- - struct iovec *riov;
- -
- - /* XXX short-cut: assume first segment is >= tlen */
- - assert(iov[0].iov_len >= tlen);
- -
- - iov[0].iov_len -= tlen;
- - if (iov[0].iov_len == 0) {
- - assert(*niov > 1);
- - *niov -= 1;
- - riov = &iov[1];
- - } else {
- - iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
- - riov = &iov[0];
- - }
- -
- - return (riov);
- -}
- -
- -static void
- -pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
- -{
- - struct iovec iov[VTNET_MAXSEGS], *riov;
- + struct iovec iov[VTNET_MAXSEGS + 1];
- struct vqueue_info *vq;
- - void *vrx;
- int len, n;
- uint16_t idx;
- - /*
- - * Should never be called without a valid tap fd
- - */
- - assert(sc->vsc_tapfd != -1);
- -
- - /*
- - * But, will be called when the rx ring hasn't yet
- - * been set up or the guest is resetting the device.
- - */
- if (!sc->vsc_rx_ready || sc->resetting) {
- /*
- - * Drop the packet and try later.
- + * The rx ring has not yet been set up or the guest is
- + * resetting the device. Drop the packet and try later.
- */
- - (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
- + netbe_rx_discard(sc->vsc_be);
- return;
- }
- - /*
- - * Check for available rx buffers
- - */
- vq = &sc->vsc_queues[VTNET_RXQ];
- if (!vq_has_descs(vq)) {
- /*
- - * Drop the packet and try later. Interrupt on
- - * empty, if that's negotiated.
- + * No available rx buffers. Drop the packet and try later.
- + * Interrupt on empty, if that's negotiated.
- */
- - (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
- + netbe_rx_discard(sc->vsc_be);
- vq_endchains(vq, 1);
- return;
- }
- do {
- - /*
- - * Get descriptor chain.
- - */
- + /* Get descriptor chain into iov */
- n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
- assert(n >= 1 && n <= VTNET_MAXSEGS);
- - /*
- - * Get a pointer to the rx header, and use the
- - * data immediately following it for the packet buffer.
- - */
- - vrx = iov[0].iov_base;
- - riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
- -
- - len = readv(sc->vsc_tapfd, riov, n);
- -
- - if (len < 0 && errno == EWOULDBLOCK) {
- - /*
- - * No more packets, but still some avail ring
- - * entries. Interrupt if needed/appropriate.
- - */
- - vq_retchain(vq);
- - vq_endchains(vq, 0);
- - return;
- - }
- -
- - /*
- - * The only valid field in the rx packet header is the
- - * number of buffers if merged rx bufs were negotiated.
- - */
- - memset(vrx, 0, sc->rx_vhdrlen);
- -
- - if (sc->rx_merge) {
- - struct virtio_net_rxhdr *vrxh;
- -
- - vrxh = vrx;
- - vrxh->vrh_bufs = 1;
- - }
- -
- - /*
- - * Release this chain and handle more chains.
- - */
- - vq_relchain(vq, idx, len + sc->rx_vhdrlen);
- - } while (vq_has_descs(vq));
- -
- - /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
- - vq_endchains(vq, 1);
- -}
- + len = netbe_recv(sc->vsc_be, iov, n);
- -static __inline int
- -pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
- -{
- - int r, i;
- - int len = 0;
- -
- - for (r = nmd->cur_tx_ring; ; ) {
- - struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
- - uint32_t cur, idx;
- - char *buf;
- -
- - if (nm_ring_empty(ring)) {
- - r++;
- - if (r > nmd->last_tx_ring)
- - r = nmd->first_tx_ring;
- - if (r == nmd->cur_tx_ring)
- - break;
- - continue;
- + if (len < 0) {
- + break;
- }
- - cur = ring->cur;
- - idx = ring->slot[cur].buf_idx;
- - buf = NETMAP_BUF(ring, idx);
- -
- - for (i = 0; i < iovcnt; i++) {
- - if (len + iov[i].iov_len > 2048)
- - break;
- - memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
- - len += iov[i].iov_len;
- - }
- - ring->slot[cur].len = len;
- - ring->head = ring->cur = nm_ring_next(ring, cur);
- - nmd->cur_tx_ring = r;
- - ioctl(nmd->fd, NIOCTXSYNC, NULL);
- - break;
- - }
- -
- - return (len);
- -}
- -
- -static __inline int
- -pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
- -{
- - int len = 0;
- - int i = 0;
- - int r;
- -
- - for (r = nmd->cur_rx_ring; ; ) {
- - struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
- - uint32_t cur, idx;
- - char *buf;
- - size_t left;
- -
- - if (nm_ring_empty(ring)) {
- - r++;
- - if (r > nmd->last_rx_ring)
- - r = nmd->first_rx_ring;
- - if (r == nmd->cur_rx_ring)
- - break;
- - continue;
- - }
- - cur = ring->cur;
- - idx = ring->slot[cur].buf_idx;
- - buf = NETMAP_BUF(ring, idx);
- - left = ring->slot[cur].len;
- -
- - for (i = 0; i < iovcnt && left > 0; i++) {
- - if (iov[i].iov_len > left)
- - iov[i].iov_len = left;
- - memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
- - len += iov[i].iov_len;
- - left -= iov[i].iov_len;
- - }
- - ring->head = ring->cur = nm_ring_next(ring, cur);
- - nmd->cur_rx_ring = r;
- - ioctl(nmd->fd, NIOCRXSYNC, NULL);
- - break;
- - }
- - for (; i < iovcnt; i++)
- - iov[i].iov_len = 0;
- -
- - return (len);
- -}
- -
- -/*
- - * Called to send a buffer chain out to the vale port
- - */
- -static void
- -pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
- - int len)
- -{
- - static char pad[60]; /* all zero bytes */
- -
- - if (sc->vsc_nmd == NULL)
- - return;
- -
- - /*
- - * If the length is < 60, pad out to that and add the
- - * extra zero'd segment to the iov. It is guaranteed that
- - * there is always an extra iov available by the caller.
- - */
- - if (len < 60) {
- - iov[iovcnt].iov_base = pad;
- - iov[iovcnt].iov_len = 60 - len;
- - iovcnt++;
- - }
- - (void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
- -}
- -
- -static void
- -pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
- -{
- - struct iovec iov[VTNET_MAXSEGS], *riov;
- - struct vqueue_info *vq;
- - void *vrx;
- - int len, n;
- - uint16_t idx;
- -
- - /*
- - * Should never be called without a valid netmap descriptor
- - */
- - assert(sc->vsc_nmd != NULL);
- -
- - /*
- - * But, will be called when the rx ring hasn't yet
- - * been set up or the guest is resetting the device.
- - */
- - if (!sc->vsc_rx_ready || sc->resetting) {
- - /*
- - * Drop the packet and try later.
- - */
- - (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
- - return;
- - }
- -
- - /*
- - * Check for available rx buffers
- - */
- - vq = &sc->vsc_queues[VTNET_RXQ];
- - if (!vq_has_descs(vq)) {
- - /*
- - * Drop the packet and try later. Interrupt on
- - * empty, if that's negotiated.
- - */
- - (void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
- - vq_endchains(vq, 1);
- - return;
- - }
- -
- - do {
- - /*
- - * Get descriptor chain.
- - */
- - n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
- - assert(n >= 1 && n <= VTNET_MAXSEGS);
- -
- - /*
- - * Get a pointer to the rx header, and use the
- - * data immediately following it for the packet buffer.
- - */
- - vrx = iov[0].iov_base;
- - riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
- -
- - len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
- if (len == 0) {
- /*
- * No more packets, but still some avail ring
- * entries. Interrupt if needed/appropriate.
- */
- - vq_retchain(vq);
- + vq_retchain(vq); /* return the slot to the vq */
- vq_endchains(vq, 0);
- return;
- }
- - /*
- - * The only valid field in the rx packet header is the
- - * number of buffers if merged rx bufs were negotiated.
- - */
- - memset(vrx, 0, sc->rx_vhdrlen);
- -
- - if (sc->rx_merge) {
- - struct virtio_net_rxhdr *vrxh;
- -
- - vrxh = vrx;
- - vrxh->vrh_bufs = 1;
- - }
- -
- - /*
- - * Release this chain and handle more chains.
- - */
- - vq_relchain(vq, idx, len + sc->rx_vhdrlen);
- + /* Publish the info to the guest */
- + vq_relchain(vq, idx, (uint32_t)len);
- } while (vq_has_descs(vq));
- /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
- vq_endchains(vq, 1);
- }
- +/*
- + * Called when there is read activity on the tap file descriptor.
- + * Each buffer posted by the guest is assumed to be able to contain
- + * an entire ethernet frame + rx header.
- + */
- static void
- pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
- {
- struct pci_vtnet_softc *sc = param;
- + (void)fd; (void)type;
- pthread_mutex_lock(&sc->rx_mtx);
- - sc->rx_in_progress = 1;
- - sc->pci_vtnet_rx(sc);
- - sc->rx_in_progress = 0;
- + pci_vtnet_rx(sc);
- pthread_mutex_unlock(&sc->rx_mtx);
- -
- }
- +/* callback when writing to the PCI register */
- static void
- pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
- {
- @@ -610,35 +302,33 @@ pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
- }
- }
- +/* TX processing (guest to host), called in the tx thread */
- static void
- pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
- {
- struct iovec iov[VTNET_MAXSEGS + 1];
- int i, n;
- - int plen, tlen;
- + uint32_t len;
- uint16_t idx;
- /*
- - * Obtain chain of descriptors. The first one is
- - * really the header descriptor, so we need to sum
- - * up two lengths: packet length and transfer length.
- + * Obtain chain of descriptors. The first descriptor also
- + * contains the virtio-net header.
- */
- n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
- assert(n >= 1 && n <= VTNET_MAXSEGS);
- - plen = 0;
- - tlen = iov[0].iov_len;
- - for (i = 1; i < n; i++) {
- - plen += iov[i].iov_len;
- - tlen += iov[i].iov_len;
- + len = 0;
- + for (i = 0; i < n; i++) {
- + len += iov[i].iov_len;
- }
- - DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
- - sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen);
- + netbe_send(sc->vsc_be, iov, n, len, 0 /* more */);
- - /* chain is processed, release it and set tlen */
- - vq_relchain(vq, idx, tlen);
- + /* chain is processed, release it and set len */
- + vq_relchain(vq, idx, len);
- }
- +/* callback when writing to the PCI register */
- static void
- pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
- {
- @@ -668,6 +358,14 @@ pci_vtnet_tx_thread(void *param)
- struct vqueue_info *vq;
- int error;
- + {
- + struct pci_devinst *pi = sc->vsc_vs.vs_pi;
- + char tname[MAXCOMLEN + 1];
- + snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
- + pi->pi_func);
- + pthread_set_name_np(pthread_self(), tname);
- + }
- +
- vq = &sc->vsc_queues[VTNET_TXQ];
- /*
- @@ -721,119 +419,28 @@ pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
- }
- #endif
- -static int
- -pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
- -{
- - struct ether_addr *ea;
- - char *tmpstr;
- - char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
- -
- - tmpstr = strsep(&mac_str,"=");
- -
- - if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
- - ea = ether_aton(mac_str);
- -
- - if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
- - memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
- - fprintf(stderr, "Invalid MAC %s\n", mac_str);
- - return (EINVAL);
- - } else
- - memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
- - }
- -
- - return (0);
- -}
- -
- -static void
- -pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname)
- -{
- - char tbuf[80];
- -#ifndef WITHOUT_CAPSICUM
- - cap_rights_t rights;
- -#endif
- -
- - strcpy(tbuf, "/dev/");
- - strlcat(tbuf, devname, sizeof(tbuf));
- -
- - sc->pci_vtnet_rx = pci_vtnet_tap_rx;
- - sc->pci_vtnet_tx = pci_vtnet_tap_tx;
- -
- - sc->vsc_tapfd = open(tbuf, O_RDWR);
- - if (sc->vsc_tapfd == -1) {
- - WPRINTF(("open of tap device %s failed\n", tbuf));
- - return;
- - }
- -
- - /*
- - * Set non-blocking and register for read
- - * notifications with the event loop
- - */
- - int opt = 1;
- - if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
- - WPRINTF(("tap device O_NONBLOCK failed\n"));
- - close(sc->vsc_tapfd);
- - sc->vsc_tapfd = -1;
- - }
- -
- -#ifndef WITHOUT_CAPSICUM
- - cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
- - if (cap_rights_limit(sc->vsc_tapfd, &rights) == -1 && errno != ENOSYS)
- - errx(EX_OSERR, "Unable to apply rights for sandbox");
- -#endif
- -
- - sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
- - EVF_READ,
- - pci_vtnet_rx_callback,
- - sc);
- - if (sc->vsc_mevp == NULL) {
- - WPRINTF(("Could not register event\n"));
- - close(sc->vsc_tapfd);
- - sc->vsc_tapfd = -1;
- - }
- -}
- -
- -static void
- -pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname)
- -{
- - sc->pci_vtnet_rx = pci_vtnet_netmap_rx;
- - sc->pci_vtnet_tx = pci_vtnet_netmap_tx;
- -
- - sc->vsc_nmd = nm_open(ifname, NULL, 0, 0);
- - if (sc->vsc_nmd == NULL) {
- - WPRINTF(("open of netmap device %s failed\n", ifname));
- - return;
- - }
- -
- - sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd,
- - EVF_READ,
- - pci_vtnet_rx_callback,
- - sc);
- - if (sc->vsc_mevp == NULL) {
- - WPRINTF(("Could not register event\n"));
- - nm_close(sc->vsc_nmd);
- - sc->vsc_nmd = NULL;
- - }
- -}
- -
- static int
- pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- {
- - MD5_CTX mdctx;
- - unsigned char digest[16];
- - char nstr[80];
- - char tname[MAXCOMLEN + 1];
- struct pci_vtnet_softc *sc;
- char *devname;
- char *vtopts;
- int mac_provided;
- + struct virtio_consts *vc;
- - sc = calloc(1, sizeof(struct pci_vtnet_softc));
- + /*
- + * Allocate data structures for further virtio initializations.
- + * sc also contains a copy of the vtnet_vi_consts,
- + * because the capabilities change depending on
- + * the backend.
- + */
- + sc = calloc(1, sizeof(struct pci_vtnet_softc) +
- + sizeof(struct virtio_consts));
- + vc = (struct virtio_consts *)(sc + 1);
- + memcpy(vc, &vtnet_vi_consts, sizeof(*vc));
- pthread_mutex_init(&sc->vsc_mtx, NULL);
- - vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
- - sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
- -
- sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
- sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
- sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
- @@ -844,12 +451,10 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- #endif
- /*
- - * Attempt to open the tap device and read the MAC address
- + * Attempt to open the backend device and read the MAC address
- * if specified
- */
- mac_provided = 0;
- - sc->vsc_tapfd = -1;
- - sc->vsc_nmd = NULL;
- if (opts != NULL) {
- int err;
- @@ -857,7 +462,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- (void) strsep(&vtopts, ",");
- if (vtopts != NULL) {
- - err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
- + err = net_parsemac(vtopts, sc->vsc_config.mac);
- if (err != 0) {
- free(devname);
- return (err);
- @@ -865,33 +470,18 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- mac_provided = 1;
- }
- - if (strncmp(devname, "vale", 4) == 0)
- - pci_vtnet_netmap_setup(sc, devname);
- - if (strncmp(devname, "tap", 3) == 0 ||
- - strncmp(devname, "vmnet", 5) == 0)
- - pci_vtnet_tap_setup(sc, devname);
- + sc->vsc_be = netbe_init(devname, pci_vtnet_rx_callback, sc);
- + if (!sc->vsc_be) {
- + WPRINTF(("net backend initialization failed\n"));
- + } else {
- + vc->vc_hv_caps |= netbe_get_cap(sc->vsc_be);
- + }
- free(devname);
- }
- - /*
- - * The default MAC address is the standard NetApp OUI of 00-a0-98,
- - * followed by an MD5 of the PCI slot/func number and dev name
- - */
- if (!mac_provided) {
- - snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
- - pi->pi_func, vmname);
- -
- - MD5Init(&mdctx);
- - MD5Update(&mdctx, nstr, strlen(nstr));
- - MD5Final(digest, &mdctx);
- -
- - sc->vsc_config.mac[0] = 0x00;
- - sc->vsc_config.mac[1] = 0xa0;
- - sc->vsc_config.mac[2] = 0x98;
- - sc->vsc_config.mac[3] = digest[0];
- - sc->vsc_config.mac[4] = digest[1];
- - sc->vsc_config.mac[5] = digest[2];
- + net_genmac(pi, sc->vsc_config.mac);
- }
- /* initialize config space */
- @@ -901,22 +491,23 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
- pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
- - /* Link is up if we managed to open tap device or vale port. */
- - sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0 ||
- - sc->vsc_nmd != NULL);
- + /* Link is up if we managed to open backend device. */
- + sc->vsc_config.status = (opts == NULL || sc->vsc_be);
- + vi_softc_linkup(&sc->vsc_vs, vc, sc, pi, sc->vsc_queues);
- + sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
- +
- /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
- if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
- return (1);
- /* use BAR 0 to map config regs in IO space */
- - vi_set_io_bar(&sc->vsc_vs, 0);
- + vi_set_io_bar(&sc->vsc_vs, 0); /* calls into virtio */
- sc->resetting = 0;
- sc->rx_merge = 1;
- sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
- - sc->rx_in_progress = 0;
- pthread_mutex_init(&sc->rx_mtx, NULL);
- /*
- @@ -928,9 +519,6 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
- pthread_mutex_init(&sc->tx_mtx, NULL);
- pthread_cond_init(&sc->tx_cond, NULL);
- pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
- - snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot,
- - pi->pi_func);
- - pthread_set_name_np(sc->tx_tid, tname);
- return (0);
- }
- @@ -941,8 +529,8 @@ pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
- struct pci_vtnet_softc *sc = vsc;
- void *ptr;
- - if (offset < 6) {
- - assert(offset + size <= 6);
- + if (offset < (int)sizeof(sc->vsc_config.mac)) {
- + assert(offset + size <= (int)sizeof(sc->vsc_config.mac));
- /*
- * The driver is allowed to change the MAC address
- */
- @@ -974,14 +562,17 @@ pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
- sc->vsc_features = negotiated_features;
- - if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) {
- + if (!(negotiated_features & VIRTIO_NET_F_MRG_RXBUF)) {
- sc->rx_merge = 0;
- /* non-merge rx header is 2 bytes shorter */
- sc->rx_vhdrlen -= 2;
- }
- +
- + /* Tell the backend to enable some capabilities it has advertised. */
- + netbe_set_cap(sc->vsc_be, negotiated_features, sc->rx_vhdrlen);
- }
- -struct pci_devemu pci_de_vnet = {
- +static struct pci_devemu pci_de_vnet = {
- .pe_emu = "virtio-net",
- .pe_init = pci_vtnet_init,
- .pe_barwrite = vi_pci_write,
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement