Guest User

Untitled

a guest
Jan 20th, 2018
318
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 50.26 KB | None | 0 0
  1. diff --git a/sys/kern/kern_mbuf.c b/sys/kern/kern_mbuf.c
  2. index a1ab9229bd1..172668a433d 100644
  3. --- a/sys/kern/kern_mbuf.c
  4. +++ b/sys/kern/kern_mbuf.c
  5. @@ -444,10 +444,11 @@ mb_dtor_mbuf(void *mem, int size, void *arg)
  6.  
  7. m = (struct mbuf *)mem;
  8. flags = (unsigned long)arg;
  9. -
  10. - KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
  11. - if (!(flags & MB_DTOR_SKIP) && (m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags))
  12. - m_tag_delete_chain(m, NULL);
  13. + if (!(flags & MB_DTOR_SKIP)) {
  14. + KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__));
  15. + if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags))
  16. + m_tag_delete_chain(m, NULL);
  17. + }
  18. #ifdef INVARIANTS
  19. trash_dtor(mem, size, arg);
  20. #endif
  21. @@ -653,6 +654,16 @@ mb_free_ext(struct mbuf *m)
  22.  
  23. /* Free attached storage if this mbuf is the only reference to it. */
  24. if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) {
  25. + /*
  26. + * An mvec has been converted to an mbuf chain, but is still
  27. + * owned by the mvec.
  28. + */
  29. + if (__predict_false(m->m_ext.ext_flags & EXT_FLAG_MVECREF)) {
  30. + MPASS(!(m->m_ext.ext_flags & EXT_FLAG_EMBREF));
  31. + MPASS(mref != m);
  32. + mvec_free((struct mbuf_ext *)mref);
  33. + goto skip;
  34. + }
  35. switch (m->m_ext.ext_type) {
  36. case EXT_PACKET:
  37. /* The packet zone is special. */
  38. @@ -676,6 +687,18 @@ mb_free_ext(struct mbuf *m)
  39. uma_zfree(zone_jumbo16, m->m_ext.ext_buf);
  40. uma_zfree(zone_mbuf, mref);
  41. break;
  42. + case EXT_MBUF:
  43. + uma_zfree(zone_mbuf, m->m_ext.ext_buf);
  44. + break;
  45. + case EXT_MVEC:
  46. + if (m->m_ext.ext_flags & EXT_FLAG_EXTFREE) {
  47. + KASSERT(m->m_ext.ext_free != NULL,
  48. + ("%s: ext_free not set", __func__));
  49. + m->m_ext.ext_free(m);
  50. + }
  51. + mvec_free((struct mbuf_ext*)m);
  52. + return;
  53. + break;
  54. case EXT_SFBUF:
  55. case EXT_NET_DRV:
  56. case EXT_MOD_TYPE:
  57. @@ -695,7 +718,7 @@ mb_free_ext(struct mbuf *m)
  58. ("%s: unknown ext_type", __func__));
  59. }
  60. }
  61. -
  62. + skip:
  63. if (freembuf && m != mref)
  64. uma_zfree(zone_mbuf, m);
  65. }
  66. @@ -796,36 +819,6 @@ m_get2(int size, int how, short type, int flags)
  67. return (m);
  68. }
  69.  
  70. -/*
  71. - * m_getjcl() returns an mbuf with a cluster of the specified size attached.
  72. - * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
  73. - */
  74. -struct mbuf *
  75. -m_getjcl(int how, short type, int flags, int size)
  76. -{
  77. - struct mb_args args;
  78. - struct mbuf *m, *n;
  79. - uma_zone_t zone;
  80. -
  81. - if (size == MCLBYTES)
  82. - return m_getcl(how, type, flags);
  83. -
  84. - args.flags = flags;
  85. - args.type = type;
  86. -
  87. - m = uma_zalloc_arg(zone_mbuf, &args, how);
  88. - if (m == NULL)
  89. - return (NULL);
  90. -
  91. - zone = m_getzone(size);
  92. - n = uma_zalloc_arg(zone, m, how);
  93. - if (n == NULL) {
  94. - uma_zfree(zone_mbuf, m);
  95. - return (NULL);
  96. - }
  97. - return (m);
  98. -}
  99. -
  100. /*
  101. * Allocate a given length worth of mbufs and/or clusters (whatever fits
  102. * best) and return a pointer to the top of the allocated chain. If an
  103. diff --git a/sys/kern/kern_switch.c b/sys/kern/kern_switch.c
  104. index ea2791673ed..a035f6d4eb3 100644
  105. --- a/sys/kern/kern_switch.c
  106. +++ b/sys/kern/kern_switch.c
  107. @@ -188,71 +188,38 @@ choosethread(void)
  108. return (td);
  109. }
  110.  
  111. -/*
  112. - * Kernel thread preemption implementation. Critical sections mark
  113. - * regions of code in which preemptions are not allowed.
  114. - *
  115. - * It might seem a good idea to inline critical_enter() but, in order
  116. - * to prevent instructions reordering by the compiler, a __compiler_membar()
  117. - * would have to be used here (the same as sched_pin()). The performance
  118. - * penalty imposed by the membar could, then, produce slower code than
  119. - * the function call itself, for most cases.
  120. - */
  121. void
  122. -critical_enter(void)
  123. +critical_preempt(struct thread *td)
  124. {
  125. - struct thread *td;
  126. + int flags;
  127.  
  128. - td = curthread;
  129. - td->td_critnest++;
  130. - CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
  131. - (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  132. + /*
  133. + * Microoptimization: we committed to switch,
  134. + * disable preemption in interrupt handlers
  135. + * while spinning for the thread lock.
  136. + */
  137. + td->td_critnest = 1;
  138. + thread_lock(td);
  139. + td->td_critnest--;
  140. + flags = SW_INVOL | SW_PREEMPT;
  141. + if (TD_IS_IDLETHREAD(td))
  142. + flags |= SWT_IDLE;
  143. + else
  144. + flags |= SWT_OWEPREEMPT;
  145. + mi_switch(flags, NULL);
  146. + thread_unlock(td);
  147. }
  148.  
  149. void
  150. -critical_exit(void)
  151. +critical_enter(void)
  152. {
  153. - struct thread *td;
  154. - int flags;
  155. -
  156. - td = curthread;
  157. - KASSERT(td->td_critnest != 0,
  158. - ("critical_exit: td_critnest == 0"));
  159. -
  160. - if (td->td_critnest == 1) {
  161. - td->td_critnest = 0;
  162. -
  163. - /*
  164. - * Interrupt handlers execute critical_exit() on
  165. - * leave, and td_owepreempt may be left set by an
  166. - * interrupt handler only when td_critnest > 0. If we
  167. - * are decrementing td_critnest from 1 to 0, read
  168. - * td_owepreempt after decrementing, to not miss the
  169. - * preempt. Disallow compiler to reorder operations.
  170. - */
  171. - __compiler_membar();
  172. - if (td->td_owepreempt && !kdb_active) {
  173. - /*
  174. - * Microoptimization: we committed to switch,
  175. - * disable preemption in interrupt handlers
  176. - * while spinning for the thread lock.
  177. - */
  178. - td->td_critnest = 1;
  179. - thread_lock(td);
  180. - td->td_critnest--;
  181. - flags = SW_INVOL | SW_PREEMPT;
  182. - if (TD_IS_IDLETHREAD(td))
  183. - flags |= SWT_IDLE;
  184. - else
  185. - flags |= SWT_OWEPREEMPT;
  186. - mi_switch(flags, NULL);
  187. - thread_unlock(td);
  188. - }
  189. - } else
  190. - td->td_critnest--;
  191. + _critical_enter();
  192. +}
  193.  
  194. - CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
  195. - (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  196. +void
  197. +critical_exit(void)
  198. +{
  199. + _critical_exit();
  200. }
  201.  
  202. /************************************************************************
  203. diff --git a/sys/kern/subr_sglist.c b/sys/kern/subr_sglist.c
  204. index ff002038393..894d7272a2d 100644
  205. --- a/sys/kern/subr_sglist.c
  206. +++ b/sys/kern/subr_sglist.c
  207. @@ -319,6 +319,37 @@ sglist_append_phys(struct sglist *sg, vm_paddr_t paddr, size_t len)
  208. return (error);
  209. }
  210.  
  211. +int
  212. +sglist_append_mvec(struct sglist *sg, struct mbuf *m0)
  213. +{
  214. + struct sgsave save;
  215. + struct mbuf_ext *mext;
  216. + struct mvec_header *mh;
  217. + struct mvec_ent *me;
  218. + int i, error;
  219. +
  220. + MPASS(m != NULL);
  221. +
  222. + mext = (void*)m0;
  223. + mh = &mext->me_mh;
  224. + me = &mext->me_ents[mh->mh_start];
  225. +
  226. + if (__predict_false(sg->sg_maxseg == 0))
  227. + return (EINVAL);
  228. +
  229. + SGLIST_SAVE(sg, save);
  230. + for (i = 0; i < mh->mh_used; i++, me++) {
  231. + if (__predict_false(me->me_len == 0))
  232. + continue;
  233. + error = sglist_append(sg, me_data(me), me->me_len);
  234. + if (__predict_false(error)) {
  235. + SGLIST_RESTORE(sg, save);
  236. + return (error);
  237. + }
  238. + }
  239. + return (0);
  240. +}
  241. +
  242. /*
  243. * Append the segments that describe a single mbuf chain to a
  244. * scatter/gather list. If there are insufficient segments, then this
  245. @@ -334,6 +365,9 @@ sglist_append_mbuf(struct sglist *sg, struct mbuf *m0)
  246. if (sg->sg_maxseg == 0)
  247. return (EINVAL);
  248.  
  249. + if (m_ismvec(m0))
  250. + return (sglist_append_mvec(sg, m0));
  251. +
  252. error = 0;
  253. SGLIST_SAVE(sg, save);
  254. for (m = m0; m != NULL; m = m->m_next) {
  255. diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c
  256. index a4bdf6f062a..04d0c1c267e 100644
  257. --- a/sys/kern/uipc_mbuf.c
  258. +++ b/sys/kern/uipc_mbuf.c
  259. @@ -724,6 +724,10 @@ m_adj(struct mbuf *mp, int req_len)
  260.  
  261. if ((m = mp) == NULL)
  262. return;
  263. + if (m_ismvec(mp)) {
  264. + mvec_adj(mp, req_len);
  265. + return;
  266. + }
  267. if (len >= 0) {
  268. /*
  269. * Trim from head.
  270. @@ -803,6 +807,9 @@ m_pullup(struct mbuf *n, int len)
  271. int count;
  272. int space;
  273.  
  274. + if (m_ismvec(n))
  275. + return (mvec_pullup(n, 0, len));
  276. +
  277. /*
  278. * If first mbuf has no cluster, and has room for len bytes
  279. * without shifting current data, pullup into it,
  280. diff --git a/sys/kern/uipc_mvec.c b/sys/kern/uipc_mvec.c
  281. new file mode 100644
  282. index 00000000000..2968d40a33d
  283. --- /dev/null
  284. +++ b/sys/kern/uipc_mvec.c
  285. @@ -0,0 +1,1258 @@
  286. +/*
  287. + * Copyright (C) 2017 Matthew Macy <matt.macy@joyent.com>
  288. + * Copyright (C) 2017 Joyent Inc.
  289. + * All rights reserved.
  290. + *
  291. + * Redistribution and use in source and binary forms, with or without
  292. + * modification, are permitted provided that the following conditions
  293. + * are met:
  294. + * 1. Redistributions of source code must retain the above copyright
  295. + * notice, this list of conditions and the following disclaimer.
  296. + * 2. Redistributions in binary form must reproduce the above copyright
  297. + * notice, this list of conditions and the following disclaimer in the
  298. + * documentation and/or other materials provided with the distribution.
  299. + *
  300. + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  301. + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  302. + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  303. + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  304. + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  305. + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  306. + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  307. + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  308. + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  309. + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  310. + * SUCH DAMAGE.
  311. + */
  312. +
  313. +#include <sys/cdefs.h>
  314. +__FBSDID("$FreeBSD$");
  315. +
  316. +#include <sys/param.h>
  317. +#include <sys/malloc.h>
  318. +#include <sys/types.h>
  319. +#include <sys/systm.h>
  320. +#include <sys/mbuf.h>
  321. +#include <sys/kernel.h>
  322. +#include <sys/lock.h>
  323. +#include <sys/mutex.h>
  324. +#include <sys/smp.h>
  325. +#include <sys/sysctl.h>
  326. +
  327. +#include <machine/in_cksum.h>
  328. +
  329. +
  330. +#define MVEC_DEBUG
  331. +
  332. +#ifdef MVEC_DEBUG
  333. +#define DPRINTF printf
  334. +#else
  335. +#define DPRINTF(...)
  336. +#endif
  337. +
  338. +static MALLOC_DEFINE(M_MVEC, "mvec", "mbuf vector");
  339. +
  340. +static int type2len[] = {-1, MCLBYTES, -1, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES, -1, MSIZE};
  341. +#ifdef INVARIANTS
  342. +static int validtypes = ((1<<EXT_CLUSTER)|(1<<EXT_JUMBOP)|(1<<EXT_JUMBO9)|(1<<EXT_JUMBO16)|(1<<EXT_MBUF));
  343. +#endif
  344. +
  345. +#define ADDCARRY(x) (x > 65535 ? x -= 65535 : x)
  346. +#define REDUCE32 \
  347. + { \
  348. + q_util.q = sum; \
  349. + sum = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
  350. + }
  351. +#define REDUCE16 \
  352. + { \
  353. + q_util.q = sum; \
  354. + l_util.l = q_util.s[0] + q_util.s[1] + q_util.s[2] + q_util.s[3]; \
  355. + sum = l_util.s[0] + l_util.s[1]; \
  356. + ADDCARRY(sum); \
  357. + }
  358. +
  359. +uint64_t in_cksumdata(const void *buf, int len);
  360. +
  361. +union l_util {
  362. + u_int16_t s[2];
  363. + u_int32_t l;
  364. +};
  365. +union q_util {
  366. + u_int16_t s[4];
  367. + u_int32_t l[2];
  368. + u_int64_t q;
  369. +};
  370. +
  371. +
  372. +#ifdef INVARIANTS
  373. +void
  374. +mvec_sanity(struct mbuf *m)
  375. +{
  376. + struct mbuf_ext *mext;
  377. + struct mvec_header *mh;
  378. + struct mvec_ent *me;
  379. + int i, total;
  380. +
  381. + mext = (void*)m;
  382. + mh = &mext->me_mh;
  383. + me = &mext->me_ents[mh->mh_start];
  384. + total = 0;
  385. + MPASS(m->m_len == me->me_len);
  386. + MPASS(m->m_data == (me->me_cl + me->me_off));
  387. + MPASS(mh->mh_count >= (mh->mh_start + mh->mh_used));
  388. + for (i = mh->mh_start; i < mh->mh_used + mh->mh_start; i++, me++) {
  389. + if (__predict_false(me->me_len == 0))
  390. + continue;
  391. +
  392. + MPASS(me->me_cl);
  393. + MPASS(me->me_cl != (void *)0xdeadc0dedeadc0de);
  394. + total += me->me_len;
  395. + }
  396. + MPASS(total == m->m_pkthdr.len);
  397. +}
  398. +#endif
  399. +
  400. +static void
  401. +mvec_buffer_free(struct mbuf *m)
  402. +{
  403. + struct mvec_header *mh;
  404. +
  405. + mh = MBUF2MH(m);
  406. + switch (mh->mh_mvtype) {
  407. + case MVALLOC_MALLOC:
  408. + free(m, M_MVEC);
  409. + break;
  410. + case MVALLOC_MBUF:
  411. + uma_zfree_arg(zone_mbuf, m, (void *)MB_DTOR_SKIP);
  412. + break;
  413. + }
  414. +}
  415. +
  416. +
  417. +static void
  418. +mvec_clfree(struct mvec_ent *me, m_refcnt_t *refcntp, bool dupref)
  419. +{
  420. + bool free = true;
  421. + struct mbuf *mref;
  422. + volatile uint32_t *refcnt;
  423. +
  424. + mref = NULL;
  425. + if (dupref) {
  426. + if (me->me_ext_flags & EXT_FLAG_EMBREF) {
  427. + refcnt = &refcntp->ext_count;
  428. + } else {
  429. + refcnt = refcntp->ext_cnt;
  430. + }
  431. + free = (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1);
  432. + }
  433. + if (!free)
  434. + return;
  435. + if (!(me->me_ext_flags & EXT_FLAG_NOFREE))
  436. + mref = __containerof(refcnt, struct mbuf, m_ext.ext_count);
  437. +
  438. + switch (me->me_ext_type) {
  439. + case EXT_CLUSTER:
  440. + uma_zfree(zone_clust, me->me_cl);
  441. + break;
  442. + case EXT_JUMBOP:
  443. + uma_zfree(zone_jumbop, me->me_cl);
  444. + break;
  445. + case EXT_JUMBO9:
  446. + uma_zfree(zone_jumbo9, me->me_cl);
  447. + break;
  448. + case EXT_JUMBO16:
  449. + uma_zfree(zone_jumbo16, me->me_cl);
  450. + break;
  451. + default:
  452. + panic("unsupported ext_type: %d\n", me->me_ext_type);
  453. + }
  454. + if (mref != NULL)
  455. + uma_zfree_arg(zone_mbuf, mref, (void *)MB_DTOR_SKIP);
  456. +}
  457. +
  458. +static void
  459. +mvec_ent_free(struct mvec_header *mh, int idx)
  460. +{
  461. + struct mvec_ent *me = (struct mvec_ent *)(mh + 1);
  462. + m_refcnt_t *me_count = (m_refcnt_t *)(me + mh->mh_count);
  463. +
  464. + me += idx;
  465. + me_count += idx;
  466. + switch (me->me_type) {
  467. + case MVEC_MBUF:
  468. + uma_zfree_arg(zone_mbuf, me->me_cl, (void *)MB_DTOR_SKIP);
  469. + break;
  470. + case MVEC_MANAGED:
  471. + mvec_clfree(me, me_count, mh->mh_multiref);
  472. + break;
  473. + default:
  474. + /* ... */
  475. + break;
  476. + }
  477. +}
  478. +
  479. +void *
  480. +mvec_seek(struct mbuf *m, struct mvec_cursor *mc, int offset)
  481. +{
  482. + struct mvec_ent *me = MBUF2ME(m);
  483. + struct mvec_header *mh = MBUF2MH(m);
  484. + int rem;
  485. +
  486. + mc->mc_idx = mc->mc_off = 0;
  487. + MPASS(offset <= m->m_pkthdr.len);
  488. + rem = offset;
  489. +
  490. + me = MHMEI(m, mh, 0);
  491. + do {
  492. + if (rem > me->me_len) {
  493. + rem -= me->me_len;
  494. + me++;
  495. + mc->mc_idx++;
  496. + } else if (rem < me->me_len) {
  497. + rem = 0;
  498. + mc->mc_off = rem;
  499. + } else {
  500. + rem = 0;
  501. + mc->mc_idx++;
  502. + me++;
  503. + }
  504. + } while(rem);
  505. +
  506. + return (void *)(me_data(me) + mc->mc_off);
  507. +}
  508. +
  509. +static void
  510. +mvec_trim_head(struct mbuf *m, int offset)
  511. +{
  512. + struct mvec_header *mh = MBUF2MH(m);
  513. + struct mvec_ent *me = MBUF2ME(m);
  514. + int rem;
  515. + bool owned;
  516. +
  517. + MPASS(offset <= m->m_pkthdr.len);
  518. + rem = offset;
  519. + if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
  520. + owned = (m->m_ext.ext_count == 1);
  521. + } else {
  522. + owned = (*(m->m_ext.ext_cnt) == 1);
  523. + }
  524. + do {
  525. + if (rem > me->me_len) {
  526. + rem -= me->me_len;
  527. + if (owned)
  528. + mvec_ent_free(mh, mh->mh_start);
  529. + mh->mh_start++;
  530. + mh->mh_used--;
  531. + me++;
  532. + } else if (rem < me->me_len) {
  533. + rem = 0;
  534. + me->me_off += rem;
  535. + me->me_len -= rem;
  536. + } else {
  537. + rem = 0;
  538. + mvec_ent_free(mh, mh->mh_start);
  539. + mh->mh_start++;
  540. + mh->mh_used--;
  541. + }
  542. + } while(rem);
  543. + m->m_pkthdr.len -= offset;
  544. + m->m_data = ME_SEG(m, mh, 0);
  545. +}
  546. +
  547. +static void
  548. +mvec_trim_tail(struct mbuf *m, int offset)
  549. +{
  550. + struct mvec_header *mh = MBUF2MH(m);
  551. + struct mvec_ent *me = MBUF2ME(m);
  552. + int i, rem;
  553. + bool owned;
  554. +
  555. + MPASS(offset <= m->m_pkthdr.len);
  556. + rem = offset;
  557. + if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
  558. + owned = (m->m_ext.ext_count == 1);
  559. + } else {
  560. + owned = (*(m->m_ext.ext_cnt) == 1);
  561. + }
  562. + i = mh->mh_count-1;
  563. + me = &me[i];
  564. + do {
  565. + if (rem > me->me_len) {
  566. + rem -= me->me_len;
  567. + me->me_len = 0;
  568. + if (owned)
  569. + mvec_ent_free(mh, i);
  570. + me--;
  571. + mh->mh_used--;
  572. + } else if (rem < me->me_len) {
  573. + rem = 0;
  574. + me->me_len -= rem;
  575. + } else {
  576. + rem = 0;
  577. + me->me_len = 0;
  578. + if (owned)
  579. + mvec_ent_free(mh, i);
  580. + mh->mh_used--;
  581. + }
  582. + i++;
  583. + } while(rem);
  584. + m->m_pkthdr.len -= offset;
  585. +}
  586. +
  587. +void
  588. +mvec_adj(struct mbuf *m, int req_len)
  589. +{
  590. + if (__predict_false(req_len == 0))
  591. + return;
  592. + if (req_len > 0)
  593. + mvec_trim_head(m, req_len);
  594. + else
  595. + mvec_trim_tail(m, req_len);
  596. +}
  597. +
  598. +void
  599. +mvec_copydata(const struct mbuf *m, int off, int len, caddr_t cp)
  600. +{
  601. + panic("%s unimplemented", __func__);
  602. +}
  603. +
  604. +struct mbuf *
  605. +mvec_dup(const struct mbuf *m, int how)
  606. +{
  607. + panic("%s unimplemented", __func__);
  608. + return (NULL);
  609. +}
  610. +
  611. +struct mbuf *
  612. +mvec_defrag(const struct mbuf *m, int how)
  613. +{
  614. + panic("%s unimplemented", __func__);
  615. + return (NULL);
  616. +}
  617. +
  618. +struct mbuf *
  619. +mvec_collapse(struct mbuf *m, int how, int maxfrags)
  620. +{
  621. + panic("%s unimplemented", __func__);
  622. + return (NULL);
  623. +}
  624. +
  625. +uint16_t
  626. +mvec_cksum_skip(struct mbuf *m, int len, int skip)
  627. +{
  628. + u_int64_t sum = 0;
  629. + int mlen = 0;
  630. + int clen = 0;
  631. + caddr_t addr;
  632. + union q_util q_util;
  633. + union l_util l_util;
  634. + struct mvec_cursor mc;
  635. + struct mvec_header mh;
  636. + struct mvec_ent *me;
  637. +
  638. + MPASS(m_ismvec(m));
  639. +
  640. + len -= skip;
  641. + mvec_seek(m, &mc, skip);
  642. + mh = *(MBUF2MH(m));
  643. +
  644. + /* XXX */
  645. + if (mh.mh_multipkt)
  646. + return (0);
  647. +
  648. + me = MHMEI(m, &mh, mc.mc_idx);
  649. + addr = me->me_cl + me->me_off;
  650. + goto skip_start;
  651. +
  652. + for (; mh.mh_used && len; me++) {
  653. + mh.mh_used--;
  654. + if (me->me_len == 0)
  655. + continue;
  656. + mlen = me->me_len;
  657. + addr = me->me_cl + me->me_off;
  658. +skip_start:
  659. + if (len < mlen)
  660. + mlen = len;
  661. + if ((clen ^ (long) addr) & 1)
  662. + sum += in_cksumdata(addr, mlen) << 8;
  663. + else
  664. + sum += in_cksumdata(addr, mlen);
  665. +
  666. + clen += mlen;
  667. + len -= mlen;
  668. + }
  669. + REDUCE16;
  670. + return (~sum & 0xffff);
  671. +}
  672. +
  673. +struct mbuf *
  674. +mvec_prepend(struct mbuf *m, int size)
  675. +{
  676. + struct mvec_header *mh;
  677. + struct mvec_ent *me;
  678. + struct mbuf *data;
  679. + struct mbuf_ext *mext;
  680. +
  681. + MPASS(size <= MSIZE);
  682. + if (__predict_false((data = m_get(M_NOWAIT, MT_NOINIT)) == NULL))
  683. + return (NULL);
  684. +
  685. + mext = (struct mbuf_ext *)m;
  686. + mh = &mext->me_mh;
  687. + if (__predict_true(mh->mh_start)) {
  688. + mh->mh_start--;
  689. + mh->mh_used++;
  690. + me = MHMEI(m, mh, 0);
  691. + me->me_len = size;
  692. + me->me_cl = (caddr_t)data;
  693. + me->me_off = 0;
  694. + me->me_type = MVEC_MBUF;
  695. + me->me_eop = 0;
  696. + me->me_ext_flags = 0;
  697. + me->me_ext_type = EXT_MBUF;
  698. + m->m_pkthdr.len += size;
  699. + m->m_len = size;
  700. + m->m_data = me->me_cl;
  701. + } else {
  702. + panic("implement fallback path for %s", __func__);
  703. + }
  704. + return (m);
  705. +}
  706. +
  707. +struct mbuf *
  708. +mvec_append(struct mbuf *m, caddr_t cl, uint16_t off,
  709. + uint16_t len, uint8_t cltype)
  710. +{
  711. + struct mvec_header *mh;
  712. + struct mvec_ent *me;
  713. +
  714. + mh = MBUF2MH(m);
  715. + KASSERT(mh->mh_used < mh->mh_count,
  716. + ("need to add support for growing mvec on append"));
  717. + me = MHMEI(m, mh, mh->mh_used);
  718. + me->me_cl = cl;
  719. + me->me_off = off;
  720. + me->me_len = len;
  721. + me->me_ext_type = cltype;
  722. + me->me_ext_flags = 0;
  723. + m->m_pkthdr.len += len;
  724. + if (mh->mh_used == 0) {
  725. + m->m_len = len;
  726. + m->m_data = (cl + off);
  727. + }
  728. + mh->mh_used++;
  729. + return (m);
  730. +}
  731. +
  732. +static int
  733. +mvec_init_mbuf_(struct mbuf *m, uint8_t count, uint8_t type, int len)
  734. +{
  735. + struct mvec_header *mh;
  736. + int rc;
  737. +
  738. + mh = MBUF2MH(m);
  739. + *((uint64_t *)mh) = 0;
  740. + if (type == MVALLOC_MBUF && len == 0)
  741. + mh->mh_count = MBUF_ME_MAX;
  742. + else
  743. + mh->mh_count = count;
  744. + mh->mh_mvtype = type;
  745. + /* leave room for prepend */
  746. + mh->mh_start = 1;
  747. + rc = m_init(m, M_NOWAIT, MT_DATA, M_PKTHDR);
  748. + if (__predict_false(rc))
  749. + return (rc);
  750. +
  751. + m->m_next = m->m_nextpkt = NULL;
  752. + m->m_len = 0;
  753. + m->m_data = NULL;
  754. + m->m_flags = M_PKTHDR|M_EXT;
  755. + m->m_ext.ext_free = NULL;
  756. + m->m_ext.ext_arg1 = m->m_ext.ext_arg2 = NULL;
  757. + m->m_ext.ext_flags = EXT_FLAG_EMBREF;
  758. + m->m_ext.ext_type = EXT_MVEC;
  759. + m->m_ext.ext_size = MSIZE;
  760. + m->m_ext.ext_buf = (caddr_t)m;
  761. + m->m_ext.ext_cnt = NULL;
  762. + m->m_ext.ext_count = 1;
  763. + return (0);
  764. +}
  765. +
  766. +int
  767. +mvec_init_mbuf(struct mbuf *m, uint8_t count, uint8_t type)
  768. +{
  769. +
  770. + return (mvec_init_mbuf_(m, count, type, 0));
  771. +}
  772. +
  773. +struct mbuf_ext *
  774. +mvec_alloc(uint8_t count, int len, int how)
  775. +{
  776. + int size;
  777. + uint8_t type;
  778. + struct mbuf_ext *m;
  779. +
  780. + size = sizeof(*m) + count*sizeof(struct mvec_ent);
  781. + size += len;
  782. + if (size <= MSIZE) {
  783. + m = (void*)m_get(how, MT_NOINIT);
  784. + type = MVALLOC_MBUF;
  785. + } else {
  786. + m = malloc(size, M_MVEC, how);
  787. + type = MVALLOC_MALLOC;
  788. + }
  789. + if (__predict_false(m == NULL))
  790. + return (NULL);
  791. + mvec_init_mbuf_((struct mbuf *)m, count, type, len);
  792. + return (m);
  793. +}
  794. +
  795. +static int
  796. +mvec_ent_size(struct mvec_ent *me)
  797. +{
  798. + int type;
  799. +
  800. + MPASS(me->me_ext_type && (me->me_ext_type < 32));
  801. +
  802. + type = me->me_ext_type;
  803. + MPASS((1<<type) & validtypes);
  804. + return (type2len[type]);
  805. +}
  806. +
  807. +struct mbuf *
  808. +mvec_pullup(struct mbuf *m, int idx, int count)
  809. +{
  810. + struct mvec_header *mh;
  811. + struct mvec_ent *mecur, *menxt;
  812. + int tailroom, size, copylen, doff, i, len;
  813. +
  814. + /* XXX --- fix */
  815. + MPASS(idx == 0);
  816. + mvec_sanity(m);
  817. + MPASS(count <= m->m_pkthdr.len);
  818. + mh = MBUF2MH(m);
  819. + mecur = MHMEI(m, mh, 0);
  820. + size = mvec_ent_size(mecur);
  821. + tailroom = size - mecur->me_off - mecur->me_len;
  822. + MPASS(tailroom >= 0);
  823. + copylen = count - mecur->me_len;
  824. +
  825. + if (__predict_false(count <= mecur->me_len))
  826. + return (m);
  827. + /*
  828. + * XXX - If we're not the exclusive owner we need to allocate a new
  829. + * buffer regardless.
  830. + */
  831. + if (copylen > size) {
  832. + /* allocate new buffer */
  833. + panic("allocate new buffer copylen=%d size=%d", copylen, size);
  834. + } else if (copylen > tailroom) {
  835. + /*
  836. + * move data up if possible
  837. + * else allocate new buffer
  838. + */
  839. + panic("relocate data copylen=%d size=%d tailroom=%d", copylen, size, tailroom);
  840. + }
  841. + doff = mecur->me_off + mecur->me_len;
  842. + i = 1;
  843. + do {
  844. + menxt = MHMEI(m, mh, i);
  845. + len = min(copylen, menxt->me_len);
  846. + bcopy(ME_SEG(m, mh, i), mecur->me_cl + doff, len);
  847. + doff += len;
  848. + mecur->me_len += len;
  849. + menxt->me_off += len;
  850. + menxt->me_len -= len;
  851. + copylen -= len;
  852. + i++;
  853. + } while (copylen);
  854. + m->m_data = ME_SEG(m, mh, 0);
  855. + m->m_len = ME_LEN(m, mh, 0);
  856. + mvec_sanity(m);
  857. + return (m);
  858. +}
  859. +
  860. +void
  861. +mvec_free(struct mbuf_ext *m)
  862. +{
  863. + struct mvec_header *mh;
  864. + struct mvec_ent *me;
  865. + m_refcnt_t *me_count;
  866. + int i;
  867. +
  868. + mh = &m->me_mh;
  869. + me = m->me_ents;
  870. + me_count = (m_refcnt_t *)(me + mh->mh_count);
  871. +
  872. + for (i = 0; i < mh->mh_count; i++, me_count++, me++) {
  873. + if (__predict_false(me->me_cl == NULL))
  874. + continue;
  875. + switch (me->me_type) {
  876. + case MVEC_MBUF:
  877. + uma_zfree_arg(zone_mbuf, me->me_cl, (void *)MB_DTOR_SKIP);
  878. + break;
  879. + case MVEC_MANAGED:
  880. + mvec_clfree(me, me_count, mh->mh_multiref);
  881. + break;
  882. + default:
  883. + /* ... */
  884. + break;
  885. + }
  886. + }
  887. + mvec_buffer_free((void*)m);
  888. +}
  889. +
  890. +struct mbuf_ext *
  891. +mchain_to_mvec(struct mbuf *m, int how)
  892. +{
  893. + struct mbuf *mp, *mnext;
  894. + struct mbuf_ext *mnew;
  895. + struct mvec_header *mh;
  896. + struct mvec_ent *me;
  897. + int count, size;
  898. + bool dupref;
  899. + m_refcnt_t *me_count;
  900. +
  901. + if (__predict_false(m_ismvec(m)))
  902. + return ((struct mbuf_ext *)m);
  903. +
  904. + size = count = 0;
  905. + mp = m;
  906. + dupref = false;
  907. + do {
  908. + mnext = mp->m_next;
  909. + count++;
  910. + if (mp->m_flags & M_EXT) {
  911. + /*
  912. + * bail on ext_free -- we can't efficiently pass an mbuf
  913. + * at free time and m_ext adds up to a lot of space
  914. + */
  915. + if (mp->m_ext.ext_free != NULL) {
  916. + DPRINTF("%s ext_free is set: %p\n", __func__, mp->m_ext.ext_free);
  917. + return (NULL);
  918. + }
  919. + if (!(mp->m_ext.ext_flags & EXT_FLAG_EMBREF && mp->m_ext.ext_count == 1))
  920. + dupref = true;
  921. + }
  922. + mp = mnext;
  923. + } while (mp);
  924. +
  925. + /* add spare */
  926. + count++;
  927. + if (dupref)
  928. + size = count*sizeof(void*);
  929. + mnew = mvec_alloc(count, size, how);
  930. +
  931. + if (mnew == NULL) {
  932. + DPRINTF("%s malloc failed\n", __func__);
  933. + return (NULL);
  934. + }
  935. + mh = &mnew->me_mh;
  936. + mh->mh_used = count-1;
  937. + MPASS(mh->mh_count == mh->mh_used+1);
  938. + mh->mh_multiref = dupref;
  939. + /* leave first entry open for encap */
  940. + bcopy(&m->m_pkthdr, &mnew->me_mbuf.m_pkthdr, sizeof(struct pkthdr));
  941. +
  942. + me = mnew->me_ents;
  943. + MPASS(mh->mh_start == 1);
  944. + me->me_cl = NULL;
  945. + me->me_off = me->me_len = 0;
  946. + me->me_ext_type = me->me_ext_flags = 0;
  947. + me++;
  948. + me_count = MBUF2REF(mnew);
  949. + if (dupref)
  950. + bzero(me_count, count*sizeof(void *));
  951. + me_count++;
  952. + mp = m;
  953. + do {
  954. + mnext = mp->m_next;
  955. + if (mp->m_flags & M_EXT) {
  956. + me->me_cl = mp->m_ext.ext_buf;
  957. + me->me_off = ((uintptr_t)mp->m_data - (uintptr_t)mp->m_ext.ext_buf);
  958. + me->me_type = MVEC_MANAGED;
  959. + me->me_ext_flags = mp->m_ext.ext_flags;
  960. + MPASS(mp->m_ext.ext_type < 32);
  961. + me->me_ext_type = mp->m_ext.ext_type;
  962. +#ifdef INVARIANTS
  963. + (void)mvec_ent_size(me);
  964. +#endif
  965. + } else {
  966. + me->me_cl = (caddr_t)mp;
  967. + me->me_off = ((uintptr_t)(mp->m_data) - (uintptr_t)mp);
  968. + me->me_type = MVEC_MBUF;
  969. + me->me_ext_flags = 0;
  970. + me->me_ext_type = EXT_MBUF;
  971. + }
  972. + me->me_len = mp->m_len;
  973. + me->me_eop = 0;
  974. + if (dupref) {
  975. + if (m->m_flags & M_EXT) {
  976. + if (mp->m_ext.ext_flags & EXT_FLAG_EMBREF) {
  977. + me_count->ext_cnt = &mp->m_ext.ext_count;
  978. + me->me_ext_flags &= ~EXT_FLAG_EMBREF;
  979. + } else
  980. + me_count->ext_cnt = mp->m_ext.ext_cnt;
  981. + }
  982. + if (mp->m_flags & M_NOFREE)
  983. + me->me_ext_flags |= EXT_FLAG_NOFREE;
  984. + }
  985. + me_count++;
  986. + mp = mnext;
  987. + me++;
  988. + } while (mp);
  989. + mnew->me_mbuf.m_len = mnew->me_ents[1].me_len;
  990. + mnew->me_mbuf.m_data = (mnew->me_ents[1].me_cl + mnew->me_ents[1].me_off);
  991. + mh = MBUF2MH(mnew);
  992. + MPASS(mh->mh_count == mh->mh_start + mh->mh_used);
  993. + mvec_sanity((void*)mnew);
  994. + return (mnew);
  995. +}
  996. +
  997. +struct mbuf_ext *
  998. +pktchain_to_mvec(struct mbuf *m, int mtu, int how)
  999. +{
  1000. + struct mbuf *mp, *mnext;
  1001. + struct mbuf_ext *mnew, *mh, *mt;
  1002. +
  1003. + mp = m;
  1004. + mh = mt = NULL;
  1005. + while (mp) {
  1006. + mnext = mp->m_nextpkt;
  1007. + mnew = mchain_to_mvec(mp, how);
  1008. + if (__predict_false(mnew == NULL)) {
  1009. + m_freem(mp);
  1010. + mp = mnext;
  1011. + continue;
  1012. + }
  1013. + if (mh == NULL) {
  1014. + mh = mt = mnew;
  1015. + } else {
  1016. + mt->me_mbuf.m_nextpkt = (void*)mnew;
  1017. + mt = mnew;
  1018. + }
  1019. + mp = mnext;
  1020. + }
  1021. + return (mh);
  1022. +}
  1023. +
  1024. +static void
  1025. +m_ext_init(struct mbuf *m, struct mbuf_ext *head, struct mvec_header *mh)
  1026. +{
  1027. + struct mvec_ent *me;
  1028. + struct mbuf *headm;
  1029. + bool doref;
  1030. +
  1031. + headm = &head->me_mbuf;
  1032. + doref = true;
  1033. + me = &head->me_ents[mh->mh_start];
  1034. + m->m_ext.ext_buf = me->me_cl;
  1035. + m->m_ext.ext_arg1 = headm->m_ext.ext_arg1;
  1036. + m->m_ext.ext_arg2 = headm->m_ext.ext_arg2;
  1037. + m->m_ext.ext_free = headm->m_ext.ext_free;
  1038. + m->m_ext.ext_type = me->me_ext_type;
  1039. + if (me->me_ext_type) {
  1040. + m->m_ext.ext_flags = me->me_ext_flags;
  1041. + m->m_ext.ext_size = mvec_ent_size(me);
  1042. + } else {
  1043. + m->m_ext.ext_flags = EXT_FLAG_NOFREE;
  1044. + /* Only used by m_sanity so just call it our size */
  1045. + m->m_ext.ext_size = me->me_len + me->me_off;
  1046. + }
  1047. + /*
  1048. + * There are 2 cases for refcount transfer:
  1049. + * 1) all clusters are owned by the mvec [default]
  1050. + * - point at mvec refcnt and increment
  1051. + * 2) cluster has a normal external refcount
  1052. + */
  1053. + if (__predict_true(!head->me_mh.mh_multiref)) {
  1054. + m->m_ext.ext_flags = EXT_FLAG_MVECREF;
  1055. + if (headm->m_ext.ext_flags & EXT_FLAG_EMBREF)
  1056. + m->m_ext.ext_cnt = &headm->m_ext.ext_count;
  1057. + else
  1058. + m->m_ext.ext_cnt = headm->m_ext.ext_cnt;
  1059. + } else {
  1060. + m_refcnt_t *ref = MHREFI(headm, mh, 0);
  1061. +
  1062. + m->m_ext.ext_cnt = ref->ext_cnt;
  1063. + if (ref->ext_cnt == NULL) {
  1064. + m->m_ext.ext_flags |= EXT_FLAG_EMBREF;
  1065. + m->m_ext.ext_type = 0;
  1066. + m->m_ext.ext_count = 1;
  1067. + doref = false;
  1068. + }
  1069. + }
  1070. + if (doref)
  1071. + atomic_add_int(m->m_ext.ext_cnt, 1);
  1072. +}
  1073. +
  1074. +static struct mbuf *
  1075. +mvec_to_mchain_pkt(struct mbuf_ext *mp, struct mvec_header *mhdr, int how)
  1076. +{
  1077. + struct mvec_ent *me;
  1078. + struct mbuf *m, *mh, *mt, *mpm;
  1079. +
  1080. + if (__predict_false((mh = m_gethdr(how, MT_DATA)) == NULL))
  1081. + return (NULL);
  1082. +
  1083. + mpm = &mp->me_mbuf;
  1084. + me = MHMEI(mp, mhdr, 0);
  1085. + mh->m_flags |= M_EXT;
  1086. + mh->m_flags |= mpm->m_flags & (M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG|M_VXLANTAG);
  1087. + /* XXX update csum_data after encap */
  1088. + mh->m_pkthdr.csum_data = mpm->m_pkthdr.csum_data;
  1089. + mh->m_pkthdr.csum_flags = mpm->m_pkthdr.csum_flags;
  1090. + mh->m_pkthdr.vxlanid = mpm->m_pkthdr.vxlanid;
  1091. + m_ext_init(mh, mp, mhdr);
  1092. + mh->m_data = me->me_cl + me->me_off;
  1093. + mh->m_pkthdr.len = mh->m_len = me->me_len;
  1094. + mhdr->mh_start++;
  1095. + mhdr->mh_used--;
  1096. + mt = mh;
  1097. + while (!me->me_eop && mhdr->mh_used) {
  1098. + if (__predict_false((m = m_get(how, MT_DATA)) == NULL))
  1099. + goto fail;
  1100. + me++;
  1101. + mt->m_next = m;
  1102. + mt = m;
  1103. + mt->m_flags |= M_EXT;
  1104. + m_ext_init(mt, mp, mhdr);
  1105. + mt->m_len = me->me_len;
  1106. + mh->m_pkthdr.len += mt->m_len;
  1107. + mt->m_data = me->me_cl + me->me_off;
  1108. + mhdr->mh_start++;
  1109. + mhdr->mh_used--;
  1110. + }
  1111. +#ifdef INVARIANTS
  1112. + m_sanity(mh, 0);
  1113. +#endif
  1114. + return (mh);
  1115. + fail:
  1116. + if (mh)
  1117. + m_freem(mh);
  1118. + return (NULL);
  1119. +}
  1120. +
  1121. +struct mbuf *
  1122. +mvec_to_mchain(struct mbuf *mp, int how)
  1123. +{
  1124. + struct mvec_header *pmhdr, mhdr;
  1125. + struct mbuf *mh, *mt, *m;
  1126. +#ifdef INVARIANTS
  1127. + int count = 0;
  1128. +#endif
  1129. +
  1130. + mvec_sanity(mp);
  1131. + pmhdr = MBUF2MH(mp);
  1132. + bcopy(pmhdr, &mhdr, sizeof(mhdr));
  1133. + mh = mt = NULL;
  1134. + while (mhdr.mh_used) {
  1135. +#ifdef INVARIANTS
  1136. + count++;
  1137. +#endif
  1138. + if (__predict_false((m = mvec_to_mchain_pkt((struct mbuf_ext *)mp, &mhdr, how)) == NULL)) {
  1139. + DPRINTF("mvec_to_mchain_pkt failed\n");
  1140. + goto fail;
  1141. + }
  1142. + if (mh != NULL) {
  1143. + mt->m_nextpkt = m;
  1144. + mt = m;
  1145. + } else
  1146. + mh = mt = m;
  1147. + }
  1148. +#ifdef INVARIANTS
  1149. + m = mh;
  1150. + while (m) {
  1151. + MPASS(m->m_data);
  1152. + m_sanity(m, 0);
  1153. + m = m->m_nextpkt;
  1154. + count--;
  1155. + }
  1156. + MPASS(count == 0);
  1157. +#endif
  1158. + return (mh);
  1159. + fail:
  1160. + m_freechain(mh);
  1161. + return (NULL);
  1162. +}
  1163. +
  1164. +/*
  1165. + * Move the below to net/ once working
  1166. + */
  1167. +
  1168. +#include <sys/socket.h>
  1169. +#include <net/if.h>
  1170. +#include <net/if_var.h>
  1171. +#include <net/ethernet.h>
  1172. +#include <net/iflib.h>
  1173. +#include <netinet/in.h>
  1174. +#include <netinet/ip.h>
  1175. +#include <netinet/tcp.h>
  1176. +#include <netinet/udp.h>
  1177. +
  1178. +#include <machine/in_cksum.h>
  1179. +
  1180. +#define MIN_HDR_LEN (ETHER_HDR_LEN + sizeof(struct ip) + sizeof(struct tcphdr))
  1181. +
  1182. +static int
  1183. +mvec_parse_header(struct mbuf_ext *mp, int prehdrlen, if_pkt_info_t pi)
  1184. +{
  1185. + struct ether_vlan_header *evh;
  1186. + struct mvec_header *mh = &mp->me_mh;
  1187. + struct mbuf *m;
  1188. +
  1189. + m = (void*)mp;
  1190. + mvec_sanity(m);
  1191. + if (__predict_false(m->m_len < MIN_HDR_LEN + prehdrlen) &&
  1192. + __predict_false(mvec_pullup(m, 0, prehdrlen + MIN_HDR_LEN) == NULL))
  1193. + return (ENOMEM);
  1194. + evh = (struct ether_vlan_header *)(ME_SEG(m, mh, 0) + prehdrlen);
  1195. + if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
  1196. + pi->ipi_etype = ntohs(evh->evl_proto);
  1197. + pi->ipi_ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
  1198. + } else {
  1199. + pi->ipi_etype = ntohs(evh->evl_encap_proto);
  1200. + pi->ipi_ehdrlen = ETHER_HDR_LEN;
  1201. + }
  1202. + switch (pi->ipi_etype) {
  1203. + case ETHERTYPE_IP: {
  1204. + struct ip *ip = NULL;
  1205. + struct tcphdr *th = NULL;
  1206. + int minthlen;
  1207. +
  1208. + minthlen = pi->ipi_ehdrlen + sizeof(*ip) + sizeof(*th);
  1209. + if (__predict_false(m->m_len < minthlen + prehdrlen) &&
  1210. + __predict_false(mvec_pullup(m, 0, prehdrlen + minthlen) == NULL))
  1211. + return (ENOMEM);
  1212. + ip = (struct ip *)(ME_SEG(m, mh, 0) + prehdrlen + pi->ipi_ehdrlen);
  1213. + pi->ipi_ip_hlen = ip->ip_hl << 2;
  1214. + pi->ipi_ipproto = ip->ip_p;
  1215. + if (ip->ip_p != IPPROTO_TCP)
  1216. + return (EINVAL);
  1217. + minthlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + sizeof(*th);
  1218. + if (__predict_false(m->m_len < minthlen + prehdrlen) &&
  1219. + __predict_false(mvec_pullup(m, 0, prehdrlen + minthlen) == NULL))
  1220. + return (ENOMEM);
  1221. + th = (struct tcphdr *)(ME_SEG(m, mh, 0) + prehdrlen + pi->ipi_ehdrlen + pi->ipi_ip_hlen);
  1222. + pi->ipi_tcp_hflags = th->th_flags;
  1223. + pi->ipi_tcp_hlen = th->th_off << 2;
  1224. + pi->ipi_tcp_seq = th->th_seq;
  1225. + minthlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen + pi->ipi_tcp_hlen;
  1226. + if (__predict_false(m->m_len < minthlen + prehdrlen) &&
  1227. + __predict_false(mvec_pullup(m, 0, prehdrlen + minthlen) == NULL))
  1228. + return (ENOMEM);
  1229. + if (prehdrlen == 0) {
  1230. + th->th_sum = in_pseudo(ip->ip_src.s_addr,
  1231. + ip->ip_dst.s_addr, htons(IPPROTO_TCP));
  1232. + ip->ip_sum = 0;
  1233. + ip->ip_len = htons(pi->ipi_ip_hlen + pi->ipi_tcp_hlen + pi->ipi_tso_segsz);
  1234. +
  1235. + }
  1236. + break;
  1237. + }
  1238. + case ETHERTYPE_IPV6: {
  1239. + break;
  1240. + }
  1241. + default:
  1242. + /* XXX unsupported -- error */
  1243. + break;
  1244. + }
  1245. + mvec_sanity(m);
  1246. + return (0);
  1247. +}
  1248. +
  1249. +struct tso_state {
  1250. + if_pkt_info_t ts_pi;
  1251. + tcp_seq ts_seq;
  1252. + uint16_t ts_idx;
  1253. + uint16_t ts_prehdrlen;
  1254. + uint16_t ts_hdrlen;
  1255. + uint16_t ts_ip_len_off;
  1256. + uint16_t ts_uh_len_off;
  1257. +};
  1258. +
  1259. +static void
  1260. +tso_init(struct tso_state *state, caddr_t hdr, if_pkt_info_t pi, int prehdrlen, int hdrlen)
  1261. +{
  1262. + struct ip *ip;
  1263. +
  1264. + MPASS(hdrlen > prehdrlen);
  1265. + ip = (struct ip *)(hdr + prehdrlen + pi->ipi_ehdrlen);
  1266. + state->ts_pi = pi;
  1267. + state->ts_idx = ntohs(ip->ip_id);
  1268. + state->ts_prehdrlen = prehdrlen;
  1269. + state->ts_hdrlen = hdrlen;
  1270. + state->ts_seq = ntohl(pi->ipi_tcp_seq);
  1271. + state->ts_uh_len_off = state->ts_ip_len_off = 0;
  1272. + /* XXX assuming !VLAN */
  1273. + if (prehdrlen) {
  1274. + state->ts_uh_len_off = ETHER_HDR_LEN + sizeof(*ip) + offsetof(struct udphdr, uh_ulen);
  1275. + state->ts_ip_len_off = ETHER_HDR_LEN + offsetof(struct ip, ip_len);
  1276. + }
  1277. +}
  1278. +
  1279. +static void
  1280. +tso_fixup(struct tso_state *state, caddr_t hdr, int len, bool last)
  1281. +{
  1282. + if_pkt_info_t pi = state->ts_pi;
  1283. + struct ip *ip;
  1284. + struct tcphdr *th;
  1285. + uint16_t encap_len, *hdr_lenp;
  1286. +
  1287. + encap_len = len + state->ts_hdrlen - state->ts_prehdrlen - pi->ipi_ehdrlen;
  1288. + if (state->ts_prehdrlen) {
  1289. + hdr_lenp = (uint16_t *)(hdr + state->ts_uh_len_off);
  1290. + *hdr_lenp = htons(len + state->ts_hdrlen - ETHER_HDR_LEN - sizeof(*ip));
  1291. + hdr_lenp = (uint16_t *)(hdr + state->ts_ip_len_off);
  1292. + *hdr_lenp = htons(len + state->ts_hdrlen - ETHER_HDR_LEN);
  1293. + }
  1294. + if (pi->ipi_etype == ETHERTYPE_IP) {
  1295. + ip = (struct ip *)(hdr + state->ts_prehdrlen + pi->ipi_ehdrlen);
  1296. + ip->ip_len = htons(encap_len);
  1297. + ip->ip_id = htons(state->ts_idx);
  1298. + ip->ip_sum = 0;
  1299. + state->ts_idx++;
  1300. + } else if (pi->ipi_etype == ETHERTYPE_IPV6) {
  1301. + /* XXX notyet */
  1302. + } else {
  1303. + panic("bad ethertype %d in tso_fixup", pi->ipi_etype);
  1304. + }
  1305. + if (pi->ipi_ipproto == IPPROTO_TCP) {
  1306. + th = (struct tcphdr *)(hdr + state->ts_prehdrlen + pi->ipi_ehdrlen + pi->ipi_ip_hlen);
  1307. + th->th_seq = htonl(state->ts_seq);
  1308. + state->ts_seq += len;
  1309. + th->th_sum = 0;
  1310. +
  1311. + /* Zero the PSH and FIN TCP flags if this is not the last
  1312. + segment. */
  1313. + if (!last)
  1314. + th->th_flags &= ~(0x8 | 0x1);
  1315. + } else {
  1316. + panic("non TCP IPPROTO %d in tso_fixup", pi->ipi_ipproto);
  1317. + }
  1318. +}
  1319. +
  1320. +struct mbuf_ext *
  1321. +mvec_tso(struct mbuf_ext *mprev, int prehdrlen, bool freesrc)
  1322. +{
  1323. + struct mvec_header *mh, *newmh;
  1324. + struct mvec_cursor mc;
  1325. + struct mvec_ent *me, *mesrc, *medst, *newme;
  1326. + struct mbuf_ext *mnew;
  1327. + struct mbuf *m;
  1328. + struct if_pkt_info pi;
  1329. + struct tso_state state;
  1330. + m_refcnt_t *newme_count, *medst_count, *mesrc_count;
  1331. + int segcount, soff, segrem, srem;
  1332. + int i, segsz, nheaders, hdrsize;
  1333. + int refsize, count, pktrem, srci, dsti;
  1334. + volatile uint32_t *refcnt;
  1335. + bool dupref, dofree;
  1336. + caddr_t hdrbuf;
  1337. +
  1338. + m = (void*)mprev;
  1339. + mvec_sanity(m);
  1340. + dofree = false;
  1341. + if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) {
  1342. + refcnt = &m->m_ext.ext_count;
  1343. + } else {
  1344. + refcnt = m->m_ext.ext_cnt;
  1345. + }
  1346. + if (freesrc && (*refcnt == 1))
  1347. + dofree = true;
  1348. +
  1349. + segsz = m->m_pkthdr.tso_segsz;
  1350. + mh = &mprev->me_mh;
  1351. + me = mprev->me_ents;
  1352. + dupref = mh->mh_multiref;
  1353. + pi.ipi_tso_segsz = segsz;
  1354. + if (mvec_parse_header(mprev, prehdrlen, &pi))
  1355. + return (NULL);
  1356. + hdrsize = prehdrlen + pi.ipi_ehdrlen + pi.ipi_ip_hlen + pi.ipi_tcp_hlen;
  1357. + pktrem = m->m_pkthdr.len - hdrsize;
  1358. + nheaders = pktrem / segsz;
  1359. + if (nheaders*segsz != pktrem)
  1360. + nheaders++;
  1361. + segrem = segsz;
  1362. + segcount = refsize = 0;
  1363. + mvec_seek(m, &mc, hdrsize);
  1364. + soff = mc.mc_off;
  1365. + srci = mc.mc_idx;
  1366. + while (pktrem > 0) {
  1367. + MPASS(pktrem >= segrem);
  1368. + MPASS(srci < mprev->me_mh.mh_count);
  1369. + if (__predict_false(me[srci].me_len == 0)) {
  1370. + srci++;
  1371. + continue;
  1372. + }
  1373. + segrem = min(pktrem, segsz);
  1374. + do {
  1375. + int used;
  1376. +
  1377. + srem = me[srci].me_len - soff;
  1378. + used = min(segrem, srem);
  1379. + srem -= used;
  1380. + if (srem) {
  1381. + soff += segrem;
  1382. + } else {
  1383. + srci++;
  1384. + soff = 0;
  1385. + }
  1386. + segrem -= used;
  1387. + pktrem -= used;
  1388. + segcount++;
  1389. + } while (segrem);
  1390. + }
  1391. +
  1392. + count = segcount + nheaders;
  1393. + if (mh->mh_multiref)
  1394. + refsize = count*sizeof(void*);
  1395. +
  1396. + mnew = mvec_alloc(count, refsize + (nheaders * hdrsize), M_NOWAIT);
  1397. + if (__predict_false(mnew == NULL))
  1398. + return (NULL);
  1399. + bcopy(&m->m_pkthdr, &mnew->me_mbuf.m_pkthdr, sizeof(struct pkthdr));
  1400. + newmh = &mnew->me_mh;
  1401. + newmh->mh_start = 0;
  1402. + newmh->mh_used = count;
  1403. + newmh->mh_multiref = mh->mh_multiref;
  1404. + newmh->mh_multipkt = true;
  1405. + newme = mnew->me_ents;
  1406. + newme_count = MBUF2REF(mnew);
  1407. + __builtin_prefetch(newme_count);
  1408. + medst_count = newme_count;
  1409. + medst = newme;
  1410. +
  1411. + /*
  1412. + * skip past header info
  1413. + */
  1414. + mvec_seek(m, &mc, hdrsize);
  1415. + mesrc = mprev->me_ents;
  1416. + mesrc_count = &MBUF2REF(m)[mc.mc_idx];
  1417. + if (dupref) {
  1418. + bzero(medst_count, count*sizeof(void *));
  1419. + medst_count++;
  1420. + }
  1421. + medst[0].me_cl = NULL;
  1422. + medst[0].me_len = 0;
  1423. + /*
  1424. + * Packet segmentation loop
  1425. + */
  1426. + srci = mc.mc_idx;
  1427. + soff = mc.mc_off;
  1428. + pktrem = m->m_pkthdr.len - hdrsize;
  1429. + for (dsti = i = 0; i < nheaders; i++) {
  1430. + /* skip header */
  1431. + medst[dsti].me_cl = NULL;
  1432. + medst[dsti].me_len = 0;
  1433. + dsti++;
  1434. + medst_count++;
  1435. +
  1436. + MPASS(pktrem > 0);
  1437. + segrem = min(segsz, pktrem);
  1438. + do {
  1439. + int used;
  1440. +
  1441. + MPASS(pktrem > 0);
  1442. + MPASS(srci < mprev->me_mh.mh_count);
  1443. + MPASS(dsti < mnew->me_mh.mh_count);
  1444. + /*
  1445. + * Skip past any empty slots
  1446. + */
  1447. + if (__predict_false(mesrc[srci].me_len == 0)) {
  1448. + srci++;
  1449. + mesrc_count++;
  1450. + continue;
  1451. + }
  1452. + /*
  1453. + * At the start of a source descriptor:
  1454. + * copy its attributes and, if dupref,
  1455. + * its refcnt
  1456. + */
  1457. + if (soff == 0) {
  1458. + if (dupref) {
  1459. + *medst_count = *mesrc_count;
  1460. + if (!dofree && (mesrc_count->ext_cnt != NULL))
  1461. + atomic_add_int(mesrc_count->ext_cnt, 1);
  1462. + }
  1463. + medst[dsti].me_type = mesrc[srci].me_type;
  1464. + medst[dsti].me_ext_flags = mesrc[srci].me_ext_flags;
  1465. + medst[dsti].me_ext_type = mesrc[srci].me_ext_type;
  1466. + } else {
  1467. + medst[dsti].me_type = MVEC_UNMANAGED;
  1468. + medst[dsti].me_ext_flags = 0;
  1469. + medst[dsti].me_ext_type = 0;
  1470. + }
  1471. + /*
  1472. + * Remaining value is len - off
  1473. + */
  1474. + srem = mesrc[srci].me_len - soff;
  1475. + medst[dsti].me_cl = mesrc[srci].me_cl;
  1476. + medst[dsti].me_off = mesrc[srci].me_off + soff;
  1477. + used = min(segrem, srem);
  1478. + srem -= used;
  1479. + if (srem) {
  1480. + soff += segrem;
  1481. + } else {
  1482. + srci++;
  1483. + mesrc_count++;
  1484. + soff = 0;
  1485. + }
  1486. + segrem -= used;
  1487. + pktrem -= used;
  1488. + medst[dsti].me_eop = (segrem == 0);
  1489. + medst[dsti].me_len = used;
  1490. + dsti++;
  1491. + medst_count++;
  1492. + } while (segrem);
  1493. + }
  1494. + /*
  1495. + * Special case first header
  1496. + */
  1497. + medst = newme;
  1498. + mesrc = MHMEI(m, MBUF2MH(m), 0);
  1499. + /*
  1500. + * Header initialization loop
  1501. + */
  1502. + hdrbuf = ((caddr_t)(newme + count)) + refsize;
  1503. + tso_init(&state, mesrc->me_cl + mesrc->me_off, &pi, prehdrlen, hdrsize);
  1504. + pktrem = m->m_pkthdr.len - hdrsize;
  1505. + for (dsti = i = 0; i < nheaders; i++) {
  1506. + MPASS(pktrem > 0);
  1507. + /* skip ahead to next header slot */
  1508. + while (medst[dsti].me_cl != NULL)
  1509. + dsti++;
  1510. + bcopy(mesrc->me_cl + mesrc->me_off, hdrbuf, hdrsize);
  1511. + tso_fixup(&state, hdrbuf, min(pktrem, segsz), (pktrem <= segsz));
  1512. + pktrem -= segsz;
  1513. + medst[dsti].me_cl = hdrbuf;
  1514. + medst[dsti].me_off = 0;
  1515. + medst[dsti].me_len = hdrsize;
  1516. + medst[dsti].me_type = MVEC_UNMANAGED;
  1517. + medst[dsti].me_ext_flags = 0;
  1518. + medst[dsti].me_ext_type = 0;
  1519. + medst[dsti].me_eop = 0;
  1520. + hdrbuf += hdrsize;
  1521. + }
  1522. +
  1523. + mnew->me_mbuf.m_len = mnew->me_ents->me_len;
  1524. + mnew->me_mbuf.m_data = (mnew->me_ents->me_cl + mnew->me_ents->me_off);
  1525. + mnew->me_mbuf.m_pkthdr.len = m->m_pkthdr.len + (nheaders - 1)*hdrsize;
  1526. + mvec_sanity((struct mbuf *)mnew);
  1527. + if (dofree) {
  1528. + if (mesrc->me_cl && (mesrc->me_type == MVEC_MBUF) && mesrc->me_len == hdrsize)
  1529. + uma_zfree_arg(zone_mbuf, mesrc->me_cl, (void *)MB_DTOR_SKIP);
  1530. + mnew->me_mbuf.m_ext.ext_count = 1;
  1531. + if (!(m->m_ext.ext_flags & EXT_FLAG_EMBREF))
  1532. + mvec_buffer_free(__containerof(refcnt, struct mbuf, m_ext.ext_count));
  1533. + /* XXX we're leaking here */
  1534. + mvec_buffer_free(m);
  1535. + } else {
  1536. + if (m->m_ext.ext_flags & EXT_FLAG_EMBREF)
  1537. + mnew->me_mbuf.m_ext.ext_cnt = m->m_ext.ext_cnt;
  1538. + else
  1539. + mnew->me_mbuf.m_ext.ext_cnt = &m->m_ext.ext_count;
  1540. + atomic_add_int(mnew->me_mbuf.m_ext.ext_cnt, 1);
  1541. + }
  1542. + return (mnew);
  1543. +}
  1544. diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h
  1545. index ba1e88c6175..b7d23354eb3 100644
  1546. --- a/sys/sys/mbuf.h
  1547. +++ b/sys/sys/mbuf.h
  1548. @@ -167,7 +167,8 @@ struct pkthdr {
  1549. uint8_t l3hlen; /* layer 3 hdr len */
  1550. uint8_t l4hlen; /* layer 4 hdr len */
  1551. uint8_t l5hlen; /* layer 5 hdr len */
  1552. - uint32_t spare;
  1553. + uint32_t spare:8;
  1554. + uint32_t vxlanid:24;
  1555. };
  1556. };
  1557. union {
  1558. @@ -285,6 +286,172 @@ struct mbuf {
  1559. };
  1560. };
  1561.  
  1562. +#define MVEC_MANAGED 0x0 /* cluster should be freed when refcnt goes to 0 */
  1563. +#define MVEC_UNMANAGED 0x1 /* memory managed elsewhere */
  1564. +#define MVEC_MBUF 0x2 /* free to mbuf zone */
  1565. +
  1566. +#define MVALLOC_MALLOC 0x0 /* mvec was malloced with type M_MVEC */
  1567. +#define MVALLOC_MBUF 0x1 /* mvec was allocated from zone_mbuf */
  1568. +
  1569. +/*
  1570. + * | mbuf { }| pkthdr { } | m_ext { }| mvec_header { } | mvec_ent[] | refcnt[] (optional) |
  1571. + */
  1572. +struct mvec_header {
  1573. + uint64_t mh_count:7; /* number of segments */
  1574. + uint64_t mh_start:7; /* starting segment */
  1575. + uint64_t mh_used:7; /* segments in use */
  1576. + uint64_t mh_mvtype:3; /* mvec allocation */
  1577. + uint64_t mh_multiref:1; /* the clusters have independent ref counts so
  1578. + * an array of refcounts sits before the mvec_ents
  1579. + */
  1580. + uint64_t mh_multipkt:1; /* contains multiple packets */
  1581. + uint64_t mh_flags:38;
  1582. +};
  1583. +
  1584. +struct mvec_ent {
  1585. + caddr_t me_cl;
  1586. + uint16_t me_off;
  1587. + uint16_t me_len;
  1588. + uint16_t me_eop:1;
  1589. + uint16_t me_type:2;
  1590. + uint16_t me_spare:13;
  1591. + uint8_t me_ext_flags;
  1592. + uint8_t me_ext_type;
  1593. +};
  1594. +
  1595. +struct mbuf_ext {
  1596. + struct mbuf me_mbuf;
  1597. + struct mvec_header me_mh;
  1598. + struct mvec_ent me_ents[0];
  1599. +};
  1600. +
  1601. +#ifdef _KERNEL
  1602. +#define MBUF2MH(m_) (&(((struct mbuf_ext *)(m_))->me_mh))
  1603. +#define MBUF2ME(m_) (((struct mbuf_ext *)(m_))->me_ents)
  1604. +#define MBUF2REF(m_) ((m_refcnt_t *)(MBUF2ME(m_) + MBUF2MH(m_)->mh_count))
  1605. +
  1606. +#define MHMEI(m_, mh_, idx_) (MBUF2ME(m_) + (mh_)->mh_start + (idx_))
  1607. +#define MHREFI(m_, mh_, idx_) (MBUF2REF(m_) + (mh_)->mh_start + (idx_))
  1608. +
  1609. +#define ME_SEG(m_, mh_, idx_) (MHMEI(m_, mh_,idx_)->me_cl + MHMEI(m_, mh_, idx_)->me_off)
  1610. +#define ME_LEN(m_, mh_, idx_) (MHMEI(m_, mh_,idx_)->me_len)
  1611. +
  1612. +#define MBUF_ME_MAX ((MHLEN - sizeof(struct m_ext) - sizeof(struct mvec_header))/sizeof(struct mvec_ent))
  1613. +
  1614. +#define m_ismvec(m) (((m)->m_flags & M_EXT) && ((m)->m_ext.ext_type == EXT_MVEC))
  1615. +#define me_data(me) ((me)->me_cl + (me)->me_off)
  1616. +/* XXX --- fix */
  1617. +#define ME_WRITABLE(m, i) (0)
  1618. +
  1619. +struct mvec_cursor {
  1620. + uint16_t mc_idx;
  1621. + uint16_t mc_off;
  1622. +};
  1623. +
  1624. +typedef union {
  1625. + /*
  1626. + * If EXT_FLAG_EMBREF is set, then we use refcount in the
  1627. + * mbuf, the 'ext_count' member. Otherwise, we have a
  1628. + * shadow copy and we use pointer 'ext_cnt'. The original
  1629. + * mbuf is responsible to carry the pointer to free routine
  1630. + * and its arguments. They aren't copied into shadows in
  1631. + * mb_dupcl() to avoid dereferencing next cachelines.
  1632. + */
  1633. + volatile u_int ext_count;
  1634. + volatile u_int *ext_cnt;
  1635. +} m_refcnt_t;
  1636. +
  1637. +/*
  1638. + * Get index and relative offset of `off` in to mvec `m`
  1639. + */
  1640. +void *mvec_seek(struct mbuf *m, struct mvec_cursor *mc, int off);
  1641. +
  1642. +/*
  1643. + * Trim (destructively if unshared) `req_len` bytes of `m`.
  1644. + * Will trim the front if req_len is positive and the tail
  1645. + * if req_len is negative.
  1646. + */
  1647. +void mvec_adj(struct mbuf *m, int req_len);
  1648. +
  1649. +/*
  1650. + * Make the first `count` bytes of `m` index `idx` contiguous
  1651. + */
  1652. +struct mbuf *mvec_pullup(struct mbuf *m, int idx, int count);
  1653. +
  1654. +/*
  1655. + * Perform accounting neccesary to free all references contained
  1656. + * and `m` itself
  1657. + */
  1658. +void mvec_free(struct mbuf_ext *m);
  1659. +
  1660. +/*
  1661. + * Convert mbuf chain `m` to mvec non-destructively. Returns
  1662. + * NULL on failure. It is the caller's responsibility to free
  1663. + * the source on success.
  1664. + */
  1665. +struct mbuf_ext *mchain_to_mvec(struct mbuf *m, int how);
  1666. +
  1667. +struct mbuf_ext *pktchain_to_mvec(struct mbuf *m, int mtu, int how);
  1668. +
  1669. +
  1670. +/*
  1671. + * Convert mvec `m` to mbuf chain non-destructively.
  1672. + * Returns NULL if not successful. It is the caller's
  1673. + * responsibility to free the source on success.
  1674. + */
  1675. +struct mbuf *mvec_to_mchain(struct mbuf *m, int how);
  1676. +/*
  1677. + * Given an mvec `m` returns a new mvec of segmented packets.
  1678. + * If prehdrlen is non-zero the first prehdrlen bytes are
  1679. + * treated as encapsulation and copied to the front of every
  1680. + * packet. Non-destructive.
  1681. + */
  1682. +struct mbuf_ext *mvec_tso(struct mbuf_ext *m, int prehdrlen, bool freesrc);
  1683. +
  1684. +/*
  1685. + * Create size bytes of room at the front of `m`. Will allocate a
  1686. + * new mvec if there is no room for an addition mvec_ent.
  1687. + */
  1688. +struct mbuf *mvec_prepend(struct mbuf *m, int size);
  1689. +
  1690. +/*
  1691. + * Append `cl` of type `cltype` and length `len` starting at `off`
  1692. + * to mvec `m` - return a new mvec if `cl` won't fit in the existing
  1693. + * entries.
  1694. + */
  1695. +struct mbuf *mvec_append(struct mbuf *m, caddr_t cl, uint16_t off,
  1696. + uint16_t len, uint8_t cltype);
  1697. +
  1698. +/*
  1699. + * Allocate mvec with `count` entries and `len` additional bytes.
  1700. + */
  1701. +struct mbuf_ext *mvec_alloc(uint8_t count, int len, int how);
  1702. +
  1703. +/*
  1704. + * Initialize an mbuf `m` from zone_mbuf as an mvec.
  1705. + */
  1706. +int mvec_init_mbuf(struct mbuf *m, uint8_t count, uint8_t type);
  1707. +
  1708. +
  1709. +uint16_t mvec_cksum_skip(struct mbuf *m, int len, int skip);
  1710. +
  1711. +
  1712. +/*
  1713. + * Mvec analogs to mbuf helpers that should be implemented sooner
  1714. + * rather than later.
  1715. + */
  1716. +void mvec_copydata(const struct mbuf *m, int off, int len, caddr_t cp);
  1717. +struct mbuf *mvec_dup(const struct mbuf *m, int how);
  1718. +struct mbuf *mvec_defrag(const struct mbuf *m, int how);
  1719. +struct mbuf *mvec_collapse(struct mbuf *m, int how, int maxfrags);
  1720. +
  1721. +#ifdef INVARIANTS
  1722. +void mvec_sanity(struct mbuf *m);
  1723. +#else
  1724. +static __inline void mvec_sanity(struct mbuf *m __unused) {}
  1725. +#endif
  1726. +
  1727. +#endif
  1728. /*
  1729. * mbuf flags of global significance and layer crossing.
  1730. * Those of only protocol/layer specific significance are to be mapped
  1731. @@ -317,7 +484,7 @@ struct mbuf {
  1732. #define M_PROTO9 0x00100000 /* protocol-specific */
  1733. #define M_PROTO10 0x00200000 /* protocol-specific */
  1734. #define M_PROTO11 0x00400000 /* protocol-specific */
  1735. -#define M_PROTO12 0x00800000 /* protocol-specific */
  1736. +#define M_VXLANTAG 0x00800000 /* vxlanid is valid */
  1737.  
  1738. #define MB_DTOR_SKIP 0x1 /* don't pollute the cache by touching a freed mbuf */
  1739.  
  1740. @@ -326,14 +493,14 @@ struct mbuf {
  1741. */
  1742. #define M_PROTOFLAGS \
  1743. (M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\
  1744. - M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12)
  1745. + M_PROTO9|M_PROTO10|M_PROTO11)
  1746.  
  1747. /*
  1748. * Flags preserved when copying m_pkthdr.
  1749. */
  1750. #define M_COPYFLAGS \
  1751. (M_PKTHDR|M_EOR|M_RDONLY|M_BCAST|M_MCAST|M_PROMISC|M_VLANTAG|M_TSTMP| \
  1752. - M_TSTMP_HPREC|M_PROTOFLAGS)
  1753. + M_TSTMP_HPREC|M_PROTOFLAGS|M_VXLANTAG)
  1754.  
  1755. /*
  1756. * Mbuf flag description for use with printf(9) %b identifier.
  1757. @@ -438,6 +605,7 @@ struct mbuf {
  1758. #define EXT_JUMBO16 5 /* jumbo cluster 16184 bytes */
  1759. #define EXT_PACKET 6 /* mbuf+cluster from packet zone */
  1760. #define EXT_MBUF 7 /* external mbuf reference */
  1761. +#define EXT_MVEC 8 /* pointer to mbuf vector */
  1762.  
  1763. #define EXT_VENDOR1 224 /* for vendor-internal use */
  1764. #define EXT_VENDOR2 225 /* for vendor-internal use */
  1765. @@ -460,6 +628,8 @@ struct mbuf {
  1766. */
  1767. #define EXT_FLAG_EMBREF 0x000001 /* embedded ext_count */
  1768. #define EXT_FLAG_EXTREF 0x000002 /* external ext_cnt, notyet */
  1769. +#define EXT_FLAG_MVECREF 0x000004 /* reference is an mvec */
  1770. +#define EXT_FLAG_EXTFREE 0x000008 /* ext_free is valid */
  1771.  
  1772. #define EXT_FLAG_NOFREE 0x000010 /* don't free mbuf to pool, notyet */
  1773.  
  1774. @@ -637,7 +807,6 @@ u_int m_fixhdr(struct mbuf *);
  1775. struct mbuf *m_fragment(struct mbuf *, int, int);
  1776. void m_freem(struct mbuf *);
  1777. struct mbuf *m_get2(int, int, short, int);
  1778. -struct mbuf *m_getjcl(int, short, int, int);
  1779. struct mbuf *m_getm2(struct mbuf *, int, int, short, int);
  1780. struct mbuf *m_getptr(struct mbuf *, int, int *);
  1781. u_int m_length(struct mbuf *, struct mbuf **);
  1782. @@ -751,6 +920,7 @@ m_init(struct mbuf *m, int how, short type, int flags)
  1783. m->m_len = 0;
  1784. m->m_flags = flags;
  1785. m->m_type = type;
  1786. + m->m_ext.ext_free = NULL;
  1787. if (flags & M_PKTHDR)
  1788. error = m_pkthdr_init(m, how);
  1789. else
  1790. @@ -786,19 +956,6 @@ m_gethdr(int how, short type)
  1791. return (m);
  1792. }
  1793.  
  1794. -static __inline struct mbuf *
  1795. -m_getcl(int how, short type, int flags)
  1796. -{
  1797. - struct mbuf *m;
  1798. - struct mb_args args;
  1799. -
  1800. - args.flags = flags;
  1801. - args.type = type;
  1802. - m = uma_zalloc_arg(zone_pack, &args, how);
  1803. - MBUF_PROBE4(m__getcl, how, type, flags, m);
  1804. - return (m);
  1805. -}
  1806. -
  1807. /*
  1808. * XXX: m_cljset() is a dangerous API. One must attach only a new,
  1809. * unreferenced cluster to an mbuf(9). It is not possible to assert
  1810. @@ -838,12 +995,37 @@ m_cljset(struct mbuf *m, void *cl, int type)
  1811. m->m_flags |= M_EXT;
  1812. MBUF_PROBE3(m__cljset, m, cl, type);
  1813. }
  1814. -
  1815. -static __inline void
  1816. -m_chtype(struct mbuf *m, short new_type)
  1817. +/*
  1818. + * m_getjcl() returns an mbuf with a cluster of the specified size attached.
  1819. + * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES.
  1820. + */
  1821. +static __inline struct mbuf *
  1822. +m_getjcl(int how, short type, int flags, int size)
  1823. {
  1824. + struct mb_args args;
  1825. + struct mbuf *m, *n;
  1826. + uma_zone_t zone;
  1827. +
  1828. + args.flags = flags;
  1829. + args.type = type;
  1830. +
  1831. + m = uma_zalloc_arg(zone_mbuf, &args, how);
  1832. + if (m == NULL)
  1833. + return (NULL);
  1834. +
  1835. + zone = m_getzone(size);
  1836. + n = uma_zalloc_arg(zone, m, how);
  1837. + if (n == NULL) {
  1838. + uma_zfree(zone_mbuf, m);
  1839. + return (NULL);
  1840. + }
  1841. + return (m);
  1842. +}
  1843.  
  1844. - m->m_type = new_type;
  1845. +static __inline struct mbuf *
  1846. +m_getcl(int how, short type, int flags)
  1847. +{
  1848. + return (m_getjcl(how, type, flags, MCLBYTES));
  1849. }
  1850.  
  1851. static __inline void
  1852. @@ -1008,12 +1190,6 @@ m_align(struct mbuf *m, int len)
  1853. *_mmp = _mm; \
  1854. } while (0)
  1855.  
  1856. -/*
  1857. - * Change mbuf to new type. This is a relatively expensive operation and
  1858. - * should be avoided.
  1859. - */
  1860. -#define MCHTYPE(m, t) m_chtype((m), (t))
  1861. -
  1862. /* Length to m_copy to copy all. */
  1863. #define M_COPYALL 1000000000
  1864.  
  1865. @@ -1219,6 +1395,19 @@ m_free(struct mbuf *m)
  1866. return (n);
  1867. }
  1868.  
  1869. +static __inline void
  1870. +m_freechain(struct mbuf *m)
  1871. +{
  1872. + struct mbuf *mp, *mnext;
  1873. +
  1874. + mp = m;
  1875. + while (mp != NULL) {
  1876. + mnext = mp->m_nextpkt;
  1877. + m_freem(mp);
  1878. + mp = mnext;
  1879. + }
  1880. +}
  1881. +
  1882. static __inline int
  1883. rt_m_getfib(struct mbuf *m)
  1884. {
  1885. diff --git a/sys/sys/proc.h b/sys/sys/proc.h
  1886. index 4af92a8297b..c015889570c 100644
  1887. --- a/sys/sys/proc.h
  1888. +++ b/sys/sys/proc.h
  1889. @@ -51,6 +51,7 @@
  1890. #include <sys/lock_profile.h>
  1891. #include <sys/_mutex.h>
  1892. #include <sys/osd.h>
  1893. +#include <sys/ktr.h>
  1894. #include <sys/priority.h>
  1895. #include <sys/rtprio.h> /* XXX. */
  1896. #include <sys/runq.h>
  1897. @@ -1136,6 +1137,52 @@ td_softdep_cleanup(struct thread *td)
  1898. softdep_ast_cleanup(td);
  1899. }
  1900.  
  1901. +extern u_char kdb_active;
  1902. +void critical_preempt(struct thread *td);
  1903. +
  1904. +static __inline void
  1905. +_critical_enter()
  1906. +{
  1907. + struct thread *td;
  1908. +
  1909. + td = curthread;
  1910. + td->td_critnest++;
  1911. + __compiler_membar();
  1912. +
  1913. + CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
  1914. + (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  1915. +}
  1916. +
  1917. +static __inline void
  1918. +_critical_exit()
  1919. +{
  1920. + struct thread *td;
  1921. +
  1922. + td = curthread;
  1923. + KASSERT(td->td_critnest != 0,
  1924. + ("critical_exit: td_critnest == 0"));
  1925. + __compiler_membar();
  1926. + if (__predict_true(td->td_critnest == 1)) {
  1927. + td->td_critnest = 0;
  1928. +
  1929. + /*
  1930. + * Interrupt handlers execute critical_exit() on
  1931. + * leave, and td_owepreempt may be left set by an
  1932. + * interrupt handler only when td_critnest > 0. If we
  1933. + * are decrementing td_critnest from 1 to 0, read
  1934. + * td_owepreempt after decrementing, to not miss the
  1935. + * preempt. Disallow compiler to reorder operations.
  1936. + */
  1937. + __compiler_membar();
  1938. + if (__predict_false(td->td_owepreempt && !kdb_active))
  1939. + critical_preempt(td);
  1940. + } else
  1941. + td->td_critnest--;
  1942. +
  1943. + CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
  1944. + (long)td->td_proc->p_pid, td->td_name, td->td_critnest);
  1945. +}
  1946. +
  1947. #endif /* _KERNEL */
  1948.  
  1949. #endif /* !_SYS_PROC_H_ */
  1950. diff --git a/sys/sys/sglist.h b/sys/sys/sglist.h
  1951. index 5674416c07a..ba1dad1bc7d 100644
  1952. --- a/sys/sys/sglist.h
  1953. +++ b/sys/sys/sglist.h
  1954. @@ -88,6 +88,7 @@ struct sglist *sglist_alloc(int nsegs, int mflags);
  1955. int sglist_append(struct sglist *sg, void *buf, size_t len);
  1956. int sglist_append_bio(struct sglist *sg, struct bio *bp);
  1957. int sglist_append_mbuf(struct sglist *sg, struct mbuf *m0);
  1958. +int sglist_append_mvec(struct sglist *sg, struct mbuf *m0);
  1959. int sglist_append_phys(struct sglist *sg, vm_paddr_t paddr,
  1960. size_t len);
  1961. int sglist_append_sglist(struct sglist *sg, struct sglist *source,
  1962. diff --git a/sys/sys/sockio.h b/sys/sys/sockio.h
  1963. index cc335e88707..14e5e7d8da6 100644
Add Comment
Please, Sign In to add comment