Guest User

Untitled

a guest
Mar 1st, 2018
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 124.81 KB | None | 0 0
  1. diff -u linux-2.6.20.y/net/ipv4/ipvs/Kconfig linux-2.6.27.y/net/ipv4/ipvs/Kconfig
  2. --- linux-2.6.20.y/net/ipv4/ipvs/Kconfig 2009-02-16 11:57:22.000000000 -0400
  3. +++ linux-2.6.27.y/net/ipv4/ipvs/Kconfig 2009-02-16 12:56:22.000000000 -0400
  4. @@ -1,10 +1,7 @@
  5. #
  6. # IP Virtual Server configuration
  7. #
  8. -menu "IP: Virtual Server Configuration"
  9. - depends on NETFILTER
  10. -
  11. -config IP_VS
  12. +menuconfig IP_VS
  13. tristate "IP virtual server support (EXPERIMENTAL)"
  14. depends on NETFILTER
  15. ---help---
  16. @@ -25,9 +22,10 @@
  17. If you want to compile it in kernel, say Y. To compile it as a
  18. module, choose M here. If unsure, say N.
  19.  
  20. +if IP_VS
  21. +
  22. config IP_VS_DEBUG
  23. bool "IP virtual server debugging"
  24. - depends on IP_VS
  25. ---help---
  26. Say Y here if you want to get additional messages useful in
  27. debugging the IP virtual server code. You can change the debug
  28. @@ -35,7 +33,6 @@
  29.  
  30. config IP_VS_TAB_BITS
  31. int "IPVS connection table size (the Nth power of 2)"
  32. - depends on IP_VS
  33. default "12"
  34. ---help---
  35. The IPVS connection hash table uses the chaining scheme to handle
  36. @@ -61,42 +58,35 @@
  37. needed for your box.
  38.  
  39. comment "IPVS transport protocol load balancing support"
  40. - depends on IP_VS
  41.  
  42. config IP_VS_PROTO_TCP
  43. bool "TCP load balancing support"
  44. - depends on IP_VS
  45. ---help---
  46. This option enables support for load balancing TCP transport
  47. protocol. Say Y if unsure.
  48.  
  49. config IP_VS_PROTO_UDP
  50. bool "UDP load balancing support"
  51. - depends on IP_VS
  52. ---help---
  53. This option enables support for load balancing UDP transport
  54. protocol. Say Y if unsure.
  55.  
  56. config IP_VS_PROTO_ESP
  57. bool "ESP load balancing support"
  58. - depends on IP_VS
  59. ---help---
  60. This option enables support for load balancing ESP (Encapsulation
  61. Security Payload) transport protocol. Say Y if unsure.
  62.  
  63. config IP_VS_PROTO_AH
  64. bool "AH load balancing support"
  65. - depends on IP_VS
  66. ---help---
  67. This option enables support for load balancing AH (Authentication
  68. Header) transport protocol. Say Y if unsure.
  69.  
  70. comment "IPVS scheduler"
  71. - depends on IP_VS
  72.  
  73. config IP_VS_RR
  74. tristate "round-robin scheduling"
  75. - depends on IP_VS
  76. ---help---
  77. The robin-robin scheduling algorithm simply directs network
  78. connections to different real servers in a round-robin manner.
  79. @@ -106,7 +96,6 @@
  80.  
  81. config IP_VS_WRR
  82. tristate "weighted round-robin scheduling"
  83. - depends on IP_VS
  84. ---help---
  85. The weighted robin-robin scheduling algorithm directs network
  86. connections to different real servers based on server weights
  87. @@ -120,7 +109,6 @@
  88.  
  89. config IP_VS_LC
  90. tristate "least-connection scheduling"
  91. - depends on IP_VS
  92. ---help---
  93. The least-connection scheduling algorithm directs network
  94. connections to the server with the least number of active
  95. @@ -131,7 +119,6 @@
  96.  
  97. config IP_VS_WLC
  98. tristate "weighted least-connection scheduling"
  99. - depends on IP_VS
  100. ---help---
  101. The weighted least-connection scheduling algorithm directs network
  102. connections to the server with the least active connections
  103. @@ -142,7 +129,6 @@
  104.  
  105. config IP_VS_LBLC
  106. tristate "locality-based least-connection scheduling"
  107. - depends on IP_VS
  108. ---help---
  109. The locality-based least-connection scheduling algorithm is for
  110. destination IP load balancing. It is usually used in cache cluster.
  111. @@ -157,7 +143,6 @@
  112.  
  113. config IP_VS_LBLCR
  114. tristate "locality-based least-connection with replication scheduling"
  115. - depends on IP_VS
  116. ---help---
  117. The locality-based least-connection with replication scheduling
  118. algorithm is also for destination IP load balancing. It is
  119. @@ -176,7 +161,6 @@
  120.  
  121. config IP_VS_DH
  122. tristate "destination hashing scheduling"
  123. - depends on IP_VS
  124. ---help---
  125. The destination hashing scheduling algorithm assigns network
  126. connections to the servers through looking up a statically assigned
  127. @@ -187,7 +171,6 @@
  128.  
  129. config IP_VS_SH
  130. tristate "source hashing scheduling"
  131. - depends on IP_VS
  132. ---help---
  133. The source hashing scheduling algorithm assigns network
  134. connections to the servers through looking up a statically assigned
  135. @@ -198,7 +181,6 @@
  136.  
  137. config IP_VS_SED
  138. tristate "shortest expected delay scheduling"
  139. - depends on IP_VS
  140. ---help---
  141. The shortest expected delay scheduling algorithm assigns network
  142. connections to the server with the shortest expected delay. The
  143. @@ -212,7 +194,6 @@
  144.  
  145. config IP_VS_NQ
  146. tristate "never queue scheduling"
  147. - depends on IP_VS
  148. ---help---
  149. The never queue scheduling algorithm adopts a two-speed model.
  150. When there is an idle server available, the job will be sent to
  151. @@ -225,11 +206,10 @@
  152. module, choose M here. If unsure, say N.
  153.  
  154. comment 'IPVS application helper'
  155. - depends on IP_VS
  156.  
  157. config IP_VS_FTP
  158. tristate "FTP protocol helper"
  159. - depends on IP_VS && IP_VS_PROTO_TCP
  160. + depends on IP_VS_PROTO_TCP
  161. ---help---
  162. FTP is a protocol that transfers IP address and/or port number in
  163. the payload. In the virtual server via Network Address Translation,
  164. @@ -241,4 +221,4 @@
  165. If you want to compile it in kernel, say Y. To compile it as a
  166. module, choose M here. If unsure, say N.
  167.  
  168. -endmenu
  169. +endif # IP_VS
  170. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_app.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_app.c
  171. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_app.c 2009-02-16 11:57:22.000000000 -0400
  172. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_app.c 2009-02-16 12:56:22.000000000 -0400
  173. @@ -1,8 +1,6 @@
  174. /*
  175. * ip_vs_app.c: Application module support for IPVS
  176. *
  177. - * Version: $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $
  178. - *
  179. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  180. *
  181. * This program is free software; you can redistribute it and/or
  182. @@ -25,6 +23,8 @@
  183. #include <linux/skbuff.h>
  184. #include <linux/in.h>
  185. #include <linux/ip.h>
  186. +#include <linux/netfilter.h>
  187. +#include <net/net_namespace.h>
  188. #include <net/protocol.h>
  189. #include <net/tcp.h>
  190. #include <asm/system.h>
  191. @@ -49,18 +49,13 @@
  192. */
  193. static inline int ip_vs_app_get(struct ip_vs_app *app)
  194. {
  195. - /* test and get the module atomically */
  196. - if (app->module)
  197. - return try_module_get(app->module);
  198. - else
  199. - return 1;
  200. + return try_module_get(app->module);
  201. }
  202.  
  203.  
  204. static inline void ip_vs_app_put(struct ip_vs_app *app)
  205. {
  206. - if (app->module)
  207. - module_put(app->module);
  208. + module_put(app->module);
  209. }
  210.  
  211.  
  212. @@ -327,18 +322,18 @@
  213. spin_unlock(&cp->lock);
  214. }
  215.  
  216. -static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
  217. +static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
  218. struct ip_vs_app *app)
  219. {
  220. int diff;
  221. - unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
  222. + const unsigned int tcp_offset = ip_hdrlen(skb);
  223. struct tcphdr *th;
  224. __u32 seq;
  225.  
  226. - if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
  227. + if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
  228. return 0;
  229.  
  230. - th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
  231. + th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
  232.  
  233. /*
  234. * Remember seq number in case this pkt gets resized
  235. @@ -359,7 +354,7 @@
  236. if (app->pkt_out == NULL)
  237. return 1;
  238.  
  239. - if (!app->pkt_out(app, cp, pskb, &diff))
  240. + if (!app->pkt_out(app, cp, skb, &diff))
  241. return 0;
  242.  
  243. /*
  244. @@ -377,7 +372,7 @@
  245. * called by ipvs packet handler, assumes previously checked cp!=NULL
  246. * returns false if it can't handle packet (oom)
  247. */
  248. -int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
  249. +int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
  250. {
  251. struct ip_vs_app *app;
  252.  
  253. @@ -390,7 +385,7 @@
  254.  
  255. /* TCP is complicated */
  256. if (cp->protocol == IPPROTO_TCP)
  257. - return app_tcp_pkt_out(cp, pskb, app);
  258. + return app_tcp_pkt_out(cp, skb, app);
  259.  
  260. /*
  261. * Call private output hook function
  262. @@ -398,22 +393,22 @@
  263. if (app->pkt_out == NULL)
  264. return 1;
  265.  
  266. - return app->pkt_out(app, cp, pskb, NULL);
  267. + return app->pkt_out(app, cp, skb, NULL);
  268. }
  269.  
  270.  
  271. -static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
  272. +static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
  273. struct ip_vs_app *app)
  274. {
  275. int diff;
  276. - unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
  277. + const unsigned int tcp_offset = ip_hdrlen(skb);
  278. struct tcphdr *th;
  279. __u32 seq;
  280.  
  281. - if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
  282. + if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
  283. return 0;
  284.  
  285. - th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
  286. + th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
  287.  
  288. /*
  289. * Remember seq number in case this pkt gets resized
  290. @@ -434,7 +429,7 @@
  291. if (app->pkt_in == NULL)
  292. return 1;
  293.  
  294. - if (!app->pkt_in(app, cp, pskb, &diff))
  295. + if (!app->pkt_in(app, cp, skb, &diff))
  296. return 0;
  297.  
  298. /*
  299. @@ -452,7 +447,7 @@
  300. * called by ipvs packet handler, assumes previously checked cp!=NULL.
  301. * returns false if can't handle packet (oom).
  302. */
  303. -int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
  304. +int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
  305. {
  306. struct ip_vs_app *app;
  307.  
  308. @@ -465,7 +460,7 @@
  309.  
  310. /* TCP is complicated */
  311. if (cp->protocol == IPPROTO_TCP)
  312. - return app_tcp_pkt_in(cp, pskb, app);
  313. + return app_tcp_pkt_in(cp, skb, app);
  314.  
  315. /*
  316. * Call private input hook function
  317. @@ -473,7 +468,7 @@
  318. if (app->pkt_in == NULL)
  319. return 1;
  320.  
  321. - return app->pkt_in(app, cp, pskb, NULL);
  322. + return app->pkt_in(app, cp, skb, NULL);
  323. }
  324.  
  325.  
  326. @@ -549,7 +544,7 @@
  327. return 0;
  328. }
  329.  
  330. -static struct seq_operations ip_vs_app_seq_ops = {
  331. +static const struct seq_operations ip_vs_app_seq_ops = {
  332. .start = ip_vs_app_seq_start,
  333. .next = ip_vs_app_seq_next,
  334. .stop = ip_vs_app_seq_stop,
  335. @@ -561,7 +556,7 @@
  336. return seq_open(file, &ip_vs_app_seq_ops);
  337. }
  338.  
  339. -static struct file_operations ip_vs_app_fops = {
  340. +static const struct file_operations ip_vs_app_fops = {
  341. .owner = THIS_MODULE,
  342. .open = ip_vs_app_open,
  343. .read = seq_read,
  344. @@ -577,7 +572,6 @@
  345. int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
  346. char *o_buf, int o_len, char *n_buf, int n_len)
  347. {
  348. - struct iphdr *iph;
  349. int diff;
  350. int o_offset;
  351. int o_left;
  352. @@ -603,27 +597,26 @@
  353. skb_put(skb, diff);
  354. memmove(skb->data + o_offset + n_len,
  355. skb->data + o_offset + o_len, o_left);
  356. - memcpy(skb->data + o_offset, n_buf, n_len);
  357. + skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
  358. }
  359.  
  360. /* must update the iph total length here */
  361. - iph = skb->nh.iph;
  362. - iph->tot_len = htons(skb->len);
  363. + ip_hdr(skb)->tot_len = htons(skb->len);
  364.  
  365. LeaveFunction(9);
  366. return 0;
  367. }
  368.  
  369.  
  370. -int ip_vs_app_init(void)
  371. +int __init ip_vs_app_init(void)
  372. {
  373. /* we will replace it with proc_net_ipvs_create() soon */
  374. - proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops);
  375. + proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
  376. return 0;
  377. }
  378.  
  379.  
  380. void ip_vs_app_cleanup(void)
  381. {
  382. - proc_net_remove("ip_vs_app");
  383. + proc_net_remove(&init_net, "ip_vs_app");
  384. }
  385. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_conn.c
  386. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_conn.c 2009-02-16 11:57:22.000000000 -0400
  387. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_conn.c 2009-02-16 12:56:22.000000000 -0400
  388. @@ -5,8 +5,6 @@
  389. * high-performance and highly available server based on a
  390. * cluster of servers.
  391. *
  392. - * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $
  393. - *
  394. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  395. * Peter Kese <peter.kese@ijs.si>
  396. * Julian Anastasov <ja@ssi.bg>
  397. @@ -35,6 +33,7 @@
  398. #include <linux/jhash.h>
  399. #include <linux/random.h>
  400.  
  401. +#include <net/net_namespace.h>
  402. #include <net/ip_vs.h>
  403.  
  404.  
  405. @@ -392,7 +391,15 @@
  406. atomic_inc(&dest->refcnt);
  407.  
  408. /* Bind with the destination and its corresponding transmitter */
  409. - cp->flags |= atomic_read(&dest->conn_flags);
  410. + if ((cp->flags & IP_VS_CONN_F_SYNC) &&
  411. + (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
  412. + /* if the connection is not template and is created
  413. + * by sync, preserve the activity flag.
  414. + */
  415. + cp->flags |= atomic_read(&dest->conn_flags) &
  416. + (~IP_VS_CONN_F_INACTIVE);
  417. + else
  418. + cp->flags |= atomic_read(&dest->conn_flags);
  419. cp->dest = dest;
  420.  
  421. IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
  422. @@ -411,7 +418,11 @@
  423. /* It is a normal connection, so increase the inactive
  424. connection counter because it is in TCP SYNRECV
  425. state (inactive) or other protocol inacive state */
  426. - atomic_inc(&dest->inactconns);
  427. + if ((cp->flags & IP_VS_CONN_F_SYNC) &&
  428. + (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
  429. + atomic_inc(&dest->activeconns);
  430. + else
  431. + atomic_inc(&dest->inactconns);
  432. } else {
  433. /* It is a persistent connection/template, so increase
  434. the peristent connection counter */
  435. @@ -425,6 +436,24 @@
  436.  
  437.  
  438. /*
  439. + * Check if there is a destination for the connection, if so
  440. + * bind the connection to the destination.
  441. + */
  442. +struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
  443. +{
  444. + struct ip_vs_dest *dest;
  445. +
  446. + if ((cp) && (!cp->dest)) {
  447. + dest = ip_vs_find_dest(cp->daddr, cp->dport,
  448. + cp->vaddr, cp->vport, cp->protocol);
  449. + ip_vs_bind_dest(cp, dest);
  450. + return dest;
  451. + } else
  452. + return NULL;
  453. +}
  454. +
  455. +
  456. +/*
  457. * Unbind a connection entry with its VS destination
  458. * Called by the ip_vs_conn_expire function.
  459. */
  460. @@ -494,8 +523,8 @@
  461. * Checking the dest server status.
  462. */
  463. if ((dest == NULL) ||
  464. - !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
  465. - (sysctl_ip_vs_expire_quiescent_template &&
  466. + !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
  467. + (sysctl_ip_vs_expire_quiescent_template &&
  468. (atomic_read(&dest->weight) == 0))) {
  469. IP_VS_DBG(9, "check_template: dest not available for "
  470. "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
  471. @@ -603,17 +632,14 @@
  472. struct ip_vs_conn *cp;
  473. struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
  474.  
  475. - cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
  476. + cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
  477. if (cp == NULL) {
  478. IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
  479. return NULL;
  480. }
  481.  
  482. - memset(cp, 0, sizeof(*cp));
  483. INIT_LIST_HEAD(&cp->c_list);
  484. - init_timer(&cp->timer);
  485. - cp->timer.data = (unsigned long)cp;
  486. - cp->timer.function = ip_vs_conn_expire;
  487. + setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
  488. cp->protocol = proto;
  489. cp->caddr = caddr;
  490. cp->cport = cport;
  491. @@ -667,7 +693,7 @@
  492. {
  493. int idx;
  494. struct ip_vs_conn *cp;
  495. -
  496. +
  497. for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
  498. ct_read_lock_bh(idx);
  499. list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
  500. @@ -695,7 +721,7 @@
  501. int idx;
  502.  
  503. ++*pos;
  504. - if (v == SEQ_START_TOKEN)
  505. + if (v == SEQ_START_TOKEN)
  506. return ip_vs_conn_array(seq, 0);
  507.  
  508. /* more on same hash chain? */
  509. @@ -710,7 +736,7 @@
  510. list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
  511. seq->private = &ip_vs_conn_tab[idx];
  512. return cp;
  513. - }
  514. + }
  515. ct_read_unlock_bh(idx);
  516. }
  517. seq->private = NULL;
  518. @@ -746,7 +772,7 @@
  519. return 0;
  520. }
  521.  
  522. -static struct seq_operations ip_vs_conn_seq_ops = {
  523. +static const struct seq_operations ip_vs_conn_seq_ops = {
  524. .start = ip_vs_conn_seq_start,
  525. .next = ip_vs_conn_seq_next,
  526. .stop = ip_vs_conn_seq_stop,
  527. @@ -758,13 +784,64 @@
  528. return seq_open(file, &ip_vs_conn_seq_ops);
  529. }
  530.  
  531. -static struct file_operations ip_vs_conn_fops = {
  532. +static const struct file_operations ip_vs_conn_fops = {
  533. .owner = THIS_MODULE,
  534. .open = ip_vs_conn_open,
  535. .read = seq_read,
  536. .llseek = seq_lseek,
  537. .release = seq_release,
  538. };
  539. +
  540. +static const char *ip_vs_origin_name(unsigned flags)
  541. +{
  542. + if (flags & IP_VS_CONN_F_SYNC)
  543. + return "SYNC";
  544. + else
  545. + return "LOCAL";
  546. +}
  547. +
  548. +static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
  549. +{
  550. +
  551. + if (v == SEQ_START_TOKEN)
  552. + seq_puts(seq,
  553. + "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
  554. + else {
  555. + const struct ip_vs_conn *cp = v;
  556. +
  557. + seq_printf(seq,
  558. + "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
  559. + ip_vs_proto_name(cp->protocol),
  560. + ntohl(cp->caddr), ntohs(cp->cport),
  561. + ntohl(cp->vaddr), ntohs(cp->vport),
  562. + ntohl(cp->daddr), ntohs(cp->dport),
  563. + ip_vs_state_name(cp->protocol, cp->state),
  564. + ip_vs_origin_name(cp->flags),
  565. + (cp->timer.expires-jiffies)/HZ);
  566. + }
  567. + return 0;
  568. +}
  569. +
  570. +static const struct seq_operations ip_vs_conn_sync_seq_ops = {
  571. + .start = ip_vs_conn_seq_start,
  572. + .next = ip_vs_conn_seq_next,
  573. + .stop = ip_vs_conn_seq_stop,
  574. + .show = ip_vs_conn_sync_seq_show,
  575. +};
  576. +
  577. +static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
  578. +{
  579. + return seq_open(file, &ip_vs_conn_sync_seq_ops);
  580. +}
  581. +
  582. +static const struct file_operations ip_vs_conn_sync_fops = {
  583. + .owner = THIS_MODULE,
  584. + .open = ip_vs_conn_sync_open,
  585. + .read = seq_read,
  586. + .llseek = seq_lseek,
  587. + .release = seq_release,
  588. +};
  589. +
  590. #endif
  591.  
  592.  
  593. @@ -888,7 +965,7 @@
  594. }
  595.  
  596.  
  597. -int ip_vs_conn_init(void)
  598. +int __init ip_vs_conn_init(void)
  599. {
  600. int idx;
  601.  
  602. @@ -902,7 +979,7 @@
  603. /* Allocate ip_vs_conn slab cache */
  604. ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
  605. sizeof(struct ip_vs_conn), 0,
  606. - SLAB_HWCACHE_ALIGN, NULL, NULL);
  607. + SLAB_HWCACHE_ALIGN, NULL);
  608. if (!ip_vs_conn_cachep) {
  609. vfree(ip_vs_conn_tab);
  610. return -ENOMEM;
  611. @@ -923,7 +1000,8 @@
  612. rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
  613. }
  614.  
  615. - proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops);
  616. + proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
  617. + proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
  618.  
  619. /* calculate the random value for connection hash */
  620. get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
  621. @@ -939,6 +1017,7 @@
  622.  
  623. /* Release the empty cache */
  624. kmem_cache_destroy(ip_vs_conn_cachep);
  625. - proc_net_remove("ip_vs_conn");
  626. + proc_net_remove(&init_net, "ip_vs_conn");
  627. + proc_net_remove(&init_net, "ip_vs_conn_sync");
  628. vfree(ip_vs_conn_tab);
  629. }
  630. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_core.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_core.c
  631. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_core.c 2009-02-16 11:57:22.000000000 -0400
  632. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_core.c 2009-02-16 12:56:22.000000000 -0400
  633. @@ -5,8 +5,6 @@
  634. * high-performance and highly available server based on a
  635. * cluster of servers.
  636. *
  637. - * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $
  638. - *
  639. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  640. * Peter Kese <peter.kese@ijs.si>
  641. * Julian Anastasov <ja@ssi.bg>
  642. @@ -58,7 +56,6 @@
  643. #ifdef CONFIG_IP_VS_DEBUG
  644. EXPORT_SYMBOL(ip_vs_get_debug_level);
  645. #endif
  646. -EXPORT_SYMBOL(ip_vs_make_skb_writable);
  647.  
  648.  
  649. /* ID used in ICMP lookups */
  650. @@ -163,42 +160,6 @@
  651. }
  652.  
  653.  
  654. -int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
  655. -{
  656. - struct sk_buff *skb = *pskb;
  657. -
  658. - /* skb is already used, better copy skb and its payload */
  659. - if (unlikely(skb_shared(skb) || skb->sk))
  660. - goto copy_skb;
  661. -
  662. - /* skb data is already used, copy it */
  663. - if (unlikely(skb_cloned(skb)))
  664. - goto copy_data;
  665. -
  666. - return pskb_may_pull(skb, writable_len);
  667. -
  668. - copy_data:
  669. - if (unlikely(writable_len > skb->len))
  670. - return 0;
  671. - return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
  672. -
  673. - copy_skb:
  674. - if (unlikely(writable_len > skb->len))
  675. - return 0;
  676. - skb = skb_copy(skb, GFP_ATOMIC);
  677. - if (!skb)
  678. - return 0;
  679. - BUG_ON(skb_is_nonlinear(skb));
  680. -
  681. - /* Rest of kernel will get very unhappy if we pass it a
  682. - suddenly-orphaned skbuff */
  683. - if ((*pskb)->sk)
  684. - skb_set_owner_w(skb, (*pskb)->sk);
  685. - kfree_skb(*pskb);
  686. - *pskb = skb;
  687. - return 1;
  688. -}
  689. -
  690. /*
  691. * IPVS persistent scheduling function
  692. * It creates a connection entry according to its template if exists,
  693. @@ -212,7 +173,7 @@
  694. __be16 ports[2])
  695. {
  696. struct ip_vs_conn *cp = NULL;
  697. - struct iphdr *iph = skb->nh.iph;
  698. + struct iphdr *iph = ip_hdr(skb);
  699. struct ip_vs_dest *dest;
  700. struct ip_vs_conn *ct;
  701. __be16 dport; /* destination port to forward */
  702. @@ -381,7 +342,7 @@
  703. ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
  704. {
  705. struct ip_vs_conn *cp = NULL;
  706. - struct iphdr *iph = skb->nh.iph;
  707. + struct iphdr *iph = ip_hdr(skb);
  708. struct ip_vs_dest *dest;
  709. __be16 _ports[2], *pptr;
  710.  
  711. @@ -447,7 +408,7 @@
  712. struct ip_vs_protocol *pp)
  713. {
  714. __be16 _ports[2], *pptr;
  715. - struct iphdr *iph = skb->nh.iph;
  716. + struct iphdr *iph = ip_hdr(skb);
  717.  
  718. pptr = skb_header_pointer(skb, iph->ihl*4,
  719. sizeof(_ports), _ports);
  720. @@ -460,7 +421,7 @@
  721. and the destination is RTN_UNICAST (and not local), then create
  722. a cache_bypass connection entry */
  723. if (sysctl_ip_vs_cache_bypass && svc->fwmark
  724. - && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
  725. + && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
  726. int ret, cs;
  727. struct ip_vs_conn *cp;
  728.  
  729. @@ -518,19 +479,19 @@
  730.  
  731.  
  732. /*
  733. - * It is hooked before NF_IP_PRI_NAT_SRC at the NF_IP_POST_ROUTING
  734. + * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
  735. * chain, and is used for VS/NAT.
  736. * It detects packets for VS/NAT connections and sends the packets
  737. * immediately. This can avoid that iptable_nat mangles the packets
  738. * for VS/NAT.
  739. */
  740. static unsigned int ip_vs_post_routing(unsigned int hooknum,
  741. - struct sk_buff **pskb,
  742. + struct sk_buff *skb,
  743. const struct net_device *in,
  744. const struct net_device *out,
  745. int (*okfn)(struct sk_buff *))
  746. {
  747. - if (!((*pskb)->ipvs_property))
  748. + if (!skb->ipvs_property)
  749. return NF_ACCEPT;
  750. /* The packet was sent from IPVS, exit this chain */
  751. return NF_STOP;
  752. @@ -541,13 +502,14 @@
  753. return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
  754. }
  755.  
  756. -static inline struct sk_buff *
  757. -ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
  758. +static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
  759. {
  760. - skb = ip_defrag(skb, user);
  761. - if (skb)
  762. - ip_send_check(skb->nh.iph);
  763. - return skb;
  764. + int err = ip_defrag(skb, user);
  765. +
  766. + if (!err)
  767. + ip_send_check(ip_hdr(skb));
  768. +
  769. + return err;
  770. }
  771.  
  772. /*
  773. @@ -557,9 +519,10 @@
  774. void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
  775. struct ip_vs_conn *cp, int inout)
  776. {
  777. - struct iphdr *iph = skb->nh.iph;
  778. + struct iphdr *iph = ip_hdr(skb);
  779. unsigned int icmp_offset = iph->ihl*4;
  780. - struct icmphdr *icmph = (struct icmphdr *)(skb->nh.raw + icmp_offset);
  781. + struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) +
  782. + icmp_offset);
  783. struct iphdr *ciph = (struct iphdr *)(icmph + 1);
  784.  
  785. if (inout) {
  786. @@ -604,9 +567,8 @@
  787. * Currently handles error types - unreachable, quench, ttl exceeded.
  788. * (Only used in VS/NAT)
  789. */
  790. -static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
  791. +static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
  792. {
  793. - struct sk_buff *skb = *pskb;
  794. struct iphdr *iph;
  795. struct icmphdr _icmph, *ic;
  796. struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
  797. @@ -617,14 +579,12 @@
  798. *related = 1;
  799.  
  800. /* reassemble IP fragments */
  801. - if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
  802. - skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
  803. - if (!skb)
  804. + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
  805. + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
  806. return NF_STOLEN;
  807. - *pskb = skb;
  808. }
  809.  
  810. - iph = skb->nh.iph;
  811. + iph = ip_hdr(skb);
  812. offset = ihl = iph->ihl * 4;
  813. ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
  814. if (ic == NULL)
  815. @@ -659,7 +619,7 @@
  816. return NF_ACCEPT;
  817.  
  818. /* Is the embedded protocol header present? */
  819. - if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
  820. + if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
  821. pp->dont_defrag))
  822. return NF_ACCEPT;
  823.  
  824. @@ -675,13 +635,12 @@
  825. verdict = NF_DROP;
  826.  
  827. if (IP_VS_FWD_METHOD(cp) != 0) {
  828. - IP_VS_ERR("shouldn't reach here, because the box is on the"
  829. + IP_VS_ERR("shouldn't reach here, because the box is on the "
  830. "half connection in the tun/dr module.\n");
  831. }
  832.  
  833. /* Ensure the checksum is correct */
  834. - if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
  835. - ip_vs_checksum_complete(skb, ihl)) {
  836. + if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
  837. /* Failed checksum! */
  838. IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
  839. NIPQUAD(iph->saddr));
  840. @@ -690,9 +649,8 @@
  841.  
  842. if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
  843. offset += 2 * sizeof(__u16);
  844. - if (!ip_vs_make_skb_writable(pskb, offset))
  845. + if (!skb_make_writable(skb, offset))
  846. goto out;
  847. - skb = *pskb;
  848.  
  849. ip_vs_nat_icmp(skb, pp, cp, 1);
  850.  
  851. @@ -712,24 +670,22 @@
  852. {
  853. struct tcphdr _tcph, *th;
  854.  
  855. - th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
  856. - sizeof(_tcph), &_tcph);
  857. + th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
  858. if (th == NULL)
  859. return 0;
  860. return th->rst;
  861. }
  862.  
  863. /*
  864. - * It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT.
  865. + * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
  866. * Check if outgoing packet belongs to the established ip_vs_conn,
  867. * rewrite addresses of the packet and send it on its way...
  868. */
  869. static unsigned int
  870. -ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
  871. +ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
  872. const struct net_device *in, const struct net_device *out,
  873. int (*okfn)(struct sk_buff *))
  874. {
  875. - struct sk_buff *skb = *pskb;
  876. struct iphdr *iph;
  877. struct ip_vs_protocol *pp;
  878. struct ip_vs_conn *cp;
  879. @@ -740,14 +696,13 @@
  880. if (skb->ipvs_property)
  881. return NF_ACCEPT;
  882.  
  883. - iph = skb->nh.iph;
  884. + iph = ip_hdr(skb);
  885. if (unlikely(iph->protocol == IPPROTO_ICMP)) {
  886. - int related, verdict = ip_vs_out_icmp(pskb, &related);
  887. + int related, verdict = ip_vs_out_icmp(skb, &related);
  888.  
  889. if (related)
  890. return verdict;
  891. - skb = *pskb;
  892. - iph = skb->nh.iph;
  893. + iph = ip_hdr(skb);
  894. }
  895.  
  896. pp = ip_vs_proto_get(iph->protocol);
  897. @@ -755,13 +710,11 @@
  898. return NF_ACCEPT;
  899.  
  900. /* reassemble IP fragments */
  901. - if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
  902. + if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
  903. !pp->dont_defrag)) {
  904. - skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
  905. - if (!skb)
  906. + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
  907. return NF_STOLEN;
  908. - iph = skb->nh.iph;
  909. - *pskb = skb;
  910. + iph = ip_hdr(skb);
  911. }
  912.  
  913. ihl = iph->ihl << 2;
  914. @@ -803,25 +756,23 @@
  915.  
  916. IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
  917.  
  918. - if (!ip_vs_make_skb_writable(pskb, ihl))
  919. + if (!skb_make_writable(skb, ihl))
  920. goto drop;
  921.  
  922. /* mangle the packet */
  923. - if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
  924. + if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
  925. + goto drop;
  926. + ip_hdr(skb)->saddr = cp->vaddr;
  927. + ip_send_check(ip_hdr(skb));
  928. +
  929. + /* For policy routing, packets originating from this
  930. + * machine itself may be routed differently to packets
  931. + * passing through. We want this packet to be routed as
  932. + * if it came from this machine itself. So re-compute
  933. + * the routing information.
  934. + */
  935. + if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
  936. goto drop;
  937. - skb = *pskb;
  938. - skb->nh.iph->saddr = cp->vaddr;
  939. - ip_send_check(skb->nh.iph);
  940. -
  941. - /* For policy routing, packets originating from this
  942. - * machine itself may be routed differently to packets
  943. - * passing through. We want this packet to be routed as
  944. - * if it came from this machine itself. So re-compute
  945. - * the routing information.
  946. - */
  947. - if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
  948. - goto drop;
  949. - skb = *pskb;
  950.  
  951. IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
  952.  
  953. @@ -836,7 +787,7 @@
  954.  
  955. drop:
  956. ip_vs_conn_put(cp);
  957. - kfree_skb(*pskb);
  958. + kfree_skb(skb);
  959. return NF_STOLEN;
  960. }
  961.  
  962. @@ -847,10 +798,9 @@
  963. * forward to the right destination host if relevant.
  964. * Currently handles error types - unreachable, quench, ttl exceeded.
  965. */
  966. -static int
  967. -ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
  968. +static int
  969. +ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
  970. {
  971. - struct sk_buff *skb = *pskb;
  972. struct iphdr *iph;
  973. struct icmphdr _icmph, *ic;
  974. struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
  975. @@ -861,16 +811,13 @@
  976. *related = 1;
  977.  
  978. /* reassemble IP fragments */
  979. - if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
  980. - skb = ip_vs_gather_frags(skb,
  981. - hooknum == NF_IP_LOCAL_IN ?
  982. - IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
  983. - if (!skb)
  984. + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
  985. + if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
  986. + IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
  987. return NF_STOLEN;
  988. - *pskb = skb;
  989. }
  990.  
  991. - iph = skb->nh.iph;
  992. + iph = ip_hdr(skb);
  993. offset = ihl = iph->ihl * 4;
  994. ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
  995. if (ic == NULL)
  996. @@ -905,7 +852,7 @@
  997. return NF_ACCEPT;
  998.  
  999. /* Is the embedded protocol header present? */
  1000. - if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
  1001. + if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
  1002. pp->dont_defrag))
  1003. return NF_ACCEPT;
  1004.  
  1005. @@ -921,8 +868,7 @@
  1006. verdict = NF_DROP;
  1007.  
  1008. /* Ensure the checksum is correct */
  1009. - if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
  1010. - ip_vs_checksum_complete(skb, ihl)) {
  1011. + if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
  1012. /* Failed checksum! */
  1013. IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
  1014. NIPQUAD(iph->saddr));
  1015. @@ -947,11 +893,10 @@
  1016. * and send it on its way...
  1017. */
  1018. static unsigned int
  1019. -ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
  1020. +ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
  1021. const struct net_device *in, const struct net_device *out,
  1022. int (*okfn)(struct sk_buff *))
  1023. {
  1024. - struct sk_buff *skb = *pskb;
  1025. struct iphdr *iph;
  1026. struct ip_vs_protocol *pp;
  1027. struct ip_vs_conn *cp;
  1028. @@ -963,22 +908,21 @@
  1029. * ... don't know why 1st test DOES NOT include 2nd (?)
  1030. */
  1031. if (unlikely(skb->pkt_type != PACKET_HOST
  1032. - || skb->dev == &loopback_dev || skb->sk)) {
  1033. + || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
  1034. IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
  1035. skb->pkt_type,
  1036. - skb->nh.iph->protocol,
  1037. - NIPQUAD(skb->nh.iph->daddr));
  1038. + ip_hdr(skb)->protocol,
  1039. + NIPQUAD(ip_hdr(skb)->daddr));
  1040. return NF_ACCEPT;
  1041. }
  1042.  
  1043. - iph = skb->nh.iph;
  1044. + iph = ip_hdr(skb);
  1045. if (unlikely(iph->protocol == IPPROTO_ICMP)) {
  1046. - int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
  1047. + int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
  1048.  
  1049. if (related)
  1050. return verdict;
  1051. - skb = *pskb;
  1052. - iph = skb->nh.iph;
  1053. + iph = ip_hdr(skb);
  1054. }
  1055.  
  1056. /* Protocol supported? */
  1057. @@ -1033,15 +977,24 @@
  1058. ret = NF_ACCEPT;
  1059. }
  1060.  
  1061. - /* increase its packet counter and check if it is needed
  1062. - to be synchronized */
  1063. + /* Increase its packet counter and check if it is needed
  1064. + * to be synchronized
  1065. + *
  1066. + * Sync connection if it is about to close to
  1067. + * encorage the standby servers to update the connections timeout
  1068. + */
  1069. atomic_inc(&cp->in_pkts);
  1070. if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
  1071. - (cp->protocol != IPPROTO_TCP ||
  1072. - cp->state == IP_VS_TCP_S_ESTABLISHED) &&
  1073. - (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
  1074. - == sysctl_ip_vs_sync_threshold[0]))
  1075. + (((cp->protocol != IPPROTO_TCP ||
  1076. + cp->state == IP_VS_TCP_S_ESTABLISHED) &&
  1077. + (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
  1078. + == sysctl_ip_vs_sync_threshold[0])) ||
  1079. + ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
  1080. + ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
  1081. + (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
  1082. + (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
  1083. ip_vs_sync_conn(cp);
  1084. + cp->old_state = cp->state;
  1085.  
  1086. ip_vs_conn_put(cp);
  1087. return ret;
  1088. @@ -1049,65 +1002,64 @@
  1089.  
  1090.  
  1091. /*
  1092. - * It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP
  1093. + * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
  1094. * related packets destined for 0.0.0.0/0.
  1095. * When fwmark-based virtual service is used, such as transparent
  1096. * cache cluster, TCP packets can be marked and routed to ip_vs_in,
  1097. * but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
  1098. - * sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain
  1099. + * sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
  1100. * and send them to ip_vs_in_icmp.
  1101. */
  1102. static unsigned int
  1103. -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
  1104. +ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
  1105. const struct net_device *in, const struct net_device *out,
  1106. int (*okfn)(struct sk_buff *))
  1107. {
  1108. int r;
  1109.  
  1110. - if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
  1111. + if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
  1112. return NF_ACCEPT;
  1113.  
  1114. - return ip_vs_in_icmp(pskb, &r, hooknum);
  1115. + return ip_vs_in_icmp(skb, &r, hooknum);
  1116. }
  1117.  
  1118.  
  1119. -/* After packet filtering, forward packet through VS/DR, VS/TUN,
  1120. - or VS/NAT(change destination), so that filtering rules can be
  1121. - applied to IPVS. */
  1122. -static struct nf_hook_ops ip_vs_in_ops = {
  1123. - .hook = ip_vs_in,
  1124. - .owner = THIS_MODULE,
  1125. - .pf = PF_INET,
  1126. - .hooknum = NF_IP_LOCAL_IN,
  1127. - .priority = 100,
  1128. -};
  1129. -
  1130. -/* After packet filtering, change source only for VS/NAT */
  1131. -static struct nf_hook_ops ip_vs_out_ops = {
  1132. - .hook = ip_vs_out,
  1133. - .owner = THIS_MODULE,
  1134. - .pf = PF_INET,
  1135. - .hooknum = NF_IP_FORWARD,
  1136. - .priority = 100,
  1137. -};
  1138. -
  1139. -/* After packet filtering (but before ip_vs_out_icmp), catch icmp
  1140. - destined for 0.0.0.0/0, which is for incoming IPVS connections */
  1141. -static struct nf_hook_ops ip_vs_forward_icmp_ops = {
  1142. - .hook = ip_vs_forward_icmp,
  1143. - .owner = THIS_MODULE,
  1144. - .pf = PF_INET,
  1145. - .hooknum = NF_IP_FORWARD,
  1146. - .priority = 99,
  1147. -};
  1148. -
  1149. -/* Before the netfilter connection tracking, exit from POST_ROUTING */
  1150. -static struct nf_hook_ops ip_vs_post_routing_ops = {
  1151. - .hook = ip_vs_post_routing,
  1152. - .owner = THIS_MODULE,
  1153. - .pf = PF_INET,
  1154. - .hooknum = NF_IP_POST_ROUTING,
  1155. - .priority = NF_IP_PRI_NAT_SRC-1,
  1156. +static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
  1157. + /* After packet filtering, forward packet through VS/DR, VS/TUN,
  1158. + * or VS/NAT(change destination), so that filtering rules can be
  1159. + * applied to IPVS. */
  1160. + {
  1161. + .hook = ip_vs_in,
  1162. + .owner = THIS_MODULE,
  1163. + .pf = PF_INET,
  1164. + .hooknum = NF_INET_LOCAL_IN,
  1165. + .priority = 100,
  1166. + },
  1167. + /* After packet filtering, change source only for VS/NAT */
  1168. + {
  1169. + .hook = ip_vs_out,
  1170. + .owner = THIS_MODULE,
  1171. + .pf = PF_INET,
  1172. + .hooknum = NF_INET_FORWARD,
  1173. + .priority = 100,
  1174. + },
  1175. + /* After packet filtering (but before ip_vs_out_icmp), catch icmp
  1176. + * destined for 0.0.0.0/0, which is for incoming IPVS connections */
  1177. + {
  1178. + .hook = ip_vs_forward_icmp,
  1179. + .owner = THIS_MODULE,
  1180. + .pf = PF_INET,
  1181. + .hooknum = NF_INET_FORWARD,
  1182. + .priority = 99,
  1183. + },
  1184. + /* Before the netfilter connection tracking, exit from POST_ROUTING */
  1185. + {
  1186. + .hook = ip_vs_post_routing,
  1187. + .owner = THIS_MODULE,
  1188. + .pf = PF_INET,
  1189. + .hooknum = NF_INET_POST_ROUTING,
  1190. + .priority = NF_IP_PRI_NAT_SRC-1,
  1191. + },
  1192. };
  1193.  
  1194.  
  1195. @@ -1138,37 +1090,15 @@
  1196. goto cleanup_app;
  1197. }
  1198.  
  1199. - ret = nf_register_hook(&ip_vs_in_ops);
  1200. + ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
  1201. if (ret < 0) {
  1202. - IP_VS_ERR("can't register in hook.\n");
  1203. + IP_VS_ERR("can't register hooks.\n");
  1204. goto cleanup_conn;
  1205. }
  1206.  
  1207. - ret = nf_register_hook(&ip_vs_out_ops);
  1208. - if (ret < 0) {
  1209. - IP_VS_ERR("can't register out hook.\n");
  1210. - goto cleanup_inops;
  1211. - }
  1212. - ret = nf_register_hook(&ip_vs_post_routing_ops);
  1213. - if (ret < 0) {
  1214. - IP_VS_ERR("can't register post_routing hook.\n");
  1215. - goto cleanup_outops;
  1216. - }
  1217. - ret = nf_register_hook(&ip_vs_forward_icmp_ops);
  1218. - if (ret < 0) {
  1219. - IP_VS_ERR("can't register forward_icmp hook.\n");
  1220. - goto cleanup_postroutingops;
  1221. - }
  1222. -
  1223. IP_VS_INFO("ipvs loaded.\n");
  1224. return ret;
  1225.  
  1226. - cleanup_postroutingops:
  1227. - nf_unregister_hook(&ip_vs_post_routing_ops);
  1228. - cleanup_outops:
  1229. - nf_unregister_hook(&ip_vs_out_ops);
  1230. - cleanup_inops:
  1231. - nf_unregister_hook(&ip_vs_in_ops);
  1232. cleanup_conn:
  1233. ip_vs_conn_cleanup();
  1234. cleanup_app:
  1235. @@ -1182,10 +1112,7 @@
  1236.  
  1237. static void __exit ip_vs_cleanup(void)
  1238. {
  1239. - nf_unregister_hook(&ip_vs_forward_icmp_ops);
  1240. - nf_unregister_hook(&ip_vs_post_routing_ops);
  1241. - nf_unregister_hook(&ip_vs_out_ops);
  1242. - nf_unregister_hook(&ip_vs_in_ops);
  1243. + nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
  1244. ip_vs_conn_cleanup();
  1245. ip_vs_app_cleanup();
  1246. ip_vs_protocol_cleanup();
  1247. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ctl.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ctl.c
  1248. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ctl.c 2009-02-16 11:57:22.000000000 -0400
  1249. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ctl.c 2009-02-16 12:56:22.000000000 -0400
  1250. @@ -5,8 +5,6 @@
  1251. * high-performance and highly available server based on a
  1252. * cluster of servers.
  1253. *
  1254. - * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
  1255. - *
  1256. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  1257. * Peter Kese <peter.kese@ijs.si>
  1258. * Julian Anastasov <ja@ssi.bg>
  1259. @@ -29,13 +27,13 @@
  1260. #include <linux/proc_fs.h>
  1261. #include <linux/workqueue.h>
  1262. #include <linux/swap.h>
  1263. -#include <linux/proc_fs.h>
  1264. #include <linux/seq_file.h>
  1265.  
  1266. #include <linux/netfilter.h>
  1267. #include <linux/netfilter_ipv4.h>
  1268. #include <linux/mutex.h>
  1269.  
  1270. +#include <net/net_namespace.h>
  1271. #include <net/ip.h>
  1272. #include <net/route.h>
  1273. #include <net/sock.h>
  1274. @@ -579,6 +577,31 @@
  1275. return NULL;
  1276. }
  1277.  
  1278. +/*
  1279. + * Find destination by {daddr,dport,vaddr,protocol}
  1280. + * Cretaed to be used in ip_vs_process_message() in
  1281. + * the backup synchronization daemon. It finds the
  1282. + * destination to be bound to the received connection
  1283. + * on the backup.
  1284. + *
  1285. + * ip_vs_lookup_real_service() looked promissing, but
  1286. + * seems not working as expected.
  1287. + */
  1288. +struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
  1289. + __be32 vaddr, __be16 vport, __u16 protocol)
  1290. +{
  1291. + struct ip_vs_dest *dest;
  1292. + struct ip_vs_service *svc;
  1293. +
  1294. + svc = ip_vs_service_get(0, protocol, vaddr, vport);
  1295. + if (!svc)
  1296. + return NULL;
  1297. + dest = ip_vs_lookup_dest(svc, daddr, dport);
  1298. + if (dest)
  1299. + atomic_inc(&dest->refcnt);
  1300. + ip_vs_service_put(svc);
  1301. + return dest;
  1302. +}
  1303.  
  1304. /*
  1305. * Lookup dest by {svc,addr,port} in the destination trash.
  1306. @@ -660,9 +683,22 @@
  1307. ip_vs_zero_stats(struct ip_vs_stats *stats)
  1308. {
  1309. spin_lock_bh(&stats->lock);
  1310. - memset(stats, 0, (char *)&stats->lock - (char *)stats);
  1311. - spin_unlock_bh(&stats->lock);
  1312. +
  1313. + stats->conns = 0;
  1314. + stats->inpkts = 0;
  1315. + stats->outpkts = 0;
  1316. + stats->inbytes = 0;
  1317. + stats->outbytes = 0;
  1318. +
  1319. + stats->cps = 0;
  1320. + stats->inpps = 0;
  1321. + stats->outpps = 0;
  1322. + stats->inbps = 0;
  1323. + stats->outbps = 0;
  1324. +
  1325. ip_vs_zero_estimator(stats);
  1326. +
  1327. + spin_unlock_bh(&stats->lock);
  1328. }
  1329.  
  1330. /*
  1331. @@ -679,7 +715,7 @@
  1332. conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
  1333.  
  1334. /* check if local node and update the flags */
  1335. - if (inet_addr_type(udest->addr) == RTN_LOCAL) {
  1336. + if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
  1337. conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
  1338. | IP_VS_CONN_F_LOCALNODE;
  1339. }
  1340. @@ -731,7 +767,7 @@
  1341.  
  1342. EnterFunction(2);
  1343.  
  1344. - atype = inet_addr_type(udest->addr);
  1345. + atype = inet_addr_type(&init_net, udest->addr);
  1346. if (atype != RTN_LOCAL && atype != RTN_UNICAST)
  1347. return -EINVAL;
  1348.  
  1349. @@ -909,7 +945,7 @@
  1350. write_lock_bh(&__ip_vs_svc_lock);
  1351.  
  1352. /* Wait until all other svc users go away */
  1353. - while (atomic_read(&svc->usecnt) > 1) {};
  1354. + IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
  1355.  
  1356. /* call the update_service, because server weight may be changed */
  1357. svc->scheduler->update_service(svc);
  1358. @@ -1399,7 +1435,6 @@
  1359.  
  1360. static struct ctl_table vs_vars[] = {
  1361. {
  1362. - .ctl_name = NET_IPV4_VS_AMEMTHRESH,
  1363. .procname = "amemthresh",
  1364. .data = &sysctl_ip_vs_amemthresh,
  1365. .maxlen = sizeof(int),
  1366. @@ -1408,7 +1443,6 @@
  1367. },
  1368. #ifdef CONFIG_IP_VS_DEBUG
  1369. {
  1370. - .ctl_name = NET_IPV4_VS_DEBUG_LEVEL,
  1371. .procname = "debug_level",
  1372. .data = &sysctl_ip_vs_debug_level,
  1373. .maxlen = sizeof(int),
  1374. @@ -1417,7 +1451,6 @@
  1375. },
  1376. #endif
  1377. {
  1378. - .ctl_name = NET_IPV4_VS_AMDROPRATE,
  1379. .procname = "am_droprate",
  1380. .data = &sysctl_ip_vs_am_droprate,
  1381. .maxlen = sizeof(int),
  1382. @@ -1425,7 +1458,6 @@
  1383. .proc_handler = &proc_dointvec,
  1384. },
  1385. {
  1386. - .ctl_name = NET_IPV4_VS_DROP_ENTRY,
  1387. .procname = "drop_entry",
  1388. .data = &sysctl_ip_vs_drop_entry,
  1389. .maxlen = sizeof(int),
  1390. @@ -1433,7 +1465,6 @@
  1391. .proc_handler = &proc_do_defense_mode,
  1392. },
  1393. {
  1394. - .ctl_name = NET_IPV4_VS_DROP_PACKET,
  1395. .procname = "drop_packet",
  1396. .data = &sysctl_ip_vs_drop_packet,
  1397. .maxlen = sizeof(int),
  1398. @@ -1441,7 +1472,6 @@
  1399. .proc_handler = &proc_do_defense_mode,
  1400. },
  1401. {
  1402. - .ctl_name = NET_IPV4_VS_SECURE_TCP,
  1403. .procname = "secure_tcp",
  1404. .data = &sysctl_ip_vs_secure_tcp,
  1405. .maxlen = sizeof(int),
  1406. @@ -1450,7 +1480,6 @@
  1407. },
  1408. #if 0
  1409. {
  1410. - .ctl_name = NET_IPV4_VS_TO_ES,
  1411. .procname = "timeout_established",
  1412. .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
  1413. .maxlen = sizeof(int),
  1414. @@ -1458,7 +1487,6 @@
  1415. .proc_handler = &proc_dointvec_jiffies,
  1416. },
  1417. {
  1418. - .ctl_name = NET_IPV4_VS_TO_SS,
  1419. .procname = "timeout_synsent",
  1420. .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
  1421. .maxlen = sizeof(int),
  1422. @@ -1466,7 +1494,6 @@
  1423. .proc_handler = &proc_dointvec_jiffies,
  1424. },
  1425. {
  1426. - .ctl_name = NET_IPV4_VS_TO_SR,
  1427. .procname = "timeout_synrecv",
  1428. .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
  1429. .maxlen = sizeof(int),
  1430. @@ -1474,7 +1501,6 @@
  1431. .proc_handler = &proc_dointvec_jiffies,
  1432. },
  1433. {
  1434. - .ctl_name = NET_IPV4_VS_TO_FW,
  1435. .procname = "timeout_finwait",
  1436. .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
  1437. .maxlen = sizeof(int),
  1438. @@ -1482,7 +1508,6 @@
  1439. .proc_handler = &proc_dointvec_jiffies,
  1440. },
  1441. {
  1442. - .ctl_name = NET_IPV4_VS_TO_TW,
  1443. .procname = "timeout_timewait",
  1444. .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
  1445. .maxlen = sizeof(int),
  1446. @@ -1490,7 +1515,6 @@
  1447. .proc_handler = &proc_dointvec_jiffies,
  1448. },
  1449. {
  1450. - .ctl_name = NET_IPV4_VS_TO_CL,
  1451. .procname = "timeout_close",
  1452. .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
  1453. .maxlen = sizeof(int),
  1454. @@ -1498,7 +1522,6 @@
  1455. .proc_handler = &proc_dointvec_jiffies,
  1456. },
  1457. {
  1458. - .ctl_name = NET_IPV4_VS_TO_CW,
  1459. .procname = "timeout_closewait",
  1460. .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
  1461. .maxlen = sizeof(int),
  1462. @@ -1506,7 +1529,6 @@
  1463. .proc_handler = &proc_dointvec_jiffies,
  1464. },
  1465. {
  1466. - .ctl_name = NET_IPV4_VS_TO_LA,
  1467. .procname = "timeout_lastack",
  1468. .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
  1469. .maxlen = sizeof(int),
  1470. @@ -1514,7 +1536,6 @@
  1471. .proc_handler = &proc_dointvec_jiffies,
  1472. },
  1473. {
  1474. - .ctl_name = NET_IPV4_VS_TO_LI,
  1475. .procname = "timeout_listen",
  1476. .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
  1477. .maxlen = sizeof(int),
  1478. @@ -1522,7 +1543,6 @@
  1479. .proc_handler = &proc_dointvec_jiffies,
  1480. },
  1481. {
  1482. - .ctl_name = NET_IPV4_VS_TO_SA,
  1483. .procname = "timeout_synack",
  1484. .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
  1485. .maxlen = sizeof(int),
  1486. @@ -1530,7 +1550,6 @@
  1487. .proc_handler = &proc_dointvec_jiffies,
  1488. },
  1489. {
  1490. - .ctl_name = NET_IPV4_VS_TO_UDP,
  1491. .procname = "timeout_udp",
  1492. .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
  1493. .maxlen = sizeof(int),
  1494. @@ -1538,7 +1557,6 @@
  1495. .proc_handler = &proc_dointvec_jiffies,
  1496. },
  1497. {
  1498. - .ctl_name = NET_IPV4_VS_TO_ICMP,
  1499. .procname = "timeout_icmp",
  1500. .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
  1501. .maxlen = sizeof(int),
  1502. @@ -1547,7 +1565,6 @@
  1503. },
  1504. #endif
  1505. {
  1506. - .ctl_name = NET_IPV4_VS_CACHE_BYPASS,
  1507. .procname = "cache_bypass",
  1508. .data = &sysctl_ip_vs_cache_bypass,
  1509. .maxlen = sizeof(int),
  1510. @@ -1555,7 +1572,6 @@
  1511. .proc_handler = &proc_dointvec,
  1512. },
  1513. {
  1514. - .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN,
  1515. .procname = "expire_nodest_conn",
  1516. .data = &sysctl_ip_vs_expire_nodest_conn,
  1517. .maxlen = sizeof(int),
  1518. @@ -1563,7 +1579,6 @@
  1519. .proc_handler = &proc_dointvec,
  1520. },
  1521. {
  1522. - .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
  1523. .procname = "expire_quiescent_template",
  1524. .data = &sysctl_ip_vs_expire_quiescent_template,
  1525. .maxlen = sizeof(int),
  1526. @@ -1571,7 +1586,6 @@
  1527. .proc_handler = &proc_dointvec,
  1528. },
  1529. {
  1530. - .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD,
  1531. .procname = "sync_threshold",
  1532. .data = &sysctl_ip_vs_sync_threshold,
  1533. .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
  1534. @@ -1579,7 +1593,6 @@
  1535. .proc_handler = &proc_do_sync_threshold,
  1536. },
  1537. {
  1538. - .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND,
  1539. .procname = "nat_icmp_send",
  1540. .data = &sysctl_ip_vs_nat_icmp_send,
  1541. .maxlen = sizeof(int),
  1542. @@ -1589,35 +1602,13 @@
  1543. { .ctl_name = 0 }
  1544. };
  1545.  
  1546. -static ctl_table vs_table[] = {
  1547. - {
  1548. - .ctl_name = NET_IPV4_VS,
  1549. - .procname = "vs",
  1550. - .mode = 0555,
  1551. - .child = vs_vars
  1552. - },
  1553. - { .ctl_name = 0 }
  1554. -};
  1555. -
  1556. -static ctl_table ipvs_ipv4_table[] = {
  1557. - {
  1558. - .ctl_name = NET_IPV4,
  1559. - .procname = "ipv4",
  1560. - .mode = 0555,
  1561. - .child = vs_table,
  1562. - },
  1563. - { .ctl_name = 0 }
  1564. -};
  1565. -
  1566. -static ctl_table vs_root_table[] = {
  1567. - {
  1568. - .ctl_name = CTL_NET,
  1569. - .procname = "net",
  1570. - .mode = 0555,
  1571. - .child = ipvs_ipv4_table,
  1572. - },
  1573. - { .ctl_name = 0 }
  1574. +const struct ctl_path net_vs_ctl_path[] = {
  1575. + { .procname = "net", .ctl_name = CTL_NET, },
  1576. + { .procname = "ipv4", .ctl_name = NET_IPV4, },
  1577. + { .procname = "vs", },
  1578. + { }
  1579. };
  1580. +EXPORT_SYMBOL_GPL(net_vs_ctl_path);
  1581.  
  1582. static struct ctl_table_header * sysctl_header;
  1583.  
  1584. @@ -1783,7 +1774,7 @@
  1585. return 0;
  1586. }
  1587.  
  1588. -static struct seq_operations ip_vs_info_seq_ops = {
  1589. +static const struct seq_operations ip_vs_info_seq_ops = {
  1590. .start = ip_vs_info_seq_start,
  1591. .next = ip_vs_info_seq_next,
  1592. .stop = ip_vs_info_seq_stop,
  1593. @@ -1792,27 +1783,11 @@
  1594.  
  1595. static int ip_vs_info_open(struct inode *inode, struct file *file)
  1596. {
  1597. - struct seq_file *seq;
  1598. - int rc = -ENOMEM;
  1599. - struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL);
  1600. -
  1601. - if (!s)
  1602. - goto out;
  1603. -
  1604. - rc = seq_open(file, &ip_vs_info_seq_ops);
  1605. - if (rc)
  1606. - goto out_kfree;
  1607. -
  1608. - seq = file->private_data;
  1609. - seq->private = s;
  1610. -out:
  1611. - return rc;
  1612. -out_kfree:
  1613. - kfree(s);
  1614. - goto out;
  1615. + return seq_open_private(file, &ip_vs_info_seq_ops,
  1616. + sizeof(struct ip_vs_iter));
  1617. }
  1618.  
  1619. -static struct file_operations ip_vs_info_fops = {
  1620. +static const struct file_operations ip_vs_info_fops = {
  1621. .owner = THIS_MODULE,
  1622. .open = ip_vs_info_open,
  1623. .read = seq_read,
  1624. @@ -1822,7 +1797,9 @@
  1625.  
  1626. #endif
  1627.  
  1628. -struct ip_vs_stats ip_vs_stats;
  1629. +struct ip_vs_stats ip_vs_stats = {
  1630. + .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
  1631. +};
  1632.  
  1633. #ifdef CONFIG_PROC_FS
  1634. static int ip_vs_stats_show(struct seq_file *seq, void *v)
  1635. @@ -1859,7 +1836,7 @@
  1636. return single_open(file, ip_vs_stats_show, NULL);
  1637. }
  1638.  
  1639. -static struct file_operations ip_vs_stats_fops = {
  1640. +static const struct file_operations ip_vs_stats_fops = {
  1641. .owner = THIS_MODULE,
  1642. .open = ip_vs_stats_seq_open,
  1643. .read = seq_read,
  1644. @@ -2340,10 +2317,11 @@
  1645. .get_optmin = IP_VS_BASE_CTL,
  1646. .get_optmax = IP_VS_SO_GET_MAX+1,
  1647. .get = do_ip_vs_get_ctl,
  1648. + .owner = THIS_MODULE,
  1649. };
  1650.  
  1651.  
  1652. -int ip_vs_control_init(void)
  1653. +int __init ip_vs_control_init(void)
  1654. {
  1655. int ret;
  1656. int idx;
  1657. @@ -2356,10 +2334,10 @@
  1658. return ret;
  1659. }
  1660.  
  1661. - proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
  1662. - proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
  1663. + proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
  1664. + proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
  1665.  
  1666. - sysctl_header = register_sysctl_table(vs_root_table, 0);
  1667. + sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
  1668.  
  1669. /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
  1670. for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
  1671. @@ -2370,8 +2348,6 @@
  1672. INIT_LIST_HEAD(&ip_vs_rtable[idx]);
  1673. }
  1674.  
  1675. - memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
  1676. - spin_lock_init(&ip_vs_stats.lock);
  1677. ip_vs_new_estimator(&ip_vs_stats);
  1678.  
  1679. /* Hook the defense timer */
  1680. @@ -2387,10 +2363,11 @@
  1681. EnterFunction(2);
  1682. ip_vs_trash_cleanup();
  1683. cancel_rearming_delayed_work(&defense_work);
  1684. + cancel_work_sync(&defense_work.work);
  1685. ip_vs_kill_estimator(&ip_vs_stats);
  1686. unregister_sysctl_table(sysctl_header);
  1687. - proc_net_remove("ip_vs_stats");
  1688. - proc_net_remove("ip_vs");
  1689. + proc_net_remove(&init_net, "ip_vs_stats");
  1690. + proc_net_remove(&init_net, "ip_vs");
  1691. nf_unregister_sockopt(&ip_vs_sockopts);
  1692. LeaveFunction(2);
  1693. }
  1694. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_dh.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_dh.c
  1695. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_dh.c 2009-02-16 11:57:22.000000000 -0400
  1696. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_dh.c 2009-02-16 12:56:22.000000000 -0400
  1697. @@ -1,8 +1,6 @@
  1698. /*
  1699. * IPVS: Destination Hashing scheduling module
  1700. *
  1701. - * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
  1702. - *
  1703. * Authors: Wensong Zhang <wensong@gnuchina.org>
  1704. *
  1705. * Inspired by the consistent hashing scheduler patch from
  1706. @@ -204,7 +202,7 @@
  1707. {
  1708. struct ip_vs_dest *dest;
  1709. struct ip_vs_dh_bucket *tbl;
  1710. - struct iphdr *iph = skb->nh.iph;
  1711. + struct iphdr *iph = ip_hdr(skb);
  1712.  
  1713. IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
  1714.  
  1715. @@ -235,6 +233,7 @@
  1716. .name = "dh",
  1717. .refcnt = ATOMIC_INIT(0),
  1718. .module = THIS_MODULE,
  1719. + .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
  1720. .init_service = ip_vs_dh_init_svc,
  1721. .done_service = ip_vs_dh_done_svc,
  1722. .update_service = ip_vs_dh_update_svc,
  1723. @@ -244,7 +243,6 @@
  1724.  
  1725. static int __init ip_vs_dh_init(void)
  1726. {
  1727. - INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list);
  1728. return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
  1729. }
  1730.  
  1731. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_est.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_est.c
  1732. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_est.c 2009-02-16 11:57:22.000000000 -0400
  1733. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_est.c 2009-02-16 12:56:22.000000000 -0400
  1734. @@ -1,8 +1,6 @@
  1735. /*
  1736. * ip_vs_est.c: simple rate estimator for IPVS
  1737. *
  1738. - * Version: $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $
  1739. - *
  1740. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  1741. *
  1742. * This program is free software; you can redistribute it and/or
  1743. @@ -18,6 +16,8 @@
  1744. #include <linux/slab.h>
  1745. #include <linux/types.h>
  1746. #include <linux/interrupt.h>
  1747. +#include <linux/sysctl.h>
  1748. +#include <linux/list.h>
  1749.  
  1750. #include <net/ip_vs.h>
  1751.  
  1752. @@ -45,28 +45,11 @@
  1753. */
  1754.  
  1755.  
  1756. -struct ip_vs_estimator
  1757. -{
  1758. - struct ip_vs_estimator *next;
  1759. - struct ip_vs_stats *stats;
  1760. +static void estimation_timer(unsigned long arg);
  1761.  
  1762. - u32 last_conns;
  1763. - u32 last_inpkts;
  1764. - u32 last_outpkts;
  1765. - u64 last_inbytes;
  1766. - u64 last_outbytes;
  1767. -
  1768. - u32 cps;
  1769. - u32 inpps;
  1770. - u32 outpps;
  1771. - u32 inbps;
  1772. - u32 outbps;
  1773. -};
  1774. -
  1775. -
  1776. -static struct ip_vs_estimator *est_list = NULL;
  1777. -static DEFINE_RWLOCK(est_lock);
  1778. -static struct timer_list est_timer;
  1779. +static LIST_HEAD(est_list);
  1780. +static DEFINE_SPINLOCK(est_lock);
  1781. +static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
  1782.  
  1783. static void estimation_timer(unsigned long arg)
  1784. {
  1785. @@ -77,9 +60,9 @@
  1786. u64 n_inbytes, n_outbytes;
  1787. u32 rate;
  1788.  
  1789. - read_lock(&est_lock);
  1790. - for (e = est_list; e; e = e->next) {
  1791. - s = e->stats;
  1792. + spin_lock(&est_lock);
  1793. + list_for_each_entry(e, &est_list, list) {
  1794. + s = container_of(e, struct ip_vs_stats, est);
  1795.  
  1796. spin_lock(&s->lock);
  1797. n_conns = s->conns;
  1798. @@ -115,19 +98,16 @@
  1799. s->outbps = (e->outbps+0xF)>>5;
  1800. spin_unlock(&s->lock);
  1801. }
  1802. - read_unlock(&est_lock);
  1803. + spin_unlock(&est_lock);
  1804. mod_timer(&est_timer, jiffies + 2*HZ);
  1805. }
  1806.  
  1807. -int ip_vs_new_estimator(struct ip_vs_stats *stats)
  1808. +void ip_vs_new_estimator(struct ip_vs_stats *stats)
  1809. {
  1810. - struct ip_vs_estimator *est;
  1811. + struct ip_vs_estimator *est = &stats->est;
  1812.  
  1813. - est = kzalloc(sizeof(*est), GFP_KERNEL);
  1814. - if (est == NULL)
  1815. - return -ENOMEM;
  1816. + INIT_LIST_HEAD(&est->list);
  1817.  
  1818. - est->stats = stats;
  1819. est->last_conns = stats->conns;
  1820. est->cps = stats->cps<<10;
  1821.  
  1822. @@ -143,60 +123,40 @@
  1823. est->last_outbytes = stats->outbytes;
  1824. est->outbps = stats->outbps<<5;
  1825.  
  1826. - write_lock_bh(&est_lock);
  1827. - est->next = est_list;
  1828. - if (est->next == NULL) {
  1829. - init_timer(&est_timer);
  1830. - est_timer.expires = jiffies + 2*HZ;
  1831. - est_timer.function = estimation_timer;
  1832. - add_timer(&est_timer);
  1833. - }
  1834. - est_list = est;
  1835. - write_unlock_bh(&est_lock);
  1836. - return 0;
  1837. + spin_lock_bh(&est_lock);
  1838. + if (list_empty(&est_list))
  1839. + mod_timer(&est_timer, jiffies + 2 * HZ);
  1840. + list_add(&est->list, &est_list);
  1841. + spin_unlock_bh(&est_lock);
  1842. }
  1843.  
  1844. void ip_vs_kill_estimator(struct ip_vs_stats *stats)
  1845. {
  1846. - struct ip_vs_estimator *est, **pest;
  1847. - int killed = 0;
  1848. + struct ip_vs_estimator *est = &stats->est;
  1849.  
  1850. - write_lock_bh(&est_lock);
  1851. - pest = &est_list;
  1852. - while ((est=*pest) != NULL) {
  1853. - if (est->stats != stats) {
  1854. - pest = &est->next;
  1855. - continue;
  1856. - }
  1857. - *pest = est->next;
  1858. - kfree(est);
  1859. - killed++;
  1860. + spin_lock_bh(&est_lock);
  1861. + list_del(&est->list);
  1862. + while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) {
  1863. + spin_unlock_bh(&est_lock);
  1864. + cpu_relax();
  1865. + spin_lock_bh(&est_lock);
  1866. }
  1867. - if (killed && est_list == NULL)
  1868. - del_timer_sync(&est_timer);
  1869. - write_unlock_bh(&est_lock);
  1870. + spin_unlock_bh(&est_lock);
  1871. }
  1872.  
  1873. void ip_vs_zero_estimator(struct ip_vs_stats *stats)
  1874. {
  1875. - struct ip_vs_estimator *e;
  1876. + struct ip_vs_estimator *est = &stats->est;
  1877.  
  1878. - write_lock_bh(&est_lock);
  1879. - for (e = est_list; e; e = e->next) {
  1880. - if (e->stats != stats)
  1881. - continue;
  1882. -
  1883. - /* set counters zero */
  1884. - e->last_conns = 0;
  1885. - e->last_inpkts = 0;
  1886. - e->last_outpkts = 0;
  1887. - e->last_inbytes = 0;
  1888. - e->last_outbytes = 0;
  1889. - e->cps = 0;
  1890. - e->inpps = 0;
  1891. - e->outpps = 0;
  1892. - e->inbps = 0;
  1893. - e->outbps = 0;
  1894. - }
  1895. - write_unlock_bh(&est_lock);
  1896. + /* set counters zero, caller must hold the stats->lock lock */
  1897. + est->last_inbytes = 0;
  1898. + est->last_outbytes = 0;
  1899. + est->last_conns = 0;
  1900. + est->last_inpkts = 0;
  1901. + est->last_outpkts = 0;
  1902. + est->cps = 0;
  1903. + est->inpps = 0;
  1904. + est->outpps = 0;
  1905. + est->inbps = 0;
  1906. + est->outbps = 0;
  1907. }
  1908. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ftp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ftp.c
  1909. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ftp.c 2009-02-16 11:57:22.000000000 -0400
  1910. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ftp.c 2009-02-16 12:56:22.000000000 -0400
  1911. @@ -1,8 +1,6 @@
  1912. /*
  1913. * ip_vs_ftp.c: IPVS ftp application module
  1914. *
  1915. - * Version: $Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $
  1916. - *
  1917. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  1918. *
  1919. * Changes:
  1920. @@ -30,6 +28,7 @@
  1921. #include <linux/skbuff.h>
  1922. #include <linux/in.h>
  1923. #include <linux/ip.h>
  1924. +#include <linux/netfilter.h>
  1925. #include <net/protocol.h>
  1926. #include <net/tcp.h>
  1927. #include <asm/unaligned.h>
  1928. @@ -135,7 +134,7 @@
  1929. * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
  1930. */
  1931. static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
  1932. - struct sk_buff **pskb, int *diff)
  1933. + struct sk_buff *skb, int *diff)
  1934. {
  1935. struct iphdr *iph;
  1936. struct tcphdr *th;
  1937. @@ -155,14 +154,14 @@
  1938. return 1;
  1939.  
  1940. /* Linear packets are much easier to deal with. */
  1941. - if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
  1942. + if (!skb_make_writable(skb, skb->len))
  1943. return 0;
  1944.  
  1945. if (cp->app_data == &ip_vs_ftp_pasv) {
  1946. - iph = (*pskb)->nh.iph;
  1947. + iph = ip_hdr(skb);
  1948. th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
  1949. data = (char *)th + (th->doff << 2);
  1950. - data_limit = (*pskb)->tail;
  1951. + data_limit = skb_tail_pointer(skb);
  1952.  
  1953. if (ip_vs_ftp_get_addrport(data, data_limit,
  1954. SERVER_STRING,
  1955. @@ -213,7 +212,7 @@
  1956. memcpy(start, buf, buf_len);
  1957. ret = 1;
  1958. } else {
  1959. - ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
  1960. + ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
  1961. end-start, buf, buf_len);
  1962. }
  1963.  
  1964. @@ -238,7 +237,7 @@
  1965. * the client.
  1966. */
  1967. static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
  1968. - struct sk_buff **pskb, int *diff)
  1969. + struct sk_buff *skb, int *diff)
  1970. {
  1971. struct iphdr *iph;
  1972. struct tcphdr *th;
  1973. @@ -256,20 +255,20 @@
  1974. return 1;
  1975.  
  1976. /* Linear packets are much easier to deal with. */
  1977. - if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
  1978. + if (!skb_make_writable(skb, skb->len))
  1979. return 0;
  1980.  
  1981. /*
  1982. * Detecting whether it is passive
  1983. */
  1984. - iph = (*pskb)->nh.iph;
  1985. + iph = ip_hdr(skb);
  1986. th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
  1987.  
  1988. /* Since there may be OPTIONS in the TCP packet and the HLEN is
  1989. the length of the header in 32-bit multiples, it is accurate
  1990. to calculate data address by th+HLEN*4 */
  1991. data = data_start = (char *)th + (th->doff << 2);
  1992. - data_limit = (*pskb)->tail;
  1993. + data_limit = skb_tail_pointer(skb);
  1994.  
  1995. while (data <= data_limit - 6) {
  1996. if (strnicmp(data, "PASV\r\n", 6) == 0) {
  1997. @@ -370,7 +369,7 @@
  1998. if (ret)
  1999. break;
  2000. IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
  2001. - app->name, i, ports[i]);
  2002. + app->name, i, ports[i]);
  2003. }
  2004.  
  2005. if (ret)
  2006. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblc.c
  2007. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblc.c 2009-02-16 11:57:22.000000000 -0400
  2008. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblc.c 2009-02-16 12:56:22.000000000 -0400
  2009. @@ -1,8 +1,6 @@
  2010. /*
  2011. * IPVS: Locality-Based Least-Connection scheduling module
  2012. *
  2013. - * Version: $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $
  2014. - *
  2015. * Authors: Wensong Zhang <wensong@gnuchina.org>
  2016. *
  2017. * This program is free software; you can redistribute it and/or
  2018. @@ -114,46 +112,15 @@
  2019.  
  2020. static ctl_table vs_vars_table[] = {
  2021. {
  2022. - .ctl_name = NET_IPV4_VS_LBLC_EXPIRE,
  2023. .procname = "lblc_expiration",
  2024. .data = &sysctl_ip_vs_lblc_expiration,
  2025. .maxlen = sizeof(int),
  2026. - .mode = 0644,
  2027. + .mode = 0644,
  2028. .proc_handler = &proc_dointvec_jiffies,
  2029. },
  2030. { .ctl_name = 0 }
  2031. };
  2032.  
  2033. -static ctl_table vs_table[] = {
  2034. - {
  2035. - .ctl_name = NET_IPV4_VS,
  2036. - .procname = "vs",
  2037. - .mode = 0555,
  2038. - .child = vs_vars_table
  2039. - },
  2040. - { .ctl_name = 0 }
  2041. -};
  2042. -
  2043. -static ctl_table ipvs_ipv4_table[] = {
  2044. - {
  2045. - .ctl_name = NET_IPV4,
  2046. - .procname = "ipv4",
  2047. - .mode = 0555,
  2048. - .child = vs_table
  2049. - },
  2050. - { .ctl_name = 0 }
  2051. -};
  2052. -
  2053. -static ctl_table lblc_root_table[] = {
  2054. - {
  2055. - .ctl_name = CTL_NET,
  2056. - .procname = "net",
  2057. - .mode = 0555,
  2058. - .child = ipvs_ipv4_table
  2059. - },
  2060. - { .ctl_name = 0 }
  2061. -};
  2062. -
  2063. static struct ctl_table_header * sysctl_header;
  2064.  
  2065. /*
  2066. @@ -288,7 +255,7 @@
  2067.  
  2068. write_lock(&tbl->lock);
  2069. list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
  2070. - if (time_before(now,
  2071. + if (time_before(now,
  2072. en->lastuse + sysctl_ip_vs_lblc_expiration))
  2073. continue;
  2074.  
  2075. @@ -393,9 +360,8 @@
  2076. /*
  2077. * Hook periodic timer for garbage collection
  2078. */
  2079. - init_timer(&tbl->periodic_timer);
  2080. - tbl->periodic_timer.data = (unsigned long)tbl;
  2081. - tbl->periodic_timer.function = ip_vs_lblc_check_expire;
  2082. + setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
  2083. + (unsigned long)tbl);
  2084. tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
  2085. add_timer(&tbl->periodic_timer);
  2086.  
  2087. @@ -521,7 +487,7 @@
  2088. struct ip_vs_dest *dest;
  2089. struct ip_vs_lblc_table *tbl;
  2090. struct ip_vs_lblc_entry *en;
  2091. - struct iphdr *iph = skb->nh.iph;
  2092. + struct iphdr *iph = ip_hdr(skb);
  2093.  
  2094. IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
  2095.  
  2096. @@ -573,6 +539,7 @@
  2097. .name = "lblc",
  2098. .refcnt = ATOMIC_INIT(0),
  2099. .module = THIS_MODULE,
  2100. + .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
  2101. .init_service = ip_vs_lblc_init_svc,
  2102. .done_service = ip_vs_lblc_done_svc,
  2103. .update_service = ip_vs_lblc_update_svc,
  2104. @@ -582,9 +549,13 @@
  2105.  
  2106. static int __init ip_vs_lblc_init(void)
  2107. {
  2108. - INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
  2109. - sysctl_header = register_sysctl_table(lblc_root_table, 0);
  2110. - return register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
  2111. + int ret;
  2112. +
  2113. + sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
  2114. + ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
  2115. + if (ret)
  2116. + unregister_sysctl_table(sysctl_header);
  2117. + return ret;
  2118. }
  2119.  
  2120.  
  2121. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblcr.c
  2122. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblcr.c 2009-02-16 11:57:22.000000000 -0400
  2123. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblcr.c 2009-02-16 12:56:22.000000000 -0400
  2124. @@ -1,8 +1,6 @@
  2125. /*
  2126. * IPVS: Locality-Based Least-Connection with Replication scheduler
  2127. *
  2128. - * Version: $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $
  2129. - *
  2130. * Authors: Wensong Zhang <wensong@gnuchina.org>
  2131. *
  2132. * This program is free software; you can redistribute it and/or
  2133. @@ -48,8 +46,7 @@
  2134. /* for sysctl */
  2135. #include <linux/fs.h>
  2136. #include <linux/sysctl.h>
  2137. -/* for proc_net_create/proc_net_remove */
  2138. -#include <linux/proc_fs.h>
  2139. +#include <net/net_namespace.h>
  2140.  
  2141. #include <net/ip_vs.h>
  2142.  
  2143. @@ -303,46 +300,15 @@
  2144.  
  2145. static ctl_table vs_vars_table[] = {
  2146. {
  2147. - .ctl_name = NET_IPV4_VS_LBLCR_EXPIRE,
  2148. .procname = "lblcr_expiration",
  2149. .data = &sysctl_ip_vs_lblcr_expiration,
  2150. .maxlen = sizeof(int),
  2151. - .mode = 0644,
  2152. + .mode = 0644,
  2153. .proc_handler = &proc_dointvec_jiffies,
  2154. },
  2155. { .ctl_name = 0 }
  2156. };
  2157.  
  2158. -static ctl_table vs_table[] = {
  2159. - {
  2160. - .ctl_name = NET_IPV4_VS,
  2161. - .procname = "vs",
  2162. - .mode = 0555,
  2163. - .child = vs_vars_table
  2164. - },
  2165. - { .ctl_name = 0 }
  2166. -};
  2167. -
  2168. -static ctl_table ipvs_ipv4_table[] = {
  2169. - {
  2170. - .ctl_name = NET_IPV4,
  2171. - .procname = "ipv4",
  2172. - .mode = 0555,
  2173. - .child = vs_table
  2174. - },
  2175. - { .ctl_name = 0 }
  2176. -};
  2177. -
  2178. -static ctl_table lblcr_root_table[] = {
  2179. - {
  2180. - .ctl_name = CTL_NET,
  2181. - .procname = "net",
  2182. - .mode = 0555,
  2183. - .child = ipvs_ipv4_table
  2184. - },
  2185. - { .ctl_name = 0 }
  2186. -};
  2187. -
  2188. static struct ctl_table_header * sysctl_header;
  2189.  
  2190. /*
  2191. @@ -546,71 +512,6 @@
  2192. mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
  2193. }
  2194.  
  2195. -
  2196. -#ifdef CONFIG_IP_VS_LBLCR_DEBUG
  2197. -static struct ip_vs_lblcr_table *lblcr_table_list;
  2198. -
  2199. -/*
  2200. - * /proc/net/ip_vs_lblcr to display the mappings of
  2201. - * destination IP address <==> its serverSet
  2202. - */
  2203. -static int
  2204. -ip_vs_lblcr_getinfo(char *buffer, char **start, off_t offset, int length)
  2205. -{
  2206. - off_t pos=0, begin;
  2207. - int len=0, size;
  2208. - struct ip_vs_lblcr_table *tbl;
  2209. - unsigned long now = jiffies;
  2210. - int i;
  2211. - struct ip_vs_lblcr_entry *en;
  2212. -
  2213. - tbl = lblcr_table_list;
  2214. -
  2215. - size = sprintf(buffer, "LastTime Dest IP address Server set\n");
  2216. - pos += size;
  2217. - len += size;
  2218. -
  2219. - for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
  2220. - read_lock_bh(&tbl->lock);
  2221. - list_for_each_entry(en, &tbl->bucket[i], list) {
  2222. - char tbuf[16];
  2223. - struct ip_vs_dest_list *d;
  2224. -
  2225. - sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(en->addr));
  2226. - size = sprintf(buffer+len, "%8lu %-16s ",
  2227. - now-en->lastuse, tbuf);
  2228. -
  2229. - read_lock(&en->set.lock);
  2230. - for (d=en->set.list; d!=NULL; d=d->next) {
  2231. - size += sprintf(buffer+len+size,
  2232. - "%u.%u.%u.%u ",
  2233. - NIPQUAD(d->dest->addr));
  2234. - }
  2235. - read_unlock(&en->set.lock);
  2236. - size += sprintf(buffer+len+size, "\n");
  2237. - len += size;
  2238. - pos += size;
  2239. - if (pos <= offset)
  2240. - len=0;
  2241. - if (pos >= offset+length) {
  2242. - read_unlock_bh(&tbl->lock);
  2243. - goto done;
  2244. - }
  2245. - }
  2246. - read_unlock_bh(&tbl->lock);
  2247. - }
  2248. -
  2249. - done:
  2250. - begin = len - (pos - offset);
  2251. - *start = buffer + begin;
  2252. - len -= begin;
  2253. - if(len>length)
  2254. - len = length;
  2255. - return len;
  2256. -}
  2257. -#endif
  2258. -
  2259. -
  2260. static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
  2261. {
  2262. int i;
  2263. @@ -643,15 +544,11 @@
  2264. /*
  2265. * Hook periodic timer for garbage collection
  2266. */
  2267. - init_timer(&tbl->periodic_timer);
  2268. - tbl->periodic_timer.data = (unsigned long)tbl;
  2269. - tbl->periodic_timer.function = ip_vs_lblcr_check_expire;
  2270. + setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
  2271. + (unsigned long)tbl);
  2272. tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
  2273. add_timer(&tbl->periodic_timer);
  2274.  
  2275. -#ifdef CONFIG_IP_VS_LBLCR_DEBUG
  2276. - lblcr_table_list = tbl;
  2277. -#endif
  2278. return 0;
  2279. }
  2280.  
  2281. @@ -775,7 +672,7 @@
  2282. struct ip_vs_dest *dest;
  2283. struct ip_vs_lblcr_table *tbl;
  2284. struct ip_vs_lblcr_entry *en;
  2285. - struct iphdr *iph = skb->nh.iph;
  2286. + struct iphdr *iph = ip_hdr(skb);
  2287.  
  2288. IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
  2289.  
  2290. @@ -831,6 +728,7 @@
  2291. .name = "lblcr",
  2292. .refcnt = ATOMIC_INIT(0),
  2293. .module = THIS_MODULE,
  2294. + .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
  2295. .init_service = ip_vs_lblcr_init_svc,
  2296. .done_service = ip_vs_lblcr_done_svc,
  2297. .update_service = ip_vs_lblcr_update_svc,
  2298. @@ -840,20 +738,18 @@
  2299.  
  2300. static int __init ip_vs_lblcr_init(void)
  2301. {
  2302. - INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
  2303. - sysctl_header = register_sysctl_table(lblcr_root_table, 0);
  2304. -#ifdef CONFIG_IP_VS_LBLCR_DEBUG
  2305. - proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
  2306. -#endif
  2307. - return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
  2308. + int ret;
  2309. +
  2310. + sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
  2311. + ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
  2312. + if (ret)
  2313. + unregister_sysctl_table(sysctl_header);
  2314. + return ret;
  2315. }
  2316.  
  2317.  
  2318. static void __exit ip_vs_lblcr_cleanup(void)
  2319. {
  2320. -#ifdef CONFIG_IP_VS_LBLCR_DEBUG
  2321. - proc_net_remove("ip_vs_lblcr");
  2322. -#endif
  2323. unregister_sysctl_table(sysctl_header);
  2324. unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
  2325. }
  2326. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lc.c
  2327. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lc.c 2009-02-16 11:57:22.000000000 -0400
  2328. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lc.c 2009-02-16 12:56:22.000000000 -0400
  2329. @@ -1,8 +1,6 @@
  2330. /*
  2331. * IPVS: Least-Connection Scheduling module
  2332. *
  2333. - * Version: $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $
  2334. - *
  2335. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  2336. *
  2337. * This program is free software; you can redistribute it and/or
  2338. @@ -100,6 +98,7 @@
  2339. .name = "lc",
  2340. .refcnt = ATOMIC_INIT(0),
  2341. .module = THIS_MODULE,
  2342. + .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
  2343. .init_service = ip_vs_lc_init_svc,
  2344. .done_service = ip_vs_lc_done_svc,
  2345. .update_service = ip_vs_lc_update_svc,
  2346. @@ -109,7 +108,6 @@
  2347.  
  2348. static int __init ip_vs_lc_init(void)
  2349. {
  2350. - INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list);
  2351. return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
  2352. }
  2353.  
  2354. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_nq.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_nq.c
  2355. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_nq.c 2009-02-16 11:57:22.000000000 -0400
  2356. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_nq.c 2009-02-16 12:56:22.000000000 -0400
  2357. @@ -1,8 +1,6 @@
  2358. /*
  2359. * IPVS: Never Queue scheduling module
  2360. *
  2361. - * Version: $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $
  2362. - *
  2363. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  2364. *
  2365. * This program is free software; you can redistribute it and/or
  2366. @@ -138,6 +136,7 @@
  2367. .name = "nq",
  2368. .refcnt = ATOMIC_INIT(0),
  2369. .module = THIS_MODULE,
  2370. + .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
  2371. .init_service = ip_vs_nq_init_svc,
  2372. .done_service = ip_vs_nq_done_svc,
  2373. .update_service = ip_vs_nq_update_svc,
  2374. @@ -147,7 +146,6 @@
  2375.  
  2376. static int __init ip_vs_nq_init(void)
  2377. {
  2378. - INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list);
  2379. return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
  2380. }
  2381.  
  2382. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto.c
  2383. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto.c 2009-02-16 11:57:22.000000000 -0400
  2384. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto.c 2009-02-16 12:56:22.000000000 -0400
  2385. @@ -1,8 +1,6 @@
  2386. /*
  2387. * ip_vs_proto.c: transport protocol load balancing support for IPVS
  2388. *
  2389. - * Version: $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $
  2390. - *
  2391. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  2392. * Julian Anastasov <ja@ssi.bg>
  2393. *
  2394. @@ -45,7 +43,7 @@
  2395. /*
  2396. * register an ipvs protocol
  2397. */
  2398. -static int register_ip_vs_protocol(struct ip_vs_protocol *pp)
  2399. +static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
  2400. {
  2401. unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
  2402.  
  2403. @@ -148,7 +146,7 @@
  2404. struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
  2405.  
  2406. if (pp == NULL || pp->state_name == NULL)
  2407. - return "ERR!";
  2408. + return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
  2409. return pp->state_name(state);
  2410. }
  2411.  
  2412. @@ -165,7 +163,7 @@
  2413. ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
  2414. if (ih == NULL)
  2415. sprintf(buf, "%s TRUNCATED", pp->name);
  2416. - else if (ih->frag_off & __constant_htons(IP_OFFSET))
  2417. + else if (ih->frag_off & htons(IP_OFFSET))
  2418. sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
  2419. pp->name, NIPQUAD(ih->saddr),
  2420. NIPQUAD(ih->daddr));
  2421. @@ -192,7 +190,7 @@
  2422. }
  2423.  
  2424.  
  2425. -int ip_vs_protocol_init(void)
  2426. +int __init ip_vs_protocol_init(void)
  2427. {
  2428. char protocols[64];
  2429. #define REGISTER_PROTOCOL(p) \
  2430. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_ah.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_ah.c
  2431. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_ah.c 2009-02-16 11:57:22.000000000 -0400
  2432. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_ah.c 2009-02-16 12:56:22.000000000 -0400
  2433. @@ -1,8 +1,6 @@
  2434. /*
  2435. * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS
  2436. *
  2437. - * Version: $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
  2438. - *
  2439. * Authors: Julian Anastasov <ja@ssi.bg>, February 2002
  2440. * Wensong Zhang <wensong@linuxvirtualserver.org>
  2441. *
  2442. @@ -52,15 +50,15 @@
  2443. if (likely(!inverse)) {
  2444. cp = ip_vs_conn_in_get(IPPROTO_UDP,
  2445. iph->saddr,
  2446. - __constant_htons(PORT_ISAKMP),
  2447. + htons(PORT_ISAKMP),
  2448. iph->daddr,
  2449. - __constant_htons(PORT_ISAKMP));
  2450. + htons(PORT_ISAKMP));
  2451. } else {
  2452. cp = ip_vs_conn_in_get(IPPROTO_UDP,
  2453. iph->daddr,
  2454. - __constant_htons(PORT_ISAKMP),
  2455. + htons(PORT_ISAKMP),
  2456. iph->saddr,
  2457. - __constant_htons(PORT_ISAKMP));
  2458. + htons(PORT_ISAKMP));
  2459. }
  2460.  
  2461. if (!cp) {
  2462. @@ -89,15 +87,15 @@
  2463. if (likely(!inverse)) {
  2464. cp = ip_vs_conn_out_get(IPPROTO_UDP,
  2465. iph->saddr,
  2466. - __constant_htons(PORT_ISAKMP),
  2467. + htons(PORT_ISAKMP),
  2468. iph->daddr,
  2469. - __constant_htons(PORT_ISAKMP));
  2470. + htons(PORT_ISAKMP));
  2471. } else {
  2472. cp = ip_vs_conn_out_get(IPPROTO_UDP,
  2473. iph->daddr,
  2474. - __constant_htons(PORT_ISAKMP),
  2475. + htons(PORT_ISAKMP),
  2476. iph->saddr,
  2477. - __constant_htons(PORT_ISAKMP));
  2478. + htons(PORT_ISAKMP));
  2479. }
  2480.  
  2481. if (!cp) {
  2482. @@ -160,6 +158,7 @@
  2483. struct ip_vs_protocol ip_vs_protocol_ah = {
  2484. .name = "AH",
  2485. .protocol = IPPROTO_AH,
  2486. + .num_states = 1,
  2487. .dont_defrag = 1,
  2488. .init = ah_init,
  2489. .exit = ah_exit,
  2490. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_esp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_esp.c
  2491. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_esp.c 2009-02-16 11:57:22.000000000 -0400
  2492. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_esp.c 2009-02-16 12:56:22.000000000 -0400
  2493. @@ -1,8 +1,6 @@
  2494. /*
  2495. * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS
  2496. *
  2497. - * Version: $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
  2498. - *
  2499. * Authors: Julian Anastasov <ja@ssi.bg>, February 2002
  2500. * Wensong Zhang <wensong@linuxvirtualserver.org>
  2501. *
  2502. @@ -52,15 +50,15 @@
  2503. if (likely(!inverse)) {
  2504. cp = ip_vs_conn_in_get(IPPROTO_UDP,
  2505. iph->saddr,
  2506. - __constant_htons(PORT_ISAKMP),
  2507. + htons(PORT_ISAKMP),
  2508. iph->daddr,
  2509. - __constant_htons(PORT_ISAKMP));
  2510. + htons(PORT_ISAKMP));
  2511. } else {
  2512. cp = ip_vs_conn_in_get(IPPROTO_UDP,
  2513. iph->daddr,
  2514. - __constant_htons(PORT_ISAKMP),
  2515. + htons(PORT_ISAKMP),
  2516. iph->saddr,
  2517. - __constant_htons(PORT_ISAKMP));
  2518. + htons(PORT_ISAKMP));
  2519. }
  2520.  
  2521. if (!cp) {
  2522. @@ -89,15 +87,15 @@
  2523. if (likely(!inverse)) {
  2524. cp = ip_vs_conn_out_get(IPPROTO_UDP,
  2525. iph->saddr,
  2526. - __constant_htons(PORT_ISAKMP),
  2527. + htons(PORT_ISAKMP),
  2528. iph->daddr,
  2529. - __constant_htons(PORT_ISAKMP));
  2530. + htons(PORT_ISAKMP));
  2531. } else {
  2532. cp = ip_vs_conn_out_get(IPPROTO_UDP,
  2533. iph->daddr,
  2534. - __constant_htons(PORT_ISAKMP),
  2535. + htons(PORT_ISAKMP),
  2536. iph->saddr,
  2537. - __constant_htons(PORT_ISAKMP));
  2538. + htons(PORT_ISAKMP));
  2539. }
  2540.  
  2541. if (!cp) {
  2542. @@ -159,6 +157,7 @@
  2543. struct ip_vs_protocol ip_vs_protocol_esp = {
  2544. .name = "ESP",
  2545. .protocol = IPPROTO_ESP,
  2546. + .num_states = 1,
  2547. .dont_defrag = 1,
  2548. .init = esp_init,
  2549. .exit = esp_exit,
  2550. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_tcp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_tcp.c
  2551. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_tcp.c 2009-02-16 11:57:22.000000000 -0400
  2552. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_tcp.c 2009-02-16 12:56:22.000000000 -0400
  2553. @@ -1,8 +1,6 @@
  2554. /*
  2555. * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
  2556. *
  2557. - * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
  2558. - *
  2559. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  2560. * Julian Anastasov <ja@ssi.bg>
  2561. *
  2562. @@ -20,6 +18,7 @@
  2563. #include <linux/tcp.h> /* for tcphdr */
  2564. #include <net/ip.h>
  2565. #include <net/tcp.h> /* for csum_tcpudp_magic */
  2566. +#include <linux/netfilter.h>
  2567. #include <linux/netfilter_ipv4.h>
  2568.  
  2569. #include <net/ip_vs.h>
  2570. @@ -76,16 +75,15 @@
  2571. struct ip_vs_service *svc;
  2572. struct tcphdr _tcph, *th;
  2573.  
  2574. - th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
  2575. - sizeof(_tcph), &_tcph);
  2576. + th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
  2577. if (th == NULL) {
  2578. *verdict = NF_DROP;
  2579. return 0;
  2580. }
  2581.  
  2582. if (th->syn &&
  2583. - (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
  2584. - skb->nh.iph->daddr, th->dest))) {
  2585. + (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
  2586. + ip_hdr(skb)->daddr, th->dest))) {
  2587. if (ip_vs_todrop()) {
  2588. /*
  2589. * It seems that we are very loaded.
  2590. @@ -123,27 +121,27 @@
  2591.  
  2592.  
  2593. static int
  2594. -tcp_snat_handler(struct sk_buff **pskb,
  2595. +tcp_snat_handler(struct sk_buff *skb,
  2596. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  2597. {
  2598. struct tcphdr *tcph;
  2599. - unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
  2600. + const unsigned int tcphoff = ip_hdrlen(skb);
  2601.  
  2602. /* csum_check requires unshared skb */
  2603. - if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
  2604. + if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
  2605. return 0;
  2606.  
  2607. if (unlikely(cp->app != NULL)) {
  2608. /* Some checks before mangling */
  2609. - if (pp->csum_check && !pp->csum_check(*pskb, pp))
  2610. + if (pp->csum_check && !pp->csum_check(skb, pp))
  2611. return 0;
  2612.  
  2613. /* Call application helper if needed */
  2614. - if (!ip_vs_app_pkt_out(cp, pskb))
  2615. + if (!ip_vs_app_pkt_out(cp, skb))
  2616. return 0;
  2617. }
  2618.  
  2619. - tcph = (void *)(*pskb)->nh.iph + tcphoff;
  2620. + tcph = (void *)ip_hdr(skb) + tcphoff;
  2621. tcph->source = cp->vport;
  2622.  
  2623. /* Adjust TCP checksums */
  2624. @@ -151,17 +149,15 @@
  2625. /* Only port and addr are changed, do fast csum update */
  2626. tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
  2627. cp->dport, cp->vport);
  2628. - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
  2629. - (*pskb)->ip_summed = CHECKSUM_NONE;
  2630. + if (skb->ip_summed == CHECKSUM_COMPLETE)
  2631. + skb->ip_summed = CHECKSUM_NONE;
  2632. } else {
  2633. /* full checksum calculation */
  2634. tcph->check = 0;
  2635. - (*pskb)->csum = skb_checksum(*pskb, tcphoff,
  2636. - (*pskb)->len - tcphoff, 0);
  2637. + skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  2638. tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
  2639. - (*pskb)->len - tcphoff,
  2640. - cp->protocol,
  2641. - (*pskb)->csum);
  2642. + skb->len - tcphoff,
  2643. + cp->protocol, skb->csum);
  2644. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  2645. pp->name, tcph->check,
  2646. (char*)&(tcph->check) - (char*)tcph);
  2647. @@ -171,30 +167,30 @@
  2648.  
  2649.  
  2650. static int
  2651. -tcp_dnat_handler(struct sk_buff **pskb,
  2652. +tcp_dnat_handler(struct sk_buff *skb,
  2653. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  2654. {
  2655. struct tcphdr *tcph;
  2656. - unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
  2657. + const unsigned int tcphoff = ip_hdrlen(skb);
  2658.  
  2659. /* csum_check requires unshared skb */
  2660. - if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
  2661. + if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
  2662. return 0;
  2663.  
  2664. if (unlikely(cp->app != NULL)) {
  2665. /* Some checks before mangling */
  2666. - if (pp->csum_check && !pp->csum_check(*pskb, pp))
  2667. + if (pp->csum_check && !pp->csum_check(skb, pp))
  2668. return 0;
  2669.  
  2670. /*
  2671. * Attempt ip_vs_app call.
  2672. * It will fix ip_vs_conn and iph ack_seq stuff
  2673. */
  2674. - if (!ip_vs_app_pkt_in(cp, pskb))
  2675. + if (!ip_vs_app_pkt_in(cp, skb))
  2676. return 0;
  2677. }
  2678.  
  2679. - tcph = (void *)(*pskb)->nh.iph + tcphoff;
  2680. + tcph = (void *)ip_hdr(skb) + tcphoff;
  2681. tcph->dest = cp->dport;
  2682.  
  2683. /*
  2684. @@ -204,18 +200,16 @@
  2685. /* Only port and addr are changed, do fast csum update */
  2686. tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
  2687. cp->vport, cp->dport);
  2688. - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
  2689. - (*pskb)->ip_summed = CHECKSUM_NONE;
  2690. + if (skb->ip_summed == CHECKSUM_COMPLETE)
  2691. + skb->ip_summed = CHECKSUM_NONE;
  2692. } else {
  2693. /* full checksum calculation */
  2694. tcph->check = 0;
  2695. - (*pskb)->csum = skb_checksum(*pskb, tcphoff,
  2696. - (*pskb)->len - tcphoff, 0);
  2697. + skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  2698. tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
  2699. - (*pskb)->len - tcphoff,
  2700. - cp->protocol,
  2701. - (*pskb)->csum);
  2702. - (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
  2703. + skb->len - tcphoff,
  2704. + cp->protocol, skb->csum);
  2705. + skb->ip_summed = CHECKSUM_UNNECESSARY;
  2706. }
  2707. return 1;
  2708. }
  2709. @@ -224,15 +218,15 @@
  2710. static int
  2711. tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
  2712. {
  2713. - unsigned int tcphoff = skb->nh.iph->ihl*4;
  2714. + const unsigned int tcphoff = ip_hdrlen(skb);
  2715.  
  2716. switch (skb->ip_summed) {
  2717. case CHECKSUM_NONE:
  2718. skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
  2719. case CHECKSUM_COMPLETE:
  2720. - if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
  2721. + if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
  2722. skb->len - tcphoff,
  2723. - skb->nh.iph->protocol, skb->csum)) {
  2724. + ip_hdr(skb)->protocol, skb->csum)) {
  2725. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  2726. "Failed checksum for");
  2727. return 0;
  2728. @@ -467,8 +461,7 @@
  2729. {
  2730. struct tcphdr _tcph, *th;
  2731.  
  2732. - th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
  2733. - sizeof(_tcph), &_tcph);
  2734. + th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
  2735. if (th == NULL)
  2736. return 0;
  2737.  
  2738. @@ -555,7 +548,7 @@
  2739.  
  2740. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  2741. "%u.%u.%u.%u:%u to app %s on port %u\n",
  2742. - __FUNCTION__,
  2743. + __func__,
  2744. NIPQUAD(cp->caddr), ntohs(cp->cport),
  2745. NIPQUAD(cp->vaddr), ntohs(cp->vport),
  2746. inc->name, ntohs(inc->port));
  2747. @@ -599,6 +592,7 @@
  2748. struct ip_vs_protocol ip_vs_protocol_tcp = {
  2749. .name = "TCP",
  2750. .protocol = IPPROTO_TCP,
  2751. + .num_states = IP_VS_TCP_S_LAST,
  2752. .dont_defrag = 0,
  2753. .appcnt = ATOMIC_INIT(0),
  2754. .init = ip_vs_tcp_init,
  2755. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_udp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_udp.c
  2756. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_udp.c 2009-02-16 11:57:22.000000000 -0400
  2757. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_udp.c 2009-02-16 12:56:22.000000000 -0400
  2758. @@ -1,8 +1,6 @@
  2759. /*
  2760. * ip_vs_proto_udp.c: UDP load balancing support for IPVS
  2761. *
  2762. - * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
  2763. - *
  2764. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  2765. * Julian Anastasov <ja@ssi.bg>
  2766. *
  2767. @@ -18,11 +16,12 @@
  2768. #include <linux/in.h>
  2769. #include <linux/ip.h>
  2770. #include <linux/kernel.h>
  2771. +#include <linux/netfilter.h>
  2772. #include <linux/netfilter_ipv4.h>
  2773. #include <linux/udp.h>
  2774.  
  2775. #include <net/ip_vs.h>
  2776. -
  2777. +#include <net/ip.h>
  2778.  
  2779. static struct ip_vs_conn *
  2780. udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
  2781. @@ -56,7 +55,7 @@
  2782. struct ip_vs_conn *cp;
  2783. __be16 _ports[2], *pptr;
  2784.  
  2785. - pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
  2786. + pptr = skb_header_pointer(skb, ip_hdrlen(skb),
  2787. sizeof(_ports), _ports);
  2788. if (pptr == NULL)
  2789. return NULL;
  2790. @@ -82,15 +81,15 @@
  2791. struct ip_vs_service *svc;
  2792. struct udphdr _udph, *uh;
  2793.  
  2794. - uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
  2795. + uh = skb_header_pointer(skb, ip_hdrlen(skb),
  2796. sizeof(_udph), &_udph);
  2797. if (uh == NULL) {
  2798. *verdict = NF_DROP;
  2799. return 0;
  2800. }
  2801.  
  2802. - if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
  2803. - skb->nh.iph->daddr, uh->dest))) {
  2804. + if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
  2805. + ip_hdr(skb)->daddr, uh->dest))) {
  2806. if (ip_vs_todrop()) {
  2807. /*
  2808. * It seems that we are very loaded.
  2809. @@ -129,29 +128,29 @@
  2810. }
  2811.  
  2812. static int
  2813. -udp_snat_handler(struct sk_buff **pskb,
  2814. +udp_snat_handler(struct sk_buff *skb,
  2815. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  2816. {
  2817. struct udphdr *udph;
  2818. - unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
  2819. + const unsigned int udphoff = ip_hdrlen(skb);
  2820.  
  2821. /* csum_check requires unshared skb */
  2822. - if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
  2823. + if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
  2824. return 0;
  2825.  
  2826. if (unlikely(cp->app != NULL)) {
  2827. /* Some checks before mangling */
  2828. - if (pp->csum_check && !pp->csum_check(*pskb, pp))
  2829. + if (pp->csum_check && !pp->csum_check(skb, pp))
  2830. return 0;
  2831.  
  2832. /*
  2833. * Call application helper if needed
  2834. */
  2835. - if (!ip_vs_app_pkt_out(cp, pskb))
  2836. + if (!ip_vs_app_pkt_out(cp, skb))
  2837. return 0;
  2838. }
  2839.  
  2840. - udph = (void *)(*pskb)->nh.iph + udphoff;
  2841. + udph = (void *)ip_hdr(skb) + udphoff;
  2842. udph->source = cp->vport;
  2843.  
  2844. /*
  2845. @@ -161,17 +160,15 @@
  2846. /* Only port and addr are changed, do fast csum update */
  2847. udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
  2848. cp->dport, cp->vport);
  2849. - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
  2850. - (*pskb)->ip_summed = CHECKSUM_NONE;
  2851. + if (skb->ip_summed == CHECKSUM_COMPLETE)
  2852. + skb->ip_summed = CHECKSUM_NONE;
  2853. } else {
  2854. /* full checksum calculation */
  2855. udph->check = 0;
  2856. - (*pskb)->csum = skb_checksum(*pskb, udphoff,
  2857. - (*pskb)->len - udphoff, 0);
  2858. + skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
  2859. udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
  2860. - (*pskb)->len - udphoff,
  2861. - cp->protocol,
  2862. - (*pskb)->csum);
  2863. + skb->len - udphoff,
  2864. + cp->protocol, skb->csum);
  2865. if (udph->check == 0)
  2866. udph->check = CSUM_MANGLED_0;
  2867. IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
  2868. @@ -183,30 +180,30 @@
  2869.  
  2870.  
  2871. static int
  2872. -udp_dnat_handler(struct sk_buff **pskb,
  2873. +udp_dnat_handler(struct sk_buff *skb,
  2874. struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
  2875. {
  2876. struct udphdr *udph;
  2877. - unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
  2878. + unsigned int udphoff = ip_hdrlen(skb);
  2879.  
  2880. /* csum_check requires unshared skb */
  2881. - if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
  2882. + if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
  2883. return 0;
  2884.  
  2885. if (unlikely(cp->app != NULL)) {
  2886. /* Some checks before mangling */
  2887. - if (pp->csum_check && !pp->csum_check(*pskb, pp))
  2888. + if (pp->csum_check && !pp->csum_check(skb, pp))
  2889. return 0;
  2890.  
  2891. /*
  2892. * Attempt ip_vs_app call.
  2893. * It will fix ip_vs_conn
  2894. */
  2895. - if (!ip_vs_app_pkt_in(cp, pskb))
  2896. + if (!ip_vs_app_pkt_in(cp, skb))
  2897. return 0;
  2898. }
  2899.  
  2900. - udph = (void *)(*pskb)->nh.iph + udphoff;
  2901. + udph = (void *)ip_hdr(skb) + udphoff;
  2902. udph->dest = cp->dport;
  2903.  
  2904. /*
  2905. @@ -216,20 +213,18 @@
  2906. /* Only port and addr are changed, do fast csum update */
  2907. udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
  2908. cp->vport, cp->dport);
  2909. - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
  2910. - (*pskb)->ip_summed = CHECKSUM_NONE;
  2911. + if (skb->ip_summed == CHECKSUM_COMPLETE)
  2912. + skb->ip_summed = CHECKSUM_NONE;
  2913. } else {
  2914. /* full checksum calculation */
  2915. udph->check = 0;
  2916. - (*pskb)->csum = skb_checksum(*pskb, udphoff,
  2917. - (*pskb)->len - udphoff, 0);
  2918. + skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
  2919. udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
  2920. - (*pskb)->len - udphoff,
  2921. - cp->protocol,
  2922. - (*pskb)->csum);
  2923. + skb->len - udphoff,
  2924. + cp->protocol, skb->csum);
  2925. if (udph->check == 0)
  2926. udph->check = CSUM_MANGLED_0;
  2927. - (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
  2928. + skb->ip_summed = CHECKSUM_UNNECESSARY;
  2929. }
  2930. return 1;
  2931. }
  2932. @@ -239,7 +234,7 @@
  2933. udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
  2934. {
  2935. struct udphdr _udph, *uh;
  2936. - unsigned int udphoff = skb->nh.iph->ihl*4;
  2937. + const unsigned int udphoff = ip_hdrlen(skb);
  2938.  
  2939. uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
  2940. if (uh == NULL)
  2941. @@ -251,10 +246,10 @@
  2942. skb->csum = skb_checksum(skb, udphoff,
  2943. skb->len - udphoff, 0);
  2944. case CHECKSUM_COMPLETE:
  2945. - if (csum_tcpudp_magic(skb->nh.iph->saddr,
  2946. - skb->nh.iph->daddr,
  2947. + if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
  2948. + ip_hdr(skb)->daddr,
  2949. skb->len - udphoff,
  2950. - skb->nh.iph->protocol,
  2951. + ip_hdr(skb)->protocol,
  2952. skb->csum)) {
  2953. IP_VS_DBG_RL_PKT(0, pp, skb, 0,
  2954. "Failed checksum for");
  2955. @@ -347,7 +342,7 @@
  2956.  
  2957. IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
  2958. "%u.%u.%u.%u:%u to app %s on port %u\n",
  2959. - __FUNCTION__,
  2960. + __func__,
  2961. NIPQUAD(cp->caddr), ntohs(cp->cport),
  2962. NIPQUAD(cp->vaddr), ntohs(cp->vport),
  2963. inc->name, ntohs(inc->port));
  2964. @@ -412,6 +407,7 @@
  2965. struct ip_vs_protocol ip_vs_protocol_udp = {
  2966. .name = "UDP",
  2967. .protocol = IPPROTO_UDP,
  2968. + .num_states = IP_VS_UDP_S_LAST,
  2969. .dont_defrag = 0,
  2970. .init = udp_init,
  2971. .exit = udp_exit,
  2972. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_rr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_rr.c
  2973. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_rr.c 2009-02-16 11:57:22.000000000 -0400
  2974. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_rr.c 2009-02-16 12:56:22.000000000 -0400
  2975. @@ -1,8 +1,6 @@
  2976. /*
  2977. * IPVS: Round-Robin Scheduling module
  2978. *
  2979. - * Version: $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $
  2980. - *
  2981. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  2982. * Peter Kese <peter.kese@ijs.si>
  2983. *
  2984. @@ -68,7 +66,7 @@
  2985. q = q->next;
  2986. continue;
  2987. }
  2988. -
  2989. +
  2990. dest = list_entry(q, struct ip_vs_dest, n_list);
  2991. if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
  2992. atomic_read(&dest->weight) > 0)
  2993. @@ -96,6 +94,7 @@
  2994. .name = "rr", /* name */
  2995. .refcnt = ATOMIC_INIT(0),
  2996. .module = THIS_MODULE,
  2997. + .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
  2998. .init_service = ip_vs_rr_init_svc,
  2999. .done_service = ip_vs_rr_done_svc,
  3000. .update_service = ip_vs_rr_update_svc,
  3001. @@ -104,7 +103,6 @@
  3002.  
  3003. static int __init ip_vs_rr_init(void)
  3004. {
  3005. - INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list);
  3006. return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
  3007. }
  3008.  
  3009. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sched.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sched.c
  3010. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sched.c 2009-02-16 11:57:22.000000000 -0400
  3011. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sched.c 2009-02-16 12:56:22.000000000 -0400
  3012. @@ -5,8 +5,6 @@
  3013. * high-performance and highly available server based on a
  3014. * cluster of servers.
  3015. *
  3016. - * Version: $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $
  3017. - *
  3018. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  3019. * Peter Kese <peter.kese@ijs.si>
  3020. *
  3021. @@ -20,11 +18,11 @@
  3022. */
  3023.  
  3024. #include <linux/module.h>
  3025. -#include <linux/sched.h>
  3026. #include <linux/spinlock.h>
  3027. #include <linux/interrupt.h>
  3028. #include <asm/string.h>
  3029. #include <linux/kmod.h>
  3030. +#include <linux/sysctl.h>
  3031.  
  3032. #include <net/ip_vs.h>
  3033.  
  3034. @@ -184,22 +182,9 @@
  3035. /* increase the module use count */
  3036. ip_vs_use_count_inc();
  3037.  
  3038. - /*
  3039. - * Make sure that the scheduler with this name doesn't exist
  3040. - * in the scheduler list.
  3041. - */
  3042. - sched = ip_vs_sched_getbyname(scheduler->name);
  3043. - if (sched) {
  3044. - ip_vs_scheduler_put(sched);
  3045. - ip_vs_use_count_dec();
  3046. - IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
  3047. - "already existed in the system\n", scheduler->name);
  3048. - return -EINVAL;
  3049. - }
  3050. -
  3051. write_lock_bh(&__ip_vs_sched_lock);
  3052.  
  3053. - if (scheduler->n_list.next != &scheduler->n_list) {
  3054. + if (!list_empty(&scheduler->n_list)) {
  3055. write_unlock_bh(&__ip_vs_sched_lock);
  3056. ip_vs_use_count_dec();
  3057. IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
  3058. @@ -208,6 +193,20 @@
  3059. }
  3060.  
  3061. /*
  3062. + * Make sure that the scheduler with this name doesn't exist
  3063. + * in the scheduler list.
  3064. + */
  3065. + list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
  3066. + if (strcmp(scheduler->name, sched->name) == 0) {
  3067. + write_unlock_bh(&__ip_vs_sched_lock);
  3068. + ip_vs_use_count_dec();
  3069. + IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
  3070. + "already existed in the system\n",
  3071. + scheduler->name);
  3072. + return -EINVAL;
  3073. + }
  3074. + }
  3075. + /*
  3076. * Add it into the d-linked scheduler list
  3077. */
  3078. list_add(&scheduler->n_list, &ip_vs_schedulers);
  3079. @@ -230,7 +229,7 @@
  3080. }
  3081.  
  3082. write_lock_bh(&__ip_vs_sched_lock);
  3083. - if (scheduler->n_list.next == &scheduler->n_list) {
  3084. + if (list_empty(&scheduler->n_list)) {
  3085. write_unlock_bh(&__ip_vs_sched_lock);
  3086. IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
  3087. "is not in the list. failed\n", scheduler->name);
  3088. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sed.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sed.c
  3089. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sed.c 2009-02-16 11:57:22.000000000 -0400
  3090. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sed.c 2009-02-16 12:56:22.000000000 -0400
  3091. @@ -1,8 +1,6 @@
  3092. /*
  3093. * IPVS: Shortest Expected Delay scheduling module
  3094. *
  3095. - * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $
  3096. - *
  3097. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  3098. *
  3099. * This program is free software; you can redistribute it and/or
  3100. @@ -18,7 +16,7 @@
  3101. * The SED algorithm attempts to minimize each job's expected delay until
  3102. * completion. The expected delay that the job will experience is
  3103. * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
  3104. - * jobs on the the ith server and Ui is the fixed service rate (weight) of
  3105. + * jobs on the ith server and Ui is the fixed service rate (weight) of
  3106. * the ith server. The SED algorithm adopts a greedy policy that each does
  3107. * what is in its own best interest, i.e. to join the queue which would
  3108. * minimize its expected delay of completion.
  3109. @@ -140,6 +138,7 @@
  3110. .name = "sed",
  3111. .refcnt = ATOMIC_INIT(0),
  3112. .module = THIS_MODULE,
  3113. + .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
  3114. .init_service = ip_vs_sed_init_svc,
  3115. .done_service = ip_vs_sed_done_svc,
  3116. .update_service = ip_vs_sed_update_svc,
  3117. @@ -149,7 +148,6 @@
  3118.  
  3119. static int __init ip_vs_sed_init(void)
  3120. {
  3121. - INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list);
  3122. return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
  3123. }
  3124.  
  3125. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sh.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sh.c
  3126. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sh.c 2009-02-16 11:57:22.000000000 -0400
  3127. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sh.c 2009-02-16 12:56:22.000000000 -0400
  3128. @@ -1,8 +1,6 @@
  3129. /*
  3130. * IPVS: Source Hashing scheduling module
  3131. *
  3132. - * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
  3133. - *
  3134. * Authors: Wensong Zhang <wensong@gnuchina.org>
  3135. *
  3136. * This program is free software; you can redistribute it and/or
  3137. @@ -201,7 +199,7 @@
  3138. {
  3139. struct ip_vs_dest *dest;
  3140. struct ip_vs_sh_bucket *tbl;
  3141. - struct iphdr *iph = skb->nh.iph;
  3142. + struct iphdr *iph = ip_hdr(skb);
  3143.  
  3144. IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
  3145.  
  3146. @@ -232,6 +230,7 @@
  3147. .name = "sh",
  3148. .refcnt = ATOMIC_INIT(0),
  3149. .module = THIS_MODULE,
  3150. + .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
  3151. .init_service = ip_vs_sh_init_svc,
  3152. .done_service = ip_vs_sh_done_svc,
  3153. .update_service = ip_vs_sh_update_svc,
  3154. @@ -241,7 +240,6 @@
  3155.  
  3156. static int __init ip_vs_sh_init(void)
  3157. {
  3158. - INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list);
  3159. return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
  3160. }
  3161.  
  3162. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sync.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sync.c
  3163. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sync.c 2009-02-16 11:57:22.000000000 -0400
  3164. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sync.c 2009-02-16 12:56:22.000000000 -0400
  3165. @@ -5,8 +5,6 @@
  3166. * high-performance and highly available server based on a
  3167. * cluster of servers.
  3168. *
  3169. - * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $
  3170. - *
  3171. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  3172. *
  3173. * ip_vs_sync: sync connection info from master load balancer to backups
  3174. @@ -29,10 +27,12 @@
  3175. #include <linux/in.h>
  3176. #include <linux/igmp.h> /* for ip_mc_join_group */
  3177. #include <linux/udp.h>
  3178. +#include <linux/err.h>
  3179. +#include <linux/kthread.h>
  3180. +#include <linux/wait.h>
  3181.  
  3182. #include <net/ip.h>
  3183. #include <net/sock.h>
  3184. -#include <asm/uaccess.h> /* for get_fs and set_fs */
  3185.  
  3186. #include <net/ip_vs.h>
  3187.  
  3188. @@ -67,7 +67,11 @@
  3189. struct ip_vs_seq out_seq; /* outgoing seq. struct */
  3190. };
  3191.  
  3192. -#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ)
  3193. +struct ip_vs_sync_thread_data {
  3194. + struct socket *sock;
  3195. + char *buf;
  3196. +};
  3197. +
  3198. #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
  3199. #define FULL_CONN_SIZE \
  3200. (sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
  3201. @@ -136,18 +140,19 @@
  3202. char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
  3203. char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
  3204.  
  3205. -/* multicast addr */
  3206. -static struct sockaddr_in mcast_addr;
  3207. +/* sync daemon tasks */
  3208. +static struct task_struct *sync_master_thread;
  3209. +static struct task_struct *sync_backup_thread;
  3210.  
  3211. +/* multicast addr */
  3212. +static struct sockaddr_in mcast_addr = {
  3213. + .sin_family = AF_INET,
  3214. + .sin_port = __constant_htons(IP_VS_SYNC_PORT),
  3215. + .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP),
  3216. +};
  3217.  
  3218. -static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
  3219. -{
  3220. - spin_lock(&ip_vs_sync_lock);
  3221. - list_add_tail(&sb->list, &ip_vs_sync_queue);
  3222. - spin_unlock(&ip_vs_sync_lock);
  3223. -}
  3224.  
  3225. -static inline struct ip_vs_sync_buff * sb_dequeue(void)
  3226. +static inline struct ip_vs_sync_buff *sb_dequeue(void)
  3227. {
  3228. struct ip_vs_sync_buff *sb;
  3229.  
  3230. @@ -191,6 +196,16 @@
  3231. kfree(sb);
  3232. }
  3233.  
  3234. +static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
  3235. +{
  3236. + spin_lock(&ip_vs_sync_lock);
  3237. + if (ip_vs_sync_state & IP_VS_STATE_MASTER)
  3238. + list_add_tail(&sb->list, &ip_vs_sync_queue);
  3239. + else
  3240. + ip_vs_sync_buff_release(sb);
  3241. + spin_unlock(&ip_vs_sync_lock);
  3242. +}
  3243. +
  3244. /*
  3245. * Get the current sync buffer if it has been created for more
  3246. * than the specified time or the specified time is zero.
  3247. @@ -279,14 +294,21 @@
  3248. struct ip_vs_sync_conn *s;
  3249. struct ip_vs_sync_conn_options *opt;
  3250. struct ip_vs_conn *cp;
  3251. + struct ip_vs_protocol *pp;
  3252. + struct ip_vs_dest *dest;
  3253. char *p;
  3254. int i;
  3255.  
  3256. + if (buflen < sizeof(struct ip_vs_sync_mesg)) {
  3257. + IP_VS_ERR_RL("sync message header too short\n");
  3258. + return;
  3259. + }
  3260. +
  3261. /* Convert size back to host byte order */
  3262. m->size = ntohs(m->size);
  3263.  
  3264. if (buflen != m->size) {
  3265. - IP_VS_ERR("bogus message\n");
  3266. + IP_VS_ERR_RL("bogus sync message size\n");
  3267. return;
  3268. }
  3269.  
  3270. @@ -299,10 +321,50 @@
  3271.  
  3272. p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
  3273. for (i=0; i<m->nr_conns; i++) {
  3274. - unsigned flags;
  3275. + unsigned flags, state;
  3276. +
  3277. + if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
  3278. + IP_VS_ERR_RL("bogus conn in sync message\n");
  3279. + return;
  3280. + }
  3281. + s = (struct ip_vs_sync_conn *) p;
  3282. + flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
  3283. + flags &= ~IP_VS_CONN_F_HASHED;
  3284. + if (flags & IP_VS_CONN_F_SEQ_MASK) {
  3285. + opt = (struct ip_vs_sync_conn_options *)&s[1];
  3286. + p += FULL_CONN_SIZE;
  3287. + if (p > buffer+buflen) {
  3288. + IP_VS_ERR_RL("bogus conn options in sync message\n");
  3289. + return;
  3290. + }
  3291. + } else {
  3292. + opt = NULL;
  3293. + p += SIMPLE_CONN_SIZE;
  3294. + }
  3295. +
  3296. + state = ntohs(s->state);
  3297. + if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
  3298. + pp = ip_vs_proto_get(s->protocol);
  3299. + if (!pp) {
  3300. + IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
  3301. + s->protocol);
  3302. + continue;
  3303. + }
  3304. + if (state >= pp->num_states) {
  3305. + IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
  3306. + pp->name, state);
  3307. + continue;
  3308. + }
  3309. + } else {
  3310. + /* protocol in templates is not used for state/timeout */
  3311. + pp = NULL;
  3312. + if (state > 0) {
  3313. + IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
  3314. + state);
  3315. + state = 0;
  3316. + }
  3317. + }
  3318.  
  3319. - s = (struct ip_vs_sync_conn *)p;
  3320. - flags = ntohs(s->flags);
  3321. if (!(flags & IP_VS_CONN_F_TEMPLATE))
  3322. cp = ip_vs_conn_in_get(s->protocol,
  3323. s->caddr, s->cport,
  3324. @@ -312,38 +374,69 @@
  3325. s->caddr, s->cport,
  3326. s->vaddr, s->vport);
  3327. if (!cp) {
  3328. + /*
  3329. + * Find the appropriate destination for the connection.
  3330. + * If it is not found the connection will remain unbound
  3331. + * but still handled.
  3332. + */
  3333. + dest = ip_vs_find_dest(s->daddr, s->dport,
  3334. + s->vaddr, s->vport,
  3335. + s->protocol);
  3336. + /* Set the approprite ativity flag */
  3337. + if (s->protocol == IPPROTO_TCP) {
  3338. + if (state != IP_VS_TCP_S_ESTABLISHED)
  3339. + flags |= IP_VS_CONN_F_INACTIVE;
  3340. + else
  3341. + flags &= ~IP_VS_CONN_F_INACTIVE;
  3342. + }
  3343. cp = ip_vs_conn_new(s->protocol,
  3344. s->caddr, s->cport,
  3345. s->vaddr, s->vport,
  3346. s->daddr, s->dport,
  3347. - flags, NULL);
  3348. + flags, dest);
  3349. + if (dest)
  3350. + atomic_dec(&dest->refcnt);
  3351. if (!cp) {
  3352. IP_VS_ERR("ip_vs_conn_new failed\n");
  3353. return;
  3354. }
  3355. - cp->state = ntohs(s->state);
  3356. } else if (!cp->dest) {
  3357. - /* it is an entry created by the synchronization */
  3358. - cp->state = ntohs(s->state);
  3359. - cp->flags = flags | IP_VS_CONN_F_HASHED;
  3360. - } /* Note that we don't touch its state and flags
  3361. - if it is a normal entry. */
  3362. + dest = ip_vs_try_bind_dest(cp);
  3363. + if (dest)
  3364. + atomic_dec(&dest->refcnt);
  3365. + } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
  3366. + (cp->state != state)) {
  3367. + /* update active/inactive flag for the connection */
  3368. + dest = cp->dest;
  3369. + if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
  3370. + (state != IP_VS_TCP_S_ESTABLISHED)) {
  3371. + atomic_dec(&dest->activeconns);
  3372. + atomic_inc(&dest->inactconns);
  3373. + cp->flags |= IP_VS_CONN_F_INACTIVE;
  3374. + } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
  3375. + (state == IP_VS_TCP_S_ESTABLISHED)) {
  3376. + atomic_inc(&dest->activeconns);
  3377. + atomic_dec(&dest->inactconns);
  3378. + cp->flags &= ~IP_VS_CONN_F_INACTIVE;
  3379. + }
  3380. + }
  3381.  
  3382. - if (flags & IP_VS_CONN_F_SEQ_MASK) {
  3383. - opt = (struct ip_vs_sync_conn_options *)&s[1];
  3384. + if (opt)
  3385. memcpy(&cp->in_seq, opt, sizeof(*opt));
  3386. - p += FULL_CONN_SIZE;
  3387. - } else
  3388. - p += SIMPLE_CONN_SIZE;
  3389. -
  3390. atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
  3391. - cp->timeout = IP_VS_SYNC_CONN_TIMEOUT;
  3392. + cp->state = state;
  3393. + cp->old_state = cp->state;
  3394. + /*
  3395. + * We can not recover the right timeout for templates
  3396. + * in all cases, we can not find the right fwmark
  3397. + * virtual service. If needed, we can do it for
  3398. + * non-fwmark persistent services.
  3399. + */
  3400. + if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
  3401. + cp->timeout = pp->timeout_table[state];
  3402. + else
  3403. + cp->timeout = (3*60*HZ);
  3404. ip_vs_conn_put(cp);
  3405. -
  3406. - if (p > buffer+buflen) {
  3407. - IP_VS_ERR("bogus message\n");
  3408. - return;
  3409. - }
  3410. }
  3411. }
  3412.  
  3413. @@ -382,7 +475,7 @@
  3414. struct net_device *dev;
  3415. struct inet_sock *inet = inet_sk(sk);
  3416.  
  3417. - if ((dev = __dev_get_by_name(ifname)) == NULL)
  3418. + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
  3419. return -ENODEV;
  3420.  
  3421. if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
  3422. @@ -407,7 +500,7 @@
  3423. int num;
  3424.  
  3425. if (sync_state == IP_VS_STATE_MASTER) {
  3426. - if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL)
  3427. + if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
  3428. return -ENODEV;
  3429.  
  3430. num = (dev->mtu - sizeof(struct iphdr) -
  3431. @@ -418,7 +511,7 @@
  3432. IP_VS_DBG(7, "setting the maximum length of sync sending "
  3433. "message %d.\n", sync_send_mesg_maxlen);
  3434. } else if (sync_state == IP_VS_STATE_BACKUP) {
  3435. - if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL)
  3436. + if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
  3437. return -ENODEV;
  3438.  
  3439. sync_recv_mesg_maxlen = dev->mtu -
  3440. @@ -446,7 +539,7 @@
  3441. memset(&mreq, 0, sizeof(mreq));
  3442. memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
  3443.  
  3444. - if ((dev = __dev_get_by_name(ifname)) == NULL)
  3445. + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
  3446. return -ENODEV;
  3447. if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
  3448. return -EINVAL;
  3449. @@ -467,7 +560,7 @@
  3450. __be32 addr;
  3451. struct sockaddr_in sin;
  3452.  
  3453. - if ((dev = __dev_get_by_name(ifname)) == NULL)
  3454. + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
  3455. return -ENODEV;
  3456.  
  3457. addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
  3458. @@ -492,14 +585,17 @@
  3459. static struct socket * make_send_sock(void)
  3460. {
  3461. struct socket *sock;
  3462. + int result;
  3463.  
  3464. /* First create a socket */
  3465. - if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
  3466. + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
  3467. + if (result < 0) {
  3468. IP_VS_ERR("Error during creation of socket; terminating\n");
  3469. - return NULL;
  3470. + return ERR_PTR(result);
  3471. }
  3472.  
  3473. - if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) {
  3474. + result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
  3475. + if (result < 0) {
  3476. IP_VS_ERR("Error setting outbound mcast interface\n");
  3477. goto error;
  3478. }
  3479. @@ -507,14 +603,15 @@
  3480. set_mcast_loop(sock->sk, 0);
  3481. set_mcast_ttl(sock->sk, 1);
  3482.  
  3483. - if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) {
  3484. + result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
  3485. + if (result < 0) {
  3486. IP_VS_ERR("Error binding address of the mcast interface\n");
  3487. goto error;
  3488. }
  3489.  
  3490. - if (sock->ops->connect(sock,
  3491. - (struct sockaddr*)&mcast_addr,
  3492. - sizeof(struct sockaddr), 0) < 0) {
  3493. + result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
  3494. + sizeof(struct sockaddr), 0);
  3495. + if (result < 0) {
  3496. IP_VS_ERR("Error connecting to the multicast addr\n");
  3497. goto error;
  3498. }
  3499. @@ -523,7 +620,7 @@
  3500.  
  3501. error:
  3502. sock_release(sock);
  3503. - return NULL;
  3504. + return ERR_PTR(result);
  3505. }
  3506.  
  3507.  
  3508. @@ -533,27 +630,30 @@
  3509. static struct socket * make_receive_sock(void)
  3510. {
  3511. struct socket *sock;
  3512. + int result;
  3513.  
  3514. /* First create a socket */
  3515. - if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
  3516. + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
  3517. + if (result < 0) {
  3518. IP_VS_ERR("Error during creation of socket; terminating\n");
  3519. - return NULL;
  3520. + return ERR_PTR(result);
  3521. }
  3522.  
  3523. /* it is equivalent to the REUSEADDR option in user-space */
  3524. sock->sk->sk_reuse = 1;
  3525.  
  3526. - if (sock->ops->bind(sock,
  3527. - (struct sockaddr*)&mcast_addr,
  3528. - sizeof(struct sockaddr)) < 0) {
  3529. + result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
  3530. + sizeof(struct sockaddr));
  3531. + if (result < 0) {
  3532. IP_VS_ERR("Error binding to the multicast addr\n");
  3533. goto error;
  3534. }
  3535.  
  3536. /* join the multicast group */
  3537. - if (join_mcast_group(sock->sk,
  3538. - (struct in_addr*)&mcast_addr.sin_addr,
  3539. - ip_vs_backup_mcast_ifn) < 0) {
  3540. + result = join_mcast_group(sock->sk,
  3541. + (struct in_addr *) &mcast_addr.sin_addr,
  3542. + ip_vs_backup_mcast_ifn);
  3543. + if (result < 0) {
  3544. IP_VS_ERR("Error joining to the multicast group\n");
  3545. goto error;
  3546. }
  3547. @@ -562,7 +662,7 @@
  3548.  
  3549. error:
  3550. sock_release(sock);
  3551. - return NULL;
  3552. + return ERR_PTR(result);
  3553. }
  3554.  
  3555.  
  3556. @@ -620,44 +720,29 @@
  3557. }
  3558.  
  3559.  
  3560. -static DECLARE_WAIT_QUEUE_HEAD(sync_wait);
  3561. -static pid_t sync_master_pid = 0;
  3562. -static pid_t sync_backup_pid = 0;
  3563. -
  3564. -static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
  3565. -static int stop_master_sync = 0;
  3566. -static int stop_backup_sync = 0;
  3567. -
  3568. -static void sync_master_loop(void)
  3569. +static int sync_thread_master(void *data)
  3570. {
  3571. - struct socket *sock;
  3572. + struct ip_vs_sync_thread_data *tinfo = data;
  3573. struct ip_vs_sync_buff *sb;
  3574.  
  3575. - /* create the sending multicast socket */
  3576. - sock = make_send_sock();
  3577. - if (!sock)
  3578. - return;
  3579. -
  3580. IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
  3581. "syncid = %d\n",
  3582. ip_vs_master_mcast_ifn, ip_vs_master_syncid);
  3583.  
  3584. - for (;;) {
  3585. - while ((sb=sb_dequeue())) {
  3586. - ip_vs_send_sync_msg(sock, sb->mesg);
  3587. + while (!kthread_should_stop()) {
  3588. + while ((sb = sb_dequeue())) {
  3589. + ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
  3590. ip_vs_sync_buff_release(sb);
  3591. }
  3592.  
  3593. /* check if entries stay in curr_sb for 2 seconds */
  3594. - if ((sb = get_curr_sync_buff(2*HZ))) {
  3595. - ip_vs_send_sync_msg(sock, sb->mesg);
  3596. + sb = get_curr_sync_buff(2 * HZ);
  3597. + if (sb) {
  3598. + ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
  3599. ip_vs_sync_buff_release(sb);
  3600. }
  3601.  
  3602. - if (stop_master_sync)
  3603. - break;
  3604. -
  3605. - msleep_interruptible(1000);
  3606. + schedule_timeout_interruptible(HZ);
  3607. }
  3608.  
  3609. /* clean up the sync_buff queue */
  3610. @@ -671,235 +756,175 @@
  3611. }
  3612.  
  3613. /* release the sending multicast socket */
  3614. - sock_release(sock);
  3615. + sock_release(tinfo->sock);
  3616. + kfree(tinfo);
  3617. +
  3618. + return 0;
  3619. }
  3620.  
  3621.  
  3622. -static void sync_backup_loop(void)
  3623. +static int sync_thread_backup(void *data)
  3624. {
  3625. - struct socket *sock;
  3626. - char *buf;
  3627. + struct ip_vs_sync_thread_data *tinfo = data;
  3628. int len;
  3629.  
  3630. - if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) {
  3631. - IP_VS_ERR("sync_backup_loop: kmalloc error\n");
  3632. - return;
  3633. - }
  3634. -
  3635. - /* create the receiving multicast socket */
  3636. - sock = make_receive_sock();
  3637. - if (!sock)
  3638. - goto out;
  3639. -
  3640. IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
  3641. "syncid = %d\n",
  3642. ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
  3643.  
  3644. - for (;;) {
  3645. - /* do you have data now? */
  3646. - while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) {
  3647. - if ((len =
  3648. - ip_vs_receive(sock, buf,
  3649. - sync_recv_mesg_maxlen)) <= 0) {
  3650. + while (!kthread_should_stop()) {
  3651. + wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
  3652. + !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
  3653. + || kthread_should_stop());
  3654. +
  3655. + /* do we have data now? */
  3656. + while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
  3657. + len = ip_vs_receive(tinfo->sock, tinfo->buf,
  3658. + sync_recv_mesg_maxlen);
  3659. + if (len <= 0) {
  3660. IP_VS_ERR("receiving message error\n");
  3661. break;
  3662. }
  3663. - /* disable bottom half, because it accessed the data
  3664. +
  3665. + /* disable bottom half, because it accesses the data
  3666. shared by softirq while getting/creating conns */
  3667. local_bh_disable();
  3668. - ip_vs_process_message(buf, len);
  3669. + ip_vs_process_message(tinfo->buf, len);
  3670. local_bh_enable();
  3671. }
  3672. -
  3673. - if (stop_backup_sync)
  3674. - break;
  3675. -
  3676. - msleep_interruptible(1000);
  3677. }
  3678.  
  3679. /* release the sending multicast socket */
  3680. - sock_release(sock);
  3681. + sock_release(tinfo->sock);
  3682. + kfree(tinfo->buf);
  3683. + kfree(tinfo);
  3684.  
  3685. - out:
  3686. - kfree(buf);
  3687. + return 0;
  3688. }
  3689.  
  3690.  
  3691. -static void set_sync_pid(int sync_state, pid_t sync_pid)
  3692. -{
  3693. - if (sync_state == IP_VS_STATE_MASTER)
  3694. - sync_master_pid = sync_pid;
  3695. - else if (sync_state == IP_VS_STATE_BACKUP)
  3696. - sync_backup_pid = sync_pid;
  3697. -}
  3698. -
  3699. -static void set_stop_sync(int sync_state, int set)
  3700. +int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
  3701. {
  3702. - if (sync_state == IP_VS_STATE_MASTER)
  3703. - stop_master_sync = set;
  3704. - else if (sync_state == IP_VS_STATE_BACKUP)
  3705. - stop_backup_sync = set;
  3706. - else {
  3707. - stop_master_sync = set;
  3708. - stop_backup_sync = set;
  3709. - }
  3710. -}
  3711. + struct ip_vs_sync_thread_data *tinfo;
  3712. + struct task_struct **realtask, *task;
  3713. + struct socket *sock;
  3714. + char *name, *buf = NULL;
  3715. + int (*threadfn)(void *data);
  3716. + int result = -ENOMEM;
  3717.  
  3718. -static int sync_thread(void *startup)
  3719. -{
  3720. - DECLARE_WAITQUEUE(wait, current);
  3721. - mm_segment_t oldmm;
  3722. - int state;
  3723. - const char *name;
  3724. + IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
  3725. + IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
  3726. + sizeof(struct ip_vs_sync_conn));
  3727.  
  3728. - /* increase the module use count */
  3729. - ip_vs_use_count_inc();
  3730. + if (state == IP_VS_STATE_MASTER) {
  3731. + if (sync_master_thread)
  3732. + return -EEXIST;
  3733.  
  3734. - if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) {
  3735. - state = IP_VS_STATE_MASTER;
  3736. + strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
  3737. + sizeof(ip_vs_master_mcast_ifn));
  3738. + ip_vs_master_syncid = syncid;
  3739. + realtask = &sync_master_thread;
  3740. name = "ipvs_syncmaster";
  3741. - } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) {
  3742. - state = IP_VS_STATE_BACKUP;
  3743. + threadfn = sync_thread_master;
  3744. + sock = make_send_sock();
  3745. + } else if (state == IP_VS_STATE_BACKUP) {
  3746. + if (sync_backup_thread)
  3747. + return -EEXIST;
  3748. +
  3749. + strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
  3750. + sizeof(ip_vs_backup_mcast_ifn));
  3751. + ip_vs_backup_syncid = syncid;
  3752. + realtask = &sync_backup_thread;
  3753. name = "ipvs_syncbackup";
  3754. + threadfn = sync_thread_backup;
  3755. + sock = make_receive_sock();
  3756. } else {
  3757. - IP_VS_BUG();
  3758. - ip_vs_use_count_dec();
  3759. return -EINVAL;
  3760. }
  3761.  
  3762. - daemonize(name);
  3763. -
  3764. - oldmm = get_fs();
  3765. - set_fs(KERNEL_DS);
  3766. -
  3767. - /* Block all signals */
  3768. - spin_lock_irq(&current->sighand->siglock);
  3769. - siginitsetinv(&current->blocked, 0);
  3770. - recalc_sigpending();
  3771. - spin_unlock_irq(&current->sighand->siglock);
  3772. + if (IS_ERR(sock)) {
  3773. + result = PTR_ERR(sock);
  3774. + goto out;
  3775. + }
  3776.  
  3777. - /* set the maximum length of sync message */
  3778. set_sync_mesg_maxlen(state);
  3779. + if (state == IP_VS_STATE_BACKUP) {
  3780. + buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
  3781. + if (!buf)
  3782. + goto outsocket;
  3783. + }
  3784.  
  3785. - /* set up multicast address */
  3786. - mcast_addr.sin_family = AF_INET;
  3787. - mcast_addr.sin_port = htons(IP_VS_SYNC_PORT);
  3788. - mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP);
  3789. -
  3790. - add_wait_queue(&sync_wait, &wait);
  3791. -
  3792. - set_sync_pid(state, current->pid);
  3793. - complete((struct completion *)startup);
  3794. -
  3795. - /* processing master/backup loop here */
  3796. - if (state == IP_VS_STATE_MASTER)
  3797. - sync_master_loop();
  3798. - else if (state == IP_VS_STATE_BACKUP)
  3799. - sync_backup_loop();
  3800. - else IP_VS_BUG();
  3801. -
  3802. - remove_wait_queue(&sync_wait, &wait);
  3803. -
  3804. - /* thread exits */
  3805. - set_sync_pid(state, 0);
  3806. - IP_VS_INFO("sync thread stopped!\n");
  3807. -
  3808. - set_fs(oldmm);
  3809. -
  3810. - /* decrease the module use count */
  3811. - ip_vs_use_count_dec();
  3812. -
  3813. - set_stop_sync(state, 0);
  3814. - wake_up(&stop_sync_wait);
  3815. + tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
  3816. + if (!tinfo)
  3817. + goto outbuf;
  3818.  
  3819. - return 0;
  3820. -}
  3821. + tinfo->sock = sock;
  3822. + tinfo->buf = buf;
  3823.  
  3824. + task = kthread_run(threadfn, tinfo, name);
  3825. + if (IS_ERR(task)) {
  3826. + result = PTR_ERR(task);
  3827. + goto outtinfo;
  3828. + }
  3829.  
  3830. -static int fork_sync_thread(void *startup)
  3831. -{
  3832. - pid_t pid;
  3833. + /* mark as active */
  3834. + *realtask = task;
  3835. + ip_vs_sync_state |= state;
  3836.  
  3837. - /* fork the sync thread here, then the parent process of the
  3838. - sync thread is the init process after this thread exits. */
  3839. - repeat:
  3840. - if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
  3841. - IP_VS_ERR("could not create sync_thread due to %d... "
  3842. - "retrying.\n", pid);
  3843. - msleep_interruptible(1000);
  3844. - goto repeat;
  3845. - }
  3846. + /* increase the module use count */
  3847. + ip_vs_use_count_inc();
  3848.  
  3849. return 0;
  3850. +
  3851. +outtinfo:
  3852. + kfree(tinfo);
  3853. +outbuf:
  3854. + kfree(buf);
  3855. +outsocket:
  3856. + sock_release(sock);
  3857. +out:
  3858. + return result;
  3859. }
  3860.  
  3861.  
  3862. -int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
  3863. +int stop_sync_thread(int state)
  3864. {
  3865. - DECLARE_COMPLETION_ONSTACK(startup);
  3866. - pid_t pid;
  3867. + IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
  3868.  
  3869. - if ((state == IP_VS_STATE_MASTER && sync_master_pid) ||
  3870. - (state == IP_VS_STATE_BACKUP && sync_backup_pid))
  3871. - return -EEXIST;
  3872. + if (state == IP_VS_STATE_MASTER) {
  3873. + if (!sync_master_thread)
  3874. + return -ESRCH;
  3875.  
  3876. - IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
  3877. - IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
  3878. - sizeof(struct ip_vs_sync_conn));
  3879. + IP_VS_INFO("stopping master sync thread %d ...\n",
  3880. + task_pid_nr(sync_master_thread));
  3881.  
  3882. - ip_vs_sync_state |= state;
  3883. - if (state == IP_VS_STATE_MASTER) {
  3884. - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
  3885. - sizeof(ip_vs_master_mcast_ifn));
  3886. - ip_vs_master_syncid = syncid;
  3887. + /*
  3888. + * The lock synchronizes with sb_queue_tail(), so that we don't
  3889. + * add sync buffers to the queue, when we are already in
  3890. + * progress of stopping the master sync daemon.
  3891. + */
  3892. +
  3893. + spin_lock_bh(&ip_vs_sync_lock);
  3894. + ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
  3895. + spin_unlock_bh(&ip_vs_sync_lock);
  3896. + kthread_stop(sync_master_thread);
  3897. + sync_master_thread = NULL;
  3898. + } else if (state == IP_VS_STATE_BACKUP) {
  3899. + if (!sync_backup_thread)
  3900. + return -ESRCH;
  3901. +
  3902. + IP_VS_INFO("stopping backup sync thread %d ...\n",
  3903. + task_pid_nr(sync_backup_thread));
  3904. +
  3905. + ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
  3906. + kthread_stop(sync_backup_thread);
  3907. + sync_backup_thread = NULL;
  3908. } else {
  3909. - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
  3910. - sizeof(ip_vs_backup_mcast_ifn));
  3911. - ip_vs_backup_syncid = syncid;
  3912. - }
  3913. -
  3914. - repeat:
  3915. - if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) {
  3916. - IP_VS_ERR("could not create fork_sync_thread due to %d... "
  3917. - "retrying.\n", pid);
  3918. - msleep_interruptible(1000);
  3919. - goto repeat;
  3920. + return -EINVAL;
  3921. }
  3922.  
  3923. - wait_for_completion(&startup);
  3924. -
  3925. - return 0;
  3926. -}
  3927. -
  3928. -
  3929. -int stop_sync_thread(int state)
  3930. -{
  3931. - DECLARE_WAITQUEUE(wait, current);
  3932. -
  3933. - if ((state == IP_VS_STATE_MASTER && !sync_master_pid) ||
  3934. - (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
  3935. - return -ESRCH;
  3936. -
  3937. - IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
  3938. - IP_VS_INFO("stopping sync thread %d ...\n",
  3939. - (state == IP_VS_STATE_MASTER) ?
  3940. - sync_master_pid : sync_backup_pid);
  3941. -
  3942. - __set_current_state(TASK_UNINTERRUPTIBLE);
  3943. - add_wait_queue(&stop_sync_wait, &wait);
  3944. - set_stop_sync(state, 1);
  3945. - ip_vs_sync_state -= state;
  3946. - wake_up(&sync_wait);
  3947. - schedule();
  3948. - __set_current_state(TASK_RUNNING);
  3949. - remove_wait_queue(&stop_sync_wait, &wait);
  3950. -
  3951. - /* Note: no need to reap the sync thread, because its parent
  3952. - process is the init process */
  3953. -
  3954. - if ((state == IP_VS_STATE_MASTER && stop_master_sync) ||
  3955. - (state == IP_VS_STATE_BACKUP && stop_backup_sync))
  3956. - IP_VS_BUG();
  3957. + /* decrease the module use count */
  3958. + ip_vs_use_count_dec();
  3959.  
  3960. return 0;
  3961. }
  3962. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wlc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wlc.c
  3963. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wlc.c 2009-02-16 11:57:22.000000000 -0400
  3964. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wlc.c 2009-02-16 12:56:22.000000000 -0400
  3965. @@ -1,8 +1,6 @@
  3966. /*
  3967. * IPVS: Weighted Least-Connection Scheduling module
  3968. *
  3969. - * Version: $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $
  3970. - *
  3971. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  3972. * Peter Kese <peter.kese@ijs.si>
  3973. *
  3974. @@ -128,6 +126,7 @@
  3975. .name = "wlc",
  3976. .refcnt = ATOMIC_INIT(0),
  3977. .module = THIS_MODULE,
  3978. + .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
  3979. .init_service = ip_vs_wlc_init_svc,
  3980. .done_service = ip_vs_wlc_done_svc,
  3981. .update_service = ip_vs_wlc_update_svc,
  3982. @@ -137,7 +136,6 @@
  3983.  
  3984. static int __init ip_vs_wlc_init(void)
  3985. {
  3986. - INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list);
  3987. return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
  3988. }
  3989.  
  3990. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wrr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wrr.c
  3991. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wrr.c 2009-02-16 11:57:22.000000000 -0400
  3992. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wrr.c 2009-02-16 12:56:22.000000000 -0400
  3993. @@ -1,8 +1,6 @@
  3994. /*
  3995. * IPVS: Weighted Round-Robin Scheduling module
  3996. *
  3997. - * Version: $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $
  3998. - *
  3999. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  4000. *
  4001. * This program is free software; you can redistribute it and/or
  4002. @@ -22,6 +20,7 @@
  4003.  
  4004. #include <linux/module.h>
  4005. #include <linux/kernel.h>
  4006. +#include <linux/net.h>
  4007.  
  4008. #include <net/ip_vs.h>
  4009.  
  4010. @@ -169,7 +168,7 @@
  4011. */
  4012. if (mark->cw == 0) {
  4013. mark->cl = &svc->destinations;
  4014. - IP_VS_INFO("ip_vs_wrr_schedule(): "
  4015. + IP_VS_ERR_RL("ip_vs_wrr_schedule(): "
  4016. "no available servers\n");
  4017. dest = NULL;
  4018. goto out;
  4019. @@ -213,6 +212,7 @@
  4020. .name = "wrr",
  4021. .refcnt = ATOMIC_INIT(0),
  4022. .module = THIS_MODULE,
  4023. + .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
  4024. .init_service = ip_vs_wrr_init_svc,
  4025. .done_service = ip_vs_wrr_done_svc,
  4026. .update_service = ip_vs_wrr_update_svc,
  4027. @@ -221,7 +221,6 @@
  4028.  
  4029. static int __init ip_vs_wrr_init(void)
  4030. {
  4031. - INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list);
  4032. return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
  4033. }
  4034.  
  4035. diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_xmit.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_xmit.c
  4036. --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_xmit.c 2009-02-16 11:57:22.000000000 -0400
  4037. +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_xmit.c 2009-02-16 12:56:22.000000000 -0400
  4038. @@ -1,8 +1,6 @@
  4039. /*
  4040. * ip_vs_xmit.c: various packet transmitters for IPVS
  4041. *
  4042. - * Version: $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $
  4043. - *
  4044. * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
  4045. * Julian Anastasov <ja@ssi.bg>
  4046. *
  4047. @@ -16,8 +14,8 @@
  4048. */
  4049.  
  4050. #include <linux/kernel.h>
  4051. -#include <linux/ip.h>
  4052. #include <linux/tcp.h> /* for tcphdr */
  4053. +#include <net/ip.h>
  4054. #include <net/tcp.h> /* for csum_tcpudp_magic */
  4055. #include <net/udp.h>
  4056. #include <net/icmp.h> /* for icmp_send */
  4057. @@ -59,7 +57,7 @@
  4058. return dst;
  4059. }
  4060.  
  4061. -static inline struct rtable *
  4062. +static struct rtable *
  4063. __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
  4064. {
  4065. struct rtable *rt; /* Route to the other host */
  4066. @@ -78,7 +76,7 @@
  4067. .tos = rtos, } },
  4068. };
  4069.  
  4070. - if (ip_route_output_key(&rt, &fl)) {
  4071. + if (ip_route_output_key(&init_net, &rt, &fl)) {
  4072. spin_unlock(&dest->dst_lock);
  4073. IP_VS_DBG_RL("ip_route_output error, "
  4074. "dest: %u.%u.%u.%u\n",
  4075. @@ -101,7 +99,7 @@
  4076. .tos = rtos, } },
  4077. };
  4078.  
  4079. - if (ip_route_output_key(&rt, &fl)) {
  4080. + if (ip_route_output_key(&init_net, &rt, &fl)) {
  4081. IP_VS_DBG_RL("ip_route_output error, dest: "
  4082. "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
  4083. return NULL;
  4084. @@ -128,8 +126,8 @@
  4085. #define IP_VS_XMIT(skb, rt) \
  4086. do { \
  4087. (skb)->ipvs_property = 1; \
  4088. - (skb)->ip_summed = CHECKSUM_NONE; \
  4089. - NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \
  4090. + skb_forward_csum(skb); \
  4091. + NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \
  4092. (rt)->u.dst.dev, dst_output); \
  4093. } while (0)
  4094.  
  4095. @@ -156,7 +154,7 @@
  4096. struct ip_vs_protocol *pp)
  4097. {
  4098. struct rtable *rt; /* Route to the other host */
  4099. - struct iphdr *iph = skb->nh.iph;
  4100. + struct iphdr *iph = ip_hdr(skb);
  4101. u8 tos = iph->tos;
  4102. int mtu;
  4103. struct flowi fl = {
  4104. @@ -170,7 +168,7 @@
  4105.  
  4106. EnterFunction(10);
  4107.  
  4108. - if (ip_route_output_key(&rt, &fl)) {
  4109. + if (ip_route_output_key(&init_net, &rt, &fl)) {
  4110. IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
  4111. "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
  4112. goto tx_error_icmp;
  4113. @@ -178,7 +176,7 @@
  4114.  
  4115. /* MTU checking */
  4116. mtu = dst_mtu(&rt->u.dst);
  4117. - if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
  4118. + if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
  4119. ip_rt_put(rt);
  4120. icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
  4121. IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
  4122. @@ -193,7 +191,7 @@
  4123. ip_rt_put(rt);
  4124. return NF_STOLEN;
  4125. }
  4126. - ip_send_check(skb->nh.iph);
  4127. + ip_send_check(ip_hdr(skb));
  4128.  
  4129. /* drop old route */
  4130. dst_release(skb->dst);
  4131. @@ -226,7 +224,7 @@
  4132. {
  4133. struct rtable *rt; /* Route to the other host */
  4134. int mtu;
  4135. - struct iphdr *iph = skb->nh.iph;
  4136. + struct iphdr *iph = ip_hdr(skb);
  4137.  
  4138. EnterFunction(10);
  4139.  
  4140. @@ -245,7 +243,7 @@
  4141.  
  4142. /* MTU checking */
  4143. mtu = dst_mtu(&rt->u.dst);
  4144. - if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
  4145. + if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
  4146. ip_rt_put(rt);
  4147. icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
  4148. IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
  4149. @@ -253,7 +251,7 @@
  4150. }
  4151.  
  4152. /* copy-on-write the packet before mangling it */
  4153. - if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr)))
  4154. + if (!skb_make_writable(skb, sizeof(struct iphdr)))
  4155. goto tx_error_put;
  4156.  
  4157. if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
  4158. @@ -264,10 +262,10 @@
  4159. skb->dst = &rt->u.dst;
  4160.  
  4161. /* mangle the packet */
  4162. - if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
  4163. + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
  4164. goto tx_error;
  4165. - skb->nh.iph->daddr = cp->daddr;
  4166. - ip_send_check(skb->nh.iph);
  4167. + ip_hdr(skb)->daddr = cp->daddr;
  4168. + ip_send_check(ip_hdr(skb));
  4169.  
  4170. IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
  4171.  
  4172. @@ -320,19 +318,20 @@
  4173. {
  4174. struct rtable *rt; /* Route to the other host */
  4175. struct net_device *tdev; /* Device to other host */
  4176. - struct iphdr *old_iph = skb->nh.iph;
  4177. + struct iphdr *old_iph = ip_hdr(skb);
  4178. u8 tos = old_iph->tos;
  4179. __be16 df = old_iph->frag_off;
  4180. + sk_buff_data_t old_transport_header = skb->transport_header;
  4181. struct iphdr *iph; /* Our new IP header */
  4182. - int max_headroom; /* The extra header space needed */
  4183. + unsigned int max_headroom; /* The extra header space needed */
  4184. int mtu;
  4185.  
  4186. EnterFunction(10);
  4187.  
  4188. - if (skb->protocol != __constant_htons(ETH_P_IP)) {
  4189. + if (skb->protocol != htons(ETH_P_IP)) {
  4190. IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
  4191. "ETH_P_IP: %d, skb protocol: %d\n",
  4192. - __constant_htons(ETH_P_IP), skb->protocol);
  4193. + htons(ETH_P_IP), skb->protocol);
  4194. goto tx_error;
  4195. }
  4196.  
  4197. @@ -350,9 +349,9 @@
  4198. if (skb->dst)
  4199. skb->dst->ops->update_pmtu(skb->dst, mtu);
  4200.  
  4201. - df |= (old_iph->frag_off&__constant_htons(IP_DF));
  4202. + df |= (old_iph->frag_off & htons(IP_DF));
  4203.  
  4204. - if ((old_iph->frag_off&__constant_htons(IP_DF))
  4205. + if ((old_iph->frag_off & htons(IP_DF))
  4206. && mtu < ntohs(old_iph->tot_len)) {
  4207. icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
  4208. ip_rt_put(rt);
  4209. @@ -377,15 +376,16 @@
  4210. }
  4211. kfree_skb(skb);
  4212. skb = new_skb;
  4213. - old_iph = skb->nh.iph;
  4214. + old_iph = ip_hdr(skb);
  4215. }
  4216.  
  4217. - skb->h.raw = (void *) old_iph;
  4218. + skb->transport_header = old_transport_header;
  4219.  
  4220. /* fix old IP header checksum */
  4221. ip_send_check(old_iph);
  4222.  
  4223. - skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
  4224. + skb_push(skb, sizeof(struct iphdr));
  4225. + skb_reset_network_header(skb);
  4226. memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
  4227.  
  4228. /* drop old route */
  4229. @@ -395,7 +395,7 @@
  4230. /*
  4231. * Push down and install the IPIP header.
  4232. */
  4233. - iph = skb->nh.iph;
  4234. + iph = ip_hdr(skb);
  4235. iph->version = 4;
  4236. iph->ihl = sizeof(struct iphdr)>>2;
  4237. iph->frag_off = df;
  4238. @@ -404,14 +404,12 @@
  4239. iph->daddr = rt->rt_dst;
  4240. iph->saddr = rt->rt_src;
  4241. iph->ttl = old_iph->ttl;
  4242. - iph->tot_len = htons(skb->len);
  4243. ip_select_ident(iph, &rt->u.dst, NULL);
  4244. - ip_send_check(iph);
  4245.  
  4246. /* Another hack: avoid icmp_send in ip_fragment */
  4247. skb->local_df = 1;
  4248.  
  4249. - IP_VS_XMIT(skb, rt);
  4250. + ip_local_out(skb);
  4251.  
  4252. LeaveFunction(10);
  4253.  
  4254. @@ -435,7 +433,7 @@
  4255. struct ip_vs_protocol *pp)
  4256. {
  4257. struct rtable *rt; /* Route to the other host */
  4258. - struct iphdr *iph = skb->nh.iph;
  4259. + struct iphdr *iph = ip_hdr(skb);
  4260. int mtu;
  4261.  
  4262. EnterFunction(10);
  4263. @@ -445,7 +443,7 @@
  4264.  
  4265. /* MTU checking */
  4266. mtu = dst_mtu(&rt->u.dst);
  4267. - if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
  4268. + if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
  4269. icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
  4270. ip_rt_put(rt);
  4271. IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
  4272. @@ -460,7 +458,7 @@
  4273. ip_rt_put(rt);
  4274. return NF_STOLEN;
  4275. }
  4276. - ip_send_check(skb->nh.iph);
  4277. + ip_send_check(ip_hdr(skb));
  4278.  
  4279. /* drop old route */
  4280. dst_release(skb->dst);
  4281. @@ -514,12 +512,12 @@
  4282. * mangle and send the packet here (only for VS/NAT)
  4283. */
  4284.  
  4285. - if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos))))
  4286. + if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
  4287. goto tx_error_icmp;
  4288.  
  4289. /* MTU checking */
  4290. mtu = dst_mtu(&rt->u.dst);
  4291. - if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) {
  4292. + if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
  4293. ip_rt_put(rt);
  4294. icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
  4295. IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
  4296. @@ -527,7 +525,7 @@
  4297. }
  4298.  
  4299. /* copy-on-write the packet before mangling it */
  4300. - if (!ip_vs_make_skb_writable(&skb, offset))
  4301. + if (!skb_make_writable(skb, offset))
  4302. goto tx_error_put;
  4303.  
  4304. if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
Add Comment
Please, Sign In to add comment