Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- diff -u linux-2.6.20.y/net/ipv4/ipvs/Kconfig linux-2.6.27.y/net/ipv4/ipvs/Kconfig
- --- linux-2.6.20.y/net/ipv4/ipvs/Kconfig 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/Kconfig 2009-02-16 12:56:22.000000000 -0400
- @@ -1,10 +1,7 @@
- #
- # IP Virtual Server configuration
- #
- -menu "IP: Virtual Server Configuration"
- - depends on NETFILTER
- -
- -config IP_VS
- +menuconfig IP_VS
- tristate "IP virtual server support (EXPERIMENTAL)"
- depends on NETFILTER
- ---help---
- @@ -25,9 +22,10 @@
- If you want to compile it in kernel, say Y. To compile it as a
- module, choose M here. If unsure, say N.
- +if IP_VS
- +
- config IP_VS_DEBUG
- bool "IP virtual server debugging"
- - depends on IP_VS
- ---help---
- Say Y here if you want to get additional messages useful in
- debugging the IP virtual server code. You can change the debug
- @@ -35,7 +33,6 @@
- config IP_VS_TAB_BITS
- int "IPVS connection table size (the Nth power of 2)"
- - depends on IP_VS
- default "12"
- ---help---
- The IPVS connection hash table uses the chaining scheme to handle
- @@ -61,42 +58,35 @@
- needed for your box.
- comment "IPVS transport protocol load balancing support"
- - depends on IP_VS
- config IP_VS_PROTO_TCP
- bool "TCP load balancing support"
- - depends on IP_VS
- ---help---
- This option enables support for load balancing TCP transport
- protocol. Say Y if unsure.
- config IP_VS_PROTO_UDP
- bool "UDP load balancing support"
- - depends on IP_VS
- ---help---
- This option enables support for load balancing UDP transport
- protocol. Say Y if unsure.
- config IP_VS_PROTO_ESP
- bool "ESP load balancing support"
- - depends on IP_VS
- ---help---
- This option enables support for load balancing ESP (Encapsulation
- Security Payload) transport protocol. Say Y if unsure.
- config IP_VS_PROTO_AH
- bool "AH load balancing support"
- - depends on IP_VS
- ---help---
- This option enables support for load balancing AH (Authentication
- Header) transport protocol. Say Y if unsure.
- comment "IPVS scheduler"
- - depends on IP_VS
- config IP_VS_RR
- tristate "round-robin scheduling"
- - depends on IP_VS
- ---help---
- The robin-robin scheduling algorithm simply directs network
- connections to different real servers in a round-robin manner.
- @@ -106,7 +96,6 @@
- config IP_VS_WRR
- tristate "weighted round-robin scheduling"
- - depends on IP_VS
- ---help---
- The weighted robin-robin scheduling algorithm directs network
- connections to different real servers based on server weights
- @@ -120,7 +109,6 @@
- config IP_VS_LC
- tristate "least-connection scheduling"
- - depends on IP_VS
- ---help---
- The least-connection scheduling algorithm directs network
- connections to the server with the least number of active
- @@ -131,7 +119,6 @@
- config IP_VS_WLC
- tristate "weighted least-connection scheduling"
- - depends on IP_VS
- ---help---
- The weighted least-connection scheduling algorithm directs network
- connections to the server with the least active connections
- @@ -142,7 +129,6 @@
- config IP_VS_LBLC
- tristate "locality-based least-connection scheduling"
- - depends on IP_VS
- ---help---
- The locality-based least-connection scheduling algorithm is for
- destination IP load balancing. It is usually used in cache cluster.
- @@ -157,7 +143,6 @@
- config IP_VS_LBLCR
- tristate "locality-based least-connection with replication scheduling"
- - depends on IP_VS
- ---help---
- The locality-based least-connection with replication scheduling
- algorithm is also for destination IP load balancing. It is
- @@ -176,7 +161,6 @@
- config IP_VS_DH
- tristate "destination hashing scheduling"
- - depends on IP_VS
- ---help---
- The destination hashing scheduling algorithm assigns network
- connections to the servers through looking up a statically assigned
- @@ -187,7 +171,6 @@
- config IP_VS_SH
- tristate "source hashing scheduling"
- - depends on IP_VS
- ---help---
- The source hashing scheduling algorithm assigns network
- connections to the servers through looking up a statically assigned
- @@ -198,7 +181,6 @@
- config IP_VS_SED
- tristate "shortest expected delay scheduling"
- - depends on IP_VS
- ---help---
- The shortest expected delay scheduling algorithm assigns network
- connections to the server with the shortest expected delay. The
- @@ -212,7 +194,6 @@
- config IP_VS_NQ
- tristate "never queue scheduling"
- - depends on IP_VS
- ---help---
- The never queue scheduling algorithm adopts a two-speed model.
- When there is an idle server available, the job will be sent to
- @@ -225,11 +206,10 @@
- module, choose M here. If unsure, say N.
- comment 'IPVS application helper'
- - depends on IP_VS
- config IP_VS_FTP
- tristate "FTP protocol helper"
- - depends on IP_VS && IP_VS_PROTO_TCP
- + depends on IP_VS_PROTO_TCP
- ---help---
- FTP is a protocol that transfers IP address and/or port number in
- the payload. In the virtual server via Network Address Translation,
- @@ -241,4 +221,4 @@
- If you want to compile it in kernel, say Y. To compile it as a
- module, choose M here. If unsure, say N.
- -endmenu
- +endif # IP_VS
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_app.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_app.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_app.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_app.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * ip_vs_app.c: Application module support for IPVS
- *
- - * Version: $Id: ip_vs_app.c,v 1.17 2003/03/22 06:31:21 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * This program is free software; you can redistribute it and/or
- @@ -25,6 +23,8 @@
- #include <linux/skbuff.h>
- #include <linux/in.h>
- #include <linux/ip.h>
- +#include <linux/netfilter.h>
- +#include <net/net_namespace.h>
- #include <net/protocol.h>
- #include <net/tcp.h>
- #include <asm/system.h>
- @@ -49,18 +49,13 @@
- */
- static inline int ip_vs_app_get(struct ip_vs_app *app)
- {
- - /* test and get the module atomically */
- - if (app->module)
- - return try_module_get(app->module);
- - else
- - return 1;
- + return try_module_get(app->module);
- }
- static inline void ip_vs_app_put(struct ip_vs_app *app)
- {
- - if (app->module)
- - module_put(app->module);
- + module_put(app->module);
- }
- @@ -327,18 +322,18 @@
- spin_unlock(&cp->lock);
- }
- -static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb,
- +static inline int app_tcp_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb,
- struct ip_vs_app *app)
- {
- int diff;
- - unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
- + const unsigned int tcp_offset = ip_hdrlen(skb);
- struct tcphdr *th;
- __u32 seq;
- - if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
- + if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
- return 0;
- - th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
- + th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
- /*
- * Remember seq number in case this pkt gets resized
- @@ -359,7 +354,7 @@
- if (app->pkt_out == NULL)
- return 1;
- - if (!app->pkt_out(app, cp, pskb, &diff))
- + if (!app->pkt_out(app, cp, skb, &diff))
- return 0;
- /*
- @@ -377,7 +372,7 @@
- * called by ipvs packet handler, assumes previously checked cp!=NULL
- * returns false if it can't handle packet (oom)
- */
- -int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff **pskb)
- +int ip_vs_app_pkt_out(struct ip_vs_conn *cp, struct sk_buff *skb)
- {
- struct ip_vs_app *app;
- @@ -390,7 +385,7 @@
- /* TCP is complicated */
- if (cp->protocol == IPPROTO_TCP)
- - return app_tcp_pkt_out(cp, pskb, app);
- + return app_tcp_pkt_out(cp, skb, app);
- /*
- * Call private output hook function
- @@ -398,22 +393,22 @@
- if (app->pkt_out == NULL)
- return 1;
- - return app->pkt_out(app, cp, pskb, NULL);
- + return app->pkt_out(app, cp, skb, NULL);
- }
- -static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb,
- +static inline int app_tcp_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb,
- struct ip_vs_app *app)
- {
- int diff;
- - unsigned int tcp_offset = (*pskb)->nh.iph->ihl*4;
- + const unsigned int tcp_offset = ip_hdrlen(skb);
- struct tcphdr *th;
- __u32 seq;
- - if (!ip_vs_make_skb_writable(pskb, tcp_offset + sizeof(*th)))
- + if (!skb_make_writable(skb, tcp_offset + sizeof(*th)))
- return 0;
- - th = (struct tcphdr *)((*pskb)->nh.raw + tcp_offset);
- + th = (struct tcphdr *)(skb_network_header(skb) + tcp_offset);
- /*
- * Remember seq number in case this pkt gets resized
- @@ -434,7 +429,7 @@
- if (app->pkt_in == NULL)
- return 1;
- - if (!app->pkt_in(app, cp, pskb, &diff))
- + if (!app->pkt_in(app, cp, skb, &diff))
- return 0;
- /*
- @@ -452,7 +447,7 @@
- * called by ipvs packet handler, assumes previously checked cp!=NULL.
- * returns false if can't handle packet (oom).
- */
- -int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff **pskb)
- +int ip_vs_app_pkt_in(struct ip_vs_conn *cp, struct sk_buff *skb)
- {
- struct ip_vs_app *app;
- @@ -465,7 +460,7 @@
- /* TCP is complicated */
- if (cp->protocol == IPPROTO_TCP)
- - return app_tcp_pkt_in(cp, pskb, app);
- + return app_tcp_pkt_in(cp, skb, app);
- /*
- * Call private input hook function
- @@ -473,7 +468,7 @@
- if (app->pkt_in == NULL)
- return 1;
- - return app->pkt_in(app, cp, pskb, NULL);
- + return app->pkt_in(app, cp, skb, NULL);
- }
- @@ -549,7 +544,7 @@
- return 0;
- }
- -static struct seq_operations ip_vs_app_seq_ops = {
- +static const struct seq_operations ip_vs_app_seq_ops = {
- .start = ip_vs_app_seq_start,
- .next = ip_vs_app_seq_next,
- .stop = ip_vs_app_seq_stop,
- @@ -561,7 +556,7 @@
- return seq_open(file, &ip_vs_app_seq_ops);
- }
- -static struct file_operations ip_vs_app_fops = {
- +static const struct file_operations ip_vs_app_fops = {
- .owner = THIS_MODULE,
- .open = ip_vs_app_open,
- .read = seq_read,
- @@ -577,7 +572,6 @@
- int ip_vs_skb_replace(struct sk_buff *skb, gfp_t pri,
- char *o_buf, int o_len, char *n_buf, int n_len)
- {
- - struct iphdr *iph;
- int diff;
- int o_offset;
- int o_left;
- @@ -603,27 +597,26 @@
- skb_put(skb, diff);
- memmove(skb->data + o_offset + n_len,
- skb->data + o_offset + o_len, o_left);
- - memcpy(skb->data + o_offset, n_buf, n_len);
- + skb_copy_to_linear_data_offset(skb, o_offset, n_buf, n_len);
- }
- /* must update the iph total length here */
- - iph = skb->nh.iph;
- - iph->tot_len = htons(skb->len);
- + ip_hdr(skb)->tot_len = htons(skb->len);
- LeaveFunction(9);
- return 0;
- }
- -int ip_vs_app_init(void)
- +int __init ip_vs_app_init(void)
- {
- /* we will replace it with proc_net_ipvs_create() soon */
- - proc_net_fops_create("ip_vs_app", 0, &ip_vs_app_fops);
- + proc_net_fops_create(&init_net, "ip_vs_app", 0, &ip_vs_app_fops);
- return 0;
- }
- void ip_vs_app_cleanup(void)
- {
- - proc_net_remove("ip_vs_app");
- + proc_net_remove(&init_net, "ip_vs_app");
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_conn.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_conn.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_conn.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_conn.c 2009-02-16 12:56:22.000000000 -0400
- @@ -5,8 +5,6 @@
- * high-performance and highly available server based on a
- * cluster of servers.
- *
- - * Version: $Id: ip_vs_conn.c,v 1.31 2003/04/18 09:03:16 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- * Peter Kese <peter.kese@ijs.si>
- * Julian Anastasov <ja@ssi.bg>
- @@ -35,6 +33,7 @@
- #include <linux/jhash.h>
- #include <linux/random.h>
- +#include <net/net_namespace.h>
- #include <net/ip_vs.h>
- @@ -392,7 +391,15 @@
- atomic_inc(&dest->refcnt);
- /* Bind with the destination and its corresponding transmitter */
- - cp->flags |= atomic_read(&dest->conn_flags);
- + if ((cp->flags & IP_VS_CONN_F_SYNC) &&
- + (!(cp->flags & IP_VS_CONN_F_TEMPLATE)))
- + /* if the connection is not template and is created
- + * by sync, preserve the activity flag.
- + */
- + cp->flags |= atomic_read(&dest->conn_flags) &
- + (~IP_VS_CONN_F_INACTIVE);
- + else
- + cp->flags |= atomic_read(&dest->conn_flags);
- cp->dest = dest;
- IP_VS_DBG(7, "Bind-dest %s c:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
- @@ -411,7 +418,11 @@
- /* It is a normal connection, so increase the inactive
- connection counter because it is in TCP SYNRECV
- state (inactive) or other protocol inacive state */
- - atomic_inc(&dest->inactconns);
- + if ((cp->flags & IP_VS_CONN_F_SYNC) &&
- + (!(cp->flags & IP_VS_CONN_F_INACTIVE)))
- + atomic_inc(&dest->activeconns);
- + else
- + atomic_inc(&dest->inactconns);
- } else {
- /* It is a persistent connection/template, so increase
- the peristent connection counter */
- @@ -425,6 +436,24 @@
- /*
- + * Check if there is a destination for the connection, if so
- + * bind the connection to the destination.
- + */
- +struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)
- +{
- + struct ip_vs_dest *dest;
- +
- + if ((cp) && (!cp->dest)) {
- + dest = ip_vs_find_dest(cp->daddr, cp->dport,
- + cp->vaddr, cp->vport, cp->protocol);
- + ip_vs_bind_dest(cp, dest);
- + return dest;
- + } else
- + return NULL;
- +}
- +
- +
- +/*
- * Unbind a connection entry with its VS destination
- * Called by the ip_vs_conn_expire function.
- */
- @@ -494,8 +523,8 @@
- * Checking the dest server status.
- */
- if ((dest == NULL) ||
- - !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
- - (sysctl_ip_vs_expire_quiescent_template &&
- + !(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
- + (sysctl_ip_vs_expire_quiescent_template &&
- (atomic_read(&dest->weight) == 0))) {
- IP_VS_DBG(9, "check_template: dest not available for "
- "protocol %s s:%u.%u.%u.%u:%d v:%u.%u.%u.%u:%d "
- @@ -603,17 +632,14 @@
- struct ip_vs_conn *cp;
- struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
- - cp = kmem_cache_alloc(ip_vs_conn_cachep, GFP_ATOMIC);
- + cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC);
- if (cp == NULL) {
- IP_VS_ERR_RL("ip_vs_conn_new: no memory available.\n");
- return NULL;
- }
- - memset(cp, 0, sizeof(*cp));
- INIT_LIST_HEAD(&cp->c_list);
- - init_timer(&cp->timer);
- - cp->timer.data = (unsigned long)cp;
- - cp->timer.function = ip_vs_conn_expire;
- + setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp);
- cp->protocol = proto;
- cp->caddr = caddr;
- cp->cport = cport;
- @@ -667,7 +693,7 @@
- {
- int idx;
- struct ip_vs_conn *cp;
- -
- +
- for(idx = 0; idx < IP_VS_CONN_TAB_SIZE; idx++) {
- ct_read_lock_bh(idx);
- list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
- @@ -695,7 +721,7 @@
- int idx;
- ++*pos;
- - if (v == SEQ_START_TOKEN)
- + if (v == SEQ_START_TOKEN)
- return ip_vs_conn_array(seq, 0);
- /* more on same hash chain? */
- @@ -710,7 +736,7 @@
- list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) {
- seq->private = &ip_vs_conn_tab[idx];
- return cp;
- - }
- + }
- ct_read_unlock_bh(idx);
- }
- seq->private = NULL;
- @@ -746,7 +772,7 @@
- return 0;
- }
- -static struct seq_operations ip_vs_conn_seq_ops = {
- +static const struct seq_operations ip_vs_conn_seq_ops = {
- .start = ip_vs_conn_seq_start,
- .next = ip_vs_conn_seq_next,
- .stop = ip_vs_conn_seq_stop,
- @@ -758,13 +784,64 @@
- return seq_open(file, &ip_vs_conn_seq_ops);
- }
- -static struct file_operations ip_vs_conn_fops = {
- +static const struct file_operations ip_vs_conn_fops = {
- .owner = THIS_MODULE,
- .open = ip_vs_conn_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release,
- };
- +
- +static const char *ip_vs_origin_name(unsigned flags)
- +{
- + if (flags & IP_VS_CONN_F_SYNC)
- + return "SYNC";
- + else
- + return "LOCAL";
- +}
- +
- +static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v)
- +{
- +
- + if (v == SEQ_START_TOKEN)
- + seq_puts(seq,
- + "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n");
- + else {
- + const struct ip_vs_conn *cp = v;
- +
- + seq_printf(seq,
- + "%-3s %08X %04X %08X %04X %08X %04X %-11s %-6s %7lu\n",
- + ip_vs_proto_name(cp->protocol),
- + ntohl(cp->caddr), ntohs(cp->cport),
- + ntohl(cp->vaddr), ntohs(cp->vport),
- + ntohl(cp->daddr), ntohs(cp->dport),
- + ip_vs_state_name(cp->protocol, cp->state),
- + ip_vs_origin_name(cp->flags),
- + (cp->timer.expires-jiffies)/HZ);
- + }
- + return 0;
- +}
- +
- +static const struct seq_operations ip_vs_conn_sync_seq_ops = {
- + .start = ip_vs_conn_seq_start,
- + .next = ip_vs_conn_seq_next,
- + .stop = ip_vs_conn_seq_stop,
- + .show = ip_vs_conn_sync_seq_show,
- +};
- +
- +static int ip_vs_conn_sync_open(struct inode *inode, struct file *file)
- +{
- + return seq_open(file, &ip_vs_conn_sync_seq_ops);
- +}
- +
- +static const struct file_operations ip_vs_conn_sync_fops = {
- + .owner = THIS_MODULE,
- + .open = ip_vs_conn_sync_open,
- + .read = seq_read,
- + .llseek = seq_lseek,
- + .release = seq_release,
- +};
- +
- #endif
- @@ -888,7 +965,7 @@
- }
- -int ip_vs_conn_init(void)
- +int __init ip_vs_conn_init(void)
- {
- int idx;
- @@ -902,7 +979,7 @@
- /* Allocate ip_vs_conn slab cache */
- ip_vs_conn_cachep = kmem_cache_create("ip_vs_conn",
- sizeof(struct ip_vs_conn), 0,
- - SLAB_HWCACHE_ALIGN, NULL, NULL);
- + SLAB_HWCACHE_ALIGN, NULL);
- if (!ip_vs_conn_cachep) {
- vfree(ip_vs_conn_tab);
- return -ENOMEM;
- @@ -923,7 +1000,8 @@
- rwlock_init(&__ip_vs_conntbl_lock_array[idx].l);
- }
- - proc_net_fops_create("ip_vs_conn", 0, &ip_vs_conn_fops);
- + proc_net_fops_create(&init_net, "ip_vs_conn", 0, &ip_vs_conn_fops);
- + proc_net_fops_create(&init_net, "ip_vs_conn_sync", 0, &ip_vs_conn_sync_fops);
- /* calculate the random value for connection hash */
- get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd));
- @@ -939,6 +1017,7 @@
- /* Release the empty cache */
- kmem_cache_destroy(ip_vs_conn_cachep);
- - proc_net_remove("ip_vs_conn");
- + proc_net_remove(&init_net, "ip_vs_conn");
- + proc_net_remove(&init_net, "ip_vs_conn_sync");
- vfree(ip_vs_conn_tab);
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_core.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_core.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_core.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_core.c 2009-02-16 12:56:22.000000000 -0400
- @@ -5,8 +5,6 @@
- * high-performance and highly available server based on a
- * cluster of servers.
- *
- - * Version: $Id: ip_vs_core.c,v 1.34 2003/05/10 03:05:23 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- * Peter Kese <peter.kese@ijs.si>
- * Julian Anastasov <ja@ssi.bg>
- @@ -58,7 +56,6 @@
- #ifdef CONFIG_IP_VS_DEBUG
- EXPORT_SYMBOL(ip_vs_get_debug_level);
- #endif
- -EXPORT_SYMBOL(ip_vs_make_skb_writable);
- /* ID used in ICMP lookups */
- @@ -163,42 +160,6 @@
- }
- -int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len)
- -{
- - struct sk_buff *skb = *pskb;
- -
- - /* skb is already used, better copy skb and its payload */
- - if (unlikely(skb_shared(skb) || skb->sk))
- - goto copy_skb;
- -
- - /* skb data is already used, copy it */
- - if (unlikely(skb_cloned(skb)))
- - goto copy_data;
- -
- - return pskb_may_pull(skb, writable_len);
- -
- - copy_data:
- - if (unlikely(writable_len > skb->len))
- - return 0;
- - return !pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
- -
- - copy_skb:
- - if (unlikely(writable_len > skb->len))
- - return 0;
- - skb = skb_copy(skb, GFP_ATOMIC);
- - if (!skb)
- - return 0;
- - BUG_ON(skb_is_nonlinear(skb));
- -
- - /* Rest of kernel will get very unhappy if we pass it a
- - suddenly-orphaned skbuff */
- - if ((*pskb)->sk)
- - skb_set_owner_w(skb, (*pskb)->sk);
- - kfree_skb(*pskb);
- - *pskb = skb;
- - return 1;
- -}
- -
- /*
- * IPVS persistent scheduling function
- * It creates a connection entry according to its template if exists,
- @@ -212,7 +173,7 @@
- __be16 ports[2])
- {
- struct ip_vs_conn *cp = NULL;
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- struct ip_vs_dest *dest;
- struct ip_vs_conn *ct;
- __be16 dport; /* destination port to forward */
- @@ -381,7 +342,7 @@
- ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
- {
- struct ip_vs_conn *cp = NULL;
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- struct ip_vs_dest *dest;
- __be16 _ports[2], *pptr;
- @@ -447,7 +408,7 @@
- struct ip_vs_protocol *pp)
- {
- __be16 _ports[2], *pptr;
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- pptr = skb_header_pointer(skb, iph->ihl*4,
- sizeof(_ports), _ports);
- @@ -460,7 +421,7 @@
- and the destination is RTN_UNICAST (and not local), then create
- a cache_bypass connection entry */
- if (sysctl_ip_vs_cache_bypass && svc->fwmark
- - && (inet_addr_type(iph->daddr) == RTN_UNICAST)) {
- + && (inet_addr_type(&init_net, iph->daddr) == RTN_UNICAST)) {
- int ret, cs;
- struct ip_vs_conn *cp;
- @@ -518,19 +479,19 @@
- /*
- - * It is hooked before NF_IP_PRI_NAT_SRC at the NF_IP_POST_ROUTING
- + * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING
- * chain, and is used for VS/NAT.
- * It detects packets for VS/NAT connections and sends the packets
- * immediately. This can avoid that iptable_nat mangles the packets
- * for VS/NAT.
- */
- static unsigned int ip_vs_post_routing(unsigned int hooknum,
- - struct sk_buff **pskb,
- + struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
- - if (!((*pskb)->ipvs_property))
- + if (!skb->ipvs_property)
- return NF_ACCEPT;
- /* The packet was sent from IPVS, exit this chain */
- return NF_STOP;
- @@ -541,13 +502,14 @@
- return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
- }
- -static inline struct sk_buff *
- -ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
- +static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
- {
- - skb = ip_defrag(skb, user);
- - if (skb)
- - ip_send_check(skb->nh.iph);
- - return skb;
- + int err = ip_defrag(skb, user);
- +
- + if (!err)
- + ip_send_check(ip_hdr(skb));
- +
- + return err;
- }
- /*
- @@ -557,9 +519,10 @@
- void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
- struct ip_vs_conn *cp, int inout)
- {
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- unsigned int icmp_offset = iph->ihl*4;
- - struct icmphdr *icmph = (struct icmphdr *)(skb->nh.raw + icmp_offset);
- + struct icmphdr *icmph = (struct icmphdr *)(skb_network_header(skb) +
- + icmp_offset);
- struct iphdr *ciph = (struct iphdr *)(icmph + 1);
- if (inout) {
- @@ -604,9 +567,8 @@
- * Currently handles error types - unreachable, quench, ttl exceeded.
- * (Only used in VS/NAT)
- */
- -static int ip_vs_out_icmp(struct sk_buff **pskb, int *related)
- +static int ip_vs_out_icmp(struct sk_buff *skb, int *related)
- {
- - struct sk_buff *skb = *pskb;
- struct iphdr *iph;
- struct icmphdr _icmph, *ic;
- struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
- @@ -617,14 +579,12 @@
- *related = 1;
- /* reassemble IP fragments */
- - if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
- - skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
- - if (!skb)
- + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
- return NF_STOLEN;
- - *pskb = skb;
- }
- - iph = skb->nh.iph;
- + iph = ip_hdr(skb);
- offset = ihl = iph->ihl * 4;
- ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
- if (ic == NULL)
- @@ -659,7 +619,7 @@
- return NF_ACCEPT;
- /* Is the embedded protocol header present? */
- - if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
- + if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
- pp->dont_defrag))
- return NF_ACCEPT;
- @@ -675,13 +635,12 @@
- verdict = NF_DROP;
- if (IP_VS_FWD_METHOD(cp) != 0) {
- - IP_VS_ERR("shouldn't reach here, because the box is on the"
- + IP_VS_ERR("shouldn't reach here, because the box is on the "
- "half connection in the tun/dr module.\n");
- }
- /* Ensure the checksum is correct */
- - if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
- - ip_vs_checksum_complete(skb, ihl)) {
- + if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
- /* Failed checksum! */
- IP_VS_DBG(1, "Forward ICMP: failed checksum from %d.%d.%d.%d!\n",
- NIPQUAD(iph->saddr));
- @@ -690,9 +649,8 @@
- if (IPPROTO_TCP == cih->protocol || IPPROTO_UDP == cih->protocol)
- offset += 2 * sizeof(__u16);
- - if (!ip_vs_make_skb_writable(pskb, offset))
- + if (!skb_make_writable(skb, offset))
- goto out;
- - skb = *pskb;
- ip_vs_nat_icmp(skb, pp, cp, 1);
- @@ -712,24 +670,22 @@
- {
- struct tcphdr _tcph, *th;
- - th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
- - sizeof(_tcph), &_tcph);
- + th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
- if (th == NULL)
- return 0;
- return th->rst;
- }
- /*
- - * It is hooked at the NF_IP_FORWARD chain, used only for VS/NAT.
- + * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT.
- * Check if outgoing packet belongs to the established ip_vs_conn,
- * rewrite addresses of the packet and send it on its way...
- */
- static unsigned int
- -ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
- +ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
- - struct sk_buff *skb = *pskb;
- struct iphdr *iph;
- struct ip_vs_protocol *pp;
- struct ip_vs_conn *cp;
- @@ -740,14 +696,13 @@
- if (skb->ipvs_property)
- return NF_ACCEPT;
- - iph = skb->nh.iph;
- + iph = ip_hdr(skb);
- if (unlikely(iph->protocol == IPPROTO_ICMP)) {
- - int related, verdict = ip_vs_out_icmp(pskb, &related);
- + int related, verdict = ip_vs_out_icmp(skb, &related);
- if (related)
- return verdict;
- - skb = *pskb;
- - iph = skb->nh.iph;
- + iph = ip_hdr(skb);
- }
- pp = ip_vs_proto_get(iph->protocol);
- @@ -755,13 +710,11 @@
- return NF_ACCEPT;
- /* reassemble IP fragments */
- - if (unlikely(iph->frag_off & __constant_htons(IP_MF|IP_OFFSET) &&
- + if (unlikely(iph->frag_off & htons(IP_MF|IP_OFFSET) &&
- !pp->dont_defrag)) {
- - skb = ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT);
- - if (!skb)
- + if (ip_vs_gather_frags(skb, IP_DEFRAG_VS_OUT))
- return NF_STOLEN;
- - iph = skb->nh.iph;
- - *pskb = skb;
- + iph = ip_hdr(skb);
- }
- ihl = iph->ihl << 2;
- @@ -803,25 +756,23 @@
- IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
- - if (!ip_vs_make_skb_writable(pskb, ihl))
- + if (!skb_make_writable(skb, ihl))
- goto drop;
- /* mangle the packet */
- - if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
- + if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
- + goto drop;
- + ip_hdr(skb)->saddr = cp->vaddr;
- + ip_send_check(ip_hdr(skb));
- +
- + /* For policy routing, packets originating from this
- + * machine itself may be routed differently to packets
- + * passing through. We want this packet to be routed as
- + * if it came from this machine itself. So re-compute
- + * the routing information.
- + */
- + if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
- goto drop;
- - skb = *pskb;
- - skb->nh.iph->saddr = cp->vaddr;
- - ip_send_check(skb->nh.iph);
- -
- - /* For policy routing, packets originating from this
- - * machine itself may be routed differently to packets
- - * passing through. We want this packet to be routed as
- - * if it came from this machine itself. So re-compute
- - * the routing information.
- - */
- - if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
- - goto drop;
- - skb = *pskb;
- IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
- @@ -836,7 +787,7 @@
- drop:
- ip_vs_conn_put(cp);
- - kfree_skb(*pskb);
- + kfree_skb(skb);
- return NF_STOLEN;
- }
- @@ -847,10 +798,9 @@
- * forward to the right destination host if relevant.
- * Currently handles error types - unreachable, quench, ttl exceeded.
- */
- -static int
- -ip_vs_in_icmp(struct sk_buff **pskb, int *related, unsigned int hooknum)
- +static int
- +ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum)
- {
- - struct sk_buff *skb = *pskb;
- struct iphdr *iph;
- struct icmphdr _icmph, *ic;
- struct iphdr _ciph, *cih; /* The ip header contained within the ICMP */
- @@ -861,16 +811,13 @@
- *related = 1;
- /* reassemble IP fragments */
- - if (skb->nh.iph->frag_off & __constant_htons(IP_MF|IP_OFFSET)) {
- - skb = ip_vs_gather_frags(skb,
- - hooknum == NF_IP_LOCAL_IN ?
- - IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD);
- - if (!skb)
- + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- + if (ip_vs_gather_frags(skb, hooknum == NF_INET_LOCAL_IN ?
- + IP_DEFRAG_VS_IN : IP_DEFRAG_VS_FWD))
- return NF_STOLEN;
- - *pskb = skb;
- }
- - iph = skb->nh.iph;
- + iph = ip_hdr(skb);
- offset = ihl = iph->ihl * 4;
- ic = skb_header_pointer(skb, offset, sizeof(_icmph), &_icmph);
- if (ic == NULL)
- @@ -905,7 +852,7 @@
- return NF_ACCEPT;
- /* Is the embedded protocol header present? */
- - if (unlikely(cih->frag_off & __constant_htons(IP_OFFSET) &&
- + if (unlikely(cih->frag_off & htons(IP_OFFSET) &&
- pp->dont_defrag))
- return NF_ACCEPT;
- @@ -921,8 +868,7 @@
- verdict = NF_DROP;
- /* Ensure the checksum is correct */
- - if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
- - ip_vs_checksum_complete(skb, ihl)) {
- + if (!skb_csum_unnecessary(skb) && ip_vs_checksum_complete(skb, ihl)) {
- /* Failed checksum! */
- IP_VS_DBG(1, "Incoming ICMP: failed checksum from %d.%d.%d.%d!\n",
- NIPQUAD(iph->saddr));
- @@ -947,11 +893,10 @@
- * and send it on its way...
- */
- static unsigned int
- -ip_vs_in(unsigned int hooknum, struct sk_buff **pskb,
- +ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
- - struct sk_buff *skb = *pskb;
- struct iphdr *iph;
- struct ip_vs_protocol *pp;
- struct ip_vs_conn *cp;
- @@ -963,22 +908,21 @@
- * ... don't know why 1st test DOES NOT include 2nd (?)
- */
- if (unlikely(skb->pkt_type != PACKET_HOST
- - || skb->dev == &loopback_dev || skb->sk)) {
- + || skb->dev->flags & IFF_LOOPBACK || skb->sk)) {
- IP_VS_DBG(12, "packet type=%d proto=%d daddr=%d.%d.%d.%d ignored\n",
- skb->pkt_type,
- - skb->nh.iph->protocol,
- - NIPQUAD(skb->nh.iph->daddr));
- + ip_hdr(skb)->protocol,
- + NIPQUAD(ip_hdr(skb)->daddr));
- return NF_ACCEPT;
- }
- - iph = skb->nh.iph;
- + iph = ip_hdr(skb);
- if (unlikely(iph->protocol == IPPROTO_ICMP)) {
- - int related, verdict = ip_vs_in_icmp(pskb, &related, hooknum);
- + int related, verdict = ip_vs_in_icmp(skb, &related, hooknum);
- if (related)
- return verdict;
- - skb = *pskb;
- - iph = skb->nh.iph;
- + iph = ip_hdr(skb);
- }
- /* Protocol supported? */
- @@ -1033,15 +977,24 @@
- ret = NF_ACCEPT;
- }
- - /* increase its packet counter and check if it is needed
- - to be synchronized */
- + /* Increase its packet counter and check if it is needed
- + * to be synchronized
- + *
- + * Sync connection if it is about to close to
- + * encorage the standby servers to update the connections timeout
- + */
- atomic_inc(&cp->in_pkts);
- if ((ip_vs_sync_state & IP_VS_STATE_MASTER) &&
- - (cp->protocol != IPPROTO_TCP ||
- - cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- - (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
- - == sysctl_ip_vs_sync_threshold[0]))
- + (((cp->protocol != IPPROTO_TCP ||
- + cp->state == IP_VS_TCP_S_ESTABLISHED) &&
- + (atomic_read(&cp->in_pkts) % sysctl_ip_vs_sync_threshold[1]
- + == sysctl_ip_vs_sync_threshold[0])) ||
- + ((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
- + ((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
- + (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
- + (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
- ip_vs_sync_conn(cp);
- + cp->old_state = cp->state;
- ip_vs_conn_put(cp);
- return ret;
- @@ -1049,65 +1002,64 @@
- /*
- - * It is hooked at the NF_IP_FORWARD chain, in order to catch ICMP
- + * It is hooked at the NF_INET_FORWARD chain, in order to catch ICMP
- * related packets destined for 0.0.0.0/0.
- * When fwmark-based virtual service is used, such as transparent
- * cache cluster, TCP packets can be marked and routed to ip_vs_in,
- * but ICMP destined for 0.0.0.0/0 cannot not be easily marked and
- - * sent to ip_vs_in_icmp. So, catch them at the NF_IP_FORWARD chain
- + * sent to ip_vs_in_icmp. So, catch them at the NF_INET_FORWARD chain
- * and send them to ip_vs_in_icmp.
- */
- static unsigned int
- -ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff **pskb,
- +ip_vs_forward_icmp(unsigned int hooknum, struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out,
- int (*okfn)(struct sk_buff *))
- {
- int r;
- - if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP)
- + if (ip_hdr(skb)->protocol != IPPROTO_ICMP)
- return NF_ACCEPT;
- - return ip_vs_in_icmp(pskb, &r, hooknum);
- + return ip_vs_in_icmp(skb, &r, hooknum);
- }
- -/* After packet filtering, forward packet through VS/DR, VS/TUN,
- - or VS/NAT(change destination), so that filtering rules can be
- - applied to IPVS. */
- -static struct nf_hook_ops ip_vs_in_ops = {
- - .hook = ip_vs_in,
- - .owner = THIS_MODULE,
- - .pf = PF_INET,
- - .hooknum = NF_IP_LOCAL_IN,
- - .priority = 100,
- -};
- -
- -/* After packet filtering, change source only for VS/NAT */
- -static struct nf_hook_ops ip_vs_out_ops = {
- - .hook = ip_vs_out,
- - .owner = THIS_MODULE,
- - .pf = PF_INET,
- - .hooknum = NF_IP_FORWARD,
- - .priority = 100,
- -};
- -
- -/* After packet filtering (but before ip_vs_out_icmp), catch icmp
- - destined for 0.0.0.0/0, which is for incoming IPVS connections */
- -static struct nf_hook_ops ip_vs_forward_icmp_ops = {
- - .hook = ip_vs_forward_icmp,
- - .owner = THIS_MODULE,
- - .pf = PF_INET,
- - .hooknum = NF_IP_FORWARD,
- - .priority = 99,
- -};
- -
- -/* Before the netfilter connection tracking, exit from POST_ROUTING */
- -static struct nf_hook_ops ip_vs_post_routing_ops = {
- - .hook = ip_vs_post_routing,
- - .owner = THIS_MODULE,
- - .pf = PF_INET,
- - .hooknum = NF_IP_POST_ROUTING,
- - .priority = NF_IP_PRI_NAT_SRC-1,
- +static struct nf_hook_ops ip_vs_ops[] __read_mostly = {
- + /* After packet filtering, forward packet through VS/DR, VS/TUN,
- + * or VS/NAT(change destination), so that filtering rules can be
- + * applied to IPVS. */
- + {
- + .hook = ip_vs_in,
- + .owner = THIS_MODULE,
- + .pf = PF_INET,
- + .hooknum = NF_INET_LOCAL_IN,
- + .priority = 100,
- + },
- + /* After packet filtering, change source only for VS/NAT */
- + {
- + .hook = ip_vs_out,
- + .owner = THIS_MODULE,
- + .pf = PF_INET,
- + .hooknum = NF_INET_FORWARD,
- + .priority = 100,
- + },
- + /* After packet filtering (but before ip_vs_out_icmp), catch icmp
- + * destined for 0.0.0.0/0, which is for incoming IPVS connections */
- + {
- + .hook = ip_vs_forward_icmp,
- + .owner = THIS_MODULE,
- + .pf = PF_INET,
- + .hooknum = NF_INET_FORWARD,
- + .priority = 99,
- + },
- + /* Before the netfilter connection tracking, exit from POST_ROUTING */
- + {
- + .hook = ip_vs_post_routing,
- + .owner = THIS_MODULE,
- + .pf = PF_INET,
- + .hooknum = NF_INET_POST_ROUTING,
- + .priority = NF_IP_PRI_NAT_SRC-1,
- + },
- };
- @@ -1138,37 +1090,15 @@
- goto cleanup_app;
- }
- - ret = nf_register_hook(&ip_vs_in_ops);
- + ret = nf_register_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
- if (ret < 0) {
- - IP_VS_ERR("can't register in hook.\n");
- + IP_VS_ERR("can't register hooks.\n");
- goto cleanup_conn;
- }
- - ret = nf_register_hook(&ip_vs_out_ops);
- - if (ret < 0) {
- - IP_VS_ERR("can't register out hook.\n");
- - goto cleanup_inops;
- - }
- - ret = nf_register_hook(&ip_vs_post_routing_ops);
- - if (ret < 0) {
- - IP_VS_ERR("can't register post_routing hook.\n");
- - goto cleanup_outops;
- - }
- - ret = nf_register_hook(&ip_vs_forward_icmp_ops);
- - if (ret < 0) {
- - IP_VS_ERR("can't register forward_icmp hook.\n");
- - goto cleanup_postroutingops;
- - }
- -
- IP_VS_INFO("ipvs loaded.\n");
- return ret;
- - cleanup_postroutingops:
- - nf_unregister_hook(&ip_vs_post_routing_ops);
- - cleanup_outops:
- - nf_unregister_hook(&ip_vs_out_ops);
- - cleanup_inops:
- - nf_unregister_hook(&ip_vs_in_ops);
- cleanup_conn:
- ip_vs_conn_cleanup();
- cleanup_app:
- @@ -1182,10 +1112,7 @@
- static void __exit ip_vs_cleanup(void)
- {
- - nf_unregister_hook(&ip_vs_forward_icmp_ops);
- - nf_unregister_hook(&ip_vs_post_routing_ops);
- - nf_unregister_hook(&ip_vs_out_ops);
- - nf_unregister_hook(&ip_vs_in_ops);
- + nf_unregister_hooks(ip_vs_ops, ARRAY_SIZE(ip_vs_ops));
- ip_vs_conn_cleanup();
- ip_vs_app_cleanup();
- ip_vs_protocol_cleanup();
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ctl.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ctl.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ctl.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ctl.c 2009-02-16 12:56:22.000000000 -0400
- @@ -5,8 +5,6 @@
- * high-performance and highly available server based on a
- * cluster of servers.
- *
- - * Version: $Id: ip_vs_ctl.c,v 1.36 2003/06/08 09:31:19 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- * Peter Kese <peter.kese@ijs.si>
- * Julian Anastasov <ja@ssi.bg>
- @@ -29,13 +27,13 @@
- #include <linux/proc_fs.h>
- #include <linux/workqueue.h>
- #include <linux/swap.h>
- -#include <linux/proc_fs.h>
- #include <linux/seq_file.h>
- #include <linux/netfilter.h>
- #include <linux/netfilter_ipv4.h>
- #include <linux/mutex.h>
- +#include <net/net_namespace.h>
- #include <net/ip.h>
- #include <net/route.h>
- #include <net/sock.h>
- @@ -579,6 +577,31 @@
- return NULL;
- }
- +/*
- + * Find destination by {daddr,dport,vaddr,protocol}
- + * Cretaed to be used in ip_vs_process_message() in
- + * the backup synchronization daemon. It finds the
- + * destination to be bound to the received connection
- + * on the backup.
- + *
- + * ip_vs_lookup_real_service() looked promissing, but
- + * seems not working as expected.
- + */
- +struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
- + __be32 vaddr, __be16 vport, __u16 protocol)
- +{
- + struct ip_vs_dest *dest;
- + struct ip_vs_service *svc;
- +
- + svc = ip_vs_service_get(0, protocol, vaddr, vport);
- + if (!svc)
- + return NULL;
- + dest = ip_vs_lookup_dest(svc, daddr, dport);
- + if (dest)
- + atomic_inc(&dest->refcnt);
- + ip_vs_service_put(svc);
- + return dest;
- +}
- /*
- * Lookup dest by {svc,addr,port} in the destination trash.
- @@ -660,9 +683,22 @@
- ip_vs_zero_stats(struct ip_vs_stats *stats)
- {
- spin_lock_bh(&stats->lock);
- - memset(stats, 0, (char *)&stats->lock - (char *)stats);
- - spin_unlock_bh(&stats->lock);
- +
- + stats->conns = 0;
- + stats->inpkts = 0;
- + stats->outpkts = 0;
- + stats->inbytes = 0;
- + stats->outbytes = 0;
- +
- + stats->cps = 0;
- + stats->inpps = 0;
- + stats->outpps = 0;
- + stats->inbps = 0;
- + stats->outbps = 0;
- +
- ip_vs_zero_estimator(stats);
- +
- + spin_unlock_bh(&stats->lock);
- }
- /*
- @@ -679,7 +715,7 @@
- conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
- /* check if local node and update the flags */
- - if (inet_addr_type(udest->addr) == RTN_LOCAL) {
- + if (inet_addr_type(&init_net, udest->addr) == RTN_LOCAL) {
- conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
- | IP_VS_CONN_F_LOCALNODE;
- }
- @@ -731,7 +767,7 @@
- EnterFunction(2);
- - atype = inet_addr_type(udest->addr);
- + atype = inet_addr_type(&init_net, udest->addr);
- if (atype != RTN_LOCAL && atype != RTN_UNICAST)
- return -EINVAL;
- @@ -909,7 +945,7 @@
- write_lock_bh(&__ip_vs_svc_lock);
- /* Wait until all other svc users go away */
- - while (atomic_read(&svc->usecnt) > 1) {};
- + IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
- /* call the update_service, because server weight may be changed */
- svc->scheduler->update_service(svc);
- @@ -1399,7 +1435,6 @@
- static struct ctl_table vs_vars[] = {
- {
- - .ctl_name = NET_IPV4_VS_AMEMTHRESH,
- .procname = "amemthresh",
- .data = &sysctl_ip_vs_amemthresh,
- .maxlen = sizeof(int),
- @@ -1408,7 +1443,6 @@
- },
- #ifdef CONFIG_IP_VS_DEBUG
- {
- - .ctl_name = NET_IPV4_VS_DEBUG_LEVEL,
- .procname = "debug_level",
- .data = &sysctl_ip_vs_debug_level,
- .maxlen = sizeof(int),
- @@ -1417,7 +1451,6 @@
- },
- #endif
- {
- - .ctl_name = NET_IPV4_VS_AMDROPRATE,
- .procname = "am_droprate",
- .data = &sysctl_ip_vs_am_droprate,
- .maxlen = sizeof(int),
- @@ -1425,7 +1458,6 @@
- .proc_handler = &proc_dointvec,
- },
- {
- - .ctl_name = NET_IPV4_VS_DROP_ENTRY,
- .procname = "drop_entry",
- .data = &sysctl_ip_vs_drop_entry,
- .maxlen = sizeof(int),
- @@ -1433,7 +1465,6 @@
- .proc_handler = &proc_do_defense_mode,
- },
- {
- - .ctl_name = NET_IPV4_VS_DROP_PACKET,
- .procname = "drop_packet",
- .data = &sysctl_ip_vs_drop_packet,
- .maxlen = sizeof(int),
- @@ -1441,7 +1472,6 @@
- .proc_handler = &proc_do_defense_mode,
- },
- {
- - .ctl_name = NET_IPV4_VS_SECURE_TCP,
- .procname = "secure_tcp",
- .data = &sysctl_ip_vs_secure_tcp,
- .maxlen = sizeof(int),
- @@ -1450,7 +1480,6 @@
- },
- #if 0
- {
- - .ctl_name = NET_IPV4_VS_TO_ES,
- .procname = "timeout_established",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
- .maxlen = sizeof(int),
- @@ -1458,7 +1487,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_SS,
- .procname = "timeout_synsent",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
- .maxlen = sizeof(int),
- @@ -1466,7 +1494,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_SR,
- .procname = "timeout_synrecv",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
- .maxlen = sizeof(int),
- @@ -1474,7 +1501,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_FW,
- .procname = "timeout_finwait",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
- .maxlen = sizeof(int),
- @@ -1482,7 +1508,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_TW,
- .procname = "timeout_timewait",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
- .maxlen = sizeof(int),
- @@ -1490,7 +1515,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_CL,
- .procname = "timeout_close",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
- .maxlen = sizeof(int),
- @@ -1498,7 +1522,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_CW,
- .procname = "timeout_closewait",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
- .maxlen = sizeof(int),
- @@ -1506,7 +1529,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_LA,
- .procname = "timeout_lastack",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
- .maxlen = sizeof(int),
- @@ -1514,7 +1536,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_LI,
- .procname = "timeout_listen",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
- .maxlen = sizeof(int),
- @@ -1522,7 +1543,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_SA,
- .procname = "timeout_synack",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
- .maxlen = sizeof(int),
- @@ -1530,7 +1550,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_UDP,
- .procname = "timeout_udp",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
- .maxlen = sizeof(int),
- @@ -1538,7 +1557,6 @@
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- - .ctl_name = NET_IPV4_VS_TO_ICMP,
- .procname = "timeout_icmp",
- .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
- .maxlen = sizeof(int),
- @@ -1547,7 +1565,6 @@
- },
- #endif
- {
- - .ctl_name = NET_IPV4_VS_CACHE_BYPASS,
- .procname = "cache_bypass",
- .data = &sysctl_ip_vs_cache_bypass,
- .maxlen = sizeof(int),
- @@ -1555,7 +1572,6 @@
- .proc_handler = &proc_dointvec,
- },
- {
- - .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN,
- .procname = "expire_nodest_conn",
- .data = &sysctl_ip_vs_expire_nodest_conn,
- .maxlen = sizeof(int),
- @@ -1563,7 +1579,6 @@
- .proc_handler = &proc_dointvec,
- },
- {
- - .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE,
- .procname = "expire_quiescent_template",
- .data = &sysctl_ip_vs_expire_quiescent_template,
- .maxlen = sizeof(int),
- @@ -1571,7 +1586,6 @@
- .proc_handler = &proc_dointvec,
- },
- {
- - .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD,
- .procname = "sync_threshold",
- .data = &sysctl_ip_vs_sync_threshold,
- .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
- @@ -1579,7 +1593,6 @@
- .proc_handler = &proc_do_sync_threshold,
- },
- {
- - .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND,
- .procname = "nat_icmp_send",
- .data = &sysctl_ip_vs_nat_icmp_send,
- .maxlen = sizeof(int),
- @@ -1589,35 +1602,13 @@
- { .ctl_name = 0 }
- };
- -static ctl_table vs_table[] = {
- - {
- - .ctl_name = NET_IPV4_VS,
- - .procname = "vs",
- - .mode = 0555,
- - .child = vs_vars
- - },
- - { .ctl_name = 0 }
- -};
- -
- -static ctl_table ipvs_ipv4_table[] = {
- - {
- - .ctl_name = NET_IPV4,
- - .procname = "ipv4",
- - .mode = 0555,
- - .child = vs_table,
- - },
- - { .ctl_name = 0 }
- -};
- -
- -static ctl_table vs_root_table[] = {
- - {
- - .ctl_name = CTL_NET,
- - .procname = "net",
- - .mode = 0555,
- - .child = ipvs_ipv4_table,
- - },
- - { .ctl_name = 0 }
- +const struct ctl_path net_vs_ctl_path[] = {
- + { .procname = "net", .ctl_name = CTL_NET, },
- + { .procname = "ipv4", .ctl_name = NET_IPV4, },
- + { .procname = "vs", },
- + { }
- };
- +EXPORT_SYMBOL_GPL(net_vs_ctl_path);
- static struct ctl_table_header * sysctl_header;
- @@ -1783,7 +1774,7 @@
- return 0;
- }
- -static struct seq_operations ip_vs_info_seq_ops = {
- +static const struct seq_operations ip_vs_info_seq_ops = {
- .start = ip_vs_info_seq_start,
- .next = ip_vs_info_seq_next,
- .stop = ip_vs_info_seq_stop,
- @@ -1792,27 +1783,11 @@
- static int ip_vs_info_open(struct inode *inode, struct file *file)
- {
- - struct seq_file *seq;
- - int rc = -ENOMEM;
- - struct ip_vs_iter *s = kzalloc(sizeof(*s), GFP_KERNEL);
- -
- - if (!s)
- - goto out;
- -
- - rc = seq_open(file, &ip_vs_info_seq_ops);
- - if (rc)
- - goto out_kfree;
- -
- - seq = file->private_data;
- - seq->private = s;
- -out:
- - return rc;
- -out_kfree:
- - kfree(s);
- - goto out;
- + return seq_open_private(file, &ip_vs_info_seq_ops,
- + sizeof(struct ip_vs_iter));
- }
- -static struct file_operations ip_vs_info_fops = {
- +static const struct file_operations ip_vs_info_fops = {
- .owner = THIS_MODULE,
- .open = ip_vs_info_open,
- .read = seq_read,
- @@ -1822,7 +1797,9 @@
- #endif
- -struct ip_vs_stats ip_vs_stats;
- +struct ip_vs_stats ip_vs_stats = {
- + .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
- +};
- #ifdef CONFIG_PROC_FS
- static int ip_vs_stats_show(struct seq_file *seq, void *v)
- @@ -1859,7 +1836,7 @@
- return single_open(file, ip_vs_stats_show, NULL);
- }
- -static struct file_operations ip_vs_stats_fops = {
- +static const struct file_operations ip_vs_stats_fops = {
- .owner = THIS_MODULE,
- .open = ip_vs_stats_seq_open,
- .read = seq_read,
- @@ -2340,10 +2317,11 @@
- .get_optmin = IP_VS_BASE_CTL,
- .get_optmax = IP_VS_SO_GET_MAX+1,
- .get = do_ip_vs_get_ctl,
- + .owner = THIS_MODULE,
- };
- -int ip_vs_control_init(void)
- +int __init ip_vs_control_init(void)
- {
- int ret;
- int idx;
- @@ -2356,10 +2334,10 @@
- return ret;
- }
- - proc_net_fops_create("ip_vs", 0, &ip_vs_info_fops);
- - proc_net_fops_create("ip_vs_stats",0, &ip_vs_stats_fops);
- + proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
- + proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
- - sysctl_header = register_sysctl_table(vs_root_table, 0);
- + sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
- /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
- for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
- @@ -2370,8 +2348,6 @@
- INIT_LIST_HEAD(&ip_vs_rtable[idx]);
- }
- - memset(&ip_vs_stats, 0, sizeof(ip_vs_stats));
- - spin_lock_init(&ip_vs_stats.lock);
- ip_vs_new_estimator(&ip_vs_stats);
- /* Hook the defense timer */
- @@ -2387,10 +2363,11 @@
- EnterFunction(2);
- ip_vs_trash_cleanup();
- cancel_rearming_delayed_work(&defense_work);
- + cancel_work_sync(&defense_work.work);
- ip_vs_kill_estimator(&ip_vs_stats);
- unregister_sysctl_table(sysctl_header);
- - proc_net_remove("ip_vs_stats");
- - proc_net_remove("ip_vs");
- + proc_net_remove(&init_net, "ip_vs_stats");
- + proc_net_remove(&init_net, "ip_vs");
- nf_unregister_sockopt(&ip_vs_sockopts);
- LeaveFunction(2);
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_dh.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_dh.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_dh.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_dh.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * IPVS: Destination Hashing scheduling module
- *
- - * Version: $Id: ip_vs_dh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@gnuchina.org>
- *
- * Inspired by the consistent hashing scheduler patch from
- @@ -204,7 +202,7 @@
- {
- struct ip_vs_dest *dest;
- struct ip_vs_dh_bucket *tbl;
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- IP_VS_DBG(6, "ip_vs_dh_schedule(): Scheduling...\n");
- @@ -235,6 +233,7 @@
- .name = "dh",
- .refcnt = ATOMIC_INIT(0),
- .module = THIS_MODULE,
- + .n_list = LIST_HEAD_INIT(ip_vs_dh_scheduler.n_list),
- .init_service = ip_vs_dh_init_svc,
- .done_service = ip_vs_dh_done_svc,
- .update_service = ip_vs_dh_update_svc,
- @@ -244,7 +243,6 @@
- static int __init ip_vs_dh_init(void)
- {
- - INIT_LIST_HEAD(&ip_vs_dh_scheduler.n_list);
- return register_ip_vs_scheduler(&ip_vs_dh_scheduler);
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_est.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_est.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_est.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_est.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * ip_vs_est.c: simple rate estimator for IPVS
- *
- - * Version: $Id: ip_vs_est.c,v 1.4 2002/11/30 01:50:35 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * This program is free software; you can redistribute it and/or
- @@ -18,6 +16,8 @@
- #include <linux/slab.h>
- #include <linux/types.h>
- #include <linux/interrupt.h>
- +#include <linux/sysctl.h>
- +#include <linux/list.h>
- #include <net/ip_vs.h>
- @@ -45,28 +45,11 @@
- */
- -struct ip_vs_estimator
- -{
- - struct ip_vs_estimator *next;
- - struct ip_vs_stats *stats;
- +static void estimation_timer(unsigned long arg);
- - u32 last_conns;
- - u32 last_inpkts;
- - u32 last_outpkts;
- - u64 last_inbytes;
- - u64 last_outbytes;
- -
- - u32 cps;
- - u32 inpps;
- - u32 outpps;
- - u32 inbps;
- - u32 outbps;
- -};
- -
- -
- -static struct ip_vs_estimator *est_list = NULL;
- -static DEFINE_RWLOCK(est_lock);
- -static struct timer_list est_timer;
- +static LIST_HEAD(est_list);
- +static DEFINE_SPINLOCK(est_lock);
- +static DEFINE_TIMER(est_timer, estimation_timer, 0, 0);
- static void estimation_timer(unsigned long arg)
- {
- @@ -77,9 +60,9 @@
- u64 n_inbytes, n_outbytes;
- u32 rate;
- - read_lock(&est_lock);
- - for (e = est_list; e; e = e->next) {
- - s = e->stats;
- + spin_lock(&est_lock);
- + list_for_each_entry(e, &est_list, list) {
- + s = container_of(e, struct ip_vs_stats, est);
- spin_lock(&s->lock);
- n_conns = s->conns;
- @@ -115,19 +98,16 @@
- s->outbps = (e->outbps+0xF)>>5;
- spin_unlock(&s->lock);
- }
- - read_unlock(&est_lock);
- + spin_unlock(&est_lock);
- mod_timer(&est_timer, jiffies + 2*HZ);
- }
- -int ip_vs_new_estimator(struct ip_vs_stats *stats)
- +void ip_vs_new_estimator(struct ip_vs_stats *stats)
- {
- - struct ip_vs_estimator *est;
- + struct ip_vs_estimator *est = &stats->est;
- - est = kzalloc(sizeof(*est), GFP_KERNEL);
- - if (est == NULL)
- - return -ENOMEM;
- + INIT_LIST_HEAD(&est->list);
- - est->stats = stats;
- est->last_conns = stats->conns;
- est->cps = stats->cps<<10;
- @@ -143,60 +123,40 @@
- est->last_outbytes = stats->outbytes;
- est->outbps = stats->outbps<<5;
- - write_lock_bh(&est_lock);
- - est->next = est_list;
- - if (est->next == NULL) {
- - init_timer(&est_timer);
- - est_timer.expires = jiffies + 2*HZ;
- - est_timer.function = estimation_timer;
- - add_timer(&est_timer);
- - }
- - est_list = est;
- - write_unlock_bh(&est_lock);
- - return 0;
- + spin_lock_bh(&est_lock);
- + if (list_empty(&est_list))
- + mod_timer(&est_timer, jiffies + 2 * HZ);
- + list_add(&est->list, &est_list);
- + spin_unlock_bh(&est_lock);
- }
- void ip_vs_kill_estimator(struct ip_vs_stats *stats)
- {
- - struct ip_vs_estimator *est, **pest;
- - int killed = 0;
- + struct ip_vs_estimator *est = &stats->est;
- - write_lock_bh(&est_lock);
- - pest = &est_list;
- - while ((est=*pest) != NULL) {
- - if (est->stats != stats) {
- - pest = &est->next;
- - continue;
- - }
- - *pest = est->next;
- - kfree(est);
- - killed++;
- + spin_lock_bh(&est_lock);
- + list_del(&est->list);
- + while (list_empty(&est_list) && try_to_del_timer_sync(&est_timer) < 0) {
- + spin_unlock_bh(&est_lock);
- + cpu_relax();
- + spin_lock_bh(&est_lock);
- }
- - if (killed && est_list == NULL)
- - del_timer_sync(&est_timer);
- - write_unlock_bh(&est_lock);
- + spin_unlock_bh(&est_lock);
- }
- void ip_vs_zero_estimator(struct ip_vs_stats *stats)
- {
- - struct ip_vs_estimator *e;
- + struct ip_vs_estimator *est = &stats->est;
- - write_lock_bh(&est_lock);
- - for (e = est_list; e; e = e->next) {
- - if (e->stats != stats)
- - continue;
- -
- - /* set counters zero */
- - e->last_conns = 0;
- - e->last_inpkts = 0;
- - e->last_outpkts = 0;
- - e->last_inbytes = 0;
- - e->last_outbytes = 0;
- - e->cps = 0;
- - e->inpps = 0;
- - e->outpps = 0;
- - e->inbps = 0;
- - e->outbps = 0;
- - }
- - write_unlock_bh(&est_lock);
- + /* set counters zero, caller must hold the stats->lock lock */
- + est->last_inbytes = 0;
- + est->last_outbytes = 0;
- + est->last_conns = 0;
- + est->last_inpkts = 0;
- + est->last_outpkts = 0;
- + est->cps = 0;
- + est->inpps = 0;
- + est->outpps = 0;
- + est->inbps = 0;
- + est->outbps = 0;
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ftp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ftp.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_ftp.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_ftp.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * ip_vs_ftp.c: IPVS ftp application module
- *
- - * Version: $Id: ip_vs_ftp.c,v 1.13 2002/09/15 08:14:08 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * Changes:
- @@ -30,6 +28,7 @@
- #include <linux/skbuff.h>
- #include <linux/in.h>
- #include <linux/ip.h>
- +#include <linux/netfilter.h>
- #include <net/protocol.h>
- #include <net/tcp.h>
- #include <asm/unaligned.h>
- @@ -135,7 +134,7 @@
- * xxx,xxx,xxx,xxx is the server address, ppp,ppp is the server port number.
- */
- static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp,
- - struct sk_buff **pskb, int *diff)
- + struct sk_buff *skb, int *diff)
- {
- struct iphdr *iph;
- struct tcphdr *th;
- @@ -155,14 +154,14 @@
- return 1;
- /* Linear packets are much easier to deal with. */
- - if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
- + if (!skb_make_writable(skb, skb->len))
- return 0;
- if (cp->app_data == &ip_vs_ftp_pasv) {
- - iph = (*pskb)->nh.iph;
- + iph = ip_hdr(skb);
- th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
- data = (char *)th + (th->doff << 2);
- - data_limit = (*pskb)->tail;
- + data_limit = skb_tail_pointer(skb);
- if (ip_vs_ftp_get_addrport(data, data_limit,
- SERVER_STRING,
- @@ -213,7 +212,7 @@
- memcpy(start, buf, buf_len);
- ret = 1;
- } else {
- - ret = !ip_vs_skb_replace(*pskb, GFP_ATOMIC, start,
- + ret = !ip_vs_skb_replace(skb, GFP_ATOMIC, start,
- end-start, buf, buf_len);
- }
- @@ -238,7 +237,7 @@
- * the client.
- */
- static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp,
- - struct sk_buff **pskb, int *diff)
- + struct sk_buff *skb, int *diff)
- {
- struct iphdr *iph;
- struct tcphdr *th;
- @@ -256,20 +255,20 @@
- return 1;
- /* Linear packets are much easier to deal with. */
- - if (!ip_vs_make_skb_writable(pskb, (*pskb)->len))
- + if (!skb_make_writable(skb, skb->len))
- return 0;
- /*
- * Detecting whether it is passive
- */
- - iph = (*pskb)->nh.iph;
- + iph = ip_hdr(skb);
- th = (struct tcphdr *)&(((char *)iph)[iph->ihl*4]);
- /* Since there may be OPTIONS in the TCP packet and the HLEN is
- the length of the header in 32-bit multiples, it is accurate
- to calculate data address by th+HLEN*4 */
- data = data_start = (char *)th + (th->doff << 2);
- - data_limit = (*pskb)->tail;
- + data_limit = skb_tail_pointer(skb);
- while (data <= data_limit - 6) {
- if (strnicmp(data, "PASV\r\n", 6) == 0) {
- @@ -370,7 +369,7 @@
- if (ret)
- break;
- IP_VS_INFO("%s: loaded support on port[%d] = %d\n",
- - app->name, i, ports[i]);
- + app->name, i, ports[i]);
- }
- if (ret)
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblc.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblc.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblc.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * IPVS: Locality-Based Least-Connection scheduling module
- *
- - * Version: $Id: ip_vs_lblc.c,v 1.10 2002/09/15 08:14:08 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@gnuchina.org>
- *
- * This program is free software; you can redistribute it and/or
- @@ -114,46 +112,15 @@
- static ctl_table vs_vars_table[] = {
- {
- - .ctl_name = NET_IPV4_VS_LBLC_EXPIRE,
- .procname = "lblc_expiration",
- .data = &sysctl_ip_vs_lblc_expiration,
- .maxlen = sizeof(int),
- - .mode = 0644,
- + .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- { .ctl_name = 0 }
- };
- -static ctl_table vs_table[] = {
- - {
- - .ctl_name = NET_IPV4_VS,
- - .procname = "vs",
- - .mode = 0555,
- - .child = vs_vars_table
- - },
- - { .ctl_name = 0 }
- -};
- -
- -static ctl_table ipvs_ipv4_table[] = {
- - {
- - .ctl_name = NET_IPV4,
- - .procname = "ipv4",
- - .mode = 0555,
- - .child = vs_table
- - },
- - { .ctl_name = 0 }
- -};
- -
- -static ctl_table lblc_root_table[] = {
- - {
- - .ctl_name = CTL_NET,
- - .procname = "net",
- - .mode = 0555,
- - .child = ipvs_ipv4_table
- - },
- - { .ctl_name = 0 }
- -};
- -
- static struct ctl_table_header * sysctl_header;
- /*
- @@ -288,7 +255,7 @@
- write_lock(&tbl->lock);
- list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
- - if (time_before(now,
- + if (time_before(now,
- en->lastuse + sysctl_ip_vs_lblc_expiration))
- continue;
- @@ -393,9 +360,8 @@
- /*
- * Hook periodic timer for garbage collection
- */
- - init_timer(&tbl->periodic_timer);
- - tbl->periodic_timer.data = (unsigned long)tbl;
- - tbl->periodic_timer.function = ip_vs_lblc_check_expire;
- + setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire,
- + (unsigned long)tbl);
- tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
- add_timer(&tbl->periodic_timer);
- @@ -521,7 +487,7 @@
- struct ip_vs_dest *dest;
- struct ip_vs_lblc_table *tbl;
- struct ip_vs_lblc_entry *en;
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- IP_VS_DBG(6, "ip_vs_lblc_schedule(): Scheduling...\n");
- @@ -573,6 +539,7 @@
- .name = "lblc",
- .refcnt = ATOMIC_INIT(0),
- .module = THIS_MODULE,
- + .n_list = LIST_HEAD_INIT(ip_vs_lblc_scheduler.n_list),
- .init_service = ip_vs_lblc_init_svc,
- .done_service = ip_vs_lblc_done_svc,
- .update_service = ip_vs_lblc_update_svc,
- @@ -582,9 +549,13 @@
- static int __init ip_vs_lblc_init(void)
- {
- - INIT_LIST_HEAD(&ip_vs_lblc_scheduler.n_list);
- - sysctl_header = register_sysctl_table(lblc_root_table, 0);
- - return register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
- + int ret;
- +
- + sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
- + ret = register_ip_vs_scheduler(&ip_vs_lblc_scheduler);
- + if (ret)
- + unregister_sysctl_table(sysctl_header);
- + return ret;
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblcr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblcr.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lblcr.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lblcr.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * IPVS: Locality-Based Least-Connection with Replication scheduler
- *
- - * Version: $Id: ip_vs_lblcr.c,v 1.11 2002/09/15 08:14:08 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@gnuchina.org>
- *
- * This program is free software; you can redistribute it and/or
- @@ -48,8 +46,7 @@
- /* for sysctl */
- #include <linux/fs.h>
- #include <linux/sysctl.h>
- -/* for proc_net_create/proc_net_remove */
- -#include <linux/proc_fs.h>
- +#include <net/net_namespace.h>
- #include <net/ip_vs.h>
- @@ -303,46 +300,15 @@
- static ctl_table vs_vars_table[] = {
- {
- - .ctl_name = NET_IPV4_VS_LBLCR_EXPIRE,
- .procname = "lblcr_expiration",
- .data = &sysctl_ip_vs_lblcr_expiration,
- .maxlen = sizeof(int),
- - .mode = 0644,
- + .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- { .ctl_name = 0 }
- };
- -static ctl_table vs_table[] = {
- - {
- - .ctl_name = NET_IPV4_VS,
- - .procname = "vs",
- - .mode = 0555,
- - .child = vs_vars_table
- - },
- - { .ctl_name = 0 }
- -};
- -
- -static ctl_table ipvs_ipv4_table[] = {
- - {
- - .ctl_name = NET_IPV4,
- - .procname = "ipv4",
- - .mode = 0555,
- - .child = vs_table
- - },
- - { .ctl_name = 0 }
- -};
- -
- -static ctl_table lblcr_root_table[] = {
- - {
- - .ctl_name = CTL_NET,
- - .procname = "net",
- - .mode = 0555,
- - .child = ipvs_ipv4_table
- - },
- - { .ctl_name = 0 }
- -};
- -
- static struct ctl_table_header * sysctl_header;
- /*
- @@ -546,71 +512,6 @@
- mod_timer(&tbl->periodic_timer, jiffies+CHECK_EXPIRE_INTERVAL);
- }
- -
- -#ifdef CONFIG_IP_VS_LBLCR_DEBUG
- -static struct ip_vs_lblcr_table *lblcr_table_list;
- -
- -/*
- - * /proc/net/ip_vs_lblcr to display the mappings of
- - * destination IP address <==> its serverSet
- - */
- -static int
- -ip_vs_lblcr_getinfo(char *buffer, char **start, off_t offset, int length)
- -{
- - off_t pos=0, begin;
- - int len=0, size;
- - struct ip_vs_lblcr_table *tbl;
- - unsigned long now = jiffies;
- - int i;
- - struct ip_vs_lblcr_entry *en;
- -
- - tbl = lblcr_table_list;
- -
- - size = sprintf(buffer, "LastTime Dest IP address Server set\n");
- - pos += size;
- - len += size;
- -
- - for (i=0; i<IP_VS_LBLCR_TAB_SIZE; i++) {
- - read_lock_bh(&tbl->lock);
- - list_for_each_entry(en, &tbl->bucket[i], list) {
- - char tbuf[16];
- - struct ip_vs_dest_list *d;
- -
- - sprintf(tbuf, "%u.%u.%u.%u", NIPQUAD(en->addr));
- - size = sprintf(buffer+len, "%8lu %-16s ",
- - now-en->lastuse, tbuf);
- -
- - read_lock(&en->set.lock);
- - for (d=en->set.list; d!=NULL; d=d->next) {
- - size += sprintf(buffer+len+size,
- - "%u.%u.%u.%u ",
- - NIPQUAD(d->dest->addr));
- - }
- - read_unlock(&en->set.lock);
- - size += sprintf(buffer+len+size, "\n");
- - len += size;
- - pos += size;
- - if (pos <= offset)
- - len=0;
- - if (pos >= offset+length) {
- - read_unlock_bh(&tbl->lock);
- - goto done;
- - }
- - }
- - read_unlock_bh(&tbl->lock);
- - }
- -
- - done:
- - begin = len - (pos - offset);
- - *start = buffer + begin;
- - len -= begin;
- - if(len>length)
- - len = length;
- - return len;
- -}
- -#endif
- -
- -
- static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc)
- {
- int i;
- @@ -643,15 +544,11 @@
- /*
- * Hook periodic timer for garbage collection
- */
- - init_timer(&tbl->periodic_timer);
- - tbl->periodic_timer.data = (unsigned long)tbl;
- - tbl->periodic_timer.function = ip_vs_lblcr_check_expire;
- + setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire,
- + (unsigned long)tbl);
- tbl->periodic_timer.expires = jiffies+CHECK_EXPIRE_INTERVAL;
- add_timer(&tbl->periodic_timer);
- -#ifdef CONFIG_IP_VS_LBLCR_DEBUG
- - lblcr_table_list = tbl;
- -#endif
- return 0;
- }
- @@ -775,7 +672,7 @@
- struct ip_vs_dest *dest;
- struct ip_vs_lblcr_table *tbl;
- struct ip_vs_lblcr_entry *en;
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- IP_VS_DBG(6, "ip_vs_lblcr_schedule(): Scheduling...\n");
- @@ -831,6 +728,7 @@
- .name = "lblcr",
- .refcnt = ATOMIC_INIT(0),
- .module = THIS_MODULE,
- + .n_list = LIST_HEAD_INIT(ip_vs_lblcr_scheduler.n_list),
- .init_service = ip_vs_lblcr_init_svc,
- .done_service = ip_vs_lblcr_done_svc,
- .update_service = ip_vs_lblcr_update_svc,
- @@ -840,20 +738,18 @@
- static int __init ip_vs_lblcr_init(void)
- {
- - INIT_LIST_HEAD(&ip_vs_lblcr_scheduler.n_list);
- - sysctl_header = register_sysctl_table(lblcr_root_table, 0);
- -#ifdef CONFIG_IP_VS_LBLCR_DEBUG
- - proc_net_create("ip_vs_lblcr", 0, ip_vs_lblcr_getinfo);
- -#endif
- - return register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
- + int ret;
- +
- + sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars_table);
- + ret = register_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
- + if (ret)
- + unregister_sysctl_table(sysctl_header);
- + return ret;
- }
- static void __exit ip_vs_lblcr_cleanup(void)
- {
- -#ifdef CONFIG_IP_VS_LBLCR_DEBUG
- - proc_net_remove("ip_vs_lblcr");
- -#endif
- unregister_sysctl_table(sysctl_header);
- unregister_ip_vs_scheduler(&ip_vs_lblcr_scheduler);
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lc.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_lc.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_lc.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * IPVS: Least-Connection Scheduling module
- *
- - * Version: $Id: ip_vs_lc.c,v 1.10 2003/04/18 09:03:16 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * This program is free software; you can redistribute it and/or
- @@ -100,6 +98,7 @@
- .name = "lc",
- .refcnt = ATOMIC_INIT(0),
- .module = THIS_MODULE,
- + .n_list = LIST_HEAD_INIT(ip_vs_lc_scheduler.n_list),
- .init_service = ip_vs_lc_init_svc,
- .done_service = ip_vs_lc_done_svc,
- .update_service = ip_vs_lc_update_svc,
- @@ -109,7 +108,6 @@
- static int __init ip_vs_lc_init(void)
- {
- - INIT_LIST_HEAD(&ip_vs_lc_scheduler.n_list);
- return register_ip_vs_scheduler(&ip_vs_lc_scheduler) ;
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_nq.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_nq.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_nq.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_nq.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * IPVS: Never Queue scheduling module
- *
- - * Version: $Id: ip_vs_nq.c,v 1.2 2003/06/08 09:31:19 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * This program is free software; you can redistribute it and/or
- @@ -138,6 +136,7 @@
- .name = "nq",
- .refcnt = ATOMIC_INIT(0),
- .module = THIS_MODULE,
- + .n_list = LIST_HEAD_INIT(ip_vs_nq_scheduler.n_list),
- .init_service = ip_vs_nq_init_svc,
- .done_service = ip_vs_nq_done_svc,
- .update_service = ip_vs_nq_update_svc,
- @@ -147,7 +146,6 @@
- static int __init ip_vs_nq_init(void)
- {
- - INIT_LIST_HEAD(&ip_vs_nq_scheduler.n_list);
- return register_ip_vs_scheduler(&ip_vs_nq_scheduler);
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * ip_vs_proto.c: transport protocol load balancing support for IPVS
- *
- - * Version: $Id: ip_vs_proto.c,v 1.2 2003/04/18 09:03:16 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- * Julian Anastasov <ja@ssi.bg>
- *
- @@ -45,7 +43,7 @@
- /*
- * register an ipvs protocol
- */
- -static int register_ip_vs_protocol(struct ip_vs_protocol *pp)
- +static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp)
- {
- unsigned hash = IP_VS_PROTO_HASH(pp->protocol);
- @@ -148,7 +146,7 @@
- struct ip_vs_protocol *pp = ip_vs_proto_get(proto);
- if (pp == NULL || pp->state_name == NULL)
- - return "ERR!";
- + return (IPPROTO_IP == proto) ? "NONE" : "ERR!";
- return pp->state_name(state);
- }
- @@ -165,7 +163,7 @@
- ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph);
- if (ih == NULL)
- sprintf(buf, "%s TRUNCATED", pp->name);
- - else if (ih->frag_off & __constant_htons(IP_OFFSET))
- + else if (ih->frag_off & htons(IP_OFFSET))
- sprintf(buf, "%s %u.%u.%u.%u->%u.%u.%u.%u frag",
- pp->name, NIPQUAD(ih->saddr),
- NIPQUAD(ih->daddr));
- @@ -192,7 +190,7 @@
- }
- -int ip_vs_protocol_init(void)
- +int __init ip_vs_protocol_init(void)
- {
- char protocols[64];
- #define REGISTER_PROTOCOL(p) \
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_ah.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_ah.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_ah.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_ah.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * ip_vs_proto_ah.c: AH IPSec load balancing support for IPVS
- *
- - * Version: $Id: ip_vs_proto_ah.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
- - *
- * Authors: Julian Anastasov <ja@ssi.bg>, February 2002
- * Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- @@ -52,15 +50,15 @@
- if (likely(!inverse)) {
- cp = ip_vs_conn_in_get(IPPROTO_UDP,
- iph->saddr,
- - __constant_htons(PORT_ISAKMP),
- + htons(PORT_ISAKMP),
- iph->daddr,
- - __constant_htons(PORT_ISAKMP));
- + htons(PORT_ISAKMP));
- } else {
- cp = ip_vs_conn_in_get(IPPROTO_UDP,
- iph->daddr,
- - __constant_htons(PORT_ISAKMP),
- + htons(PORT_ISAKMP),
- iph->saddr,
- - __constant_htons(PORT_ISAKMP));
- + htons(PORT_ISAKMP));
- }
- if (!cp) {
- @@ -89,15 +87,15 @@
- if (likely(!inverse)) {
- cp = ip_vs_conn_out_get(IPPROTO_UDP,
- iph->saddr,
- - __constant_htons(PORT_ISAKMP),
- + htons(PORT_ISAKMP),
- iph->daddr,
- - __constant_htons(PORT_ISAKMP));
- + htons(PORT_ISAKMP));
- } else {
- cp = ip_vs_conn_out_get(IPPROTO_UDP,
- iph->daddr,
- - __constant_htons(PORT_ISAKMP),
- + htons(PORT_ISAKMP),
- iph->saddr,
- - __constant_htons(PORT_ISAKMP));
- + htons(PORT_ISAKMP));
- }
- if (!cp) {
- @@ -160,6 +158,7 @@
- struct ip_vs_protocol ip_vs_protocol_ah = {
- .name = "AH",
- .protocol = IPPROTO_AH,
- + .num_states = 1,
- .dont_defrag = 1,
- .init = ah_init,
- .exit = ah_exit,
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_esp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_esp.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_esp.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_esp.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * ip_vs_proto_esp.c: ESP IPSec load balancing support for IPVS
- *
- - * Version: $Id: ip_vs_proto_esp.c,v 1.1 2003/07/04 15:04:37 wensong Exp $
- - *
- * Authors: Julian Anastasov <ja@ssi.bg>, February 2002
- * Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- @@ -52,15 +50,15 @@
- if (likely(!inverse)) {
- cp = ip_vs_conn_in_get(IPPROTO_UDP,
- iph->saddr,
- - __constant_htons(PORT_ISAKMP),
- + htons(PORT_ISAKMP),
- iph->daddr,
- - __constant_htons(PORT_ISAKMP));
- + htons(PORT_ISAKMP));
- } else {
- cp = ip_vs_conn_in_get(IPPROTO_UDP,
- iph->daddr,
- - __constant_htons(PORT_ISAKMP),
- + htons(PORT_ISAKMP),
- iph->saddr,
- - __constant_htons(PORT_ISAKMP));
- + htons(PORT_ISAKMP));
- }
- if (!cp) {
- @@ -89,15 +87,15 @@
- if (likely(!inverse)) {
- cp = ip_vs_conn_out_get(IPPROTO_UDP,
- iph->saddr,
- - __constant_htons(PORT_ISAKMP),
- + htons(PORT_ISAKMP),
- iph->daddr,
- - __constant_htons(PORT_ISAKMP));
- + htons(PORT_ISAKMP));
- } else {
- cp = ip_vs_conn_out_get(IPPROTO_UDP,
- iph->daddr,
- - __constant_htons(PORT_ISAKMP),
- + htons(PORT_ISAKMP),
- iph->saddr,
- - __constant_htons(PORT_ISAKMP));
- + htons(PORT_ISAKMP));
- }
- if (!cp) {
- @@ -159,6 +157,7 @@
- struct ip_vs_protocol ip_vs_protocol_esp = {
- .name = "ESP",
- .protocol = IPPROTO_ESP,
- + .num_states = 1,
- .dont_defrag = 1,
- .init = esp_init,
- .exit = esp_exit,
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_tcp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_tcp.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_tcp.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_tcp.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * ip_vs_proto_tcp.c: TCP load balancing support for IPVS
- *
- - * Version: $Id: ip_vs_proto_tcp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- * Julian Anastasov <ja@ssi.bg>
- *
- @@ -20,6 +18,7 @@
- #include <linux/tcp.h> /* for tcphdr */
- #include <net/ip.h>
- #include <net/tcp.h> /* for csum_tcpudp_magic */
- +#include <linux/netfilter.h>
- #include <linux/netfilter_ipv4.h>
- #include <net/ip_vs.h>
- @@ -76,16 +75,15 @@
- struct ip_vs_service *svc;
- struct tcphdr _tcph, *th;
- - th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- - sizeof(_tcph), &_tcph);
- + th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
- if (th == NULL) {
- *verdict = NF_DROP;
- return 0;
- }
- if (th->syn &&
- - (svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
- - skb->nh.iph->daddr, th->dest))) {
- + (svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
- + ip_hdr(skb)->daddr, th->dest))) {
- if (ip_vs_todrop()) {
- /*
- * It seems that we are very loaded.
- @@ -123,27 +121,27 @@
- static int
- -tcp_snat_handler(struct sk_buff **pskb,
- +tcp_snat_handler(struct sk_buff *skb,
- struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
- {
- struct tcphdr *tcph;
- - unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
- + const unsigned int tcphoff = ip_hdrlen(skb);
- /* csum_check requires unshared skb */
- - if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
- + if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
- return 0;
- if (unlikely(cp->app != NULL)) {
- /* Some checks before mangling */
- - if (pp->csum_check && !pp->csum_check(*pskb, pp))
- + if (pp->csum_check && !pp->csum_check(skb, pp))
- return 0;
- /* Call application helper if needed */
- - if (!ip_vs_app_pkt_out(cp, pskb))
- + if (!ip_vs_app_pkt_out(cp, skb))
- return 0;
- }
- - tcph = (void *)(*pskb)->nh.iph + tcphoff;
- + tcph = (void *)ip_hdr(skb) + tcphoff;
- tcph->source = cp->vport;
- /* Adjust TCP checksums */
- @@ -151,17 +149,15 @@
- /* Only port and addr are changed, do fast csum update */
- tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr,
- cp->dport, cp->vport);
- - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- - (*pskb)->ip_summed = CHECKSUM_NONE;
- + if (skb->ip_summed == CHECKSUM_COMPLETE)
- + skb->ip_summed = CHECKSUM_NONE;
- } else {
- /* full checksum calculation */
- tcph->check = 0;
- - (*pskb)->csum = skb_checksum(*pskb, tcphoff,
- - (*pskb)->len - tcphoff, 0);
- + skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
- tcph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
- - (*pskb)->len - tcphoff,
- - cp->protocol,
- - (*pskb)->csum);
- + skb->len - tcphoff,
- + cp->protocol, skb->csum);
- IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
- pp->name, tcph->check,
- (char*)&(tcph->check) - (char*)tcph);
- @@ -171,30 +167,30 @@
- static int
- -tcp_dnat_handler(struct sk_buff **pskb,
- +tcp_dnat_handler(struct sk_buff *skb,
- struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
- {
- struct tcphdr *tcph;
- - unsigned int tcphoff = (*pskb)->nh.iph->ihl * 4;
- + const unsigned int tcphoff = ip_hdrlen(skb);
- /* csum_check requires unshared skb */
- - if (!ip_vs_make_skb_writable(pskb, tcphoff+sizeof(*tcph)))
- + if (!skb_make_writable(skb, tcphoff+sizeof(*tcph)))
- return 0;
- if (unlikely(cp->app != NULL)) {
- /* Some checks before mangling */
- - if (pp->csum_check && !pp->csum_check(*pskb, pp))
- + if (pp->csum_check && !pp->csum_check(skb, pp))
- return 0;
- /*
- * Attempt ip_vs_app call.
- * It will fix ip_vs_conn and iph ack_seq stuff
- */
- - if (!ip_vs_app_pkt_in(cp, pskb))
- + if (!ip_vs_app_pkt_in(cp, skb))
- return 0;
- }
- - tcph = (void *)(*pskb)->nh.iph + tcphoff;
- + tcph = (void *)ip_hdr(skb) + tcphoff;
- tcph->dest = cp->dport;
- /*
- @@ -204,18 +200,16 @@
- /* Only port and addr are changed, do fast csum update */
- tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr,
- cp->vport, cp->dport);
- - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- - (*pskb)->ip_summed = CHECKSUM_NONE;
- + if (skb->ip_summed == CHECKSUM_COMPLETE)
- + skb->ip_summed = CHECKSUM_NONE;
- } else {
- /* full checksum calculation */
- tcph->check = 0;
- - (*pskb)->csum = skb_checksum(*pskb, tcphoff,
- - (*pskb)->len - tcphoff, 0);
- + skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
- tcph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
- - (*pskb)->len - tcphoff,
- - cp->protocol,
- - (*pskb)->csum);
- - (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
- + skb->len - tcphoff,
- + cp->protocol, skb->csum);
- + skb->ip_summed = CHECKSUM_UNNECESSARY;
- }
- return 1;
- }
- @@ -224,15 +218,15 @@
- static int
- tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
- {
- - unsigned int tcphoff = skb->nh.iph->ihl*4;
- + const unsigned int tcphoff = ip_hdrlen(skb);
- switch (skb->ip_summed) {
- case CHECKSUM_NONE:
- skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0);
- case CHECKSUM_COMPLETE:
- - if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr,
- + if (csum_tcpudp_magic(ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
- skb->len - tcphoff,
- - skb->nh.iph->protocol, skb->csum)) {
- + ip_hdr(skb)->protocol, skb->csum)) {
- IP_VS_DBG_RL_PKT(0, pp, skb, 0,
- "Failed checksum for");
- return 0;
- @@ -467,8 +461,7 @@
- {
- struct tcphdr _tcph, *th;
- - th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- - sizeof(_tcph), &_tcph);
- + th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
- if (th == NULL)
- return 0;
- @@ -555,7 +548,7 @@
- IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
- "%u.%u.%u.%u:%u to app %s on port %u\n",
- - __FUNCTION__,
- + __func__,
- NIPQUAD(cp->caddr), ntohs(cp->cport),
- NIPQUAD(cp->vaddr), ntohs(cp->vport),
- inc->name, ntohs(inc->port));
- @@ -599,6 +592,7 @@
- struct ip_vs_protocol ip_vs_protocol_tcp = {
- .name = "TCP",
- .protocol = IPPROTO_TCP,
- + .num_states = IP_VS_TCP_S_LAST,
- .dont_defrag = 0,
- .appcnt = ATOMIC_INIT(0),
- .init = ip_vs_tcp_init,
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_udp.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_udp.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_proto_udp.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_proto_udp.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * ip_vs_proto_udp.c: UDP load balancing support for IPVS
- *
- - * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- * Julian Anastasov <ja@ssi.bg>
- *
- @@ -18,11 +16,12 @@
- #include <linux/in.h>
- #include <linux/ip.h>
- #include <linux/kernel.h>
- +#include <linux/netfilter.h>
- #include <linux/netfilter_ipv4.h>
- #include <linux/udp.h>
- #include <net/ip_vs.h>
- -
- +#include <net/ip.h>
- static struct ip_vs_conn *
- udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp,
- @@ -56,7 +55,7 @@
- struct ip_vs_conn *cp;
- __be16 _ports[2], *pptr;
- - pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- + pptr = skb_header_pointer(skb, ip_hdrlen(skb),
- sizeof(_ports), _ports);
- if (pptr == NULL)
- return NULL;
- @@ -82,15 +81,15 @@
- struct ip_vs_service *svc;
- struct udphdr _udph, *uh;
- - uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
- + uh = skb_header_pointer(skb, ip_hdrlen(skb),
- sizeof(_udph), &_udph);
- if (uh == NULL) {
- *verdict = NF_DROP;
- return 0;
- }
- - if ((svc = ip_vs_service_get(skb->mark, skb->nh.iph->protocol,
- - skb->nh.iph->daddr, uh->dest))) {
- + if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol,
- + ip_hdr(skb)->daddr, uh->dest))) {
- if (ip_vs_todrop()) {
- /*
- * It seems that we are very loaded.
- @@ -129,29 +128,29 @@
- }
- static int
- -udp_snat_handler(struct sk_buff **pskb,
- +udp_snat_handler(struct sk_buff *skb,
- struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
- {
- struct udphdr *udph;
- - unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
- + const unsigned int udphoff = ip_hdrlen(skb);
- /* csum_check requires unshared skb */
- - if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
- + if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
- return 0;
- if (unlikely(cp->app != NULL)) {
- /* Some checks before mangling */
- - if (pp->csum_check && !pp->csum_check(*pskb, pp))
- + if (pp->csum_check && !pp->csum_check(skb, pp))
- return 0;
- /*
- * Call application helper if needed
- */
- - if (!ip_vs_app_pkt_out(cp, pskb))
- + if (!ip_vs_app_pkt_out(cp, skb))
- return 0;
- }
- - udph = (void *)(*pskb)->nh.iph + udphoff;
- + udph = (void *)ip_hdr(skb) + udphoff;
- udph->source = cp->vport;
- /*
- @@ -161,17 +160,15 @@
- /* Only port and addr are changed, do fast csum update */
- udp_fast_csum_update(udph, cp->daddr, cp->vaddr,
- cp->dport, cp->vport);
- - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- - (*pskb)->ip_summed = CHECKSUM_NONE;
- + if (skb->ip_summed == CHECKSUM_COMPLETE)
- + skb->ip_summed = CHECKSUM_NONE;
- } else {
- /* full checksum calculation */
- udph->check = 0;
- - (*pskb)->csum = skb_checksum(*pskb, udphoff,
- - (*pskb)->len - udphoff, 0);
- + skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
- udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr,
- - (*pskb)->len - udphoff,
- - cp->protocol,
- - (*pskb)->csum);
- + skb->len - udphoff,
- + cp->protocol, skb->csum);
- if (udph->check == 0)
- udph->check = CSUM_MANGLED_0;
- IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n",
- @@ -183,30 +180,30 @@
- static int
- -udp_dnat_handler(struct sk_buff **pskb,
- +udp_dnat_handler(struct sk_buff *skb,
- struct ip_vs_protocol *pp, struct ip_vs_conn *cp)
- {
- struct udphdr *udph;
- - unsigned int udphoff = (*pskb)->nh.iph->ihl * 4;
- + unsigned int udphoff = ip_hdrlen(skb);
- /* csum_check requires unshared skb */
- - if (!ip_vs_make_skb_writable(pskb, udphoff+sizeof(*udph)))
- + if (!skb_make_writable(skb, udphoff+sizeof(*udph)))
- return 0;
- if (unlikely(cp->app != NULL)) {
- /* Some checks before mangling */
- - if (pp->csum_check && !pp->csum_check(*pskb, pp))
- + if (pp->csum_check && !pp->csum_check(skb, pp))
- return 0;
- /*
- * Attempt ip_vs_app call.
- * It will fix ip_vs_conn
- */
- - if (!ip_vs_app_pkt_in(cp, pskb))
- + if (!ip_vs_app_pkt_in(cp, skb))
- return 0;
- }
- - udph = (void *)(*pskb)->nh.iph + udphoff;
- + udph = (void *)ip_hdr(skb) + udphoff;
- udph->dest = cp->dport;
- /*
- @@ -216,20 +213,18 @@
- /* Only port and addr are changed, do fast csum update */
- udp_fast_csum_update(udph, cp->vaddr, cp->daddr,
- cp->vport, cp->dport);
- - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE)
- - (*pskb)->ip_summed = CHECKSUM_NONE;
- + if (skb->ip_summed == CHECKSUM_COMPLETE)
- + skb->ip_summed = CHECKSUM_NONE;
- } else {
- /* full checksum calculation */
- udph->check = 0;
- - (*pskb)->csum = skb_checksum(*pskb, udphoff,
- - (*pskb)->len - udphoff, 0);
- + skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0);
- udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr,
- - (*pskb)->len - udphoff,
- - cp->protocol,
- - (*pskb)->csum);
- + skb->len - udphoff,
- + cp->protocol, skb->csum);
- if (udph->check == 0)
- udph->check = CSUM_MANGLED_0;
- - (*pskb)->ip_summed = CHECKSUM_UNNECESSARY;
- + skb->ip_summed = CHECKSUM_UNNECESSARY;
- }
- return 1;
- }
- @@ -239,7 +234,7 @@
- udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp)
- {
- struct udphdr _udph, *uh;
- - unsigned int udphoff = skb->nh.iph->ihl*4;
- + const unsigned int udphoff = ip_hdrlen(skb);
- uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph);
- if (uh == NULL)
- @@ -251,10 +246,10 @@
- skb->csum = skb_checksum(skb, udphoff,
- skb->len - udphoff, 0);
- case CHECKSUM_COMPLETE:
- - if (csum_tcpudp_magic(skb->nh.iph->saddr,
- - skb->nh.iph->daddr,
- + if (csum_tcpudp_magic(ip_hdr(skb)->saddr,
- + ip_hdr(skb)->daddr,
- skb->len - udphoff,
- - skb->nh.iph->protocol,
- + ip_hdr(skb)->protocol,
- skb->csum)) {
- IP_VS_DBG_RL_PKT(0, pp, skb, 0,
- "Failed checksum for");
- @@ -347,7 +342,7 @@
- IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->"
- "%u.%u.%u.%u:%u to app %s on port %u\n",
- - __FUNCTION__,
- + __func__,
- NIPQUAD(cp->caddr), ntohs(cp->cport),
- NIPQUAD(cp->vaddr), ntohs(cp->vport),
- inc->name, ntohs(inc->port));
- @@ -412,6 +407,7 @@
- struct ip_vs_protocol ip_vs_protocol_udp = {
- .name = "UDP",
- .protocol = IPPROTO_UDP,
- + .num_states = IP_VS_UDP_S_LAST,
- .dont_defrag = 0,
- .init = udp_init,
- .exit = udp_exit,
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_rr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_rr.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_rr.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_rr.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * IPVS: Round-Robin Scheduling module
- *
- - * Version: $Id: ip_vs_rr.c,v 1.9 2002/09/15 08:14:08 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- * Peter Kese <peter.kese@ijs.si>
- *
- @@ -68,7 +66,7 @@
- q = q->next;
- continue;
- }
- -
- +
- dest = list_entry(q, struct ip_vs_dest, n_list);
- if (!(dest->flags & IP_VS_DEST_F_OVERLOAD) &&
- atomic_read(&dest->weight) > 0)
- @@ -96,6 +94,7 @@
- .name = "rr", /* name */
- .refcnt = ATOMIC_INIT(0),
- .module = THIS_MODULE,
- + .n_list = LIST_HEAD_INIT(ip_vs_rr_scheduler.n_list),
- .init_service = ip_vs_rr_init_svc,
- .done_service = ip_vs_rr_done_svc,
- .update_service = ip_vs_rr_update_svc,
- @@ -104,7 +103,6 @@
- static int __init ip_vs_rr_init(void)
- {
- - INIT_LIST_HEAD(&ip_vs_rr_scheduler.n_list);
- return register_ip_vs_scheduler(&ip_vs_rr_scheduler);
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sched.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sched.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sched.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sched.c 2009-02-16 12:56:22.000000000 -0400
- @@ -5,8 +5,6 @@
- * high-performance and highly available server based on a
- * cluster of servers.
- *
- - * Version: $Id: ip_vs_sched.c,v 1.13 2003/05/10 03:05:23 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- * Peter Kese <peter.kese@ijs.si>
- *
- @@ -20,11 +18,11 @@
- */
- #include <linux/module.h>
- -#include <linux/sched.h>
- #include <linux/spinlock.h>
- #include <linux/interrupt.h>
- #include <asm/string.h>
- #include <linux/kmod.h>
- +#include <linux/sysctl.h>
- #include <net/ip_vs.h>
- @@ -184,22 +182,9 @@
- /* increase the module use count */
- ip_vs_use_count_inc();
- - /*
- - * Make sure that the scheduler with this name doesn't exist
- - * in the scheduler list.
- - */
- - sched = ip_vs_sched_getbyname(scheduler->name);
- - if (sched) {
- - ip_vs_scheduler_put(sched);
- - ip_vs_use_count_dec();
- - IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
- - "already existed in the system\n", scheduler->name);
- - return -EINVAL;
- - }
- -
- write_lock_bh(&__ip_vs_sched_lock);
- - if (scheduler->n_list.next != &scheduler->n_list) {
- + if (!list_empty(&scheduler->n_list)) {
- write_unlock_bh(&__ip_vs_sched_lock);
- ip_vs_use_count_dec();
- IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
- @@ -208,6 +193,20 @@
- }
- /*
- + * Make sure that the scheduler with this name doesn't exist
- + * in the scheduler list.
- + */
- + list_for_each_entry(sched, &ip_vs_schedulers, n_list) {
- + if (strcmp(scheduler->name, sched->name) == 0) {
- + write_unlock_bh(&__ip_vs_sched_lock);
- + ip_vs_use_count_dec();
- + IP_VS_ERR("register_ip_vs_scheduler(): [%s] scheduler "
- + "already existed in the system\n",
- + scheduler->name);
- + return -EINVAL;
- + }
- + }
- + /*
- * Add it into the d-linked scheduler list
- */
- list_add(&scheduler->n_list, &ip_vs_schedulers);
- @@ -230,7 +229,7 @@
- }
- write_lock_bh(&__ip_vs_sched_lock);
- - if (scheduler->n_list.next == &scheduler->n_list) {
- + if (list_empty(&scheduler->n_list)) {
- write_unlock_bh(&__ip_vs_sched_lock);
- IP_VS_ERR("unregister_ip_vs_scheduler(): [%s] scheduler "
- "is not in the list. failed\n", scheduler->name);
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sed.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sed.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sed.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sed.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * IPVS: Shortest Expected Delay scheduling module
- *
- - * Version: $Id: ip_vs_sed.c,v 1.1 2003/05/10 03:06:08 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * This program is free software; you can redistribute it and/or
- @@ -18,7 +16,7 @@
- * The SED algorithm attempts to minimize each job's expected delay until
- * completion. The expected delay that the job will experience is
- * (Ci + 1) / Ui if sent to the ith server, in which Ci is the number of
- - * jobs on the the ith server and Ui is the fixed service rate (weight) of
- + * jobs on the ith server and Ui is the fixed service rate (weight) of
- * the ith server. The SED algorithm adopts a greedy policy that each does
- * what is in its own best interest, i.e. to join the queue which would
- * minimize its expected delay of completion.
- @@ -140,6 +138,7 @@
- .name = "sed",
- .refcnt = ATOMIC_INIT(0),
- .module = THIS_MODULE,
- + .n_list = LIST_HEAD_INIT(ip_vs_sed_scheduler.n_list),
- .init_service = ip_vs_sed_init_svc,
- .done_service = ip_vs_sed_done_svc,
- .update_service = ip_vs_sed_update_svc,
- @@ -149,7 +148,6 @@
- static int __init ip_vs_sed_init(void)
- {
- - INIT_LIST_HEAD(&ip_vs_sed_scheduler.n_list);
- return register_ip_vs_scheduler(&ip_vs_sed_scheduler);
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sh.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sh.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sh.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sh.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * IPVS: Source Hashing scheduling module
- *
- - * Version: $Id: ip_vs_sh.c,v 1.5 2002/09/15 08:14:08 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@gnuchina.org>
- *
- * This program is free software; you can redistribute it and/or
- @@ -201,7 +199,7 @@
- {
- struct ip_vs_dest *dest;
- struct ip_vs_sh_bucket *tbl;
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- IP_VS_DBG(6, "ip_vs_sh_schedule(): Scheduling...\n");
- @@ -232,6 +230,7 @@
- .name = "sh",
- .refcnt = ATOMIC_INIT(0),
- .module = THIS_MODULE,
- + .n_list = LIST_HEAD_INIT(ip_vs_sh_scheduler.n_list),
- .init_service = ip_vs_sh_init_svc,
- .done_service = ip_vs_sh_done_svc,
- .update_service = ip_vs_sh_update_svc,
- @@ -241,7 +240,6 @@
- static int __init ip_vs_sh_init(void)
- {
- - INIT_LIST_HEAD(&ip_vs_sh_scheduler.n_list);
- return register_ip_vs_scheduler(&ip_vs_sh_scheduler);
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sync.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sync.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_sync.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_sync.c 2009-02-16 12:56:22.000000000 -0400
- @@ -5,8 +5,6 @@
- * high-performance and highly available server based on a
- * cluster of servers.
- *
- - * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * ip_vs_sync: sync connection info from master load balancer to backups
- @@ -29,10 +27,12 @@
- #include <linux/in.h>
- #include <linux/igmp.h> /* for ip_mc_join_group */
- #include <linux/udp.h>
- +#include <linux/err.h>
- +#include <linux/kthread.h>
- +#include <linux/wait.h>
- #include <net/ip.h>
- #include <net/sock.h>
- -#include <asm/uaccess.h> /* for get_fs and set_fs */
- #include <net/ip_vs.h>
- @@ -67,7 +67,11 @@
- struct ip_vs_seq out_seq; /* outgoing seq. struct */
- };
- -#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ)
- +struct ip_vs_sync_thread_data {
- + struct socket *sock;
- + char *buf;
- +};
- +
- #define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn))
- #define FULL_CONN_SIZE \
- (sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options))
- @@ -136,18 +140,19 @@
- char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
- char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
- -/* multicast addr */
- -static struct sockaddr_in mcast_addr;
- +/* sync daemon tasks */
- +static struct task_struct *sync_master_thread;
- +static struct task_struct *sync_backup_thread;
- +/* multicast addr */
- +static struct sockaddr_in mcast_addr = {
- + .sin_family = AF_INET,
- + .sin_port = __constant_htons(IP_VS_SYNC_PORT),
- + .sin_addr.s_addr = __constant_htonl(IP_VS_SYNC_GROUP),
- +};
- -static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
- -{
- - spin_lock(&ip_vs_sync_lock);
- - list_add_tail(&sb->list, &ip_vs_sync_queue);
- - spin_unlock(&ip_vs_sync_lock);
- -}
- -static inline struct ip_vs_sync_buff * sb_dequeue(void)
- +static inline struct ip_vs_sync_buff *sb_dequeue(void)
- {
- struct ip_vs_sync_buff *sb;
- @@ -191,6 +196,16 @@
- kfree(sb);
- }
- +static inline void sb_queue_tail(struct ip_vs_sync_buff *sb)
- +{
- + spin_lock(&ip_vs_sync_lock);
- + if (ip_vs_sync_state & IP_VS_STATE_MASTER)
- + list_add_tail(&sb->list, &ip_vs_sync_queue);
- + else
- + ip_vs_sync_buff_release(sb);
- + spin_unlock(&ip_vs_sync_lock);
- +}
- +
- /*
- * Get the current sync buffer if it has been created for more
- * than the specified time or the specified time is zero.
- @@ -279,14 +294,21 @@
- struct ip_vs_sync_conn *s;
- struct ip_vs_sync_conn_options *opt;
- struct ip_vs_conn *cp;
- + struct ip_vs_protocol *pp;
- + struct ip_vs_dest *dest;
- char *p;
- int i;
- + if (buflen < sizeof(struct ip_vs_sync_mesg)) {
- + IP_VS_ERR_RL("sync message header too short\n");
- + return;
- + }
- +
- /* Convert size back to host byte order */
- m->size = ntohs(m->size);
- if (buflen != m->size) {
- - IP_VS_ERR("bogus message\n");
- + IP_VS_ERR_RL("bogus sync message size\n");
- return;
- }
- @@ -299,10 +321,50 @@
- p = (char *)buffer + sizeof(struct ip_vs_sync_mesg);
- for (i=0; i<m->nr_conns; i++) {
- - unsigned flags;
- + unsigned flags, state;
- +
- + if (p + SIMPLE_CONN_SIZE > buffer+buflen) {
- + IP_VS_ERR_RL("bogus conn in sync message\n");
- + return;
- + }
- + s = (struct ip_vs_sync_conn *) p;
- + flags = ntohs(s->flags) | IP_VS_CONN_F_SYNC;
- + flags &= ~IP_VS_CONN_F_HASHED;
- + if (flags & IP_VS_CONN_F_SEQ_MASK) {
- + opt = (struct ip_vs_sync_conn_options *)&s[1];
- + p += FULL_CONN_SIZE;
- + if (p > buffer+buflen) {
- + IP_VS_ERR_RL("bogus conn options in sync message\n");
- + return;
- + }
- + } else {
- + opt = NULL;
- + p += SIMPLE_CONN_SIZE;
- + }
- +
- + state = ntohs(s->state);
- + if (!(flags & IP_VS_CONN_F_TEMPLATE)) {
- + pp = ip_vs_proto_get(s->protocol);
- + if (!pp) {
- + IP_VS_ERR_RL("Unsupported protocol %u in sync msg\n",
- + s->protocol);
- + continue;
- + }
- + if (state >= pp->num_states) {
- + IP_VS_DBG(2, "Invalid %s state %u in sync msg\n",
- + pp->name, state);
- + continue;
- + }
- + } else {
- + /* protocol in templates is not used for state/timeout */
- + pp = NULL;
- + if (state > 0) {
- + IP_VS_DBG(2, "Invalid template state %u in sync msg\n",
- + state);
- + state = 0;
- + }
- + }
- - s = (struct ip_vs_sync_conn *)p;
- - flags = ntohs(s->flags);
- if (!(flags & IP_VS_CONN_F_TEMPLATE))
- cp = ip_vs_conn_in_get(s->protocol,
- s->caddr, s->cport,
- @@ -312,38 +374,69 @@
- s->caddr, s->cport,
- s->vaddr, s->vport);
- if (!cp) {
- + /*
- + * Find the appropriate destination for the connection.
- + * If it is not found the connection will remain unbound
- + * but still handled.
- + */
- + dest = ip_vs_find_dest(s->daddr, s->dport,
- + s->vaddr, s->vport,
- + s->protocol);
- + /* Set the approprite ativity flag */
- + if (s->protocol == IPPROTO_TCP) {
- + if (state != IP_VS_TCP_S_ESTABLISHED)
- + flags |= IP_VS_CONN_F_INACTIVE;
- + else
- + flags &= ~IP_VS_CONN_F_INACTIVE;
- + }
- cp = ip_vs_conn_new(s->protocol,
- s->caddr, s->cport,
- s->vaddr, s->vport,
- s->daddr, s->dport,
- - flags, NULL);
- + flags, dest);
- + if (dest)
- + atomic_dec(&dest->refcnt);
- if (!cp) {
- IP_VS_ERR("ip_vs_conn_new failed\n");
- return;
- }
- - cp->state = ntohs(s->state);
- } else if (!cp->dest) {
- - /* it is an entry created by the synchronization */
- - cp->state = ntohs(s->state);
- - cp->flags = flags | IP_VS_CONN_F_HASHED;
- - } /* Note that we don't touch its state and flags
- - if it is a normal entry. */
- + dest = ip_vs_try_bind_dest(cp);
- + if (dest)
- + atomic_dec(&dest->refcnt);
- + } else if ((cp->dest) && (cp->protocol == IPPROTO_TCP) &&
- + (cp->state != state)) {
- + /* update active/inactive flag for the connection */
- + dest = cp->dest;
- + if (!(cp->flags & IP_VS_CONN_F_INACTIVE) &&
- + (state != IP_VS_TCP_S_ESTABLISHED)) {
- + atomic_dec(&dest->activeconns);
- + atomic_inc(&dest->inactconns);
- + cp->flags |= IP_VS_CONN_F_INACTIVE;
- + } else if ((cp->flags & IP_VS_CONN_F_INACTIVE) &&
- + (state == IP_VS_TCP_S_ESTABLISHED)) {
- + atomic_inc(&dest->activeconns);
- + atomic_dec(&dest->inactconns);
- + cp->flags &= ~IP_VS_CONN_F_INACTIVE;
- + }
- + }
- - if (flags & IP_VS_CONN_F_SEQ_MASK) {
- - opt = (struct ip_vs_sync_conn_options *)&s[1];
- + if (opt)
- memcpy(&cp->in_seq, opt, sizeof(*opt));
- - p += FULL_CONN_SIZE;
- - } else
- - p += SIMPLE_CONN_SIZE;
- -
- atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]);
- - cp->timeout = IP_VS_SYNC_CONN_TIMEOUT;
- + cp->state = state;
- + cp->old_state = cp->state;
- + /*
- + * We can not recover the right timeout for templates
- + * in all cases, we can not find the right fwmark
- + * virtual service. If needed, we can do it for
- + * non-fwmark persistent services.
- + */
- + if (!(flags & IP_VS_CONN_F_TEMPLATE) && pp->timeout_table)
- + cp->timeout = pp->timeout_table[state];
- + else
- + cp->timeout = (3*60*HZ);
- ip_vs_conn_put(cp);
- -
- - if (p > buffer+buflen) {
- - IP_VS_ERR("bogus message\n");
- - return;
- - }
- }
- }
- @@ -382,7 +475,7 @@
- struct net_device *dev;
- struct inet_sock *inet = inet_sk(sk);
- - if ((dev = __dev_get_by_name(ifname)) == NULL)
- + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
- return -ENODEV;
- if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
- @@ -407,7 +500,7 @@
- int num;
- if (sync_state == IP_VS_STATE_MASTER) {
- - if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL)
- + if ((dev = __dev_get_by_name(&init_net, ip_vs_master_mcast_ifn)) == NULL)
- return -ENODEV;
- num = (dev->mtu - sizeof(struct iphdr) -
- @@ -418,7 +511,7 @@
- IP_VS_DBG(7, "setting the maximum length of sync sending "
- "message %d.\n", sync_send_mesg_maxlen);
- } else if (sync_state == IP_VS_STATE_BACKUP) {
- - if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL)
- + if ((dev = __dev_get_by_name(&init_net, ip_vs_backup_mcast_ifn)) == NULL)
- return -ENODEV;
- sync_recv_mesg_maxlen = dev->mtu -
- @@ -446,7 +539,7 @@
- memset(&mreq, 0, sizeof(mreq));
- memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr));
- - if ((dev = __dev_get_by_name(ifname)) == NULL)
- + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
- return -ENODEV;
- if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if)
- return -EINVAL;
- @@ -467,7 +560,7 @@
- __be32 addr;
- struct sockaddr_in sin;
- - if ((dev = __dev_get_by_name(ifname)) == NULL)
- + if ((dev = __dev_get_by_name(&init_net, ifname)) == NULL)
- return -ENODEV;
- addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
- @@ -492,14 +585,17 @@
- static struct socket * make_send_sock(void)
- {
- struct socket *sock;
- + int result;
- /* First create a socket */
- - if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
- + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
- + if (result < 0) {
- IP_VS_ERR("Error during creation of socket; terminating\n");
- - return NULL;
- + return ERR_PTR(result);
- }
- - if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) {
- + result = set_mcast_if(sock->sk, ip_vs_master_mcast_ifn);
- + if (result < 0) {
- IP_VS_ERR("Error setting outbound mcast interface\n");
- goto error;
- }
- @@ -507,14 +603,15 @@
- set_mcast_loop(sock->sk, 0);
- set_mcast_ttl(sock->sk, 1);
- - if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) {
- + result = bind_mcastif_addr(sock, ip_vs_master_mcast_ifn);
- + if (result < 0) {
- IP_VS_ERR("Error binding address of the mcast interface\n");
- goto error;
- }
- - if (sock->ops->connect(sock,
- - (struct sockaddr*)&mcast_addr,
- - sizeof(struct sockaddr), 0) < 0) {
- + result = sock->ops->connect(sock, (struct sockaddr *) &mcast_addr,
- + sizeof(struct sockaddr), 0);
- + if (result < 0) {
- IP_VS_ERR("Error connecting to the multicast addr\n");
- goto error;
- }
- @@ -523,7 +620,7 @@
- error:
- sock_release(sock);
- - return NULL;
- + return ERR_PTR(result);
- }
- @@ -533,27 +630,30 @@
- static struct socket * make_receive_sock(void)
- {
- struct socket *sock;
- + int result;
- /* First create a socket */
- - if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) {
- + result = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
- + if (result < 0) {
- IP_VS_ERR("Error during creation of socket; terminating\n");
- - return NULL;
- + return ERR_PTR(result);
- }
- /* it is equivalent to the REUSEADDR option in user-space */
- sock->sk->sk_reuse = 1;
- - if (sock->ops->bind(sock,
- - (struct sockaddr*)&mcast_addr,
- - sizeof(struct sockaddr)) < 0) {
- + result = sock->ops->bind(sock, (struct sockaddr *) &mcast_addr,
- + sizeof(struct sockaddr));
- + if (result < 0) {
- IP_VS_ERR("Error binding to the multicast addr\n");
- goto error;
- }
- /* join the multicast group */
- - if (join_mcast_group(sock->sk,
- - (struct in_addr*)&mcast_addr.sin_addr,
- - ip_vs_backup_mcast_ifn) < 0) {
- + result = join_mcast_group(sock->sk,
- + (struct in_addr *) &mcast_addr.sin_addr,
- + ip_vs_backup_mcast_ifn);
- + if (result < 0) {
- IP_VS_ERR("Error joining to the multicast group\n");
- goto error;
- }
- @@ -562,7 +662,7 @@
- error:
- sock_release(sock);
- - return NULL;
- + return ERR_PTR(result);
- }
- @@ -620,44 +720,29 @@
- }
- -static DECLARE_WAIT_QUEUE_HEAD(sync_wait);
- -static pid_t sync_master_pid = 0;
- -static pid_t sync_backup_pid = 0;
- -
- -static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait);
- -static int stop_master_sync = 0;
- -static int stop_backup_sync = 0;
- -
- -static void sync_master_loop(void)
- +static int sync_thread_master(void *data)
- {
- - struct socket *sock;
- + struct ip_vs_sync_thread_data *tinfo = data;
- struct ip_vs_sync_buff *sb;
- - /* create the sending multicast socket */
- - sock = make_send_sock();
- - if (!sock)
- - return;
- -
- IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, "
- "syncid = %d\n",
- ip_vs_master_mcast_ifn, ip_vs_master_syncid);
- - for (;;) {
- - while ((sb=sb_dequeue())) {
- - ip_vs_send_sync_msg(sock, sb->mesg);
- + while (!kthread_should_stop()) {
- + while ((sb = sb_dequeue())) {
- + ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
- ip_vs_sync_buff_release(sb);
- }
- /* check if entries stay in curr_sb for 2 seconds */
- - if ((sb = get_curr_sync_buff(2*HZ))) {
- - ip_vs_send_sync_msg(sock, sb->mesg);
- + sb = get_curr_sync_buff(2 * HZ);
- + if (sb) {
- + ip_vs_send_sync_msg(tinfo->sock, sb->mesg);
- ip_vs_sync_buff_release(sb);
- }
- - if (stop_master_sync)
- - break;
- -
- - msleep_interruptible(1000);
- + schedule_timeout_interruptible(HZ);
- }
- /* clean up the sync_buff queue */
- @@ -671,235 +756,175 @@
- }
- /* release the sending multicast socket */
- - sock_release(sock);
- + sock_release(tinfo->sock);
- + kfree(tinfo);
- +
- + return 0;
- }
- -static void sync_backup_loop(void)
- +static int sync_thread_backup(void *data)
- {
- - struct socket *sock;
- - char *buf;
- + struct ip_vs_sync_thread_data *tinfo = data;
- int len;
- - if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) {
- - IP_VS_ERR("sync_backup_loop: kmalloc error\n");
- - return;
- - }
- -
- - /* create the receiving multicast socket */
- - sock = make_receive_sock();
- - if (!sock)
- - goto out;
- -
- IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, "
- "syncid = %d\n",
- ip_vs_backup_mcast_ifn, ip_vs_backup_syncid);
- - for (;;) {
- - /* do you have data now? */
- - while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) {
- - if ((len =
- - ip_vs_receive(sock, buf,
- - sync_recv_mesg_maxlen)) <= 0) {
- + while (!kthread_should_stop()) {
- + wait_event_interruptible(*tinfo->sock->sk->sk_sleep,
- + !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue)
- + || kthread_should_stop());
- +
- + /* do we have data now? */
- + while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) {
- + len = ip_vs_receive(tinfo->sock, tinfo->buf,
- + sync_recv_mesg_maxlen);
- + if (len <= 0) {
- IP_VS_ERR("receiving message error\n");
- break;
- }
- - /* disable bottom half, because it accessed the data
- +
- + /* disable bottom half, because it accesses the data
- shared by softirq while getting/creating conns */
- local_bh_disable();
- - ip_vs_process_message(buf, len);
- + ip_vs_process_message(tinfo->buf, len);
- local_bh_enable();
- }
- -
- - if (stop_backup_sync)
- - break;
- -
- - msleep_interruptible(1000);
- }
- /* release the sending multicast socket */
- - sock_release(sock);
- + sock_release(tinfo->sock);
- + kfree(tinfo->buf);
- + kfree(tinfo);
- - out:
- - kfree(buf);
- + return 0;
- }
- -static void set_sync_pid(int sync_state, pid_t sync_pid)
- -{
- - if (sync_state == IP_VS_STATE_MASTER)
- - sync_master_pid = sync_pid;
- - else if (sync_state == IP_VS_STATE_BACKUP)
- - sync_backup_pid = sync_pid;
- -}
- -
- -static void set_stop_sync(int sync_state, int set)
- +int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
- {
- - if (sync_state == IP_VS_STATE_MASTER)
- - stop_master_sync = set;
- - else if (sync_state == IP_VS_STATE_BACKUP)
- - stop_backup_sync = set;
- - else {
- - stop_master_sync = set;
- - stop_backup_sync = set;
- - }
- -}
- + struct ip_vs_sync_thread_data *tinfo;
- + struct task_struct **realtask, *task;
- + struct socket *sock;
- + char *name, *buf = NULL;
- + int (*threadfn)(void *data);
- + int result = -ENOMEM;
- -static int sync_thread(void *startup)
- -{
- - DECLARE_WAITQUEUE(wait, current);
- - mm_segment_t oldmm;
- - int state;
- - const char *name;
- + IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
- + IP_VS_DBG(7, "Each ip_vs_sync_conn entry needs %Zd bytes\n",
- + sizeof(struct ip_vs_sync_conn));
- - /* increase the module use count */
- - ip_vs_use_count_inc();
- + if (state == IP_VS_STATE_MASTER) {
- + if (sync_master_thread)
- + return -EEXIST;
- - if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) {
- - state = IP_VS_STATE_MASTER;
- + strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
- + sizeof(ip_vs_master_mcast_ifn));
- + ip_vs_master_syncid = syncid;
- + realtask = &sync_master_thread;
- name = "ipvs_syncmaster";
- - } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) {
- - state = IP_VS_STATE_BACKUP;
- + threadfn = sync_thread_master;
- + sock = make_send_sock();
- + } else if (state == IP_VS_STATE_BACKUP) {
- + if (sync_backup_thread)
- + return -EEXIST;
- +
- + strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
- + sizeof(ip_vs_backup_mcast_ifn));
- + ip_vs_backup_syncid = syncid;
- + realtask = &sync_backup_thread;
- name = "ipvs_syncbackup";
- + threadfn = sync_thread_backup;
- + sock = make_receive_sock();
- } else {
- - IP_VS_BUG();
- - ip_vs_use_count_dec();
- return -EINVAL;
- }
- - daemonize(name);
- -
- - oldmm = get_fs();
- - set_fs(KERNEL_DS);
- -
- - /* Block all signals */
- - spin_lock_irq(¤t->sighand->siglock);
- - siginitsetinv(¤t->blocked, 0);
- - recalc_sigpending();
- - spin_unlock_irq(¤t->sighand->siglock);
- + if (IS_ERR(sock)) {
- + result = PTR_ERR(sock);
- + goto out;
- + }
- - /* set the maximum length of sync message */
- set_sync_mesg_maxlen(state);
- + if (state == IP_VS_STATE_BACKUP) {
- + buf = kmalloc(sync_recv_mesg_maxlen, GFP_KERNEL);
- + if (!buf)
- + goto outsocket;
- + }
- - /* set up multicast address */
- - mcast_addr.sin_family = AF_INET;
- - mcast_addr.sin_port = htons(IP_VS_SYNC_PORT);
- - mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP);
- -
- - add_wait_queue(&sync_wait, &wait);
- -
- - set_sync_pid(state, current->pid);
- - complete((struct completion *)startup);
- -
- - /* processing master/backup loop here */
- - if (state == IP_VS_STATE_MASTER)
- - sync_master_loop();
- - else if (state == IP_VS_STATE_BACKUP)
- - sync_backup_loop();
- - else IP_VS_BUG();
- -
- - remove_wait_queue(&sync_wait, &wait);
- -
- - /* thread exits */
- - set_sync_pid(state, 0);
- - IP_VS_INFO("sync thread stopped!\n");
- -
- - set_fs(oldmm);
- -
- - /* decrease the module use count */
- - ip_vs_use_count_dec();
- -
- - set_stop_sync(state, 0);
- - wake_up(&stop_sync_wait);
- + tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
- + if (!tinfo)
- + goto outbuf;
- - return 0;
- -}
- + tinfo->sock = sock;
- + tinfo->buf = buf;
- + task = kthread_run(threadfn, tinfo, name);
- + if (IS_ERR(task)) {
- + result = PTR_ERR(task);
- + goto outtinfo;
- + }
- -static int fork_sync_thread(void *startup)
- -{
- - pid_t pid;
- + /* mark as active */
- + *realtask = task;
- + ip_vs_sync_state |= state;
- - /* fork the sync thread here, then the parent process of the
- - sync thread is the init process after this thread exits. */
- - repeat:
- - if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) {
- - IP_VS_ERR("could not create sync_thread due to %d... "
- - "retrying.\n", pid);
- - msleep_interruptible(1000);
- - goto repeat;
- - }
- + /* increase the module use count */
- + ip_vs_use_count_inc();
- return 0;
- +
- +outtinfo:
- + kfree(tinfo);
- +outbuf:
- + kfree(buf);
- +outsocket:
- + sock_release(sock);
- +out:
- + return result;
- }
- -int start_sync_thread(int state, char *mcast_ifn, __u8 syncid)
- +int stop_sync_thread(int state)
- {
- - DECLARE_COMPLETION_ONSTACK(startup);
- - pid_t pid;
- + IP_VS_DBG(7, "%s: pid %d\n", __func__, task_pid_nr(current));
- - if ((state == IP_VS_STATE_MASTER && sync_master_pid) ||
- - (state == IP_VS_STATE_BACKUP && sync_backup_pid))
- - return -EEXIST;
- + if (state == IP_VS_STATE_MASTER) {
- + if (!sync_master_thread)
- + return -ESRCH;
- - IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
- - IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n",
- - sizeof(struct ip_vs_sync_conn));
- + IP_VS_INFO("stopping master sync thread %d ...\n",
- + task_pid_nr(sync_master_thread));
- - ip_vs_sync_state |= state;
- - if (state == IP_VS_STATE_MASTER) {
- - strlcpy(ip_vs_master_mcast_ifn, mcast_ifn,
- - sizeof(ip_vs_master_mcast_ifn));
- - ip_vs_master_syncid = syncid;
- + /*
- + * The lock synchronizes with sb_queue_tail(), so that we don't
- + * add sync buffers to the queue, when we are already in
- + * progress of stopping the master sync daemon.
- + */
- +
- + spin_lock_bh(&ip_vs_sync_lock);
- + ip_vs_sync_state &= ~IP_VS_STATE_MASTER;
- + spin_unlock_bh(&ip_vs_sync_lock);
- + kthread_stop(sync_master_thread);
- + sync_master_thread = NULL;
- + } else if (state == IP_VS_STATE_BACKUP) {
- + if (!sync_backup_thread)
- + return -ESRCH;
- +
- + IP_VS_INFO("stopping backup sync thread %d ...\n",
- + task_pid_nr(sync_backup_thread));
- +
- + ip_vs_sync_state &= ~IP_VS_STATE_BACKUP;
- + kthread_stop(sync_backup_thread);
- + sync_backup_thread = NULL;
- } else {
- - strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn,
- - sizeof(ip_vs_backup_mcast_ifn));
- - ip_vs_backup_syncid = syncid;
- - }
- -
- - repeat:
- - if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) {
- - IP_VS_ERR("could not create fork_sync_thread due to %d... "
- - "retrying.\n", pid);
- - msleep_interruptible(1000);
- - goto repeat;
- + return -EINVAL;
- }
- - wait_for_completion(&startup);
- -
- - return 0;
- -}
- -
- -
- -int stop_sync_thread(int state)
- -{
- - DECLARE_WAITQUEUE(wait, current);
- -
- - if ((state == IP_VS_STATE_MASTER && !sync_master_pid) ||
- - (state == IP_VS_STATE_BACKUP && !sync_backup_pid))
- - return -ESRCH;
- -
- - IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid);
- - IP_VS_INFO("stopping sync thread %d ...\n",
- - (state == IP_VS_STATE_MASTER) ?
- - sync_master_pid : sync_backup_pid);
- -
- - __set_current_state(TASK_UNINTERRUPTIBLE);
- - add_wait_queue(&stop_sync_wait, &wait);
- - set_stop_sync(state, 1);
- - ip_vs_sync_state -= state;
- - wake_up(&sync_wait);
- - schedule();
- - __set_current_state(TASK_RUNNING);
- - remove_wait_queue(&stop_sync_wait, &wait);
- -
- - /* Note: no need to reap the sync thread, because its parent
- - process is the init process */
- -
- - if ((state == IP_VS_STATE_MASTER && stop_master_sync) ||
- - (state == IP_VS_STATE_BACKUP && stop_backup_sync))
- - IP_VS_BUG();
- + /* decrease the module use count */
- + ip_vs_use_count_dec();
- return 0;
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wlc.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wlc.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wlc.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wlc.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * IPVS: Weighted Least-Connection Scheduling module
- *
- - * Version: $Id: ip_vs_wlc.c,v 1.13 2003/04/18 09:03:16 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- * Peter Kese <peter.kese@ijs.si>
- *
- @@ -128,6 +126,7 @@
- .name = "wlc",
- .refcnt = ATOMIC_INIT(0),
- .module = THIS_MODULE,
- + .n_list = LIST_HEAD_INIT(ip_vs_wlc_scheduler.n_list),
- .init_service = ip_vs_wlc_init_svc,
- .done_service = ip_vs_wlc_done_svc,
- .update_service = ip_vs_wlc_update_svc,
- @@ -137,7 +136,6 @@
- static int __init ip_vs_wlc_init(void)
- {
- - INIT_LIST_HEAD(&ip_vs_wlc_scheduler.n_list);
- return register_ip_vs_scheduler(&ip_vs_wlc_scheduler);
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wrr.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wrr.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_wrr.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_wrr.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * IPVS: Weighted Round-Robin Scheduling module
- *
- - * Version: $Id: ip_vs_wrr.c,v 1.12 2002/09/15 08:14:08 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- *
- * This program is free software; you can redistribute it and/or
- @@ -22,6 +20,7 @@
- #include <linux/module.h>
- #include <linux/kernel.h>
- +#include <linux/net.h>
- #include <net/ip_vs.h>
- @@ -169,7 +168,7 @@
- */
- if (mark->cw == 0) {
- mark->cl = &svc->destinations;
- - IP_VS_INFO("ip_vs_wrr_schedule(): "
- + IP_VS_ERR_RL("ip_vs_wrr_schedule(): "
- "no available servers\n");
- dest = NULL;
- goto out;
- @@ -213,6 +212,7 @@
- .name = "wrr",
- .refcnt = ATOMIC_INIT(0),
- .module = THIS_MODULE,
- + .n_list = LIST_HEAD_INIT(ip_vs_wrr_scheduler.n_list),
- .init_service = ip_vs_wrr_init_svc,
- .done_service = ip_vs_wrr_done_svc,
- .update_service = ip_vs_wrr_update_svc,
- @@ -221,7 +221,6 @@
- static int __init ip_vs_wrr_init(void)
- {
- - INIT_LIST_HEAD(&ip_vs_wrr_scheduler.n_list);
- return register_ip_vs_scheduler(&ip_vs_wrr_scheduler) ;
- }
- diff -u linux-2.6.20.y/net/ipv4/ipvs/ip_vs_xmit.c linux-2.6.27.y/net/ipv4/ipvs/ip_vs_xmit.c
- --- linux-2.6.20.y/net/ipv4/ipvs/ip_vs_xmit.c 2009-02-16 11:57:22.000000000 -0400
- +++ linux-2.6.27.y/net/ipv4/ipvs/ip_vs_xmit.c 2009-02-16 12:56:22.000000000 -0400
- @@ -1,8 +1,6 @@
- /*
- * ip_vs_xmit.c: various packet transmitters for IPVS
- *
- - * Version: $Id: ip_vs_xmit.c,v 1.2 2002/11/30 01:50:35 wensong Exp $
- - *
- * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
- * Julian Anastasov <ja@ssi.bg>
- *
- @@ -16,8 +14,8 @@
- */
- #include <linux/kernel.h>
- -#include <linux/ip.h>
- #include <linux/tcp.h> /* for tcphdr */
- +#include <net/ip.h>
- #include <net/tcp.h> /* for csum_tcpudp_magic */
- #include <net/udp.h>
- #include <net/icmp.h> /* for icmp_send */
- @@ -59,7 +57,7 @@
- return dst;
- }
- -static inline struct rtable *
- +static struct rtable *
- __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos)
- {
- struct rtable *rt; /* Route to the other host */
- @@ -78,7 +76,7 @@
- .tos = rtos, } },
- };
- - if (ip_route_output_key(&rt, &fl)) {
- + if (ip_route_output_key(&init_net, &rt, &fl)) {
- spin_unlock(&dest->dst_lock);
- IP_VS_DBG_RL("ip_route_output error, "
- "dest: %u.%u.%u.%u\n",
- @@ -101,7 +99,7 @@
- .tos = rtos, } },
- };
- - if (ip_route_output_key(&rt, &fl)) {
- + if (ip_route_output_key(&init_net, &rt, &fl)) {
- IP_VS_DBG_RL("ip_route_output error, dest: "
- "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
- return NULL;
- @@ -128,8 +126,8 @@
- #define IP_VS_XMIT(skb, rt) \
- do { \
- (skb)->ipvs_property = 1; \
- - (skb)->ip_summed = CHECKSUM_NONE; \
- - NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \
- + skb_forward_csum(skb); \
- + NF_HOOK(PF_INET, NF_INET_LOCAL_OUT, (skb), NULL, \
- (rt)->u.dst.dev, dst_output); \
- } while (0)
- @@ -156,7 +154,7 @@
- struct ip_vs_protocol *pp)
- {
- struct rtable *rt; /* Route to the other host */
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- u8 tos = iph->tos;
- int mtu;
- struct flowi fl = {
- @@ -170,7 +168,7 @@
- EnterFunction(10);
- - if (ip_route_output_key(&rt, &fl)) {
- + if (ip_route_output_key(&init_net, &rt, &fl)) {
- IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
- "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
- goto tx_error_icmp;
- @@ -178,7 +176,7 @@
- /* MTU checking */
- mtu = dst_mtu(&rt->u.dst);
- - if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
- + if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
- ip_rt_put(rt);
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL("ip_vs_bypass_xmit(): frag needed\n");
- @@ -193,7 +191,7 @@
- ip_rt_put(rt);
- return NF_STOLEN;
- }
- - ip_send_check(skb->nh.iph);
- + ip_send_check(ip_hdr(skb));
- /* drop old route */
- dst_release(skb->dst);
- @@ -226,7 +224,7 @@
- {
- struct rtable *rt; /* Route to the other host */
- int mtu;
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- EnterFunction(10);
- @@ -245,7 +243,7 @@
- /* MTU checking */
- mtu = dst_mtu(&rt->u.dst);
- - if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
- + if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) {
- ip_rt_put(rt);
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for");
- @@ -253,7 +251,7 @@
- }
- /* copy-on-write the packet before mangling it */
- - if (!ip_vs_make_skb_writable(&skb, sizeof(struct iphdr)))
- + if (!skb_make_writable(skb, sizeof(struct iphdr)))
- goto tx_error_put;
- if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
- @@ -264,10 +262,10 @@
- skb->dst = &rt->u.dst;
- /* mangle the packet */
- - if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
- + if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
- goto tx_error;
- - skb->nh.iph->daddr = cp->daddr;
- - ip_send_check(skb->nh.iph);
- + ip_hdr(skb)->daddr = cp->daddr;
- + ip_send_check(ip_hdr(skb));
- IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT");
- @@ -320,19 +318,20 @@
- {
- struct rtable *rt; /* Route to the other host */
- struct net_device *tdev; /* Device to other host */
- - struct iphdr *old_iph = skb->nh.iph;
- + struct iphdr *old_iph = ip_hdr(skb);
- u8 tos = old_iph->tos;
- __be16 df = old_iph->frag_off;
- + sk_buff_data_t old_transport_header = skb->transport_header;
- struct iphdr *iph; /* Our new IP header */
- - int max_headroom; /* The extra header space needed */
- + unsigned int max_headroom; /* The extra header space needed */
- int mtu;
- EnterFunction(10);
- - if (skb->protocol != __constant_htons(ETH_P_IP)) {
- + if (skb->protocol != htons(ETH_P_IP)) {
- IP_VS_DBG_RL("ip_vs_tunnel_xmit(): protocol error, "
- "ETH_P_IP: %d, skb protocol: %d\n",
- - __constant_htons(ETH_P_IP), skb->protocol);
- + htons(ETH_P_IP), skb->protocol);
- goto tx_error;
- }
- @@ -350,9 +349,9 @@
- if (skb->dst)
- skb->dst->ops->update_pmtu(skb->dst, mtu);
- - df |= (old_iph->frag_off&__constant_htons(IP_DF));
- + df |= (old_iph->frag_off & htons(IP_DF));
- - if ((old_iph->frag_off&__constant_htons(IP_DF))
- + if ((old_iph->frag_off & htons(IP_DF))
- && mtu < ntohs(old_iph->tot_len)) {
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- ip_rt_put(rt);
- @@ -377,15 +376,16 @@
- }
- kfree_skb(skb);
- skb = new_skb;
- - old_iph = skb->nh.iph;
- + old_iph = ip_hdr(skb);
- }
- - skb->h.raw = (void *) old_iph;
- + skb->transport_header = old_transport_header;
- /* fix old IP header checksum */
- ip_send_check(old_iph);
- - skb->nh.raw = skb_push(skb, sizeof(struct iphdr));
- + skb_push(skb, sizeof(struct iphdr));
- + skb_reset_network_header(skb);
- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
- /* drop old route */
- @@ -395,7 +395,7 @@
- /*
- * Push down and install the IPIP header.
- */
- - iph = skb->nh.iph;
- + iph = ip_hdr(skb);
- iph->version = 4;
- iph->ihl = sizeof(struct iphdr)>>2;
- iph->frag_off = df;
- @@ -404,14 +404,12 @@
- iph->daddr = rt->rt_dst;
- iph->saddr = rt->rt_src;
- iph->ttl = old_iph->ttl;
- - iph->tot_len = htons(skb->len);
- ip_select_ident(iph, &rt->u.dst, NULL);
- - ip_send_check(iph);
- /* Another hack: avoid icmp_send in ip_fragment */
- skb->local_df = 1;
- - IP_VS_XMIT(skb, rt);
- + ip_local_out(skb);
- LeaveFunction(10);
- @@ -435,7 +433,7 @@
- struct ip_vs_protocol *pp)
- {
- struct rtable *rt; /* Route to the other host */
- - struct iphdr *iph = skb->nh.iph;
- + struct iphdr *iph = ip_hdr(skb);
- int mtu;
- EnterFunction(10);
- @@ -445,7 +443,7 @@
- /* MTU checking */
- mtu = dst_mtu(&rt->u.dst);
- - if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
- + if ((iph->frag_off & htons(IP_DF)) && skb->len > mtu) {
- icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
- ip_rt_put(rt);
- IP_VS_DBG_RL("ip_vs_dr_xmit(): frag needed\n");
- @@ -460,7 +458,7 @@
- ip_rt_put(rt);
- return NF_STOLEN;
- }
- - ip_send_check(skb->nh.iph);
- + ip_send_check(ip_hdr(skb));
- /* drop old route */
- dst_release(skb->dst);
- @@ -514,12 +512,12 @@
- * mangle and send the packet here (only for VS/NAT)
- */
- - if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(skb->nh.iph->tos))))
- + if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos))))
- goto tx_error_icmp;
- /* MTU checking */
- mtu = dst_mtu(&rt->u.dst);
- - if ((skb->len > mtu) && (skb->nh.iph->frag_off&__constant_htons(IP_DF))) {
- + if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) {
- ip_rt_put(rt);
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
- IP_VS_DBG_RL("ip_vs_in_icmp(): frag needed\n");
- @@ -527,7 +525,7 @@
- }
- /* copy-on-write the packet before mangling it */
- - if (!ip_vs_make_skb_writable(&skb, offset))
- + if (!skb_make_writable(skb, offset))
- goto tx_error_put;
- if (skb_cow(skb, rt->u.dst.dev->hard_header_len))
Add Comment
Please, Sign In to add comment