首页 > 代码库 > dctcp-2.6.26-rev1.1.0.patch

dctcp-2.6.26-rev1.1.0.patch

dctcp-2.6.26-rev1.1.0.patch

  1 diff -Naur linux-2.6.26/include/linux/sysctl.h linux-2.6.26-dctcp-rev1.1.0/include/linux/sysctl.h  2 --- linux-2.6.26/include/linux/sysctl.h    2008-07-13 14:51:29.000000000 -0700  3 +++ linux-2.6.26-dctcp-rev1.1.0/include/linux/sysctl.h    2011-10-07 14:41:50.000000000 -0700  4 @@ -435,6 +435,9 @@  5      NET_TCP_ALLOWED_CONG_CONTROL=123,  6      NET_TCP_MAX_SSTHRESH=124,  7      NET_TCP_FRTO_RESPONSE=125,  8 +    NET_TCP_DELAYED_ACK=126,  9 +    NET_TCP_DCTCP_ENABLE=127, 10 +    NET_TCP_DCTCP_SHIFT_G=128, 11  }; 12   13  enum { 14 diff -Naur linux-2.6.26/include/linux/tcp.h linux-2.6.26-dctcp-rev1.1.0/include/linux/tcp.h 15 --- linux-2.6.26/include/linux/tcp.h    2008-07-13 14:51:29.000000000 -0700 16 +++ linux-2.6.26-dctcp-rev1.1.0/include/linux/tcp.h    2011-10-07 14:53:45.000000000 -0700 17 @@ -405,6 +405,15 @@ 18  /* TCP MD5 Signagure Option information */ 19      struct tcp_md5sig_info    *md5sig_info; 20  #endif 21 + 22 +/* DCTCP Specific Parameters */ 23 +     u32    acked_bytes_ecn; 24 +     u32    acked_bytes_total; 25 +     u32    prior_rcv_nxt; 26 +     u32    dctcp_alpha; 27 +     u32    next_seq; 28 +     u32    ce_state;    /* 0: last pkt was non-ce , 1: last pkt was ce */ 29 +     u32    delayed_ack_reserved; 30  }; 31   32  static inline struct tcp_sock *tcp_sk(const struct sock *sk) 33 diff -Naur linux-2.6.26/include/net/tcp.h linux-2.6.26-dctcp-rev1.1.0/include/net/tcp.h 34 --- linux-2.6.26/include/net/tcp.h    2008-07-13 14:51:29.000000000 -0700 35 +++ linux-2.6.26-dctcp-rev1.1.0/include/net/tcp.h    2011-10-07 14:41:50.000000000 -0700 36 @@ -214,6 +214,9 @@ 37  extern int sysctl_tcp_fack; 38  extern int sysctl_tcp_reordering; 39  extern int sysctl_tcp_ecn; 40 +extern int sysctl_tcp_delayed_ack; 41 +extern int sysctl_tcp_dctcp_enable; 42 +extern int sysctl_tcp_dctcp_shift_g; 43  extern int sysctl_tcp_dsack; 44  extern int sysctl_tcp_mem[3]; 45  extern int sysctl_tcp_wmem[3]; 46 diff -Naur linux-2.6.26/kernel/sysctl_check.c linux-2.6.26-dctcp-rev1.1.0/kernel/sysctl_check.c 47 --- linux-2.6.26/kernel/sysctl_check.c    2008-07-13 14:51:29.000000000 -0700 48 +++ linux-2.6.26-dctcp-rev1.1.0/kernel/sysctl_check.c    2011-10-07 14:41:50.000000000 -0700 49 @@ -353,6 +353,9 @@ 50      { NET_TCP_FACK,                "tcp_fack" }, 51      { NET_TCP_REORDERING,            "tcp_reordering" }, 52      { NET_TCP_ECN,                "tcp_ecn" }, 53 +    { NET_TCP_DELAYED_ACK,                  "tcp_delayed_ack" }, 54 +    { NET_TCP_DCTCP_ENABLE,                 "tcp_dctcp_enable" }, 55 +        { NET_TCP_DCTCP_SHIFT_G,                "tcp_dctcp_shift_g" }, 56      { NET_TCP_DSACK,            "tcp_dsack" }, 57      { NET_TCP_MEM,                "tcp_mem" }, 58      { NET_TCP_WMEM,                "tcp_wmem" }, 59 diff -Naur linux-2.6.26/net/ipv4/sysctl_net_ipv4.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/sysctl_net_ipv4.c 60 --- linux-2.6.26/net/ipv4/sysctl_net_ipv4.c    2008-07-13 14:51:29.000000000 -0700 61 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/sysctl_net_ipv4.c    2011-10-07 14:41:50.000000000 -0700 62 @@ -506,6 +506,30 @@ 63          .proc_handler    = &proc_dointvec 64      }, 65      { 66 +        .ctl_name    = NET_TCP_DELAYED_ACK, 67 +        .procname    = "tcp_delayed_ack", 68 +        .data        = http://www.mamicode.com/&sysctl_tcp_delayed_ack, 69 +        .maxlen        = sizeof(int), 70 +        .mode        = 0644, 71 +        .proc_handler    = &proc_dointvec 72 +    }, 73 +    { 74 +        .ctl_name    = NET_TCP_DCTCP_ENABLE, 75 +        .procname    = "tcp_dctcp_enable", 76 +        .data        = http://www.mamicode.com/&sysctl_tcp_dctcp_enable, 77 +        .maxlen        = sizeof(int), 78 +        .mode        = 0644, 79 +        .proc_handler    = &proc_dointvec 80 +    }, 81 +    { 82 +        .ctl_name    = NET_TCP_DCTCP_SHIFT_G, 83 +        .procname    = "tcp_dctcp_shift_g", 84 +        .data        = http://www.mamicode.com/&sysctl_tcp_dctcp_shift_g, 85 +        .maxlen        = sizeof(int), 86 +        .mode        = 0644, 87 +        .proc_handler    = &proc_dointvec 88 +    }, 89 +    { 90          .ctl_name    = NET_TCP_DSACK, 91          .procname    = "tcp_dsack", 92          .data        = http://www.mamicode.com/&sysctl_tcp_dsack, 93 diff -Naur linux-2.6.26/net/ipv4/tcp_input.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_input.c 94 --- linux-2.6.26/net/ipv4/tcp_input.c    2008-07-13 14:51:29.000000000 -0700 95 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_input.c    2011-10-07 14:53:21.000000000 -0700 96 @@ -79,6 +79,9 @@ 97  int sysctl_tcp_fack __read_mostly = 1; 98  int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; 99  int sysctl_tcp_ecn __read_mostly;100 +int sysctl_tcp_delayed_ack __read_mostly = 1;101 +int sysctl_tcp_dctcp_enable __read_mostly;102 +int sysctl_tcp_dctcp_shift_g  __read_mostly = 5; /* g=1/2^5 */103  int sysctl_tcp_dsack __read_mostly = 1;104  int sysctl_tcp_app_win __read_mostly = 31;105  int sysctl_tcp_adv_win_scale __read_mostly = 2;106 @@ -212,16 +215,68 @@107      tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;108  }109  110 -static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)111 +static inline void TCP_ECN_dctcp_check_ce(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)112  {113      if (tp->ecn_flags & TCP_ECN_OK) {114 -        if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))115 -            tp->ecn_flags |= TCP_ECN_DEMAND_CWR;116 -        /* Funny extension: if ECT is not set on a segment,117 -         * it is surely retransmit. It is not in ECN RFC,118 -         * but Linux follows this rule. */119 -        else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))120 -            tcp_enter_quickack_mode((struct sock *)tp);121 +      u32 temp_rcv_nxt;122 +123 +      if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) {124 +125 +        /* rcv_nxt is already update in previous process (tcp_rcv_established) */126 +127 +        if(sysctl_tcp_dctcp_enable) {128 +129 +          /* state has changed from CE=0 to CE=1 && delayed ack has not sent yet */130 +          if(tp->ce_state == 0 && tp->delayed_ack_reserved) {131 +132 +        /* save current rcv_nxt */133 +        temp_rcv_nxt = tp->rcv_nxt;134 +        /* generate previous ack with CE=0 */135 +        tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;136 +        tp->rcv_nxt = tp->prior_rcv_nxt;137 +        tcp_send_ack(sk);138 +        /* recover current rcv_nxt */139 +        tp->rcv_nxt = temp_rcv_nxt;140 +          }141 +          142 +          tp->ce_state = 1;143 +        }144 +145 +        tp->ecn_flags |= TCP_ECN_DEMAND_CWR;146 +147 +148 +        /* Funny extension: if ECT is not set on a segment,149 +         * it is surely retransmit. It is not in ECN RFC,150 +         * but Linux follows this rule. */151 +      } else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) {152 +        tcp_enter_quickack_mode((struct sock *)tp);153 +      }else {154 +        /* It has ECT but it doesn‘t have CE */155 +        156 +        if(sysctl_tcp_dctcp_enable) {157 +          158 +          if(tp->ce_state != 0 && tp->delayed_ack_reserved) {159 +        160 +        /* save current rcv_nxt */161 +        temp_rcv_nxt = tp->rcv_nxt;162 +        /* generate previous ack with CE=1 */163 +        tp->ecn_flags |= TCP_ECN_DEMAND_CWR;164 +        tp->rcv_nxt = tp->prior_rcv_nxt;165 +        tcp_send_ack(sk);166 +        /* recover current rcv_nxt */167 +        tp->rcv_nxt = temp_rcv_nxt;168 +          }169 +170 +          tp->ce_state = 0;171 +172 +          /* deassert only when DCTCP is enabled */173 +          tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;174 +        }175 +176 +      }177 +        178 +      /* set current rcv_nxt to prior_rcv_nxt */179 +      tp->prior_rcv_nxt = tp->rcv_nxt;180      }181  }182  183 @@ -572,6 +627,8 @@184           */185          tcp_incr_quickack(sk);186          icsk->icsk_ack.ato = TCP_ATO_MIN;187 +188 +        tp->ce_state = 0;189      } else {190          int m = now - icsk->icsk_ack.lrcvtime;191  192 @@ -592,7 +649,7 @@193      }194      icsk->icsk_ack.lrcvtime = now;195  196 -    TCP_ECN_check_ce(tp, skb);197 +    TCP_ECN_dctcp_check_ce(sk, tp, skb);198  199      if (skb->len >= 128)200          tcp_grow_window(sk, skb);201 @@ -836,19 +893,54 @@202      struct tcp_sock *tp = tcp_sk(sk);203      const struct inet_connection_sock *icsk = inet_csk(sk);204  205 +    __u32 ssthresh_old; 206 +    __u32 cwnd_old;207 +    __u32 cwnd_new;208 +209      tp->prior_ssthresh = 0;210      tp->bytes_acked = 0;211      if (icsk->icsk_ca_state < TCP_CA_CWR) {212          tp->undo_marker = 0;213 -        if (set_ssthresh)214 -            tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);215 -        tp->snd_cwnd = min(tp->snd_cwnd,216 -                   tcp_packets_in_flight(tp) + 1U);217 +218 +        if(!sysctl_tcp_dctcp_enable) {219 +220 +          if (set_ssthresh)221 +            tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);222 +223 +          tp->snd_cwnd = min(tp->snd_cwnd,224 +                     tcp_packets_in_flight(tp) + 1U);225 +          226 +        }else {227 +228 +          cwnd_new = max (tp->snd_cwnd - ((tp->snd_cwnd * tp->dctcp_alpha)>>11) , 2U);229 +230 +          if(set_ssthresh) {231 +            232 +            ssthresh_old = tp->snd_ssthresh;233 +            tp->snd_ssthresh =  cwnd_new;234 +            235 +            /* printk("%llu alpha= %d ssth old= %d new= %d\n", */236 +            /*                ktime_to_us(ktime_get_real()), */237 +            /*                tp->dctcp_alpha, */238 +            /*                ssthresh_old, */239 +            /*                tp->snd_ssthresh); */240 +          }241 +          242 +          cwnd_old = tp->snd_cwnd;243 +          tp->snd_cwnd = cwnd_new;244 +          245 +          /* printk("%llu alpha= %d cwnd old= %d new= %d\n", */246 +          /*              ktime_to_us(ktime_get_real()), */247 +          /*              tp->dctcp_alpha, */248 +          /*              cwnd_old, */249 +          /*              tp->snd_cwnd); */250 +        }251 +        252          tp->snd_cwnd_cnt = 0;253          tp->high_seq = tp->snd_nxt;254          tp->snd_cwnd_stamp = tcp_time_stamp;255          TCP_ECN_queue_cwr(tp);256 -257 +        258          tcp_set_ca_state(sk, TCP_CA_CWR);259      }260  }261 @@ -2513,7 +2605,8 @@262          tcp_try_keep_open(sk);263          tcp_moderate_cwnd(tp);264      } else {265 -        tcp_cwnd_down(sk, flag);266 +      if(!sysctl_tcp_dctcp_enable)267 +        tcp_cwnd_down(sk, flag);268      }269  }270  271 @@ -3216,6 +3309,9 @@272      int prior_packets;273      int frto_cwnd = 0;274  275 +    __u32 alpha_old;276 +    __u32 acked_bytes;277 +278      /* If the ack is newer than sent or older than previous acks279       * then we can probably ignore it.280       */281 @@ -3269,6 +3365,45 @@282          tcp_ca_event(sk, CA_EVENT_SLOW_ACK);283      }284  285 +286 +    /* START: DCTCP Processing */287 +288 +    /* calc acked bytes */289 +    if(after(ack,prior_snd_una)) {290 +      acked_bytes = ack - prior_snd_una;291 +    } else {292 +      acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;293 +    }294 +    295 +    if(flag & FLAG_ECE) 296 +      tp->acked_bytes_ecn += acked_bytes;297 +298 +    tp->acked_bytes_total += acked_bytes;299 +300 +    /* Expired RTT */301 +        if (!before(tp->snd_una,tp->next_seq)) {302 +303 +      /* For avoiding denominator == 1 */304 +      if(tp->acked_bytes_total == 0) tp->acked_bytes_total = 1;305 +306 +          alpha_old = tp->dctcp_alpha; 307 +308 +      /* alpha = (1-g) * alpha + g * F */309 +      tp->dctcp_alpha = alpha_old - (alpha_old >> sysctl_tcp_dctcp_shift_g)310 +        + (tp->acked_bytes_ecn << (10 - sysctl_tcp_dctcp_shift_g)) / tp->acked_bytes_total;  311 +      312 +      if(tp->dctcp_alpha > 1024) tp->dctcp_alpha = 1024; /* round to 0-1024 */313 +314 +          /* printk("bytes_ecn= %d total= %d alpha: old= %d new= %d\n", */315 +      /*          tp->acked_bytes_ecn, tp->acked_bytes_total, alpha_old, tp->dctcp_alpha); */316 +      317 +      tp->acked_bytes_ecn = 0;318 +      tp->acked_bytes_total = 0;319 +      tp->next_seq = tp->snd_nxt;320 +        }321 +322 +    /* END: DCTCP Processing */323 +324      /* We passed data and got it acked, remove any soft error325       * log. Something worked...326       */327 @@ -4014,7 +4149,7 @@328          goto queue_and_out;329      }330  331 -    TCP_ECN_check_ce(tp, skb);332 +    TCP_ECN_dctcp_check_ce(sk, tp, skb);333  334      if (tcp_try_rmem_schedule(sk, skb->truesize))335          goto drop;336 @@ -4421,6 +4556,8 @@337           && __tcp_select_window(sk) >= tp->rcv_wnd) ||338          /* We ACK each frame or... */339          tcp_in_quickack_mode(sk) ||340 +        /* Delayed ACK is disabled or ... */341 +        sysctl_tcp_delayed_ack == 0 ||342          /* We have out of order data. */343          (ofo_possible && skb_peek(&tp->out_of_order_queue))) {344          /* Then ack it now */345 @@ -5419,6 +5556,9 @@346  }347  348  EXPORT_SYMBOL(sysctl_tcp_ecn);349 +EXPORT_SYMBOL(sysctl_tcp_delayed_ack);350 +EXPORT_SYMBOL(sysctl_tcp_dctcp_enable);351 +EXPORT_SYMBOL(sysctl_tcp_dctcp_shift_g);352  EXPORT_SYMBOL(sysctl_tcp_reordering);353  EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);354  EXPORT_SYMBOL(tcp_parse_options);355 diff -Naur linux-2.6.26/net/ipv4/tcp_minisocks.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_minisocks.c356 --- linux-2.6.26/net/ipv4/tcp_minisocks.c    2008-07-13 14:51:29.000000000 -0700357 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_minisocks.c    2011-10-07 15:03:45.000000000 -0700358 @@ -398,6 +398,11 @@359          newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;360          newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;361  362 +        /* Initialize DCTCP internal parameters */363 +        newtp->next_seq = newtp->snd_nxt; 364 +        newtp->acked_bytes_ecn = 0;365 +        newtp->acked_bytes_total = 0;366 +367          tcp_prequeue_init(newtp);368  369          tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);370 diff -Naur linux-2.6.26/net/ipv4/tcp_output.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_output.c371 --- linux-2.6.26/net/ipv4/tcp_output.c    2008-07-13 14:51:29.000000000 -0700372 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_output.c    2011-10-07 14:41:50.000000000 -0700373 @@ -290,7 +290,7 @@374      struct tcp_sock *tp = tcp_sk(sk);375  376      tp->ecn_flags = 0;377 -    if (sysctl_tcp_ecn) {378 +    if (sysctl_tcp_ecn || sysctl_tcp_dctcp_enable) {379          TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;380          tp->ecn_flags = TCP_ECN_OK;381      }382 @@ -600,6 +600,10 @@383          TCP_ECN_send(sk, skb, tcp_header_size);384      }385  386 +    /* In DCTCP, Assert ECT bit to all packets*/387 +    if(sysctl_tcp_dctcp_enable)388 +      INET_ECN_xmit(sk);389 +390  #ifdef CONFIG_TCP_MD5SIG391      /* Calculate the MD5 hash, as we have all we need now */392      if (md5) {393 @@ -2352,6 +2356,11 @@394      tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);395      TCP_ECN_send_syn(sk, buff);396  397 +    /* Initialize DCTCP internal parameters */398 +    tp->next_seq = tp->snd_nxt; 399 +    tp->acked_bytes_ecn = 0;400 +    tp->acked_bytes_total = 0;401 +402      /* Send it off. */403      TCP_SKB_CB(buff)->when = tcp_time_stamp;404      tp->retrans_stamp = TCP_SKB_CB(buff)->when;405 @@ -2385,6 +2394,10 @@406      int ato = icsk->icsk_ack.ato;407      unsigned long timeout;408  409 +    /* Delayed ACK reserved flag for DCTCP */410 +    struct tcp_sock *tp = tcp_sk(sk);411 +    tp->delayed_ack_reserved = 1;412 +413      if (ato > TCP_DELACK_MIN) {414          const struct tcp_sock *tp = tcp_sk(sk);415          int max_ato = HZ / 2;416 @@ -2436,6 +2449,10 @@417  {418      struct sk_buff *buff;419  420 +    /* Delayed ACK reserved flag for DCTCP */421 +    struct tcp_sock *tp = tcp_sk(sk);422 +    tp->delayed_ack_reserved = 0;423 +424      /* If we have been reset, we may not send again. */425      if (sk->sk_state == TCP_CLOSE)426          return;

https://github.com/myasuda/DCTCP-Linux/blob/master/dctcp-2.6.26-rev1.1.0.patch

dctcp-2.6.26-rev1.1.0.patch