首页 > 代码库 > dctcp-2.6.26-rev1.1.0.patch
dctcp-2.6.26-rev1.1.0.patch
dctcp-2.6.26-rev1.1.0.patch
1 diff -Naur linux-2.6.26/include/linux/sysctl.h linux-2.6.26-dctcp-rev1.1.0/include/linux/sysctl.h 2 --- linux-2.6.26/include/linux/sysctl.h 2008-07-13 14:51:29.000000000 -0700 3 +++ linux-2.6.26-dctcp-rev1.1.0/include/linux/sysctl.h 2011-10-07 14:41:50.000000000 -0700 4 @@ -435,6 +435,9 @@ 5 NET_TCP_ALLOWED_CONG_CONTROL=123, 6 NET_TCP_MAX_SSTHRESH=124, 7 NET_TCP_FRTO_RESPONSE=125, 8 + NET_TCP_DELAYED_ACK=126, 9 + NET_TCP_DCTCP_ENABLE=127, 10 + NET_TCP_DCTCP_SHIFT_G=128, 11 }; 12 13 enum { 14 diff -Naur linux-2.6.26/include/linux/tcp.h linux-2.6.26-dctcp-rev1.1.0/include/linux/tcp.h 15 --- linux-2.6.26/include/linux/tcp.h 2008-07-13 14:51:29.000000000 -0700 16 +++ linux-2.6.26-dctcp-rev1.1.0/include/linux/tcp.h 2011-10-07 14:53:45.000000000 -0700 17 @@ -405,6 +405,15 @@ 18 /* TCP MD5 Signagure Option information */ 19 struct tcp_md5sig_info *md5sig_info; 20 #endif 21 + 22 +/* DCTCP Specific Parameters */ 23 + u32 acked_bytes_ecn; 24 + u32 acked_bytes_total; 25 + u32 prior_rcv_nxt; 26 + u32 dctcp_alpha; 27 + u32 next_seq; 28 + u32 ce_state; /* 0: last pkt was non-ce , 1: last pkt was ce */ 29 + u32 delayed_ack_reserved; 30 }; 31 32 static inline struct tcp_sock *tcp_sk(const struct sock *sk) 33 diff -Naur linux-2.6.26/include/net/tcp.h linux-2.6.26-dctcp-rev1.1.0/include/net/tcp.h 34 --- linux-2.6.26/include/net/tcp.h 2008-07-13 14:51:29.000000000 -0700 35 +++ linux-2.6.26-dctcp-rev1.1.0/include/net/tcp.h 2011-10-07 14:41:50.000000000 -0700 36 @@ -214,6 +214,9 @@ 37 extern int sysctl_tcp_fack; 38 extern int sysctl_tcp_reordering; 39 extern int sysctl_tcp_ecn; 40 +extern int sysctl_tcp_delayed_ack; 41 +extern int sysctl_tcp_dctcp_enable; 42 +extern int sysctl_tcp_dctcp_shift_g; 43 extern int sysctl_tcp_dsack; 44 extern int sysctl_tcp_mem[3]; 45 extern int sysctl_tcp_wmem[3]; 46 diff -Naur linux-2.6.26/kernel/sysctl_check.c linux-2.6.26-dctcp-rev1.1.0/kernel/sysctl_check.c 47 --- linux-2.6.26/kernel/sysctl_check.c 2008-07-13 14:51:29.000000000 -0700 48 +++ linux-2.6.26-dctcp-rev1.1.0/kernel/sysctl_check.c 2011-10-07 14:41:50.000000000 -0700 49 @@ -353,6 +353,9 @@ 50 { NET_TCP_FACK, "tcp_fack" }, 51 { NET_TCP_REORDERING, "tcp_reordering" }, 52 { NET_TCP_ECN, "tcp_ecn" }, 53 + { NET_TCP_DELAYED_ACK, "tcp_delayed_ack" }, 54 + { NET_TCP_DCTCP_ENABLE, "tcp_dctcp_enable" }, 55 + { NET_TCP_DCTCP_SHIFT_G, "tcp_dctcp_shift_g" }, 56 { NET_TCP_DSACK, "tcp_dsack" }, 57 { NET_TCP_MEM, "tcp_mem" }, 58 { NET_TCP_WMEM, "tcp_wmem" }, 59 diff -Naur linux-2.6.26/net/ipv4/sysctl_net_ipv4.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/sysctl_net_ipv4.c 60 --- linux-2.6.26/net/ipv4/sysctl_net_ipv4.c 2008-07-13 14:51:29.000000000 -0700 61 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/sysctl_net_ipv4.c 2011-10-07 14:41:50.000000000 -0700 62 @@ -506,6 +506,30 @@ 63 .proc_handler = &proc_dointvec 64 }, 65 { 66 + .ctl_name = NET_TCP_DELAYED_ACK, 67 + .procname = "tcp_delayed_ack", 68 + .data = http://www.mamicode.com/&sysctl_tcp_delayed_ack, 69 + .maxlen = sizeof(int), 70 + .mode = 0644, 71 + .proc_handler = &proc_dointvec 72 + }, 73 + { 74 + .ctl_name = NET_TCP_DCTCP_ENABLE, 75 + .procname = "tcp_dctcp_enable", 76 + .data = http://www.mamicode.com/&sysctl_tcp_dctcp_enable, 77 + .maxlen = sizeof(int), 78 + .mode = 0644, 79 + .proc_handler = &proc_dointvec 80 + }, 81 + { 82 + .ctl_name = NET_TCP_DCTCP_SHIFT_G, 83 + .procname = "tcp_dctcp_shift_g", 84 + .data = http://www.mamicode.com/&sysctl_tcp_dctcp_shift_g, 85 + .maxlen = sizeof(int), 86 + .mode = 0644, 87 + .proc_handler = &proc_dointvec 88 + }, 89 + { 90 .ctl_name = NET_TCP_DSACK, 91 .procname = "tcp_dsack", 92 .data = http://www.mamicode.com/&sysctl_tcp_dsack, 93 diff -Naur linux-2.6.26/net/ipv4/tcp_input.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_input.c 94 --- linux-2.6.26/net/ipv4/tcp_input.c 2008-07-13 14:51:29.000000000 -0700 95 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_input.c 2011-10-07 14:53:21.000000000 -0700 96 @@ -79,6 +79,9 @@ 97 int sysctl_tcp_fack __read_mostly = 1; 98 int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; 99 int sysctl_tcp_ecn __read_mostly;100 +int sysctl_tcp_delayed_ack __read_mostly = 1;101 +int sysctl_tcp_dctcp_enable __read_mostly;102 +int sysctl_tcp_dctcp_shift_g __read_mostly = 5; /* g=1/2^5 */103 int sysctl_tcp_dsack __read_mostly = 1;104 int sysctl_tcp_app_win __read_mostly = 31;105 int sysctl_tcp_adv_win_scale __read_mostly = 2;106 @@ -212,16 +215,68 @@107 tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;108 }109 110 -static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)111 +static inline void TCP_ECN_dctcp_check_ce(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb)112 {113 if (tp->ecn_flags & TCP_ECN_OK) {114 - if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))115 - tp->ecn_flags |= TCP_ECN_DEMAND_CWR;116 - /* Funny extension: if ECT is not set on a segment,117 - * it is surely retransmit. It is not in ECN RFC,118 - * but Linux follows this rule. */119 - else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags)))120 - tcp_enter_quickack_mode((struct sock *)tp);121 + u32 temp_rcv_nxt;122 +123 + if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags)) {124 +125 + /* rcv_nxt is already update in previous process (tcp_rcv_established) */126 +127 + if(sysctl_tcp_dctcp_enable) {128 +129 + /* state has changed from CE=0 to CE=1 && delayed ack has not sent yet */130 + if(tp->ce_state == 0 && tp->delayed_ack_reserved) {131 +132 + /* save current rcv_nxt */133 + temp_rcv_nxt = tp->rcv_nxt;134 + /* generate previous ack with CE=0 */135 + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;136 + tp->rcv_nxt = tp->prior_rcv_nxt;137 + tcp_send_ack(sk);138 + /* recover current rcv_nxt */139 + tp->rcv_nxt = temp_rcv_nxt;140 + }141 + 142 + tp->ce_state = 1;143 + }144 +145 + tp->ecn_flags |= TCP_ECN_DEMAND_CWR;146 +147 +148 + /* Funny extension: if ECT is not set on a segment,149 + * it is surely retransmit. It is not in ECN RFC,150 + * but Linux follows this rule. */151 + } else if (INET_ECN_is_not_ect((TCP_SKB_CB(skb)->flags))) {152 + tcp_enter_quickack_mode((struct sock *)tp);153 + }else {154 + /* It has ECT but it doesn‘t have CE */155 + 156 + if(sysctl_tcp_dctcp_enable) {157 + 158 + if(tp->ce_state != 0 && tp->delayed_ack_reserved) {159 + 160 + /* save current rcv_nxt */161 + temp_rcv_nxt = tp->rcv_nxt;162 + /* generate previous ack with CE=1 */163 + tp->ecn_flags |= TCP_ECN_DEMAND_CWR;164 + tp->rcv_nxt = tp->prior_rcv_nxt;165 + tcp_send_ack(sk);166 + /* recover current rcv_nxt */167 + tp->rcv_nxt = temp_rcv_nxt;168 + }169 +170 + tp->ce_state = 0;171 +172 + /* deassert only when DCTCP is enabled */173 + tp->ecn_flags &= ~TCP_ECN_DEMAND_CWR;174 + }175 +176 + }177 + 178 + /* set current rcv_nxt to prior_rcv_nxt */179 + tp->prior_rcv_nxt = tp->rcv_nxt;180 }181 }182 183 @@ -572,6 +627,8 @@184 */185 tcp_incr_quickack(sk);186 icsk->icsk_ack.ato = TCP_ATO_MIN;187 +188 + tp->ce_state = 0;189 } else {190 int m = now - icsk->icsk_ack.lrcvtime;191 192 @@ -592,7 +649,7 @@193 }194 icsk->icsk_ack.lrcvtime = now;195 196 - TCP_ECN_check_ce(tp, skb);197 + TCP_ECN_dctcp_check_ce(sk, tp, skb);198 199 if (skb->len >= 128)200 tcp_grow_window(sk, skb);201 @@ -836,19 +893,54 @@202 struct tcp_sock *tp = tcp_sk(sk);203 const struct inet_connection_sock *icsk = inet_csk(sk);204 205 + __u32 ssthresh_old; 206 + __u32 cwnd_old;207 + __u32 cwnd_new;208 +209 tp->prior_ssthresh = 0;210 tp->bytes_acked = 0;211 if (icsk->icsk_ca_state < TCP_CA_CWR) {212 tp->undo_marker = 0;213 - if (set_ssthresh)214 - tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);215 - tp->snd_cwnd = min(tp->snd_cwnd,216 - tcp_packets_in_flight(tp) + 1U);217 +218 + if(!sysctl_tcp_dctcp_enable) {219 +220 + if (set_ssthresh)221 + tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);222 +223 + tp->snd_cwnd = min(tp->snd_cwnd,224 + tcp_packets_in_flight(tp) + 1U);225 + 226 + }else {227 +228 + cwnd_new = max (tp->snd_cwnd - ((tp->snd_cwnd * tp->dctcp_alpha)>>11) , 2U);229 +230 + if(set_ssthresh) {231 + 232 + ssthresh_old = tp->snd_ssthresh;233 + tp->snd_ssthresh = cwnd_new;234 + 235 + /* printk("%llu alpha= %d ssth old= %d new= %d\n", */236 + /* ktime_to_us(ktime_get_real()), */237 + /* tp->dctcp_alpha, */238 + /* ssthresh_old, */239 + /* tp->snd_ssthresh); */240 + }241 + 242 + cwnd_old = tp->snd_cwnd;243 + tp->snd_cwnd = cwnd_new;244 + 245 + /* printk("%llu alpha= %d cwnd old= %d new= %d\n", */246 + /* ktime_to_us(ktime_get_real()), */247 + /* tp->dctcp_alpha, */248 + /* cwnd_old, */249 + /* tp->snd_cwnd); */250 + }251 + 252 tp->snd_cwnd_cnt = 0;253 tp->high_seq = tp->snd_nxt;254 tp->snd_cwnd_stamp = tcp_time_stamp;255 TCP_ECN_queue_cwr(tp);256 -257 + 258 tcp_set_ca_state(sk, TCP_CA_CWR);259 }260 }261 @@ -2513,7 +2605,8 @@262 tcp_try_keep_open(sk);263 tcp_moderate_cwnd(tp);264 } else {265 - tcp_cwnd_down(sk, flag);266 + if(!sysctl_tcp_dctcp_enable)267 + tcp_cwnd_down(sk, flag);268 }269 }270 271 @@ -3216,6 +3309,9 @@272 int prior_packets;273 int frto_cwnd = 0;274 275 + __u32 alpha_old;276 + __u32 acked_bytes;277 +278 /* If the ack is newer than sent or older than previous acks279 * then we can probably ignore it.280 */281 @@ -3269,6 +3365,45 @@282 tcp_ca_event(sk, CA_EVENT_SLOW_ACK);283 }284 285 +286 + /* START: DCTCP Processing */287 +288 + /* calc acked bytes */289 + if(after(ack,prior_snd_una)) {290 + acked_bytes = ack - prior_snd_una;291 + } else {292 + acked_bytes = inet_csk(sk)->icsk_ack.rcv_mss;293 + }294 + 295 + if(flag & FLAG_ECE) 296 + tp->acked_bytes_ecn += acked_bytes;297 +298 + tp->acked_bytes_total += acked_bytes;299 +300 + /* Expired RTT */301 + if (!before(tp->snd_una,tp->next_seq)) {302 +303 + /* For avoiding denominator == 1 */304 + if(tp->acked_bytes_total == 0) tp->acked_bytes_total = 1;305 +306 + alpha_old = tp->dctcp_alpha; 307 +308 + /* alpha = (1-g) * alpha + g * F */309 + tp->dctcp_alpha = alpha_old - (alpha_old >> sysctl_tcp_dctcp_shift_g)310 + + (tp->acked_bytes_ecn << (10 - sysctl_tcp_dctcp_shift_g)) / tp->acked_bytes_total; 311 + 312 + if(tp->dctcp_alpha > 1024) tp->dctcp_alpha = 1024; /* round to 0-1024 */313 +314 + /* printk("bytes_ecn= %d total= %d alpha: old= %d new= %d\n", */315 + /* tp->acked_bytes_ecn, tp->acked_bytes_total, alpha_old, tp->dctcp_alpha); */316 + 317 + tp->acked_bytes_ecn = 0;318 + tp->acked_bytes_total = 0;319 + tp->next_seq = tp->snd_nxt;320 + }321 +322 + /* END: DCTCP Processing */323 +324 /* We passed data and got it acked, remove any soft error325 * log. Something worked...326 */327 @@ -4014,7 +4149,7 @@328 goto queue_and_out;329 }330 331 - TCP_ECN_check_ce(tp, skb);332 + TCP_ECN_dctcp_check_ce(sk, tp, skb);333 334 if (tcp_try_rmem_schedule(sk, skb->truesize))335 goto drop;336 @@ -4421,6 +4556,8 @@337 && __tcp_select_window(sk) >= tp->rcv_wnd) ||338 /* We ACK each frame or... */339 tcp_in_quickack_mode(sk) ||340 + /* Delayed ACK is disabled or ... */341 + sysctl_tcp_delayed_ack == 0 ||342 /* We have out of order data. */343 (ofo_possible && skb_peek(&tp->out_of_order_queue))) {344 /* Then ack it now */345 @@ -5419,6 +5556,9 @@346 }347 348 EXPORT_SYMBOL(sysctl_tcp_ecn);349 +EXPORT_SYMBOL(sysctl_tcp_delayed_ack);350 +EXPORT_SYMBOL(sysctl_tcp_dctcp_enable);351 +EXPORT_SYMBOL(sysctl_tcp_dctcp_shift_g);352 EXPORT_SYMBOL(sysctl_tcp_reordering);353 EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);354 EXPORT_SYMBOL(tcp_parse_options);355 diff -Naur linux-2.6.26/net/ipv4/tcp_minisocks.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_minisocks.c356 --- linux-2.6.26/net/ipv4/tcp_minisocks.c 2008-07-13 14:51:29.000000000 -0700357 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_minisocks.c 2011-10-07 15:03:45.000000000 -0700358 @@ -398,6 +398,11 @@359 newtp->rcv_wup = newtp->copied_seq = newtp->rcv_nxt = treq->rcv_isn + 1;360 newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = treq->snt_isn + 1;361 362 + /* Initialize DCTCP internal parameters */363 + newtp->next_seq = newtp->snd_nxt; 364 + newtp->acked_bytes_ecn = 0;365 + newtp->acked_bytes_total = 0;366 +367 tcp_prequeue_init(newtp);368 369 tcp_init_wl(newtp, treq->snt_isn, treq->rcv_isn);370 diff -Naur linux-2.6.26/net/ipv4/tcp_output.c linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_output.c371 --- linux-2.6.26/net/ipv4/tcp_output.c 2008-07-13 14:51:29.000000000 -0700372 +++ linux-2.6.26-dctcp-rev1.1.0/net/ipv4/tcp_output.c 2011-10-07 14:41:50.000000000 -0700373 @@ -290,7 +290,7 @@374 struct tcp_sock *tp = tcp_sk(sk);375 376 tp->ecn_flags = 0;377 - if (sysctl_tcp_ecn) {378 + if (sysctl_tcp_ecn || sysctl_tcp_dctcp_enable) {379 TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_ECE | TCPCB_FLAG_CWR;380 tp->ecn_flags = TCP_ECN_OK;381 }382 @@ -600,6 +600,10 @@383 TCP_ECN_send(sk, skb, tcp_header_size);384 }385 386 + /* In DCTCP, Assert ECT bit to all packets*/387 + if(sysctl_tcp_dctcp_enable)388 + INET_ECN_xmit(sk);389 +390 #ifdef CONFIG_TCP_MD5SIG391 /* Calculate the MD5 hash, as we have all we need now */392 if (md5) {393 @@ -2352,6 +2356,11 @@394 tcp_init_nondata_skb(buff, tp->write_seq++, TCPCB_FLAG_SYN);395 TCP_ECN_send_syn(sk, buff);396 397 + /* Initialize DCTCP internal parameters */398 + tp->next_seq = tp->snd_nxt; 399 + tp->acked_bytes_ecn = 0;400 + tp->acked_bytes_total = 0;401 +402 /* Send it off. */403 TCP_SKB_CB(buff)->when = tcp_time_stamp;404 tp->retrans_stamp = TCP_SKB_CB(buff)->when;405 @@ -2385,6 +2394,10 @@406 int ato = icsk->icsk_ack.ato;407 unsigned long timeout;408 409 + /* Delayed ACK reserved flag for DCTCP */410 + struct tcp_sock *tp = tcp_sk(sk);411 + tp->delayed_ack_reserved = 1;412 +413 if (ato > TCP_DELACK_MIN) {414 const struct tcp_sock *tp = tcp_sk(sk);415 int max_ato = HZ / 2;416 @@ -2436,6 +2449,10 @@417 {418 struct sk_buff *buff;419 420 + /* Delayed ACK reserved flag for DCTCP */421 + struct tcp_sock *tp = tcp_sk(sk);422 + tp->delayed_ack_reserved = 0;423 +424 /* If we have been reset, we may not send again. */425 if (sk->sk_state == TCP_CLOSE)426 return;
https://github.com/myasuda/DCTCP-Linux/blob/master/dctcp-2.6.26-rev1.1.0.patch
dctcp-2.6.26-rev1.1.0.patch
声明:以上内容来自用户投稿及互联网公开渠道收集整理发布,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任,若内容有误或涉及侵权可进行投诉: 投诉/举报 工作人员会在5个工作日内联系你,一经查实,本站将立刻删除涉嫌侵权内容。