首页 > 代码库 > TCP内核源码分析笔记

TCP内核源码分析笔记

Table of Contents

  • 1 tcp_v4_connect()
  • 2 sys_accept()
    • 2.1 tcp_accept()
  • 3 三次握手
    • 3.1 客户端发送SYN段
    • 3.2 服务端接收到SYN段后,发送SYN/ACK处理
    • 3.3 客户端回复确认ACK段
      • 3.3.1 tcp_rcv_synsent_state_process()
    • 3.4 服务端收到ACK段
  • 4 数据传输

tcp_v4_connect()

  • 描述: 建立与服务器连接,发送SYN段
  • 返回值: 0或错误码
  • 代码关键路径:
     1:  int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
     2:  {
     3:      .....      
     4:      /* 设置目的地址和目标端口 */
     5:      inet->dport = usin->sin_port;
     6:      inet->daddr = daddr;
     7:      ....     
     8:      /* 初始化MSS上限 */
     9:      tp->rx_opt.mss_clamp = 536;
    10:  
    11:      /* Socket identity is still unknown (sport may be zero).
    12:       * However we set state to SYN-SENT and not releasing socket
    13:       * lock select source port, enter ourselves into the hash tables and
    14:       * complete initialization after this.
    15:       */
    16:      tcp_set_state(sk, TCP_SYN_SENT);/* 设置状态 */
    17:      err = tcp_v4_hash_connect(sk);/* 将传输控制添加到ehash散列表中,并动态分配端口 */
    18:      if (err)
    19:          goto failure;
    20:      ....
    21:      if (!tp->write_seq)/* 还未计算初始序号 */
    22:          /* 根据双方地址、端口计算初始序号 */
    23:          tp->write_seq = secure_tcp_sequence_number(inet->saddr,
    24:                                 inet->daddr,
    25:                                 inet->sport,
    26:                                 usin->sin_port);
    27:  
    28:      /* 根据初始序号和当前时间,随机算一个初始id */
    29:      inet->id = tp->write_seq ^ jiffies;
    30:  
    31:      /* 发送SYN段 */
    32:      err = tcp_connect(sk);
    33:      rt = NULL;
    34:      if (err)
    35:          goto failure;
    36:  
    37:      return 0;
    38:  }
    


sys_accept()

  • 描述: 调用tcp_accept(), 并把它返回的newsk进行连接描述符分配后返回给用户空间。
  • 返回值: 连接描述符
  • 代码关键路径:
     1:  asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen)
     2:  {
     3:      struct socket *sock, *newsock;
     4:      .....     
     5:      sock = sockfd_lookup(fd, &err);/* 获得侦听端口的socket */
     6:      .....    
     7:      if (!(newsock = sock_alloc()))/* 分配一个新的套接口,用来处理与客户端的连接 */ 
     8:      .....     
     9:      /* 调用传输层的accept,对TCP来说,是inet_accept */
    10:      err = sock->ops->accept(sock, newsock, sock->file->f_flags);
    11:      ....    
    12:      if (upeer_sockaddr) {/* 调用者需要获取对方套接口地址和端口 */
    13:          /* 调用传输层回调获得对方的地址和端口 */
    14:          if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
    15:          }
    16:          /* 成功后复制到用户态 */
    17:          err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
    18:      }
    19:      .....     
    20:      if ((err = sock_map_fd(newsock)) < 0)/* 为新连接分配文件描述符 */
    21:  
    22:      return err;
    23:  }
    


tcp_accept()

[注]: 在内核2.6.32以后对应函数为inet_csk_accept().

  • 描述: 通过在规定时间内,判断tcp_sock->accept_queue队列非空,代表有新的连接进入.
  • 返回值: (struct sock *)newsk;
  • 代码关键路径:
     1:  struct sock *tcp_accept(struct sock *sk, int flags, int *err)
     2:  {
     3:      ....
     4:      /* Find already established connection */
     5:      if (!tp->accept_queue) {/* accept队列为空,说明还没有收到新连接 */
     6:          long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);/* 如果套口是非阻塞的,或者在一定时间内没有新连接,则返回 */
     7:  
     8:          if (!timeo)/* 超时时间到,没有新连接,退出 */
     9:              goto out;
    10:  
    11:          /* 运行到这里,说明有新连接到来,则等待新的传输控制块 */
    12:          error = wait_for_connect(sk, timeo);
    13:          if (error)
    14:              goto out;
    15:      }
    16:  
    17:      req = tp->accept_queue;
    18:      if ((tp->accept_queue = req->dl_next) == NULL)
    19:          tp->accept_queue_tail = NULL;
    20:  
    21:      newsk = req->sk;
    22:      sk_acceptq_removed(sk);
    23:      tcp_openreq_fastfree(req);
    24:      ....
    25:  
    26:      return newsk;
    27:  }
    


三次握手

客户端发送SYN段

  • 由tcp_v4_connect()->tcp_connect()->tcp_transmit_skb()发送,并置为TCP_SYN_SENT.
  • 代码关键路径:
     1:  /* 构造并发送SYN段 */
     2:  int tcp_connect(struct sock *sk)
     3:  {
     4:      struct tcp_sock *tp = tcp_sk(sk);
     5:      struct sk_buff *buff;
     6:  
     7:      tcp_connect_init(sk);/* 初始化传输控制块中与连接相关的成员 */
     8:  
     9:      /* 为SYN段分配报文并进行初始化 */
    10:      buff = alloc_skb(MAX_TCP_HEADER + 15, sk->sk_allocation);
    11:      if (unlikely(buff == NULL))
    12:          return -ENOBUFS;
    13:  
    14:      /* Reserve space for headers. */
    15:      skb_reserve(buff, MAX_TCP_HEADER);
    16:  
    17:      TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
    18:      TCP_ECN_send_syn(sk, tp, buff);
    19:      TCP_SKB_CB(buff)->sacked = 0;
    20:      skb_shinfo(buff)->tso_segs = 1;
    21:      skb_shinfo(buff)->tso_size = 0;
    22:      buff->csum = 0;
    23:      TCP_SKB_CB(buff)->seq = tp->write_seq++;
    24:      TCP_SKB_CB(buff)->end_seq = tp->write_seq;
    25:      tp->snd_nxt = tp->write_seq;
    26:      tp->pushed_seq = tp->write_seq;
    27:      tcp_ca_init(tp);
    28:  
    29:      /* Send it off. */
    30:      TCP_SKB_CB(buff)->when = tcp_time_stamp;
    31:      tp->retrans_stamp = TCP_SKB_CB(buff)->when;
    32:  
    33:      /* 将报文添加到发送队列上 */
    34:      __skb_queue_tail(&sk->sk_write_queue, buff);
    35:      sk_charge_skb(sk, buff);
    36:      tp->packets_out += tcp_skb_pcount(buff);
    37:      /* 发送SYN段 */
    38:      tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
    39:      TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
    40:  
    41:      /* Timer for repeating the SYN until an answer. */
    42:      /* 启动重传定时器 */
    43:      tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
    44:      return 0;
    45:  }
    46:  
    


服务端接收到SYN段后,发送SYN/ACK处理

  • 由tcp_v4_do_rcv()->tcp_rcv_state_process()->tcp_v4_conn_request()->tcp_v4_send_synack().
  • tcp_v4_send_synack()
    • tcp_make_synack(sk, dst, req); * 根据路由、传输控制块、连接请求块中的构建SYN+ACK段 *
    • ip_build_and_send_pkt(); * 生成IP数据报并发送出去 *



      http://images.cnitblog.com/blog/479389/201408/051135590845524.jpg

      图: 服务端接收到SYN段后,发送SYN/ACK处理流程。



    • 代码关键路径:
       1:  /* 向客户端发送SYN+ACK报文 */
       2:  static int tcp_v4_send_synack(struct sock *sk, struct open_request *req,
       3:                    struct dst_entry *dst)
       4:  {
       5:      int err = -1;
       6:      struct sk_buff * skb;
       7:  
       8:      /* First, grab a route. */
       9:      /* 查找到客户端的路由 */
      10:      if (!dst && (dst = tcp_v4_route_req(sk, req)) == NULL)
      11:          goto out;
      12:  
      13:      /* 根据路由、传输控制块、连接请求块中的构建SYN+ACK段 */
      14:      skb = tcp_make_synack(sk, dst, req);
      15:  
      16:      if (skb) {/* 生成SYN+ACK段成功 */
      17:          struct tcphdr *th = skb->h.th;
      18:  
      19:          /* 生成校验码 */
      20:          th->check = tcp_v4_check(th, skb->len,
      21:                       req->af.v4_req.loc_addr,
      22:                       req->af.v4_req.rmt_addr,
      23:                       csum_partial((char *)th, skb->len,
      24:                                skb->csum));
      25:  
      26:          /* 生成IP数据报并发送出去 */
      27:          err = ip_build_and_send_pkt(skb, sk, req->af.v4_req.loc_addr,
      28:                          req->af.v4_req.rmt_addr,
      29:                          req->af.v4_req.opt);
      30:          if (err == NET_XMIT_CN)
      31:              err = 0;
      32:      }
      33:  
      34:  out:
      35:      dst_release(dst);
      36:      return err;
      37:  }
      38:  
      


客户端回复确认ACK段

  • 由tcp_v4_do_rcv()->tcp_rcv_state_process().当前客户端处于TCP_SYN_SENT状态。
  • tcp_rcv_synsent_state_process(); * tcp_rcv_synsent_state_process处理SYN_SENT状态下接收到的TCP段 *
    • tcp_ack(); * 处理接收到的ack报文 *
    • tcp_send_ack(); * 在主动连接时,向服务器端发送ACK完成连接,并更新窗口 *
      • alloc_skb(); * 构造ack段 *
      • tcp_transmit_skb(); * 将ack段发出 *
    • tcp_urg(sk, skb, th); * 处理完第二次握手后,还需要处理带外数据 *
    • tcp_data_snd_check(sk); * 检测是否有数据需要发送 *
      • 检查sk->sk_send_head队列上是否有待发送的数据。
      • tcp_write_xmit(); * 将TCP发送队列上的段发送出去 *
  • 代码关键路径:

tcp_rcv_synsent_state_process()

  1:  /* 在SYN_SENT状态下处理接收到的段,但是不处理带外数据 */
  2:  static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
  3:                     struct tcphdr *th, unsigned len)
  4:  {
  5:    struct tcp_sock *tp = tcp_sk(sk);
  6:    int saved_clamp = tp->rx_opt.mss_clamp;
  7:  
  8:    /* 解析TCP选项并保存到传输控制块中 */
  9:    tcp_parse_options(skb, &tp->rx_opt, 0);
 10:  
 11:    if (th->ack) {/* 处理ACK标志 */
 12:        /* rfc793:
 13:         * "If the state is SYN-SENT then
 14:         *    first check the ACK bit
 15:         *      If the ACK bit is set
 16:         *    If SEG.ACK =< ISS, or SEG.ACK > SND.NXT, send
 17:         *        a reset (unless the RST bit is set, if so drop
 18:         *        the segment and return)"
 19:         *
 20:         *  We do not send data with SYN, so that RFC-correct
 21:         *  test reduces to:
 22:         */
 23:        if (TCP_SKB_CB(skb)->ack_seq != tp->snd_nxt)
 24:            goto reset_and_undo;
 25:  
 26:        if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
 27:            !between(tp->rx_opt.rcv_tsecr, tp->retrans_stamp,
 28:                 tcp_time_stamp)) {
 29:            NET_INC_STATS_BH(LINUX_MIB_PAWSACTIVEREJECTED);
 30:            goto reset_and_undo;
 31:        }
 32:  
 33:        /* Now ACK is acceptable.
 34:         *
 35:         * "If the RST bit is set
 36:         *    If the ACK was acceptable then signal the user "error:
 37:         *    connection reset", drop the segment, enter CLOSED state,
 38:         *    delete TCB, and return."
 39:         */
 40:  
 41:        if (th->rst) {/* 收到ACK+RST段,需要tcp_reset设置错误码,并关闭套接口 */
 42:            tcp_reset(sk);
 43:            goto discard;
 44:        }
 45:  
 46:        /* rfc793:
 47:         *   "fifth, if neither of the SYN or RST bits is set then
 48:         *    drop the segment and return."
 49:         *
 50:         *    See note below!
 51:         *                                        --ANK(990513)
 52:         */
 53:        if (!th->syn)/* 在SYN_SENT状态下接收到的段必须存在SYN标志,否则说明接收到的段无效,丢弃该段 */
 54:            goto discard_and_undo;
 55:  
 56:        /* rfc793:
 57:         *   "If the SYN bit is on ...
 58:         *    are acceptable then ...
 59:         *    (our SYN has been ACKed), change the connection
 60:         *    state to ESTABLISHED..."
 61:         */
 62:  
 63:        /* 从首部标志中获取显示拥塞通知的特性 */
 64:        TCP_ECN_rcv_synack(tp, th);
 65:        if (tp->ecn_flags&TCP_ECN_OK)/* 如果支持ECN,则设置标志 */
 66:            sk->sk_no_largesend = 1;
 67:  
 68:        /* 设置与窗口相关的成员变量 */
 69:        tp->snd_wl1 = TCP_SKB_CB(skb)->seq;
 70:        tcp_ack(sk, skb, FLAG_SLOWPATH);
 71:  
 72:        /* Ok.. it‘s good. Set up sequence numbers and
 73:         * move to established.
 74:         */
 75:        tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
 76:        tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
 77:  
 78:        /* RFC1323: The window in SYN & SYN/ACK segments is
 79:         * never scaled.
 80:         */
 81:        tp->snd_wnd = ntohs(th->window);
 82:        tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq, TCP_SKB_CB(skb)->seq);
 83:  
 84:        if (!tp->rx_opt.wscale_ok) {
 85:            tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
 86:            tp->window_clamp = min(tp->window_clamp, 65535U);
 87:        }
 88:  
 89:        if (tp->rx_opt.saw_tstamp) {/* 根据是否支持时间戳选项来设置传输控制块的相关字段 */
 90:            tp->rx_opt.tstamp_ok       = 1;
 91:            tp->tcp_header_len =
 92:                sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
 93:            tp->advmss      -= TCPOLEN_TSTAMP_ALIGNED;
 94:            tcp_store_ts_recent(tp);
 95:        } else {
 96:            tp->tcp_header_len = sizeof(struct tcphdr);
 97:        }
 98:  
 99:        /* 初始化PMTU、MSS等成员变量 */
100:        if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
101:            tp->rx_opt.sack_ok |= 2;
102:  
103:        tcp_sync_mss(sk, tp->pmtu_cookie);
104:        tcp_initialize_rcv_mss(sk);
105:  
106:        /* Remember, tcp_poll() does not lock socket!
107:         * Change state from SYN-SENT only after copied_seq
108:         * is initialized. */
109:        tp->copied_seq = tp->rcv_nxt;
110:        mb();
111:        tcp_set_state(sk, TCP_ESTABLISHED);
112:  
113:        /* Make sure socket is routed, for correct metrics.  */
114:        tp->af_specific->rebuild_header(sk);
115:  
116:        tcp_init_metrics(sk);
117:  
118:        /* Prevent spurious tcp_cwnd_restart() on first data
119:         * packet.
120:         */
121:        tp->lsndtime = tcp_time_stamp;
122:  
123:        tcp_init_buffer_space(sk);
124:  
125:        /* 如果启用了连接保活,则启用连接保活定时器 */
126:        if (sock_flag(sk, SOCK_KEEPOPEN))
127:            tcp_reset_keepalive_timer(sk, keepalive_time_when(tp));
128:  
129:        if (!tp->rx_opt.snd_wscale)/* 首部预测 */
130:            __tcp_fast_path_on(tp, tp->snd_wnd);
131:        else
132:            tp->pred_flags = 0;
133:  
134:        if (!sock_flag(sk, SOCK_DEAD)) {/* 如果套口不处于SOCK_DEAD状态,则唤醒等待该套接口的进程 */
135:            sk->sk_state_change(sk);
136:            sk_wake_async(sk, 0, POLL_OUT);
137:        }
138:  
139:        /* 连接建立完成,根据情况进入延时确认模式 */
140:        if (sk->sk_write_pending || tp->defer_accept || tp->ack.pingpong) {
141:            /* Save one ACK. Data will be ready after
142:             * several ticks, if write_pending is set.
143:             *
144:             * It may be deleted, but with this feature tcpdumps
145:             * look so _wonderfully_ clever, that I was not able
146:             * to stand against the temptation 8)     --ANK
147:             */
148:            tcp_schedule_ack(tp);
149:            tp->ack.lrcvtime = tcp_time_stamp;
150:            tp->ack.ato  = TCP_ATO_MIN;
151:            tcp_incr_quickack(tp);
152:            tcp_enter_quickack_mode(tp);
153:            tcp_reset_xmit_timer(sk, TCP_TIME_DACK, TCP_DELACK_MAX);
154:  
155:  discard:
156:            __kfree_skb(skb);
157:            return 0;
158:        } else {/* 不需要延时确认,立即发送ACK段 */
159:            tcp_send_ack(sk);
160:        }
161:        return -1;
162:    }
163:  
164:    /* No ACK in the segment */
165:  
166:    if (th->rst) {/* 收到RST段,则丢弃传输控制块 */
167:        /* rfc793:
168:         * "If the RST bit is set
169:         *
170:         *      Otherwise (no ACK) drop the segment and return."
171:         */
172:  
173:        goto discard_and_undo;
174:    }
175:  
176:    /* PAWS check. */
177:    /* PAWS检测失效,也丢弃传输控制块 */
178:    if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0))
179:        goto discard_and_undo;
180:  
181:    /* 在SYN_SENT状态下收到了SYN段并且没有ACK,说明是两端同时打开 */
182:    if (th->syn) {
183:        /* We see SYN without ACK. It is attempt of
184:         * simultaneous connect with crossed SYNs.
185:         * Particularly, it can be connect to self.
186:         */
187:        tcp_set_state(sk, TCP_SYN_RECV);/* 设置状态为TCP_SYN_RECV */
188:  
189:        if (tp->rx_opt.saw_tstamp) {/* 设置时间戳相关的字段 */
190:            tp->rx_opt.tstamp_ok = 1;
191:            tcp_store_ts_recent(tp);
192:            tp->tcp_header_len =
193:                sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
194:        } else {
195:            tp->tcp_header_len = sizeof(struct tcphdr);
196:        }
197:  
198:        /* 初始化窗口相关的成员变量 */
199:        tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
200:        tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1;
201:  
202:        /* RFC1323: The window in SYN & SYN/ACK segments is
203:         * never scaled.
204:         */
205:        tp->snd_wnd    = ntohs(th->window);
206:        tp->snd_wl1    = TCP_SKB_CB(skb)->seq;
207:        tp->max_window = tp->snd_wnd;
208:  
209:        TCP_ECN_rcv_syn(tp, th);/* 从首部标志中获取显式拥塞通知的特性。 */
210:        if (tp->ecn_flags&TCP_ECN_OK)
211:            sk->sk_no_largesend = 1;
212:  
213:        /* 初始化MSS相关的成员变量 */
214:        tcp_sync_mss(sk, tp->pmtu_cookie);
215:        tcp_initialize_rcv_mss(sk);
216:  
217:        /* 向对端发送SYN+ACK段,并丢弃接收到的SYN段 */
218:        tcp_send_synack(sk);
219:  #if 0
220:        /* Note, we could accept data and URG from this segment.
221:         * There are no obstacles to make this.
222:         *
223:         * However, if we ignore data in ACKless segments sometimes,
224:         * we have no reasons to accept it sometimes.
225:         * Also, seems the code doing it in step6 of tcp_rcv_state_process
226:         * is not flawless. So, discard packet for sanity.
227:         * Uncomment this return to process the data.
228:         */
229:        return -1;
230:  #else
231:        goto discard;
232:  #endif
233:    }
234:    /* "fifth, if neither of the SYN or RST bits is set then
235:     * drop the segment and return."
236:     */
237:  
238:  discard_and_undo:
239:    tcp_clear_options(&tp->rx_opt);
240:    tp->rx_opt.mss_clamp = saved_clamp;
241:    goto discard;
242:  
243:  reset_and_undo:
244:    tcp_clear_options(&tp->rx_opt);
245:    tp->rx_opt.mss_clamp = saved_clamp;
246:    return 1;
247:  }
248:  


服务端收到ACK段

  • 由tcp_v4_do_rcv()->tcp_rcv_state_process().当前服务端处于TCP_SYN_RECV状态变为TCP_ESTABLISHED状态。
  • 代码关键路径:
      1:  /* 除了ESTABLISHED和TIME_WAIT状态外,其他状态下的TCP段处理都由本函数实现 */ 
      2:  int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
      3:                struct tcphdr *th, unsigned len)
      4:  {
      5:      struct tcp_sock *tp = tcp_sk(sk);
      6:      int queued = 0;
      7:  
      8:      tp->rx_opt.saw_tstamp = 0;
      9:  
     10:      switch (sk->sk_state) {
     11:      .....
     12:      /* SYN_RECV状态的处理 */
     13:      if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp &&/* 解析TCP选项,如果首部中存在时间戳选项 */
     14:          tcp_paws_discard(tp, skb)) {/* PAWS检测失败,则丢弃报文 */
     15:          if (!th->rst) {/* 如果不是RST段 */
     16:              /* 发送DACK给对端,说明接收到的TCP段已经处理过 */
     17:              NET_INC_STATS_BH(LINUX_MIB_PAWSESTABREJECTED);
     18:              tcp_send_dupack(sk, skb);
     19:              goto discard;
     20:          }
     21:          /* Reset is accepted even if it did not pass PAWS. */
     22:      }
     23:  
     24:      /* step 1: check sequence number */
     25:      if (!tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq)) {/* TCP段序号无效 */
     26:          if (!th->rst)/* 如果TCP段无RST标志,则发送DACK给对方 */
     27:              tcp_send_dupack(sk, skb);
     28:          goto discard;
     29:      }
     30:  
     31:      /* step 2: check RST bit */
     32:      if(th->rst) {/* 如果有RST标志,则重置连接 */
     33:          tcp_reset(sk);
     34:          goto discard;
     35:      }
     36:  
     37:      /* 如果有必要,则更新时间戳 */
     38:      tcp_replace_ts_recent(tp, TCP_SKB_CB(skb)->seq);
     39:  
     40:      /* step 3: check security and precedence [ignored] */
     41:  
     42:      /*  step 4:
     43:       *
     44:       *  Check for a SYN in window.
     45:       */
     46:      if (th->syn && !before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) {/* 如果有SYN标志并且序号在接收窗口内 */
     47:          NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN);
     48:          tcp_reset(sk);/* 复位连接 */
     49:          return 1;
     50:      }
     51:  
     52:      /* step 5: check the ACK field */
     53:      if (th->ack) {/* 如果有ACK标志 */
     54:          /* 检查ACK是否为正常的第三次握手 */
     55:          int acceptable = tcp_ack(sk, skb, FLAG_SLOWPATH);
     56:  
     57:          switch(sk->sk_state) {
     58:          case TCP_SYN_RECV:
     59:              if (acceptable) {
     60:                  tp->copied_seq = tp->rcv_nxt;
     61:                  mb();
     62:                  /* 正常的第三次握手,设置连接状态为TCP_ESTABLISHED */
     63:                  tcp_set_state(sk, TCP_ESTABLISHED);
     64:                  sk->sk_state_change(sk);
     65:  
     66:                  /* Note, that this wakeup is only for marginal
     67:                   * crossed SYN case. Passively open sockets
     68:                   * are not waked up, because sk->sk_sleep ==
     69:                   * NULL and sk->sk_socket == NULL.
     70:                   */
     71:                  if (sk->sk_socket) {/* 状态已经正常,唤醒那些等待的线程 */
     72:                      sk_wake_async(sk,0,POLL_OUT);
     73:                  }
     74:  
     75:                  /* 初始化传输控制块,如果存在时间戳选项,同时平滑RTT为0,则需计算重传超时时间 */
     76:                  tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
     77:                  tp->snd_wnd = ntohs(th->window) <<
     78:                            tp->rx_opt.snd_wscale;
     79:                  tcp_init_wl(tp, TCP_SKB_CB(skb)->ack_seq,
     80:                          TCP_SKB_CB(skb)->seq);
     81:  
     82:                  /* tcp_ack considers this ACK as duplicate
     83:                   * and does not calculate rtt.
     84:                   * Fix it at least with timestamps.
     85:                   */
     86:                  if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
     87:                      !tp->srtt)
     88:                      tcp_ack_saw_tstamp(tp, 0);
     89:  
     90:                  if (tp->rx_opt.tstamp_ok)
     91:                      tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
     92:  
     93:                  /* Make sure socket is routed, for
     94:                   * correct metrics.
     95:                   */
     96:                  /* 建立路由,初始化拥塞控制模块 */
     97:                  tp->af_specific->rebuild_header(sk);
     98:  
     99:                  tcp_init_metrics(sk);
    100:  
    101:                  /* Prevent spurious tcp_cwnd_restart() on
    102:                   * first data packet.
    103:                   */
    104:                  tp->lsndtime = tcp_time_stamp;/* 更新最近一次发送数据包的时间 */
    105:  
    106:                  tcp_initialize_rcv_mss(sk);
    107:                  tcp_init_buffer_space(sk);
    108:                  tcp_fast_path_on(tp);/* 计算有关TCP首部预测的标志 */
    109:              } else {
    110:                  return 1;
    111:              }
    112:              break;
    113:          .....
    114:          }
    115:      } else
    116:          goto discard;
    117:      .....
    118:  
    119:      /* step 6: check the URG bit */
    120:      tcp_urg(sk, skb, th);/* 检测带外数据位 */
    121:  
    122:      /* tcp_data could move socket to TIME-WAIT */
    123:      if (sk->sk_state != TCP_CLOSE) {/* 如果tcp_data需要发送数据和ACK则在这里处理 */
    124:          tcp_data_snd_check(sk);
    125:          tcp_ack_snd_check(sk);
    126:      }
    127:  
    128:      if (!queued) { /* 如果段没有加入队列,或者前面的流程需要释放报文,则释放它 */
    129:  discard:
    130:          __kfree_skb(skb);
    131:      }
    132:      return 0;
    133:  }
    


数据传输

  • 由tcp_v4_do_rcv()->tcp_rcv_established().当前服务端处于TCP_ESTABLISHED状态。
  • 代码关键路径: