首页 > 代码库 > ipsec outbound

ipsec outbound

对于输出的报文,主要做如下操作:

1. 调用ip_route_output_flow路由查找

2. 在ip_route_output_flow里面会调用xfrm_lookup进行ipsec policy查找

3. 如果policy的action为XFRM_POLICY_ALLOW,则调用xfrm_find_bundle或者xfrm_bundle_create创建dst链表

4. 应用链表中的dst->output函数进行ipsec加密或发包

例如需要分别对报文进行IP COMP, ESP, AH变换的链表如图所示

image

image

下面主要分析bundle的创建过程,skb->dst在__xfrm4_bundle_create前后变化如下:

 

传输模式:

image

隧道模式:

图片中黄色的为原始报文的路由缓存,蓝色的为隧道外层的路由缓存

image

 

/* Allocate chain of dst_entry‘s, attach known xfrm‘s, calculate
 * all the metrics... Shortly, bundle a bundle.
 */

static int
__xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
              struct flowi *fl, struct dst_entry **dst_p)
{
    struct dst_entry *dst, *dst_prev;
    struct rtable *rt0 = (struct rtable*)(*dst_p);
    struct rtable *rt = rt0;
    struct flowi fl_tunnel = {
        .nl_u = {
            .ip4_u = {
                .saddr = fl->fl4_src,
                .daddr = fl->fl4_dst,
                .tos = fl->fl4_tos
            }
        }
    };
    int i;
    int err;
    int header_len = 0;
    int trailer_len = 0;

    dst = dst_prev = NULL;
    dst_hold(&rt->u.dst);

    /* 每个xfrm生成一个dst_entry,child指向下一次变换或最外层报文的实际路由
     * 一个报文最多只能有一次隧道变换,因此rt最多只有两个,一个是隧道内报文的路由,一个是隧道报文的路由
     */
    for (i = 0; i < nx; i++) {
        struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
        struct xfrm_dst *xdst;

        if (unlikely(dst1 == NULL)) {
            err = -ENOBUFS;
            dst_release(&rt->u.dst);
            goto error;
        }

        if (!dst)
            dst = dst1;
        else {
            dst_prev->child = dst1;
            dst1->flags |= DST_NOHASH;
            dst_clone(dst1);
        }

        xdst = (struct xfrm_dst *)dst1;
        xdst->route = &rt->u.dst;
        xdst->genid = xfrm[i]->genid;

        dst1->next = dst_prev;
        dst_prev = dst1;

        header_len += xfrm[i]->props.header_len;
        trailer_len += xfrm[i]->props.trailer_len;

        /* 隧道模式则查找外层ip包的路由 */
        if (xfrm[i]->props.mode == XFRM_MODE_TUNNEL) {
            unsigned short encap_family = xfrm[i]->props.family;
            switch(encap_family) {
            case AF_INET:
                fl_tunnel.fl4_dst = xfrm[i]->id.daddr.a4;
                fl_tunnel.fl4_src = xfrm[i]->props.saddr.a4;
                break;
#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
            case AF_INET6:
                ipv6_addr_copy(&fl_tunnel.fl6_dst, (struct in6_addr*)&xfrm[i]->id.daddr.a6);
                ipv6_addr_copy(&fl_tunnel.fl6_src, (struct in6_addr*)&xfrm[i]->props.saddr.a6);
                break;
#endif
            default:
                BUG_ON(1);
            }
            err = xfrm_dst_lookup((struct xfrm_dst **)&rt,
                          &fl_tunnel, encap_family);
            if (err)
                goto error;
        } else
            dst_hold(&rt->u.dst);
    }

    /* 最后一个child指向最外层报文的路由 */
    dst_prev->child = &rt->u.dst;
    dst->path = &rt->u.dst;

    *dst_p = dst;
    dst = dst_prev;

    dst_prev = *dst_p;
    i = 0;
    for (; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
        struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
        struct xfrm_state_afinfo *afinfo;
        x->u.rt.fl = *fl;

        dst_prev->xfrm = xfrm[i++];
        dst_prev->dev = rt->u.dst.dev;
        if (rt->u.dst.dev)
            dev_hold(rt->u.dst.dev);
        dst_prev->obsolete    = -1;
        dst_prev->flags           |= DST_HOST;
        dst_prev->lastuse    = jiffies;
        dst_prev->header_len    = header_len;
        dst_prev->nfheader_len    = 0;
        dst_prev->trailer_len    = trailer_len;
        memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics));

        /* Copy neighbout for reachability confirmation */
        dst_prev->neighbour    = neigh_clone(rt->u.dst.neighbour);
        dst_prev->input        = rt->u.dst.input;
        /* XXX: When IPv6 module can be unloaded, we should manage reference
         * to xfrm6_output in afinfo->output. Miyazawa
         * */
        afinfo = xfrm_state_get_afinfo(dst_prev->xfrm->props.family);
        if (!afinfo) {
            dst = *dst_p;
            err = -EAFNOSUPPORT;
            goto error;
        }
        /* xfrm的dst_entry->output函数初始化为xfrm4_output */
        dst_prev->output = afinfo->output;
        xfrm_state_put_afinfo(afinfo);
        if (dst_prev->xfrm->props.family == AF_INET && rt->peer)
            atomic_inc(&rt->peer->refcnt);
        x->u.rt.peer = rt->peer;
        /* Sheit... I remember I did this right. Apparently,
         * it was magically lost, so this code needs audit */
        x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
        x->u.rt.rt_type = rt->rt_type;
        x->u.rt.rt_src = http://www.mamicode.com/rt0->rt_src;
        x->u.rt.rt_dst = rt0->rt_dst;
        x->u.rt.rt_gateway = rt->rt_gateway;
        x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
        x->u.rt.idev = rt0->idev;
        in_dev_hold(rt0->idev);
        header_len -= x->u.dst.xfrm->props.header_len;
        trailer_len -= x->u.dst.xfrm->props.trailer_len;
    }

    xfrm_init_pmtu(dst);
    return 0;

error:
    if (dst)
        dst_free(dst);
    return err;
}

最后的xfrm4_output_finish2中沿着dst_entry->child每次调用xfrm4_output_one执行一次xfrm,直到最后一个真正的路由dst_entry,这时候调用dst_output进行路由输出;