#TCP你学得会# 之 TCP端口选择那些事儿-白红宇

#TCP你学得会# 之 TCP端口选择那些事儿

阅读量：5967 次

发布时间：2019-06-19

本文共 7030 字，大约阅读时间需要 23 分钟。

本文所讨论的内容基于Linux Kernel 3.13.0。

Linux内核中TCP连接的源端口选择是由inet_hash_connect()函数完成的：

/* * Bind a port for a connect operation and hash it. */int inet_hash_connect(struct inet_timewait_death_row *death_row,              struct sock *sk){    return __inet_hash_connect(death_row, sk, inet_sk_port_offset(sk),            __inet_check_established, __inet_hash_nolisten);}EXPORT_SYMBOL_GPL(inet_hash_connect);

具体工作由__inet_hash_connect()函数完成：

int __inet_hash_connect(struct inet_timewait_death_row *death_row,        struct sock *sk, u32 port_offset,        int (*check_established)(struct inet_timewait_death_row *,            struct sock *, __u16, struct inet_timewait_sock **),        int (*hash)(struct sock *sk, struct inet_timewait_sock *twp)){    struct inet_hashinfo *hinfo = death_row->hashinfo;    const unsigned short snum = inet_sk(sk)->inet_num;    struct inet_bind_hashbucket *head;    struct inet_bind_bucket *tb;    int ret;    struct net *net = sock_net(sk);    int twrefcnt = 1;    if (!snum) {        int i, remaining, low, high, port;        static u32 hint;        u32 offset = hint + port_offset;        struct inet_timewait_sock *tw = NULL;        inet_get_local_port_range(net, &low, &high);        remaining = (high - low) + 1;        local_bh_disable();        for (i = 1; i <= remaining; i++) {            port = low + (i + offset) % remaining;            if (inet_is_reserved_local_port(port))                continue;            ...        }        local_bh_enable();        return -EADDRNOTAVAIL;ok:        hint += i;        ...}

在__inet_hash_connect()函数中与端口选择相关的参数和变量有下面这几个：

port_offset   传入的参数，由inet_sk_port_offset()函数计算得到，实际相当于一个随机因子;snum          源端口，如果没有进行过bind操作的话这个值为0;low           本地可选端口范围的最小值;high          本地可选端口范围的最大值;remaining     本地可选端口数;（low 和 high 的具体取值可以通过/proc/sys/net/ipv4/ip_local_port_range查看;）hint          静态变量，用于全局控制;offset        函数上次执行完毕后的hint值加上本次传入的随机因子port_offset, 这个值基本确定了本次端口号的取值;port          待确定的端口号；

当我们确定了offset值之后，剩下的内容就比较好理解了，主要工作集中在一个for循环中，从offset之后的值开始逐个尝试，一般一次就能成功。如果该端口被预留，或者已经被占用且不可reuse，那么就尝试下一个。

下面就看看offset值是如何获得的：

u32 offset = hint + port_offset;

可以看到，本次端口选择与两个因素有关：

一个是静态变量hint，__inet_hash_connect()函数每成功调用一次该hint值就加一，用于全局控制；

另一个是port_offset，这是一个输入参数，实际是inet_sk_port_offset()函数的返回值；

static inline u32 inet_sk_port_offset(const struct sock *sk){    const struct inet_sock *inet = inet_sk(sk);    return secure_ipv4_port_ephemeral(inet->inet_rcv_saddr,                      inet->inet_daddr,                      inet->inet_dport);}u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport){    u32 hash[MD5_DIGEST_WORDS];    net_secret_init();    hash[0] = (__force u32)saddr;    hash[1] = (__force u32)daddr;    hash[2] = (__force u32)dport ^ net_secret[14];    hash[3] = net_secret[15];    md5_transform(hash, net_secret);    return hash[0];}EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);

可以看到，inet_sk_port_offset()的返回值是通过源地址、目的地址、目的端口和随机因子通过md5计算出来的。

下面我们就实际测试一下，这里需要使用SystemTap工具协助将__inet_hash_connect()函数中相关变量的值打印出来：

begin to probe/*telnet 192.168.28.1 3次*/snum: 0,  port_offset: 3837244845i: 1,  hint: 13, port: 45898snum: 0,  port_offset: 3837244845i: 1,  hint: 14, port: 45899snum: 0,  port_offset: 3837244845i: 1,  hint: 15, port: 45900/*telnet 192.168.28.11 2次*/snum: 0,  port_offset: 918745431i: 1,  hint: 16, port: 48163snum: 0,  port_offset: 918745431i: 1,  hint: 17, port: 48164/*telnet 192.168.28.1 2次*/snum: 0,  port_offset: 3837244845i: 1,  hint: 18, port: 45903snum: 0,  port_offset: 3837244845i: 1,  hint: 19, port: 45904/*telnet 192.168.28.111 2次*/snum: 0,  port_offset: 1738081703i: 1,  hint: 20, port: 34546snum: 0,  port_offset: 1738081703i: 1,  hint: 21, port: 34547^Cend to probe

测试结果与前面的分析一致，hint值在每次测试中连续递增。对于不同的目的地址，计算得到的port_offset值不同，因此不同连接选择的源端口有一定的随机性，对于相同连接，由于有hint值的参与，前后两次选择的源端口也未必连续，需要看中间是否还有其他连接调用过__inet_hash_connect()函数。

下面我们就对net_secret_init()函数比较好奇了，随机因子到底是如何生成的呢：

#if IS_ENABLED(CONFIG_IPV6) || IS_ENABLED(CONFIG_INET)#define NET_SECRET_SIZE (MD5_MESSAGE_BYTES / 4)static u32 net_secret[NET_SECRET_SIZE] ____cacheline_aligned;static __always_inline void net_secret_init(void){    net_get_random_once(net_secret, sizeof(net_secret));}#endif        #define net_get_random_once(buf, nbytes)                \    ({                                \        bool ___ret = false;                    \        static bool ___done = false;                \        static struct static_key ___once_key =            \            STATIC_KEY_INIT_TRUE;                \        if (static_key_true(&___once_key))            \            ___ret = __net_get_random_once(buf,        \                               nbytes,        \                               &___done,    \                               &___once_key);    \        ___ret;                            \    })       bool __net_get_random_once(void *buf, int nbytes, bool *done,               struct static_key *once_key){    static DEFINE_SPINLOCK(lock);    unsigned long flags;    spin_lock_irqsave(&lock, flags);    if (*done) {        spin_unlock_irqrestore(&lock, flags);        return false;    }    get_random_bytes(buf, nbytes);    *done = true;    spin_unlock_irqrestore(&lock, flags);    __net_random_once_disable_jump(once_key);    return true;}EXPORT_SYMBOL(__net_get_random_once);

net_get_random_once是一个宏定义，其中___done 和 ___once_key都是静态变量。从函数实现可以看出，只有在第一次执行的时候（__done为false），才会调用get_random_bytes()获取随机数，随后就将__done置为true。所以在上述测试中，对于相同的源地址、目的地址和目的端口，获取的port_offset总是相同的，当然如果系统重启了那么肯定会有变化。

延伸：

从源码中可以看到，TCP的序列号也是通过类似的方法选择的：

__u32 secure_tcp_sequence_number(__be32 saddr, __be32 daddr,                 __be16 sport, __be16 dport){    u32 hash[MD5_DIGEST_WORDS];    net_secret_init();    hash[0] = (__force u32)saddr;    hash[1] = (__force u32)daddr;    hash[2] = ((__force u16)sport << 16) + (__force u16)dport;    hash[3] = net_secret[15];    md5_transform(hash, net_secret);    return seq_scale(hash[0]);}#ifdef CONFIG_INETstatic u32 seq_scale(u32 seq){    /*     *    As close as possible to RFC 793, which     *    suggests using a 250 kHz clock.     *    Further reading shows this assumes 2 Mb/s networks.     *    For 10 Mb/s Ethernet, a 1 MHz clock is appropriate.     *    For 10 Gb/s Ethernet, a 1 GHz clock should be ok, but     *    we also need to limit the resolution so that the u32 seq     *    overlaps less than one time per MSL (2 minutes).     *    Choosing a clock of 64 ns period is OK. (period of 274 s)     */    return seq + (ktime_to_ns(ktime_get_real()) >> 6);}#endif

由于在secure_tcp_sequence_number()函数返回时引入了seq_scale()，将时间因子也添加进来了，所以对于四元组相同的连接来说，序列号的选择则不会重复。

到这里，TCP连接源端口选择的内容就分析完了，下面附上测试中使用的SystemTap脚本：

#!/usr/bin/stapprobe begin{    log("begin to probe")}probe kernel.statement("__inet_hash_connect@inet_hashtables.c:491"){    printf("snum: %u,  port_offset: %u\n",$snum, $port_offset);}probe kernel.statement("__inet_hash_connect@inet_hashtables.c:503"){    printf("i: %u,  hint: %u, port: %u\n",$i, $hint, $port);}probe end{    log("end to probe")}

清明小长假就要结束了，你的假期计划都完成了吗？：）

转载于:https://my.oschina.net/u/2310891/blog/652323

你可能感兴趣的文章