netfilter之filter

作者: 分享放大价值 | 来源:发表于2020-06-15 21:41 被阅读0次

netfilter之filter
iptables详解
netfilter之nat
iptables
Iptables 详解
初识jQuery之jQuery方法（二）
54 iptables
iptables系列二
Netfilter
NetFilter

filter 模块初始化

#filter 是以kernel module形式加载的，初始化函数为iptable_filter_init
module_init(iptable_filter_init);

enum nf_inet_hooks {
    NF_INET_PRE_ROUTING,
    NF_INET_LOCAL_IN,
    NF_INET_FORWARD,
    NF_INET_LOCAL_OUT,
    NF_INET_POST_ROUTING,
    NF_INET_NUMHOOKS
};
#filter作用于IN, FORWARD和OUTPUT这三个hook点
#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
                (1 << NF_INET_FORWARD) | \
                (1 << NF_INET_LOCAL_OUT))

static const struct xt_table packet_filter = {
    .name       = "filter",
    .valid_hooks    = FILTER_VALID_HOOKS, //1110
    .me     = THIS_MODULE,
    .af     = NFPROTO_IPV4,
    .priority   = NF_IP_PRI_FILTER, //NF_IP_PRI_FILTER = 0,
};

static struct pernet_operations iptable_filter_net_ops = {
    .init = iptable_filter_net_init,
    .exit = iptable_filter_net_exit,
};
//初始化函数
static int __init iptable_filter_init(void)
{
    int ret;
    //注册pernet子系统，将iptable_filter_net_ops添加到链表first_device
    //如果支持多网络namespace，则对每个namespace调用ops->init(net)，即iptable_filter_net_init(net)
    //如果不支持，则只对init_net调用init函数，即ops->init(init_net)
    ret = register_pernet_subsys(&iptable_filter_net_ops);
    if (ret < 0)
        return ret;

  //将filter 提供的在三个hook的函数挂载到 nf_hooks[reg->pf][reg->hooknum]
    /* Register hooks */
    filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
    if (IS_ERR(filter_ops)) {
        ret = PTR_ERR(filter_ops);
        unregister_pernet_subsys(&iptable_filter_net_ops);
    }

    return ret;
}

1.注册filter表及生成默认规则
iptable_filter_net_init参数为net，所以会对所有network namespace执行此初始化函数

static int __net_init iptable_filter_net_init(struct net *net)
{
    struct ipt_replace *repl;
    //根据packet_filter，生成repl结构体
    //根据valid_hooks默认创建对应个数的的entry，并且每个entry的target的verdict 
    //初始化为 NF_ACCEPT。值得注意的是每个entry中没有扩展match ipt_entry_match
    repl = ipt_alloc_initial_table(&packet_filter);
    if (repl == NULL)
        return -ENOMEM;
        
    /* Entry 1 is the FORWARD hook */
    //forward 是filter模块的参数，默认为true
    //如果想要在forward hook点默认丢弃，可在加载模块时设置forward为false
        //insmod xxx forward=false
    ((struct ipt_standard *)repl->entries)[1].target.verdict =
        forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;

    //将repl中的内容转换成 struct xt_table_info，并且存储在struct xt_table的private中，将struct xt_table挂载到链表 net->xt.tables[table->af]
    net->ipv4.iptable_filter =
        ipt_register_table(net, &packet_filter, repl);
    kfree(repl);
    return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter);
}

1.1
通过宏 xt_alloc_initial_table 生成 ipt_replace

void *ipt_alloc_initial_table(const struct xt_table *info)
{
    return xt_alloc_initial_table(ipt, IPT);
}
#define xt_alloc_initial_table(type, typ2) ({ \
    unsigned int hook_mask = info->valid_hooks; \
    unsigned int nhooks = hweight32(hook_mask); \
    unsigned int bytes = 0, hooknum = 0, i = 0; \
    struct { \
        struct type##_replace repl; \
        struct type##_standard entries[]; \
    } *tbl; \
    struct type##_error *term; \
    size_t term_offset = (offsetof(typeof(*tbl), entries[nhooks]) + \
        __alignof__(*term) - 1) & ~(__alignof__(*term) - 1); \
    tbl = kzalloc(term_offset + sizeof(*term), GFP_KERNEL); \
    if (tbl == NULL) \
        return NULL; \
    term = (struct type##_error *)&(((char *)tbl)[term_offset]); \
    strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); \
    *term = (struct type##_error)typ2##_ERROR_INIT;  \
    tbl->repl.valid_hooks = hook_mask; \
    tbl->repl.num_entries = nhooks + 1; \
    tbl->repl.size = nhooks * sizeof(struct type##_standard) + \
             sizeof(struct type##_error); \
    for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { \
        if (!(hook_mask & 1)) \
            continue; \
        tbl->repl.hook_entry[hooknum] = bytes; \
        tbl->repl.underflow[hooknum]  = bytes; \
        tbl->entries[i++] = (struct type##_standard) \
            typ2##_STANDARD_INIT(NF_ACCEPT); \
        bytes += sizeof(struct type##_standard); \
    } \
    tbl; \
})
#define IPT_ENTRY_INIT(__size)                             \
{                                          \
    .target_offset  = sizeof(struct ipt_entry),                \
    .next_offset    = (__size),                        \
}
#define XT_TARGET_INIT(__name, __size)                         \
{                                          \
    .target.u.user = {                             \
        .target_size    = XT_ALIGN(__size),                \
        .name       = __name,                      \
    },                                     \
}
#define IPT_STANDARD_INIT(__verdict)                           \
{                                          \
    .entry      = IPT_ENTRY_INIT(sizeof(struct ipt_standard)),         \
    .target     = XT_TARGET_INIT(XT_STANDARD_TARGET,               \
                     sizeof(struct xt_standard_target)),   \
    .target.verdict = -(__verdict) - 1,                    \
}

ipt_replace的结构: ipt_replace本身+默认的四个entry

image.png

1.2

struct xt_table *ipt_register_table(struct net *net,
                    const struct xt_table *table,
                    const struct ipt_replace *repl)
{
    int ret;
    struct xt_table_info *newinfo;
    struct xt_table_info bootstrap = {0};
    void *loc_cpu_entry;
    struct xt_table *new_table;
    //分配xt_table_info结构体，大小为XT_TABLE_INFO_SZ
    //#define XT_TABLE_INFO_SZ (offsetof(struct xt_table_info, entries) + nr_cpu_ids * sizeof(char *))
    newinfo = xt_alloc_table_info(repl->size);
    if (!newinfo) {
        ret = -ENOMEM;
        goto out;
    }

    /* choose the copy on our node/cpu, but dont care about preemption */
    loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
    memcpy(loc_cpu_entry, repl->entries, repl->size);

    //将 repl 信息转换到newinfo中
    //转换过程中，作长度的校验
    //调用match或者target提供的check函数检查
    ret = translate_table(net, newinfo, loc_cpu_entry, repl);
    if (ret != 0)
        goto out_free;
    //生成新的xt_table，将旧table中内容复制到新table
    //将newinfo赋值到新table的private
    //将新table加到链表 net->xt.tables[table->af]
    new_table = xt_register_table(net, table, &bootstrap, newinfo);
    if (IS_ERR(new_table)) {
        ret = PTR_ERR(new_table);
        goto out_free;
    }

    return new_table;

out_free:
    xt_free_table_info(newinfo);
out:
    return ERR_PTR(ret);
}

1.2.1

struct xt_table_info *xt_alloc_table_info(unsigned int size)
{
    struct xt_table_info *newinfo;
    int cpu;

    /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
    if ((SMP_ALIGN(size) >> PAGE_SHIFT) + 2 > totalram_pages)
        return NULL;

    newinfo = kzalloc(XT_TABLE_INFO_SZ, GFP_KERNEL);
    if (!newinfo)
        return NULL;

    newinfo->size = size;

    for_each_possible_cpu(cpu) {
        if (size <= PAGE_SIZE)
            newinfo->entries[cpu] = kmalloc_node(size,
                            GFP_KERNEL,
                            cpu_to_node(cpu));
        else
            newinfo->entries[cpu] = vmalloc_node(size,
                            cpu_to_node(cpu));

        if (newinfo->entries[cpu] == NULL) {
            xt_free_table_info(newinfo);
            return NULL;
        }
    }

    return newinfo;
}

1.2.2

/* Checks and translates the user-supplied table segment (held in
   newinfo) */
static int
translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
                const struct ipt_replace *repl)
{
    struct ipt_entry *iter;
    unsigned int *offsets;
    unsigned int i;
    int ret = 0;

    newinfo->size = repl->size;
    newinfo->number = repl->num_entries;

    /* Init all hooks to impossible value. */
    for (i = 0; i < NF_INET_NUMHOOKS; i++) {
        newinfo->hook_entry[i] = 0xFFFFFFFF;
        newinfo->underflow[i] = 0xFFFFFFFF;
    }

    duprintf("translate_table: size %u\n", newinfo->size);
    offsets = xt_alloc_entry_offsets(newinfo->number);
    if (!offsets)
        return -ENOMEM;
    i = 0;
    /* Walk through entries, checking offsets. */
    xt_entry_foreach(iter, entry0, newinfo->size) {
        ret = check_entry_size_and_hooks(iter, newinfo, entry0,
                         entry0 + repl->size,
                         repl->hook_entry,
                         repl->underflow,
                         repl->valid_hooks);
        if (ret != 0)
            goto out_free;
        if (i < repl->num_entries)
            offsets[i] = (void *)iter - entry0;
        ++i;
        //每添加一个自定义链，都会在最后添加一条规则，其target 
       //为"ERROR"，所以这里统计"ERROR"的个数就是自定义链个数
        if (strcmp(ipt_get_target(iter)->u.user.name,
            XT_ERROR_TARGET) == 0)
            ++newinfo->stacksize;
    }

    ret = -EINVAL;
    if (i != repl->num_entries) {
        duprintf("translate_table: %u not %u entries\n",
             i, repl->num_entries);
        goto out_free;
    }

    /* Check hooks all assigned */
    for (i = 0; i < NF_INET_NUMHOOKS; i++) {
        /* Only hooks which are valid */
        if (!(repl->valid_hooks & (1 << i)))
            continue;
        if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
            duprintf("Invalid hook entry %u %u\n",
                 i, repl->hook_entry[i]);
            goto out_free;
        }
        if (newinfo->underflow[i] == 0xFFFFFFFF) {
            duprintf("Invalid underflow %u %u\n",
                 i, repl->underflow[i]);
            goto out_free;
        }
    }

    if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
        ret = -ELOOP;
        goto out_free;
    }
    kvfree(offsets);

    //遍历所有entry(包括多个match和一个target)，针对每个entry，根据match的m->u.user.name和m->u.user.revision查找xt[af].match匹配的match，如果查找失败，
    //说明用户设置的match是错误的，如果查找成功，则调用match模块提供的check函数进行检查，并将match赋值给m->u.kernel.match = match，
    //接着根据t->u.user.name和t->u.user.revision查找xt[af].target匹配的target,将此target赋值给t->u.kernel.target = target，后面数据匹配时，直接使用
    /* Finally, each sanity check must pass */
    i = 0;
    xt_entry_foreach(iter, entry0, newinfo->size) {
        ret = find_check_entry(iter, net, repl->name, repl->size);
        if (ret != 0)
            break;
        ++i;
    }

    if (ret != 0) {
        xt_entry_foreach(iter, entry0, newinfo->size) {
            if (i-- == 0)
                break;
            cleanup_entry(iter, net);
        }
        return ret;
    }

    /* And one copy for every other CPU */
    //将entry信息复制到所有cpu对应的内存
    for_each_possible_cpu(i) {
        if (newinfo->entries[i] && newinfo->entries[i] != entry0)
            memcpy(newinfo->entries[i], entry0, newinfo->size);
    }

    return ret;
 out_free:
    kvfree(offsets);
    return ret;
}

1.2.3


struct xt_table *xt_register_table(struct net *net,
                   const struct xt_table *input_table,
                   struct xt_table_info *bootstrap,
                   struct xt_table_info *newinfo)
{
    int ret;
    struct xt_table_info *private;
    struct xt_table *t, *table;

    /* Don't add one object to multiple lists. */
    table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
    if (!table) {
        ret = -ENOMEM;
        goto out;
    }

    mutex_lock(&xt[table->af].mutex);
    /* Don't autoload: we'd eat our tail... */
    list_for_each_entry(t, &net->xt.tables[table->af], list) {
        if (strcmp(t->name, table->name) == 0) {
            ret = -EEXIST;
            goto unlock;
        }
    }

    /* Simplifies replace_table code. */
    table->private = bootstrap;

    if (!xt_replace_table(table, 0, newinfo, &ret))
        goto unlock;

    private = table->private;
    pr_debug("table->private->number = %u\n", private->number);

    /* save number of initial entries */
    private->initial_entries = private->number;

    list_add(&table->list, &net->xt.tables[table->af]);
    mutex_unlock(&xt[table->af].mutex);
    return table;

unlock:
    mutex_unlock(&xt[table->af].mutex);
    kfree(table);
out:
    return ERR_PTR(ret);
}

最终会把 xt_table 存储到net.ipv4.iptable_filter 和 net.xt.tables[IPV4]的链表中

image.png

entries[cpuid]是此table所有规则的的首地址。
在每个hook点，只需要查询此hook对应的规则即可，如果做到呢？
答案是通过hook_entry[NF_INET_NUMHOOKS]，它存放的是每个hook相对于首地址的offset。对于filter表来说，prerouting和postrouting不生效，所以offset为ffffffff无效值。

注册hook函数

struct nf_hook_ops *xt_hook_link(const struct xt_table *table, nf_hookfn *fn)
{
    unsigned int hook_mask = table->valid_hooks;
    uint8_t i, num_hooks = hweight32(hook_mask);
    uint8_t hooknum;
    struct nf_hook_ops *ops;
    int ret;

    ops = kmalloc(sizeof(*ops) * num_hooks, GFP_KERNEL);
    if (ops == NULL)
        return ERR_PTR(-ENOMEM);

    for (i = 0, hooknum = 0; i < num_hooks && hook_mask != 0;
         hook_mask >>= 1, ++hooknum) {
        if (!(hook_mask & 1))
            continue;
        ops[i].hook     = fn;
        ops[i].owner    = table->me;
        ops[i].pf       = table->af;
        ops[i].hooknum  = hooknum;
        ops[i].priority = table->priority;
        ++i;
    }

    ret = nf_register_hooks(ops, num_hooks);
    if (ret < 0) {
        kfree(ops);
        return ERR_PTR(ret);
    }

    return ops;
}

int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
{
    unsigned int i;
    int err = 0;

    for (i = 0; i < n; i++) {
        err = nf_register_hook(&reg[i]);
        if (err)
            goto err;
    }
    return err;

err:
    if (i > 0)
        nf_unregister_hooks(reg, i);
    return err;
}

int nf_register_hook(struct nf_hook_ops *reg)
{
    struct nf_hook_ops *elem;

    mutex_lock(&nf_hook_mutex);
    list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
        if (reg->priority < elem->priority)
            break;
    }
        //安装优先级从大到小
    list_add_rcu(&reg->list, elem->list.prev);
    mutex_unlock(&nf_hook_mutex);
#ifdef HAVE_JUMP_LABEL
    static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
    return 0;
}

将nf_hook_ops注册到nf_hooks[reg->pf][reg->hooknum]上，filter生效的三个hook点使用同一个hook函数iptable_filter_hook。

image.png

报文匹配

数据包匹配时，根据pf和hook点找到对应的nf_hooks链表头，循环执行此链表上注册的hook函数，对于filter来说，hook函数为iptable_filter_hook。
在hook函数iptable_filter_hook中，调用ipt_do_table遍历filter规则，其参数net->ipv4.iptable_filter为filter的xt_table,存储了filter表相关的规则。

static unsigned int
iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
            const struct net_device *in, const struct net_device *out,
            int (*okfn)(struct sk_buff *))
{
    const struct net *net;

    if (ops->hooknum == NF_INET_LOCAL_OUT &&
        (skb->len < sizeof(struct iphdr) ||
         ip_hdrlen(skb) < sizeof(struct iphdr)))
        /* root is playing with raw sockets. */
        return NF_ACCEPT;

    net = dev_net((in != NULL) ? in : out);
    return ipt_do_table(skb, ops->hooknum, in, out,
                net->ipv4.iptable_filter);
}

标准target，包含如下定义内建和跳转到自定义链，t->u.kernel.target->target为空
#define NF_DROP 0
#define NF_ACCEPT 1
#define NF_STOLEN 2
#define NF_QUEUE 3
#define NF_REPEAT 4
#define RETURN IPT_RETURN
#define IPT_RETURN (-NF_MAX_VERDICT - 1)
#define NF_MAX_VERDICT NF_REPEAT

iptables –A INPUT –i eth0 –p udp –dport 137:138 –j ACCEPT --verdict为负值
iptables -A INPUT -p tcp --dport 22 -j testssh --跳转到自定义链 --verdict为正值

扩展target，此时t->u.kernel.target->target不为空
iptables –t nat –A POSTROUTING –s 192.168.10.10 –o eth1 –j SNAT --to-source 111.196.221.212

/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
ipt_do_table(struct sk_buff *skb,
         unsigned int hook,
         const struct net_device *in,
         const struct net_device *out,
         struct xt_table *table)
{
    static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
    const struct iphdr *ip;
    /* Initializing verdict to NF_DROP keeps gcc happy. */
    unsigned int verdict = NF_DROP;
    const char *indev, *outdev;
    const void *table_base;
    struct ipt_entry *e, **jumpstack;
    unsigned int *stackptr, origptr, cpu;
    const struct xt_table_info *private;
    struct xt_action_param acpar;
    unsigned int addend;

    /* Initialization */
    ip = ip_hdr(skb);
    indev = in ? in->name : nulldevname;
    outdev = out ? out->name : nulldevname;
    /* We handle fragments by dealing with the first fragment as
     * if it was a normal packet.  All other fragments are treated
     * normally, except that they will NEVER match rules that ask
     * things we don't know, ie. tcp syn flag or ports).  If the
     * rule is also a fragment-specific rule, non-fragments won't
     * match it. */
    acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
    acpar.thoff   = ip_hdrlen(skb);
    acpar.hotdrop = false;
    acpar.in      = in;
    acpar.out     = out;
    acpar.family  = NFPROTO_IPV4;
    acpar.hooknum = hook;

    IP_NF_ASSERT(table->valid_hooks & (1 << hook));
    local_bh_disable();
    addend = xt_write_recseq_begin();
    private = table->private;
    cpu        = smp_processor_id();
    /*
     * Ensure we load private-> members after we've fetched the base
     * pointer.
     */
    smp_read_barrier_depends();
    //获取filter表规则的首地址
    table_base = private->entries[cpu];
    jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
    stackptr   = per_cpu_ptr(private->stackptr, cpu);
    origptr    = *stackptr;
    //获取hook在filter表规则内的地址
    e = get_entry(table_base, private->hook_entry[hook]);

    pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
         table->name, hook, origptr,
         get_entry(table_base, private->underflow[hook]));

    do {
        const struct xt_entry_target *t;
        const struct xt_entry_match *ematch;

        IP_NF_ASSERT(e);
        //标准匹配，源目的ip，出入接口，四层协议等
        if (!ip_packet_match(ip, indev, outdev,
            &e->ip, acpar.fragoff)) {
 no_match:
            //返回值为false，说明没匹配成功，获取下一个entry继续匹配
            e = ipt_next_entry(e);
            continue;
        }
         //标准匹配成功后，如果有扩展match，遍历执行match提供的函数
         //比如 iptables -A INPUT -p tcp --dport 22 -j testssh 这条规则中 --dport 22就是扩展匹配
        xt_ematch_foreach(ematch, e) {
       //如果是tcp，kernel.match 就是tcp_mt
            acpar.match     = ematch->u.kernel.match; 
      //扩展规则内容放在data里，在tcp_mt中转换成struct xt_tcp *tcpinfo
            acpar.matchinfo = ematch->data;
      //如果扩展规则返回false，说明匹配失败，跳转到 no_match，获取下一个entry继续匹配
            if (!acpar.match->match(skb, &acpar))
                goto no_match;
        }
     //如果标准和扩展规则都匹配成功，则增加统计计数，并且该数据包需要执行该条规则 的target操作。
     //首先，调用函数ipt_get_target，获得该rule对应的ipt_entry_target，这个函数就是简单的返回e+e->target_offset每个entry只有一个target，所以不需要像match一样遍历，直接指针指过去了
        ADD_COUNTER(e->counters, skb->len, 1);
        t = ipt_get_target(e);
        IP_NF_ASSERT(t->u.kernel.target);

#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
        /* The packet is traced: log it */
        if (unlikely(skb->nf_trace))
            trace_packet(skb, hook, in, out,
                     table->name, private, e);
#endif
        /* Standard target? */
        //target为空，说明没有指定特殊target
        if (!t->u.kernel.target->target) {
            int v;
            //标准target中，如果verdict为负值，可能是内建action，
            //比如accept，drop等。也有可能是从自定义链返回的return
            v = ((struct xt_standard_target *)t)->verdict;
            if (v < 0) {
                /* Pop from stack? */
                //verdict不为return，则肯定是内建action，跳出循环，结束匹配
                if (v != XT_RETURN) {
                    verdict = (unsigned int)(-v) - 1;
                    break;
                }
                if (*stackptr <= origptr) {
                    e = get_entry(table_base,
                        private->underflow[hook]);
                    pr_debug("Underflow (this is normal) "
                         "to %p\n", e);
                } else {
                   //从自定义链返回后，取出之前保存的位置，即调用链位置，获取下一个rule执行
                    e = jumpstack[--*stackptr];
                    pr_debug("Pulled %p out from pos %u\n",
                         e, *stackptr);
                    e = ipt_next_entry(e);
                }
                continue;
            }
            //此时v为正值，代表自定义链的偏移量，表示需要跳转到自定义链
            //如果没有IPT_F_GOTO标志，说明是-j跳转，需要保存当前位置，以便
           //从自定义链返回后，继续执行后面的rule。
           //如果有IPT_F_GOTO标志，说明是 -g 跳转，不用保存当前位置，从自定义
          //链返回后，直接到调用链的上个链执行。
            if (table_base + v != ipt_next_entry(e) &&
                !(e->ip.flags & IPT_F_GOTO)) {
                     //如果跳转次数太多，直接返回drop
                if (*stackptr >= private->stacksize) {
                    verdict = NF_DROP;
                    break;
                }
                jumpstack[(*stackptr)++] = e;
                pr_debug("Pushed %p into pos %u\n",
                     e, *stackptr - 1);
            }

            e = get_entry(table_base, v);
            continue;
        }

        acpar.target   = t->u.kernel.target;
        acpar.targinfo = t->data;
          //执行扩展target
        verdict = t->u.kernel.target->target(skb, &acpar);
        /* Target might have changed stuff. */
        ip = ip_hdr(skb);
        if (verdict == XT_CONTINUE)
            e = ipt_next_entry(e);
        else
            /* Verdict */
            break;
    } while (!acpar.hotdrop);
    pr_debug("Exiting %s; resetting sp from %u to %u\n",
         __func__, *stackptr, origptr);
    *stackptr = origptr;
    xt_write_recseq_end(addend);
    local_bh_enable();

#ifdef DEBUG_ALLOW_ALL
    return NF_ACCEPT;
#else
    if (acpar.hotdrop)
        return NF_DROP;
    else return verdict;
#endif
}

iptables操作

添加新链，会默认设置两个rule，一个用于return会调用链，另一个用于错误处理。
entry172和entry173是添加新链test生成的rule，
entry171是在新链上添加规则生成的，其target为DROP。
假如数据包匹配到了entry171则丢弃，否则匹配默认的entry172，其target为return，即返回上一个链继续执行。

#iptables -N test
#iptables -A test -p tcp -j DROP
#iptables -vvv -Ln
Entry 171 (66304):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 6
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=NF_DROP

Entry 172 (66456):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=RETURN

Entry 173 (66608):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `ERROR' [64]
error=`ERROR'

如果用户设置rule时，没有指定 -j或者-g,则verdict为指定下一个rule的偏移量。
entry175和entry176是添加新链test生成的rule，
entry174是在新链上添加规则生成的，没有指定target，默认为空，verdict=66936即下一个entry175。
假如数据包匹配到了entry174，只是增加计数，继续执行默认的entry172，其target为return，即返回上一个链继续执行

#iptables -N test1
#iptables -A test1 -p tcp
#iptables -vvv -Ln
Entry 174 (66784):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 6
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=66936

Entry 175 (66936):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=RETURN

Entry 176 (67088):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `ERROR' [64]
error=`ERROR'

添加rule时，-j和-g在代码上的区别是 -g 会设置 IPT_F_GOTO。
在代码中，如果没有此标志，会保存跳转时rule地址，当跳转到自定义链执行后返回时，如果target为return，会获取保存的rule的地址，获取调用链下一个rule继续匹配。
如果有此标志，就不会保存跳转时rule地址，也就不会返回调用链。还要分为两种情况：
a. 在内建chain上添加rule时使用 -g
iptables -N test
iptables -A INPUT -g test
走if分支，获取此chain默认rule的地址，执行默认的action
b. 在自定义链上添加rule时使用 -g
iptables -N test
iptables -N test1
iptables -A INPUT -j test
iptables -A test -g test1
走else分支，在跳转到test时，会保存chain INPUT的rule地址，再跳转到test1时，不会保存任何地址，当从test1返回时，取出保存的rule地址，获取INPUT chain上下一个rule继续匹配。

                if (*stackptr <= origptr) {
                    e = get_entry(table_base,
                        private->underflow[hook]);
                    pr_debug("Underflow (this is normal) "
                         "to %p\n", e);
                } else {
                    e = jumpstack[--*stackptr];
                    pr_debug("Pulled %p out from pos %u\n",
                         e, *stackptr);
                    e = ipt_next_entry(e);
                }

#iptables -N test
#iptables -N test1
#iptables -A test -p tcp -j test1
Entry 171 (66304):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 6
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=66784

Entry 172 (66456):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=RETURN

Entry 173 (66608):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `ERROR' [64]
error=`test1'

Entry 174 (66784):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=RETURN

Entry 175 (66936):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `ERROR' [64]
error=`ERROR'

#iptables -N test
#iptables -N test1
#iptables -A test -p tcp -g test1
Entry 171 (66304):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 6
Flags: 02
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=66784

Entry 172 (66456):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=RETURN

Entry 173 (66608):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `ERROR' [64]
error=`test1'

Entry 174 (66784):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=RETURN

Entry 175 (66936):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `ERROR' [64]
error=`ERROR'

iptables命令的flag-vvv的作用是显示每个rule的详细信息。
从下面代码可知，verbose初始值为0，只有当verbose大于1时才会调用dump_entries显示详细信息，而verbose的值根据-v选项而来，如果只有一个-v，verbose就是1，两个-v，verbose就是2,。。。
所以至少指定两个-v才可显示

iptables-1.8.1/iptables/iptables.c:
int do_command4(int argc, char *argv[], char **table,
    int verbose = 0;
        struct xtc_handle **handle, bool restore)
        case 'v':
            if (!verbose)
                set_option(&cs.options, OPT_VERBOSE,
                       &cs.fw.ip.invflags, cs.invert);
            verbose++;
            break;
    if (verbose > 1)
        dump_entries(*handle);

对于filter表来说，默认有三个chain: input, forward和output。
每个chain有一个默认的rule(大小为152字节)，这个rule不能被删除，永远是此chain上最后一个rule，如果前面的rule没匹配上，肯定会匹配到这个默认的rule，并执行它的target(内建action：drop，accept等)。
除了这三个默认的rule，最后还有一个处理error的rule。从下面的命令执行结果和源码部分可以证实。
另外从下面命令执行结果还有个重要信息hooks和Underflows，
他们分别用于标识此chain中用户设置的rule的头和尾，即第一个rule的头和最后一个rule的尾，这里的最后一个rule指的是用户设置的rule，不是最后面的默认rule。underflow减去hook=用户设置的所有rule的大小。
而且只显示内建chain的偏移，不显示用户自定义chain的偏移。
这三个chain和其默认rule是加载filter模块自动生成的，此时还没有用户设置rule，所以hook和underflow的值是相同的。
Hooks: pre/in/fwd/out/post = ffffffff/0/98/130/ffffffff
Underflows: pre/in/fwd/out/post = ffffffff/0/98/130/ffffffff
在此输出中，分别显示了pre/in/fwd/out/post这五个chain的偏移(98/130为十六进制)，其中ffffffff为无效值，因为filter表对应用在in/fwd/out这三个chain上。
in chain的起始偏移为0，因为in上没有用户设置的rule，所以结束偏移也为0，最后默认rule大小为152, in chain所有rule大小为152；
forward chain起始偏移为152，也没有用户设置的rule，所以结束偏移也为152，最后默认rule大小为152,forward chain所有rule大小为152;
out chain起始偏移为304,也没有用户设置的rule，所以结束偏移也为304，最后默认rule大小为152.

root@master:~# iptables -L -vvv
Chain INPUT (policy ACCEPT 162 packets, 16830 bytes)
 pkts bytes target     prot opt in     out     source               destination

Chain FORWARD (policy DROP 0 packets, 0 bytes)
 pkts bytes target     prot opt in     out     source               destination

Chain OUTPUT (policy ACCEPT 158 packets, 21254 bytes)
 pkts bytes target     prot opt in     out     source               destination
libiptc vlibxtables.so.12. 632 bytes.
Table `filter'
//此处的98/130等数字是十六进制的，换算成十进制为152/304
Hooks: pre/in/fwd/out/post = ffffffff/0/98/130/ffffffff
Underflows: pre/in/fwd/out/post = ffffffff/0/98/130/ffffffff
//input chain
Entry 0 (0):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 162 packets, 16830 bytes
Cache: 00000000
Target name: `' [40]
verdict=NF_ACCEPT
//forward chain
Entry 1 (152):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=NF_DROP
//output chain
Entry 2 (304):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 158 packets, 21254 bytes
Cache: 00000000
Target name: `' [40]
verdict=NF_ACCEPT

Entry 3 (456):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `ERROR' [64]
error=`ERROR'

iptables源码中可知
一个chain包含N个用户设置的rule和一个默认的rule，
c->head_offset是这个chain里的第一个rule的偏移，
c->foot_offset是这个chain里最后一个用户设置的rule的偏移，
最后一个默认rule大小为size = sizeof(STRUCT_ENTRY)+ ALIGN(sizeof(STRUCT_STANDARD_TARGET)) = 152
整个chain的大小为c->foot_offset-c->head_offset+size

iptables-1.8.1/libiptc/libiptc.c:
/* put the pieces back together again */
static int iptcc_compile_table_prep(struct xtc_handle *h, unsigned int *size)
{
    struct chain_head *c;
    unsigned int offset = 0, num = 0;
    int ret = 0;

    /* First pass: calculate offset for every rule */
    //遍历所有的chain，计算每个chain所有rule的大小和个数
    //每个chain包含用户设置的rule和最后一个默认的rule。
    list_for_each_entry(c, &h->chains, list) {
        ret = iptcc_compile_chain_offsets(h, c, &offset, &num);
        if (ret < 0)
            return ret;
    }
    //所有chain的最后有一个默认的target为error的rule
    /* Append one error rule at end of chain */
    num++;
    offset += sizeof(STRUCT_ENTRY)
          + ALIGN(sizeof(struct xt_error_target));

    /* ruleset size is now in offset */
    *size = offset;
    return num;
}
//上面说到一个chain包含N个用户设置的rule和一个默认的rule，
//c->head_offset是这个chain里的第一个rule的偏移，
//c->foot_offset是这个chain里最后一个用户设置的rule的偏移，
//最后一个默认rule大小为size = sizeof(STRUCT_ENTRY)+ ALIGN(sizeof(STRUCT_STANDARD_TARGET)) = 152
//整个chain的大小为c->foot_offset-c->head_offset+size
/* calculate offset and number for every rule in the cache */
static int iptcc_compile_chain_offsets(struct xtc_handle *h, struct chain_head *c,
                       unsigned int *offset, unsigned int *num)
{
    struct rule_head *r;

    c->head_offset = *offset;
    DEBUGP("%s: chain_head %u, offset=%u\n", c->name, *num, *offset);

    if (!iptcc_is_builtin(c))  {
        /* Chain has header */
        *offset += sizeof(STRUCT_ENTRY)
                 + ALIGN(sizeof(struct xt_error_target));
        (*num)++;
    }

    list_for_each_entry(r, &c->rules, list) {
        DEBUGP("rule %u, offset=%u, index=%u\n", *num, *offset, *num);
        r->offset = *offset;
        r->index = *num;
        *offset += r->size;
        (*num)++;
    }

    DEBUGP("%s; chain_foot %u, offset=%u, index=%u\n", c->name, *num,
        *offset, *num);
    c->foot_offset = *offset;
    c->foot_index = *num;
    *offset += sizeof(STRUCT_ENTRY)
           + ALIGN(sizeof(STRUCT_STANDARD_TARGET));
    (*num)++;

    return 1;
}

下面添加一个用户自定义chain test，并在input chain设置一个rule，跳转到test chain。观察下偏移量的变化。
Hooks: pre/in/fwd/out/post = ffffffff/0/130/1c8/ffffffff
Underflows: pre/in/fwd/out/post = ffffffff/98/130/1c8/ffffffff
input chain: 起始偏移hook为0，因为添加了一个rule，大小为152，所以结束偏移underflow为152(十六进制98)，还有一个默认rule，大小为152，所以input chain大小为304(十六进制为130)。
forward chian：起始偏移hook为304(十六进制为130)，没有用户设置的rule，所以结束偏移underflow仍然为304，默认rule大小为152，所以forward chain大小为152.
output chain：起始偏移hook为456(十六进制为1c8)，没有用户设置的rule，所以结束偏移underflow仍然为456，默认rule大小为152，所以forward chain大小为152.

#iptables -N test
#iptables -A INPUT -j test
root@master:~# iptables -L -vvv
Chain INPUT (policy ACCEPT 448 packets, 57326 bytes)
 pkts bytes target     prot opt in     out     source               destination
  448 57326 test       all  --  any    any     anywhere             anywhere

Chain FORWARD (policy DROP 0 packets, 0 bytes)
 pkts bytes target     prot opt in     out     source               destination

Chain OUTPUT (policy ACCEPT 446 packets, 61170 bytes)
 pkts bytes target     prot opt in     out     source               destination

Chain test (1 references)
 pkts bytes target     prot opt in     out     source               destination
libiptc vlibxtables.so.12. 1112 bytes.
Table `filter'
Hooks: pre/in/fwd/out/post = ffffffff/0/130/1c8/ffffffff
Underflows: pre/in/fwd/out/post = ffffffff/98/130/1c8/ffffffff
//input chain
//用户设置的rule
Entry 0 (0):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 448 packets, 57326 bytes
Cache: 00000000
Target name: `' [40]
verdict=784  //784为自定义chain
//默认rule 
Entry 1 (152):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 448 packets, 57326 bytes
Cache: 00000000
Target name: `' [40]
verdict=NF_ACCEPT
//forward chain
Entry 2 (304):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `' [40]
verdict=NF_DROP
//output chain
Entry 3 (456):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 446 packets, 61170 bytes
Cache: 00000000
Target name: `' [40]
verdict=NF_ACCEPT

Entry 4 (608):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `ERROR' [64]
error=`test'
//自定义chain
Entry 5 (784):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 448 packets, 57326 bytes
Cache: 00000000
Target name: `' [40]
verdict=RETURN

Entry 6 (936):
SRC IP: 0.0.0.0/0.0.0.0
DST IP: 0.0.0.0/0.0.0.0
Interface: `'/................to `'/................
Protocol: 0
Flags: 00
Invflags: 00
Counters: 0 packets, 0 bytes
Cache: 00000000
Target name: `ERROR' [64]
error=`ERROR'

netfilter之filter
filter 模块初始化 1.注册filter表及生成默认规则iptable_filter_net_init参数为...
iptables详解
netfilter 内核中的防火墙框架，承载并生效规则；4表5链； netfilter功能：。 filter 包过...
netfilter之nat
总结如下：a. original方向报文根据规则做了转换，那么reply方向一定是根据conntrack连接状态做...
iptables
iptables iptables 规则原理和组成NetFilter什么是NetFilter？NetFilter是...
Iptables 详解
一、Iptables说明 1.0 概述 netfilter/iptables : netfilter/iptabl...
初识jQuery之jQuery方法（二）
1.jQuery方法之filter，not，has filter : 过滤 not : filter的反义词 ha...
54 iptables
netfilter与iptables Netfilter是由Rusty Russell提出的Linux 2.4内核...
iptables系列二
iptables系列之基本应用及显式扩展 netfilter:Framework,TCP,内核中 iptables...
Netfilter
NetFilter
当reroutecheck 走到loopback后继续走完整个netfilter框架后，判断目的地是本地走直接将i...