Merge branch 'akpm' (patches from Andrew)
[linux] / net / netfilter / nf_conntrack_proto.c
1 // SPDX-License-Identifier: GPL-2.0
2
3 #include <linux/types.h>
4 #include <linux/netfilter.h>
5 #include <linux/module.h>
6 #include <linux/slab.h>
7 #include <linux/mutex.h>
8 #include <linux/vmalloc.h>
9 #include <linux/stddef.h>
10 #include <linux/err.h>
11 #include <linux/percpu.h>
12 #include <linux/notifier.h>
13 #include <linux/kernel.h>
14 #include <linux/netdevice.h>
15
16 #include <net/netfilter/nf_conntrack.h>
17 #include <net/netfilter/nf_conntrack_l4proto.h>
18 #include <net/netfilter/nf_conntrack_core.h>
19 #include <net/netfilter/nf_log.h>
20
21 #include <linux/ip.h>
22 #include <linux/icmp.h>
23 #include <linux/sysctl.h>
24 #include <net/route.h>
25 #include <net/ip.h>
26
27 #include <linux/netfilter_ipv4.h>
28 #include <linux/netfilter_ipv6.h>
29 #include <linux/netfilter_ipv6/ip6_tables.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_zones.h>
32 #include <net/netfilter/nf_conntrack_seqadj.h>
33 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
34 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
35 #include <net/netfilter/nf_nat_helper.h>
36 #include <net/netfilter/ipv4/nf_defrag_ipv4.h>
37 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
38
39 #include <linux/ipv6.h>
40 #include <linux/in6.h>
41 #include <net/ipv6.h>
42 #include <net/inet_frag.h>
43
44 extern unsigned int nf_conntrack_net_id;
45
46 static struct nf_conntrack_l4proto __rcu *nf_ct_protos[MAX_NF_CT_PROTO + 1] __read_mostly;
47
48 static DEFINE_MUTEX(nf_ct_proto_mutex);
49
50 #ifdef CONFIG_SYSCTL
51 static int
52 nf_ct_register_sysctl(struct net *net,
53                       struct ctl_table_header **header,
54                       const char *path,
55                       struct ctl_table *table)
56 {
57         if (*header == NULL) {
58                 *header = register_net_sysctl(net, path, table);
59                 if (*header == NULL)
60                         return -ENOMEM;
61         }
62
63         return 0;
64 }
65
66 static void
67 nf_ct_unregister_sysctl(struct ctl_table_header **header,
68                         struct ctl_table **table,
69                         unsigned int users)
70 {
71         if (users > 0)
72                 return;
73
74         unregister_net_sysctl_table(*header);
75         kfree(*table);
76         *header = NULL;
77         *table = NULL;
78 }
79
80 __printf(5, 6)
81 void nf_l4proto_log_invalid(const struct sk_buff *skb,
82                             struct net *net,
83                             u16 pf, u8 protonum,
84                             const char *fmt, ...)
85 {
86         struct va_format vaf;
87         va_list args;
88
89         if (net->ct.sysctl_log_invalid != protonum ||
90             net->ct.sysctl_log_invalid != IPPROTO_RAW)
91                 return;
92
93         va_start(args, fmt);
94         vaf.fmt = fmt;
95         vaf.va = &args;
96
97         nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
98                       "nf_ct_proto_%d: %pV ", protonum, &vaf);
99         va_end(args);
100 }
101 EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid);
102
103 __printf(3, 4)
104 void nf_ct_l4proto_log_invalid(const struct sk_buff *skb,
105                                const struct nf_conn *ct,
106                                const char *fmt, ...)
107 {
108         struct va_format vaf;
109         struct net *net;
110         va_list args;
111
112         net = nf_ct_net(ct);
113         if (likely(net->ct.sysctl_log_invalid == 0))
114                 return;
115
116         va_start(args, fmt);
117         vaf.fmt = fmt;
118         vaf.va = &args;
119
120         nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct),
121                                nf_ct_protonum(ct), "%pV", &vaf);
122         va_end(args);
123 }
124 EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid);
125 #endif
126
127 const struct nf_conntrack_l4proto *__nf_ct_l4proto_find(u8 l4proto)
128 {
129         if (unlikely(l4proto >= ARRAY_SIZE(nf_ct_protos)))
130                 return &nf_conntrack_l4proto_generic;
131
132         return rcu_dereference(nf_ct_protos[l4proto]);
133 }
134 EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find);
135
136 const struct nf_conntrack_l4proto *nf_ct_l4proto_find_get(u8 l4num)
137 {
138         const struct nf_conntrack_l4proto *p;
139
140         rcu_read_lock();
141         p = __nf_ct_l4proto_find(l4num);
142         if (!try_module_get(p->me))
143                 p = &nf_conntrack_l4proto_generic;
144         rcu_read_unlock();
145
146         return p;
147 }
148 EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get);
149
150 void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p)
151 {
152         module_put(p->me);
153 }
154 EXPORT_SYMBOL_GPL(nf_ct_l4proto_put);
155
156 static int kill_l4proto(struct nf_conn *i, void *data)
157 {
158         const struct nf_conntrack_l4proto *l4proto;
159         l4proto = data;
160         return nf_ct_protonum(i) == l4proto->l4proto;
161 }
162
163 static struct nf_proto_net *nf_ct_l4proto_net(struct net *net,
164                                 const struct nf_conntrack_l4proto *l4proto)
165 {
166         if (l4proto->get_net_proto) {
167                 /* statically built-in protocols use static per-net */
168                 return l4proto->get_net_proto(net);
169         } else if (l4proto->net_id) {
170                 /* ... and loadable protocols use dynamic per-net */
171                 return net_generic(net, *l4proto->net_id);
172         }
173         return NULL;
174 }
175
176 static
177 int nf_ct_l4proto_register_sysctl(struct net *net,
178                                   struct nf_proto_net *pn)
179 {
180         int err = 0;
181
182 #ifdef CONFIG_SYSCTL
183         if (pn->ctl_table != NULL) {
184                 err = nf_ct_register_sysctl(net,
185                                             &pn->ctl_table_header,
186                                             "net/netfilter",
187                                             pn->ctl_table);
188                 if (err < 0) {
189                         if (!pn->users) {
190                                 kfree(pn->ctl_table);
191                                 pn->ctl_table = NULL;
192                         }
193                 }
194         }
195 #endif /* CONFIG_SYSCTL */
196         return err;
197 }
198
199 static
200 void nf_ct_l4proto_unregister_sysctl(struct nf_proto_net *pn)
201 {
202 #ifdef CONFIG_SYSCTL
203         if (pn->ctl_table_header != NULL)
204                 nf_ct_unregister_sysctl(&pn->ctl_table_header,
205                                         &pn->ctl_table,
206                                         pn->users);
207 #endif /* CONFIG_SYSCTL */
208 }
209
210 /* FIXME: Allow NULL functions and sub in pointers to generic for
211    them. --RR */
212 int nf_ct_l4proto_register_one(const struct nf_conntrack_l4proto *l4proto)
213 {
214         int ret = 0;
215
216         if ((l4proto->to_nlattr && l4proto->nlattr_size == 0) ||
217             (l4proto->tuple_to_nlattr && !l4proto->nlattr_tuple_size))
218                 return -EINVAL;
219
220         mutex_lock(&nf_ct_proto_mutex);
221         if (rcu_dereference_protected(
222                         nf_ct_protos[l4proto->l4proto],
223                         lockdep_is_held(&nf_ct_proto_mutex)
224                         ) != &nf_conntrack_l4proto_generic) {
225                 ret = -EBUSY;
226                 goto out_unlock;
227         }
228
229         rcu_assign_pointer(nf_ct_protos[l4proto->l4proto], l4proto);
230 out_unlock:
231         mutex_unlock(&nf_ct_proto_mutex);
232         return ret;
233 }
234 EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one);
235
236 int nf_ct_l4proto_pernet_register_one(struct net *net,
237                                 const struct nf_conntrack_l4proto *l4proto)
238 {
239         int ret = 0;
240         struct nf_proto_net *pn = NULL;
241
242         if (l4proto->init_net) {
243                 ret = l4proto->init_net(net);
244                 if (ret < 0)
245                         goto out;
246         }
247
248         pn = nf_ct_l4proto_net(net, l4proto);
249         if (pn == NULL)
250                 goto out;
251
252         ret = nf_ct_l4proto_register_sysctl(net, pn);
253         if (ret < 0)
254                 goto out;
255
256         pn->users++;
257 out:
258         return ret;
259 }
260 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one);
261
262 static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
263
264 {
265         BUG_ON(l4proto->l4proto >= ARRAY_SIZE(nf_ct_protos));
266
267         BUG_ON(rcu_dereference_protected(
268                         nf_ct_protos[l4proto->l4proto],
269                         lockdep_is_held(&nf_ct_proto_mutex)
270                         ) != l4proto);
271         rcu_assign_pointer(nf_ct_protos[l4proto->l4proto],
272                            &nf_conntrack_l4proto_generic);
273 }
274
275 void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto)
276 {
277         mutex_lock(&nf_ct_proto_mutex);
278         __nf_ct_l4proto_unregister_one(l4proto);
279         mutex_unlock(&nf_ct_proto_mutex);
280
281         synchronize_net();
282         /* Remove all contrack entries for this protocol */
283         nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto);
284 }
285 EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one);
286
287 void nf_ct_l4proto_pernet_unregister_one(struct net *net,
288                                 const struct nf_conntrack_l4proto *l4proto)
289 {
290         struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto);
291
292         if (pn == NULL)
293                 return;
294
295         pn->users--;
296         nf_ct_l4proto_unregister_sysctl(pn);
297 }
298 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister_one);
299
300 static void
301 nf_ct_l4proto_unregister(const struct nf_conntrack_l4proto * const l4proto[],
302                          unsigned int num_proto)
303 {
304         int i;
305
306         mutex_lock(&nf_ct_proto_mutex);
307         for (i = 0; i < num_proto; i++)
308                 __nf_ct_l4proto_unregister_one(l4proto[i]);
309         mutex_unlock(&nf_ct_proto_mutex);
310
311         synchronize_net();
312
313         for (i = 0; i < num_proto; i++)
314                 nf_ct_iterate_destroy(kill_l4proto, (void *)l4proto[i]);
315 }
316
317 static int
318 nf_ct_l4proto_register(const struct nf_conntrack_l4proto * const l4proto[],
319                        unsigned int num_proto)
320 {
321         int ret = -EINVAL;
322         unsigned int i;
323
324         for (i = 0; i < num_proto; i++) {
325                 ret = nf_ct_l4proto_register_one(l4proto[i]);
326                 if (ret < 0)
327                         break;
328         }
329         if (i != num_proto) {
330                 pr_err("nf_conntrack: can't register l4 %d proto.\n",
331                        l4proto[i]->l4proto);
332                 nf_ct_l4proto_unregister(l4proto, i);
333         }
334         return ret;
335 }
336
337 int nf_ct_l4proto_pernet_register(struct net *net,
338                                   const struct nf_conntrack_l4proto *const l4proto[],
339                                   unsigned int num_proto)
340 {
341         int ret = -EINVAL;
342         unsigned int i;
343
344         for (i = 0; i < num_proto; i++) {
345                 ret = nf_ct_l4proto_pernet_register_one(net, l4proto[i]);
346                 if (ret < 0)
347                         break;
348         }
349         if (i != num_proto) {
350                 pr_err("nf_conntrack %d: pernet registration failed\n",
351                        l4proto[i]->l4proto);
352                 nf_ct_l4proto_pernet_unregister(net, l4proto, i);
353         }
354         return ret;
355 }
356 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register);
357
358 void nf_ct_l4proto_pernet_unregister(struct net *net,
359                                 const struct nf_conntrack_l4proto *const l4proto[],
360                                 unsigned int num_proto)
361 {
362         while (num_proto-- != 0)
363                 nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]);
364 }
365 EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_unregister);
366
367 static unsigned int ipv4_helper(void *priv,
368                                 struct sk_buff *skb,
369                                 const struct nf_hook_state *state)
370 {
371         struct nf_conn *ct;
372         enum ip_conntrack_info ctinfo;
373         const struct nf_conn_help *help;
374         const struct nf_conntrack_helper *helper;
375
376         /* This is where we call the helper: as the packet goes out. */
377         ct = nf_ct_get(skb, &ctinfo);
378         if (!ct || ctinfo == IP_CT_RELATED_REPLY)
379                 return NF_ACCEPT;
380
381         help = nfct_help(ct);
382         if (!help)
383                 return NF_ACCEPT;
384
385         /* rcu_read_lock()ed by nf_hook_thresh */
386         helper = rcu_dereference(help->helper);
387         if (!helper)
388                 return NF_ACCEPT;
389
390         return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
391                             ct, ctinfo);
392 }
393
394 static unsigned int ipv4_confirm(void *priv,
395                                  struct sk_buff *skb,
396                                  const struct nf_hook_state *state)
397 {
398         struct nf_conn *ct;
399         enum ip_conntrack_info ctinfo;
400
401         ct = nf_ct_get(skb, &ctinfo);
402         if (!ct || ctinfo == IP_CT_RELATED_REPLY)
403                 goto out;
404
405         /* adjust seqs for loopback traffic only in outgoing direction */
406         if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
407             !nf_is_loopback_packet(skb)) {
408                 if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
409                         NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
410                         return NF_DROP;
411                 }
412         }
413 out:
414         /* We've seen it coming out the other side: confirm it */
415         return nf_conntrack_confirm(skb);
416 }
417
418 static unsigned int ipv4_conntrack_in(void *priv,
419                                       struct sk_buff *skb,
420                                       const struct nf_hook_state *state)
421 {
422         return nf_conntrack_in(skb, state);
423 }
424
425 static unsigned int ipv4_conntrack_local(void *priv,
426                                          struct sk_buff *skb,
427                                          const struct nf_hook_state *state)
428 {
429         if (ip_is_fragment(ip_hdr(skb))) { /* IP_NODEFRAG setsockopt set */
430                 enum ip_conntrack_info ctinfo;
431                 struct nf_conn *tmpl;
432
433                 tmpl = nf_ct_get(skb, &ctinfo);
434                 if (tmpl && nf_ct_is_template(tmpl)) {
435                         /* when skipping ct, clear templates to avoid fooling
436                          * later targets/matches
437                          */
438                         skb->_nfct = 0;
439                         nf_ct_put(tmpl);
440                 }
441                 return NF_ACCEPT;
442         }
443
444         return nf_conntrack_in(skb, state);
445 }
446
447 /* Connection tracking may drop packets, but never alters them, so
448  * make it the first hook.
449  */
450 static const struct nf_hook_ops ipv4_conntrack_ops[] = {
451         {
452                 .hook           = ipv4_conntrack_in,
453                 .pf             = NFPROTO_IPV4,
454                 .hooknum        = NF_INET_PRE_ROUTING,
455                 .priority       = NF_IP_PRI_CONNTRACK,
456         },
457         {
458                 .hook           = ipv4_conntrack_local,
459                 .pf             = NFPROTO_IPV4,
460                 .hooknum        = NF_INET_LOCAL_OUT,
461                 .priority       = NF_IP_PRI_CONNTRACK,
462         },
463         {
464                 .hook           = ipv4_helper,
465                 .pf             = NFPROTO_IPV4,
466                 .hooknum        = NF_INET_POST_ROUTING,
467                 .priority       = NF_IP_PRI_CONNTRACK_HELPER,
468         },
469         {
470                 .hook           = ipv4_confirm,
471                 .pf             = NFPROTO_IPV4,
472                 .hooknum        = NF_INET_POST_ROUTING,
473                 .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
474         },
475         {
476                 .hook           = ipv4_helper,
477                 .pf             = NFPROTO_IPV4,
478                 .hooknum        = NF_INET_LOCAL_IN,
479                 .priority       = NF_IP_PRI_CONNTRACK_HELPER,
480         },
481         {
482                 .hook           = ipv4_confirm,
483                 .pf             = NFPROTO_IPV4,
484                 .hooknum        = NF_INET_LOCAL_IN,
485                 .priority       = NF_IP_PRI_CONNTRACK_CONFIRM,
486         },
487 };
488
489 /* Fast function for those who don't want to parse /proc (and I don't
490  * blame them).
491  * Reversing the socket's dst/src point of view gives us the reply
492  * mapping.
493  */
494 static int
495 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
496 {
497         const struct inet_sock *inet = inet_sk(sk);
498         const struct nf_conntrack_tuple_hash *h;
499         struct nf_conntrack_tuple tuple;
500
501         memset(&tuple, 0, sizeof(tuple));
502
503         lock_sock(sk);
504         tuple.src.u3.ip = inet->inet_rcv_saddr;
505         tuple.src.u.tcp.port = inet->inet_sport;
506         tuple.dst.u3.ip = inet->inet_daddr;
507         tuple.dst.u.tcp.port = inet->inet_dport;
508         tuple.src.l3num = PF_INET;
509         tuple.dst.protonum = sk->sk_protocol;
510         release_sock(sk);
511
512         /* We only do TCP and SCTP at the moment: is there a better way? */
513         if (tuple.dst.protonum != IPPROTO_TCP &&
514             tuple.dst.protonum != IPPROTO_SCTP) {
515                 pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
516                 return -ENOPROTOOPT;
517         }
518
519         if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
520                 pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
521                          *len, sizeof(struct sockaddr_in));
522                 return -EINVAL;
523         }
524
525         h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
526         if (h) {
527                 struct sockaddr_in sin;
528                 struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
529
530                 sin.sin_family = AF_INET;
531                 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
532                         .tuple.dst.u.tcp.port;
533                 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
534                         .tuple.dst.u3.ip;
535                 memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
536
537                 pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
538                          &sin.sin_addr.s_addr, ntohs(sin.sin_port));
539                 nf_ct_put(ct);
540                 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
541                         return -EFAULT;
542                 else
543                         return 0;
544         }
545         pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
546                  &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
547                  &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
548         return -ENOENT;
549 }
550
551 static struct nf_sockopt_ops so_getorigdst = {
552         .pf             = PF_INET,
553         .get_optmin     = SO_ORIGINAL_DST,
554         .get_optmax     = SO_ORIGINAL_DST + 1,
555         .get            = getorigdst,
556         .owner          = THIS_MODULE,
557 };
558
559 #if IS_ENABLED(CONFIG_IPV6)
560 static int
561 ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
562 {
563         struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };
564         const struct ipv6_pinfo *inet6 = inet6_sk(sk);
565         const struct inet_sock *inet = inet_sk(sk);
566         const struct nf_conntrack_tuple_hash *h;
567         struct sockaddr_in6 sin6;
568         struct nf_conn *ct;
569         __be32 flow_label;
570         int bound_dev_if;
571
572         lock_sock(sk);
573         tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;
574         tuple.src.u.tcp.port = inet->inet_sport;
575         tuple.dst.u3.in6 = sk->sk_v6_daddr;
576         tuple.dst.u.tcp.port = inet->inet_dport;
577         tuple.dst.protonum = sk->sk_protocol;
578         bound_dev_if = sk->sk_bound_dev_if;
579         flow_label = inet6->flow_label;
580         release_sock(sk);
581
582         if (tuple.dst.protonum != IPPROTO_TCP &&
583             tuple.dst.protonum != IPPROTO_SCTP)
584                 return -ENOPROTOOPT;
585
586         if (*len < 0 || (unsigned int)*len < sizeof(sin6))
587                 return -EINVAL;
588
589         h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
590         if (!h) {
591                 pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
592                          &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
593                          &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
594                 return -ENOENT;
595         }
596
597         ct = nf_ct_tuplehash_to_ctrack(h);
598
599         sin6.sin6_family = AF_INET6;
600         sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port;
601         sin6.sin6_flowinfo = flow_label & IPV6_FLOWINFO_MASK;
602         memcpy(&sin6.sin6_addr,
603                &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6,
604                sizeof(sin6.sin6_addr));
605
606         nf_ct_put(ct);
607         sin6.sin6_scope_id = ipv6_iface_scope_id(&sin6.sin6_addr, bound_dev_if);
608         return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0;
609 }
610
611 static struct nf_sockopt_ops so_getorigdst6 = {
612         .pf             = NFPROTO_IPV6,
613         .get_optmin     = IP6T_SO_ORIGINAL_DST,
614         .get_optmax     = IP6T_SO_ORIGINAL_DST + 1,
615         .get            = ipv6_getorigdst,
616         .owner          = THIS_MODULE,
617 };
618
619 static unsigned int ipv6_confirm(void *priv,
620                                  struct sk_buff *skb,
621                                  const struct nf_hook_state *state)
622 {
623         struct nf_conn *ct;
624         enum ip_conntrack_info ctinfo;
625         unsigned char pnum = ipv6_hdr(skb)->nexthdr;
626         int protoff;
627         __be16 frag_off;
628
629         ct = nf_ct_get(skb, &ctinfo);
630         if (!ct || ctinfo == IP_CT_RELATED_REPLY)
631                 goto out;
632
633         protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum,
634                                    &frag_off);
635         if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
636                 pr_debug("proto header not found\n");
637                 goto out;
638         }
639
640         /* adjust seqs for loopback traffic only in outgoing direction */
641         if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
642             !nf_is_loopback_packet(skb)) {
643                 if (!nf_ct_seq_adjust(skb, ct, ctinfo, protoff)) {
644                         NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
645                         return NF_DROP;
646                 }
647         }
648 out:
649         /* We've seen it coming out the other side: confirm it */
650         return nf_conntrack_confirm(skb);
651 }
652
653 static unsigned int ipv6_conntrack_in(void *priv,
654                                       struct sk_buff *skb,
655                                       const struct nf_hook_state *state)
656 {
657         return nf_conntrack_in(skb, state);
658 }
659
660 static unsigned int ipv6_conntrack_local(void *priv,
661                                          struct sk_buff *skb,
662                                          const struct nf_hook_state *state)
663 {
664         return nf_conntrack_in(skb, state);
665 }
666
667 static unsigned int ipv6_helper(void *priv,
668                                 struct sk_buff *skb,
669                                 const struct nf_hook_state *state)
670 {
671         struct nf_conn *ct;
672         const struct nf_conn_help *help;
673         const struct nf_conntrack_helper *helper;
674         enum ip_conntrack_info ctinfo;
675         __be16 frag_off;
676         int protoff;
677         u8 nexthdr;
678
679         /* This is where we call the helper: as the packet goes out. */
680         ct = nf_ct_get(skb, &ctinfo);
681         if (!ct || ctinfo == IP_CT_RELATED_REPLY)
682                 return NF_ACCEPT;
683
684         help = nfct_help(ct);
685         if (!help)
686                 return NF_ACCEPT;
687         /* rcu_read_lock()ed by nf_hook_thresh */
688         helper = rcu_dereference(help->helper);
689         if (!helper)
690                 return NF_ACCEPT;
691
692         nexthdr = ipv6_hdr(skb)->nexthdr;
693         protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
694                                    &frag_off);
695         if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
696                 pr_debug("proto header not found\n");
697                 return NF_ACCEPT;
698         }
699
700         return helper->help(skb, protoff, ct, ctinfo);
701 }
702
703 static const struct nf_hook_ops ipv6_conntrack_ops[] = {
704         {
705                 .hook           = ipv6_conntrack_in,
706                 .pf             = NFPROTO_IPV6,
707                 .hooknum        = NF_INET_PRE_ROUTING,
708                 .priority       = NF_IP6_PRI_CONNTRACK,
709         },
710         {
711                 .hook           = ipv6_conntrack_local,
712                 .pf             = NFPROTO_IPV6,
713                 .hooknum        = NF_INET_LOCAL_OUT,
714                 .priority       = NF_IP6_PRI_CONNTRACK,
715         },
716         {
717                 .hook           = ipv6_helper,
718                 .pf             = NFPROTO_IPV6,
719                 .hooknum        = NF_INET_POST_ROUTING,
720                 .priority       = NF_IP6_PRI_CONNTRACK_HELPER,
721         },
722         {
723                 .hook           = ipv6_confirm,
724                 .pf             = NFPROTO_IPV6,
725                 .hooknum        = NF_INET_POST_ROUTING,
726                 .priority       = NF_IP6_PRI_LAST,
727         },
728         {
729                 .hook           = ipv6_helper,
730                 .pf             = NFPROTO_IPV6,
731                 .hooknum        = NF_INET_LOCAL_IN,
732                 .priority       = NF_IP6_PRI_CONNTRACK_HELPER,
733         },
734         {
735                 .hook           = ipv6_confirm,
736                 .pf             = NFPROTO_IPV6,
737                 .hooknum        = NF_INET_LOCAL_IN,
738                 .priority       = NF_IP6_PRI_LAST - 1,
739         },
740 };
741 #endif
742
743 static int nf_ct_tcp_fixup(struct nf_conn *ct, void *_nfproto)
744 {
745         u8 nfproto = (unsigned long)_nfproto;
746
747         if (nf_ct_l3num(ct) != nfproto)
748                 return 0;
749
750         if (nf_ct_protonum(ct) == IPPROTO_TCP &&
751             ct->proto.tcp.state == TCP_CONNTRACK_ESTABLISHED) {
752                 ct->proto.tcp.seen[0].td_maxwin = 0;
753                 ct->proto.tcp.seen[1].td_maxwin = 0;
754         }
755
756         return 0;
757 }
758
759 static int nf_ct_netns_do_get(struct net *net, u8 nfproto)
760 {
761         struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
762         bool fixup_needed = false;
763         int err = 0;
764
765         mutex_lock(&nf_ct_proto_mutex);
766
767         switch (nfproto) {
768         case NFPROTO_IPV4:
769                 cnet->users4++;
770                 if (cnet->users4 > 1)
771                         goto out_unlock;
772                 err = nf_defrag_ipv4_enable(net);
773                 if (err) {
774                         cnet->users4 = 0;
775                         goto out_unlock;
776                 }
777
778                 err = nf_register_net_hooks(net, ipv4_conntrack_ops,
779                                             ARRAY_SIZE(ipv4_conntrack_ops));
780                 if (err)
781                         cnet->users4 = 0;
782                 else
783                         fixup_needed = true;
784                 break;
785 #if IS_ENABLED(CONFIG_IPV6)
786         case NFPROTO_IPV6:
787                 cnet->users6++;
788                 if (cnet->users6 > 1)
789                         goto out_unlock;
790                 err = nf_defrag_ipv6_enable(net);
791                 if (err < 0) {
792                         cnet->users6 = 0;
793                         goto out_unlock;
794                 }
795
796                 err = nf_register_net_hooks(net, ipv6_conntrack_ops,
797                                             ARRAY_SIZE(ipv6_conntrack_ops));
798                 if (err)
799                         cnet->users6 = 0;
800                 else
801                         fixup_needed = true;
802                 break;
803 #endif
804         default:
805                 err = -EPROTO;
806                 break;
807         }
808  out_unlock:
809         mutex_unlock(&nf_ct_proto_mutex);
810
811         if (fixup_needed)
812                 nf_ct_iterate_cleanup_net(net, nf_ct_tcp_fixup,
813                                           (void *)(unsigned long)nfproto, 0, 0);
814
815         return err;
816 }
817
818 static void nf_ct_netns_do_put(struct net *net, u8 nfproto)
819 {
820         struct nf_conntrack_net *cnet = net_generic(net, nf_conntrack_net_id);
821
822         mutex_lock(&nf_ct_proto_mutex);
823         switch (nfproto) {
824         case NFPROTO_IPV4:
825                 if (cnet->users4 && (--cnet->users4 == 0))
826                         nf_unregister_net_hooks(net, ipv4_conntrack_ops,
827                                                 ARRAY_SIZE(ipv4_conntrack_ops));
828                 break;
829 #if IS_ENABLED(CONFIG_IPV6)
830         case NFPROTO_IPV6:
831                 if (cnet->users6 && (--cnet->users6 == 0))
832                         nf_unregister_net_hooks(net, ipv6_conntrack_ops,
833                                                 ARRAY_SIZE(ipv6_conntrack_ops));
834                 break;
835 #endif
836         }
837
838         mutex_unlock(&nf_ct_proto_mutex);
839 }
840
841 int nf_ct_netns_get(struct net *net, u8 nfproto)
842 {
843         int err;
844
845         if (nfproto == NFPROTO_INET) {
846                 err = nf_ct_netns_do_get(net, NFPROTO_IPV4);
847                 if (err < 0)
848                         goto err1;
849                 err = nf_ct_netns_do_get(net, NFPROTO_IPV6);
850                 if (err < 0)
851                         goto err2;
852         } else {
853                 err = nf_ct_netns_do_get(net, nfproto);
854                 if (err < 0)
855                         goto err1;
856         }
857         return 0;
858
859 err2:
860         nf_ct_netns_put(net, NFPROTO_IPV4);
861 err1:
862         return err;
863 }
864 EXPORT_SYMBOL_GPL(nf_ct_netns_get);
865
866 void nf_ct_netns_put(struct net *net, uint8_t nfproto)
867 {
868         if (nfproto == NFPROTO_INET) {
869                 nf_ct_netns_do_put(net, NFPROTO_IPV4);
870                 nf_ct_netns_do_put(net, NFPROTO_IPV6);
871         } else {
872                 nf_ct_netns_do_put(net, nfproto);
873         }
874 }
875 EXPORT_SYMBOL_GPL(nf_ct_netns_put);
876
877 static const struct nf_conntrack_l4proto * const builtin_l4proto[] = {
878         &nf_conntrack_l4proto_tcp,
879         &nf_conntrack_l4proto_udp,
880         &nf_conntrack_l4proto_icmp,
881 #ifdef CONFIG_NF_CT_PROTO_DCCP
882         &nf_conntrack_l4proto_dccp,
883 #endif
884 #ifdef CONFIG_NF_CT_PROTO_SCTP
885         &nf_conntrack_l4proto_sctp,
886 #endif
887 #ifdef CONFIG_NF_CT_PROTO_UDPLITE
888         &nf_conntrack_l4proto_udplite,
889 #endif
890 #if IS_ENABLED(CONFIG_IPV6)
891         &nf_conntrack_l4proto_icmpv6,
892 #endif /* CONFIG_IPV6 */
893 };
894
895 int nf_conntrack_proto_init(void)
896 {
897         int ret = 0, i;
898
899         ret = nf_register_sockopt(&so_getorigdst);
900         if (ret < 0)
901                 return ret;
902
903 #if IS_ENABLED(CONFIG_IPV6)
904         ret = nf_register_sockopt(&so_getorigdst6);
905         if (ret < 0)
906                 goto cleanup_sockopt;
907 #endif
908
909         for (i = 0; i < ARRAY_SIZE(nf_ct_protos); i++)
910                 RCU_INIT_POINTER(nf_ct_protos[i],
911                                  &nf_conntrack_l4proto_generic);
912
913         ret = nf_ct_l4proto_register(builtin_l4proto,
914                                      ARRAY_SIZE(builtin_l4proto));
915         if (ret < 0)
916                 goto cleanup_sockopt2;
917
918         return ret;
919 cleanup_sockopt2:
920         nf_unregister_sockopt(&so_getorigdst);
921 #if IS_ENABLED(CONFIG_IPV6)
922 cleanup_sockopt:
923         nf_unregister_sockopt(&so_getorigdst6);
924 #endif
925         return ret;
926 }
927
928 void nf_conntrack_proto_fini(void)
929 {
930         nf_unregister_sockopt(&so_getorigdst);
931 #if IS_ENABLED(CONFIG_IPV6)
932         nf_unregister_sockopt(&so_getorigdst6);
933 #endif
934 }
935
936 int nf_conntrack_proto_pernet_init(struct net *net)
937 {
938         int err;
939         struct nf_proto_net *pn = nf_ct_l4proto_net(net,
940                                         &nf_conntrack_l4proto_generic);
941
942         err = nf_conntrack_l4proto_generic.init_net(net);
943         if (err < 0)
944                 return err;
945         err = nf_ct_l4proto_register_sysctl(net,
946                                             pn);
947         if (err < 0)
948                 return err;
949
950         err = nf_ct_l4proto_pernet_register(net, builtin_l4proto,
951                                             ARRAY_SIZE(builtin_l4proto));
952         if (err < 0) {
953                 nf_ct_l4proto_unregister_sysctl(pn);
954                 return err;
955         }
956
957         pn->users++;
958         return 0;
959 }
960
961 void nf_conntrack_proto_pernet_fini(struct net *net)
962 {
963         struct nf_proto_net *pn = nf_ct_l4proto_net(net,
964                                         &nf_conntrack_l4proto_generic);
965
966         nf_ct_l4proto_pernet_unregister(net, builtin_l4proto,
967                                         ARRAY_SIZE(builtin_l4proto));
968         pn->users--;
969         nf_ct_l4proto_unregister_sysctl(pn);
970 }
971
972
973 module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
974                   &nf_conntrack_htable_size, 0600);
975
976 MODULE_ALIAS("ip_conntrack");
977 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
978 MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6));
979 MODULE_LICENSE("GPL");