Merge branch 'for-4.19/cougar' into for-linus
[linux] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <linux/audit.h>
23 #include <linux/uaccess.h>
24 #include <linux/ktime.h>
25 #include <linux/slab.h>
26 #include <linux/interrupt.h>
27 #include <linux/kernel.h>
28
29 #include "xfrm_hash.h"
30
31 #define xfrm_state_deref_prot(table, net) \
32         rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock))
33
34 static void xfrm_state_gc_task(struct work_struct *work);
35
36 /* Each xfrm_state may be linked to two tables:
37
38    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40       destination/tunnel endpoint. (output)
41  */
42
43 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
44 static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation);
45 static struct kmem_cache *xfrm_state_cache __ro_after_init;
46
47 static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task);
48 static HLIST_HEAD(xfrm_state_gc_list);
49
50 static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x)
51 {
52         return refcount_inc_not_zero(&x->refcnt);
53 }
54
55 static inline unsigned int xfrm_dst_hash(struct net *net,
56                                          const xfrm_address_t *daddr,
57                                          const xfrm_address_t *saddr,
58                                          u32 reqid,
59                                          unsigned short family)
60 {
61         return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask);
62 }
63
64 static inline unsigned int xfrm_src_hash(struct net *net,
65                                          const xfrm_address_t *daddr,
66                                          const xfrm_address_t *saddr,
67                                          unsigned short family)
68 {
69         return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask);
70 }
71
72 static inline unsigned int
73 xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr,
74               __be32 spi, u8 proto, unsigned short family)
75 {
76         return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask);
77 }
78
79 static void xfrm_hash_transfer(struct hlist_head *list,
80                                struct hlist_head *ndsttable,
81                                struct hlist_head *nsrctable,
82                                struct hlist_head *nspitable,
83                                unsigned int nhashmask)
84 {
85         struct hlist_node *tmp;
86         struct xfrm_state *x;
87
88         hlist_for_each_entry_safe(x, tmp, list, bydst) {
89                 unsigned int h;
90
91                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
92                                     x->props.reqid, x->props.family,
93                                     nhashmask);
94                 hlist_add_head_rcu(&x->bydst, ndsttable + h);
95
96                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
97                                     x->props.family,
98                                     nhashmask);
99                 hlist_add_head_rcu(&x->bysrc, nsrctable + h);
100
101                 if (x->id.spi) {
102                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
103                                             x->id.proto, x->props.family,
104                                             nhashmask);
105                         hlist_add_head_rcu(&x->byspi, nspitable + h);
106                 }
107         }
108 }
109
110 static unsigned long xfrm_hash_new_size(unsigned int state_hmask)
111 {
112         return ((state_hmask + 1) << 1) * sizeof(struct hlist_head);
113 }
114
115 static void xfrm_hash_resize(struct work_struct *work)
116 {
117         struct net *net = container_of(work, struct net, xfrm.state_hash_work);
118         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
119         unsigned long nsize, osize;
120         unsigned int nhashmask, ohashmask;
121         int i;
122
123         nsize = xfrm_hash_new_size(net->xfrm.state_hmask);
124         ndst = xfrm_hash_alloc(nsize);
125         if (!ndst)
126                 return;
127         nsrc = xfrm_hash_alloc(nsize);
128         if (!nsrc) {
129                 xfrm_hash_free(ndst, nsize);
130                 return;
131         }
132         nspi = xfrm_hash_alloc(nsize);
133         if (!nspi) {
134                 xfrm_hash_free(ndst, nsize);
135                 xfrm_hash_free(nsrc, nsize);
136                 return;
137         }
138
139         spin_lock_bh(&net->xfrm.xfrm_state_lock);
140         write_seqcount_begin(&xfrm_state_hash_generation);
141
142         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
143         odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net);
144         for (i = net->xfrm.state_hmask; i >= 0; i--)
145                 xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask);
146
147         osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net);
148         ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net);
149         ohashmask = net->xfrm.state_hmask;
150
151         rcu_assign_pointer(net->xfrm.state_bydst, ndst);
152         rcu_assign_pointer(net->xfrm.state_bysrc, nsrc);
153         rcu_assign_pointer(net->xfrm.state_byspi, nspi);
154         net->xfrm.state_hmask = nhashmask;
155
156         write_seqcount_end(&xfrm_state_hash_generation);
157         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
158
159         osize = (ohashmask + 1) * sizeof(struct hlist_head);
160
161         synchronize_rcu();
162
163         xfrm_hash_free(odst, osize);
164         xfrm_hash_free(osrc, osize);
165         xfrm_hash_free(ospi, osize);
166 }
167
168 static DEFINE_SPINLOCK(xfrm_state_afinfo_lock);
169 static struct xfrm_state_afinfo __rcu *xfrm_state_afinfo[NPROTO];
170
171 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
172
173 int __xfrm_state_delete(struct xfrm_state *x);
174
175 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
176 bool km_is_alive(const struct km_event *c);
177 void km_state_expired(struct xfrm_state *x, int hard, u32 portid);
178
179 static DEFINE_SPINLOCK(xfrm_type_lock);
180 int xfrm_register_type(const struct xfrm_type *type, unsigned short family)
181 {
182         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
183         const struct xfrm_type **typemap;
184         int err = 0;
185
186         if (unlikely(afinfo == NULL))
187                 return -EAFNOSUPPORT;
188         typemap = afinfo->type_map;
189         spin_lock_bh(&xfrm_type_lock);
190
191         if (likely(typemap[type->proto] == NULL))
192                 typemap[type->proto] = type;
193         else
194                 err = -EEXIST;
195         spin_unlock_bh(&xfrm_type_lock);
196         rcu_read_unlock();
197         return err;
198 }
199 EXPORT_SYMBOL(xfrm_register_type);
200
201 int xfrm_unregister_type(const struct xfrm_type *type, unsigned short family)
202 {
203         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
204         const struct xfrm_type **typemap;
205         int err = 0;
206
207         if (unlikely(afinfo == NULL))
208                 return -EAFNOSUPPORT;
209         typemap = afinfo->type_map;
210         spin_lock_bh(&xfrm_type_lock);
211
212         if (unlikely(typemap[type->proto] != type))
213                 err = -ENOENT;
214         else
215                 typemap[type->proto] = NULL;
216         spin_unlock_bh(&xfrm_type_lock);
217         rcu_read_unlock();
218         return err;
219 }
220 EXPORT_SYMBOL(xfrm_unregister_type);
221
222 static const struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
223 {
224         struct xfrm_state_afinfo *afinfo;
225         const struct xfrm_type **typemap;
226         const struct xfrm_type *type;
227         int modload_attempted = 0;
228
229 retry:
230         afinfo = xfrm_state_get_afinfo(family);
231         if (unlikely(afinfo == NULL))
232                 return NULL;
233         typemap = afinfo->type_map;
234
235         type = READ_ONCE(typemap[proto]);
236         if (unlikely(type && !try_module_get(type->owner)))
237                 type = NULL;
238
239         rcu_read_unlock();
240
241         if (!type && !modload_attempted) {
242                 request_module("xfrm-type-%d-%d", family, proto);
243                 modload_attempted = 1;
244                 goto retry;
245         }
246
247         return type;
248 }
249
250 static void xfrm_put_type(const struct xfrm_type *type)
251 {
252         module_put(type->owner);
253 }
254
255 static DEFINE_SPINLOCK(xfrm_type_offload_lock);
256 int xfrm_register_type_offload(const struct xfrm_type_offload *type,
257                                unsigned short family)
258 {
259         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
260         const struct xfrm_type_offload **typemap;
261         int err = 0;
262
263         if (unlikely(afinfo == NULL))
264                 return -EAFNOSUPPORT;
265         typemap = afinfo->type_offload_map;
266         spin_lock_bh(&xfrm_type_offload_lock);
267
268         if (likely(typemap[type->proto] == NULL))
269                 typemap[type->proto] = type;
270         else
271                 err = -EEXIST;
272         spin_unlock_bh(&xfrm_type_offload_lock);
273         rcu_read_unlock();
274         return err;
275 }
276 EXPORT_SYMBOL(xfrm_register_type_offload);
277
278 int xfrm_unregister_type_offload(const struct xfrm_type_offload *type,
279                                  unsigned short family)
280 {
281         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
282         const struct xfrm_type_offload **typemap;
283         int err = 0;
284
285         if (unlikely(afinfo == NULL))
286                 return -EAFNOSUPPORT;
287         typemap = afinfo->type_offload_map;
288         spin_lock_bh(&xfrm_type_offload_lock);
289
290         if (unlikely(typemap[type->proto] != type))
291                 err = -ENOENT;
292         else
293                 typemap[type->proto] = NULL;
294         spin_unlock_bh(&xfrm_type_offload_lock);
295         rcu_read_unlock();
296         return err;
297 }
298 EXPORT_SYMBOL(xfrm_unregister_type_offload);
299
300 static const struct xfrm_type_offload *
301 xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load)
302 {
303         struct xfrm_state_afinfo *afinfo;
304         const struct xfrm_type_offload **typemap;
305         const struct xfrm_type_offload *type;
306
307 retry:
308         afinfo = xfrm_state_get_afinfo(family);
309         if (unlikely(afinfo == NULL))
310                 return NULL;
311         typemap = afinfo->type_offload_map;
312
313         type = typemap[proto];
314         if ((type && !try_module_get(type->owner)))
315                 type = NULL;
316
317         rcu_read_unlock();
318
319         if (!type && try_load) {
320                 request_module("xfrm-offload-%d-%d", family, proto);
321                 try_load = false;
322                 goto retry;
323         }
324
325         return type;
326 }
327
328 static void xfrm_put_type_offload(const struct xfrm_type_offload *type)
329 {
330         module_put(type->owner);
331 }
332
333 static DEFINE_SPINLOCK(xfrm_mode_lock);
334 int xfrm_register_mode(struct xfrm_mode *mode, int family)
335 {
336         struct xfrm_state_afinfo *afinfo;
337         struct xfrm_mode **modemap;
338         int err;
339
340         if (unlikely(mode->encap >= XFRM_MODE_MAX))
341                 return -EINVAL;
342
343         afinfo = xfrm_state_get_afinfo(family);
344         if (unlikely(afinfo == NULL))
345                 return -EAFNOSUPPORT;
346
347         err = -EEXIST;
348         modemap = afinfo->mode_map;
349         spin_lock_bh(&xfrm_mode_lock);
350         if (modemap[mode->encap])
351                 goto out;
352
353         err = -ENOENT;
354         if (!try_module_get(afinfo->owner))
355                 goto out;
356
357         mode->afinfo = afinfo;
358         modemap[mode->encap] = mode;
359         err = 0;
360
361 out:
362         spin_unlock_bh(&xfrm_mode_lock);
363         rcu_read_unlock();
364         return err;
365 }
366 EXPORT_SYMBOL(xfrm_register_mode);
367
368 int xfrm_unregister_mode(struct xfrm_mode *mode, int family)
369 {
370         struct xfrm_state_afinfo *afinfo;
371         struct xfrm_mode **modemap;
372         int err;
373
374         if (unlikely(mode->encap >= XFRM_MODE_MAX))
375                 return -EINVAL;
376
377         afinfo = xfrm_state_get_afinfo(family);
378         if (unlikely(afinfo == NULL))
379                 return -EAFNOSUPPORT;
380
381         err = -ENOENT;
382         modemap = afinfo->mode_map;
383         spin_lock_bh(&xfrm_mode_lock);
384         if (likely(modemap[mode->encap] == mode)) {
385                 modemap[mode->encap] = NULL;
386                 module_put(mode->afinfo->owner);
387                 err = 0;
388         }
389
390         spin_unlock_bh(&xfrm_mode_lock);
391         rcu_read_unlock();
392         return err;
393 }
394 EXPORT_SYMBOL(xfrm_unregister_mode);
395
396 static struct xfrm_mode *xfrm_get_mode(unsigned int encap, int family)
397 {
398         struct xfrm_state_afinfo *afinfo;
399         struct xfrm_mode *mode;
400         int modload_attempted = 0;
401
402         if (unlikely(encap >= XFRM_MODE_MAX))
403                 return NULL;
404
405 retry:
406         afinfo = xfrm_state_get_afinfo(family);
407         if (unlikely(afinfo == NULL))
408                 return NULL;
409
410         mode = READ_ONCE(afinfo->mode_map[encap]);
411         if (unlikely(mode && !try_module_get(mode->owner)))
412                 mode = NULL;
413
414         rcu_read_unlock();
415         if (!mode && !modload_attempted) {
416                 request_module("xfrm-mode-%d-%d", family, encap);
417                 modload_attempted = 1;
418                 goto retry;
419         }
420
421         return mode;
422 }
423
424 static void xfrm_put_mode(struct xfrm_mode *mode)
425 {
426         module_put(mode->owner);
427 }
428
429 static void xfrm_state_gc_destroy(struct xfrm_state *x)
430 {
431         tasklet_hrtimer_cancel(&x->mtimer);
432         del_timer_sync(&x->rtimer);
433         kfree(x->aead);
434         kfree(x->aalg);
435         kfree(x->ealg);
436         kfree(x->calg);
437         kfree(x->encap);
438         kfree(x->coaddr);
439         kfree(x->replay_esn);
440         kfree(x->preplay_esn);
441         if (x->inner_mode)
442                 xfrm_put_mode(x->inner_mode);
443         if (x->inner_mode_iaf)
444                 xfrm_put_mode(x->inner_mode_iaf);
445         if (x->outer_mode)
446                 xfrm_put_mode(x->outer_mode);
447         if (x->type_offload)
448                 xfrm_put_type_offload(x->type_offload);
449         if (x->type) {
450                 x->type->destructor(x);
451                 xfrm_put_type(x->type);
452         }
453         xfrm_dev_state_free(x);
454         security_xfrm_state_free(x);
455         kmem_cache_free(xfrm_state_cache, x);
456 }
457
458 static void xfrm_state_gc_task(struct work_struct *work)
459 {
460         struct xfrm_state *x;
461         struct hlist_node *tmp;
462         struct hlist_head gc_list;
463
464         spin_lock_bh(&xfrm_state_gc_lock);
465         hlist_move_list(&xfrm_state_gc_list, &gc_list);
466         spin_unlock_bh(&xfrm_state_gc_lock);
467
468         synchronize_rcu();
469
470         hlist_for_each_entry_safe(x, tmp, &gc_list, gclist)
471                 xfrm_state_gc_destroy(x);
472 }
473
474 static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
475 {
476         struct tasklet_hrtimer *thr = container_of(me, struct tasklet_hrtimer, timer);
477         struct xfrm_state *x = container_of(thr, struct xfrm_state, mtimer);
478         unsigned long now = get_seconds();
479         long next = LONG_MAX;
480         int warn = 0;
481         int err = 0;
482
483         spin_lock(&x->lock);
484         if (x->km.state == XFRM_STATE_DEAD)
485                 goto out;
486         if (x->km.state == XFRM_STATE_EXPIRED)
487                 goto expired;
488         if (x->lft.hard_add_expires_seconds) {
489                 long tmo = x->lft.hard_add_expires_seconds +
490                         x->curlft.add_time - now;
491                 if (tmo <= 0) {
492                         if (x->xflags & XFRM_SOFT_EXPIRE) {
493                                 /* enter hard expire without soft expire first?!
494                                  * setting a new date could trigger this.
495                                  * workaround: fix x->curflt.add_time by below:
496                                  */
497                                 x->curlft.add_time = now - x->saved_tmo - 1;
498                                 tmo = x->lft.hard_add_expires_seconds - x->saved_tmo;
499                         } else
500                                 goto expired;
501                 }
502                 if (tmo < next)
503                         next = tmo;
504         }
505         if (x->lft.hard_use_expires_seconds) {
506                 long tmo = x->lft.hard_use_expires_seconds +
507                         (x->curlft.use_time ? : now) - now;
508                 if (tmo <= 0)
509                         goto expired;
510                 if (tmo < next)
511                         next = tmo;
512         }
513         if (x->km.dying)
514                 goto resched;
515         if (x->lft.soft_add_expires_seconds) {
516                 long tmo = x->lft.soft_add_expires_seconds +
517                         x->curlft.add_time - now;
518                 if (tmo <= 0) {
519                         warn = 1;
520                         x->xflags &= ~XFRM_SOFT_EXPIRE;
521                 } else if (tmo < next) {
522                         next = tmo;
523                         x->xflags |= XFRM_SOFT_EXPIRE;
524                         x->saved_tmo = tmo;
525                 }
526         }
527         if (x->lft.soft_use_expires_seconds) {
528                 long tmo = x->lft.soft_use_expires_seconds +
529                         (x->curlft.use_time ? : now) - now;
530                 if (tmo <= 0)
531                         warn = 1;
532                 else if (tmo < next)
533                         next = tmo;
534         }
535
536         x->km.dying = warn;
537         if (warn)
538                 km_state_expired(x, 0, 0);
539 resched:
540         if (next != LONG_MAX) {
541                 tasklet_hrtimer_start(&x->mtimer, ktime_set(next, 0), HRTIMER_MODE_REL);
542         }
543
544         goto out;
545
546 expired:
547         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0)
548                 x->km.state = XFRM_STATE_EXPIRED;
549
550         err = __xfrm_state_delete(x);
551         if (!err)
552                 km_state_expired(x, 1, 0);
553
554         xfrm_audit_state_delete(x, err ? 0 : 1, true);
555
556 out:
557         spin_unlock(&x->lock);
558         return HRTIMER_NORESTART;
559 }
560
561 static void xfrm_replay_timer_handler(struct timer_list *t);
562
563 struct xfrm_state *xfrm_state_alloc(struct net *net)
564 {
565         struct xfrm_state *x;
566
567         x = kmem_cache_alloc(xfrm_state_cache, GFP_ATOMIC | __GFP_ZERO);
568
569         if (x) {
570                 write_pnet(&x->xs_net, net);
571                 refcount_set(&x->refcnt, 1);
572                 atomic_set(&x->tunnel_users, 0);
573                 INIT_LIST_HEAD(&x->km.all);
574                 INIT_HLIST_NODE(&x->bydst);
575                 INIT_HLIST_NODE(&x->bysrc);
576                 INIT_HLIST_NODE(&x->byspi);
577                 tasklet_hrtimer_init(&x->mtimer, xfrm_timer_handler,
578                                         CLOCK_BOOTTIME, HRTIMER_MODE_ABS);
579                 timer_setup(&x->rtimer, xfrm_replay_timer_handler, 0);
580                 x->curlft.add_time = get_seconds();
581                 x->lft.soft_byte_limit = XFRM_INF;
582                 x->lft.soft_packet_limit = XFRM_INF;
583                 x->lft.hard_byte_limit = XFRM_INF;
584                 x->lft.hard_packet_limit = XFRM_INF;
585                 x->replay_maxage = 0;
586                 x->replay_maxdiff = 0;
587                 x->inner_mode = NULL;
588                 x->inner_mode_iaf = NULL;
589                 spin_lock_init(&x->lock);
590         }
591         return x;
592 }
593 EXPORT_SYMBOL(xfrm_state_alloc);
594
595 void __xfrm_state_destroy(struct xfrm_state *x)
596 {
597         WARN_ON(x->km.state != XFRM_STATE_DEAD);
598
599         spin_lock_bh(&xfrm_state_gc_lock);
600         hlist_add_head(&x->gclist, &xfrm_state_gc_list);
601         spin_unlock_bh(&xfrm_state_gc_lock);
602         schedule_work(&xfrm_state_gc_work);
603 }
604 EXPORT_SYMBOL(__xfrm_state_destroy);
605
606 int __xfrm_state_delete(struct xfrm_state *x)
607 {
608         struct net *net = xs_net(x);
609         int err = -ESRCH;
610
611         if (x->km.state != XFRM_STATE_DEAD) {
612                 x->km.state = XFRM_STATE_DEAD;
613                 spin_lock(&net->xfrm.xfrm_state_lock);
614                 list_del(&x->km.all);
615                 hlist_del_rcu(&x->bydst);
616                 hlist_del_rcu(&x->bysrc);
617                 if (x->id.spi)
618                         hlist_del_rcu(&x->byspi);
619                 net->xfrm.state_num--;
620                 spin_unlock(&net->xfrm.xfrm_state_lock);
621
622                 xfrm_dev_state_delete(x);
623
624                 /* All xfrm_state objects are created by xfrm_state_alloc.
625                  * The xfrm_state_alloc call gives a reference, and that
626                  * is what we are dropping here.
627                  */
628                 xfrm_state_put(x);
629                 err = 0;
630         }
631
632         return err;
633 }
634 EXPORT_SYMBOL(__xfrm_state_delete);
635
636 int xfrm_state_delete(struct xfrm_state *x)
637 {
638         int err;
639
640         spin_lock_bh(&x->lock);
641         err = __xfrm_state_delete(x);
642         spin_unlock_bh(&x->lock);
643
644         return err;
645 }
646 EXPORT_SYMBOL(xfrm_state_delete);
647
648 #ifdef CONFIG_SECURITY_NETWORK_XFRM
649 static inline int
650 xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
651 {
652         int i, err = 0;
653
654         for (i = 0; i <= net->xfrm.state_hmask; i++) {
655                 struct xfrm_state *x;
656
657                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
658                         if (xfrm_id_proto_match(x->id.proto, proto) &&
659                            (err = security_xfrm_state_delete(x)) != 0) {
660                                 xfrm_audit_state_delete(x, 0, task_valid);
661                                 return err;
662                         }
663                 }
664         }
665
666         return err;
667 }
668
669 static inline int
670 xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
671 {
672         int i, err = 0;
673
674         for (i = 0; i <= net->xfrm.state_hmask; i++) {
675                 struct xfrm_state *x;
676                 struct xfrm_state_offload *xso;
677
678                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
679                         xso = &x->xso;
680
681                         if (xso->dev == dev &&
682                            (err = security_xfrm_state_delete(x)) != 0) {
683                                 xfrm_audit_state_delete(x, 0, task_valid);
684                                 return err;
685                         }
686                 }
687         }
688
689         return err;
690 }
691 #else
692 static inline int
693 xfrm_state_flush_secctx_check(struct net *net, u8 proto, bool task_valid)
694 {
695         return 0;
696 }
697
698 static inline int
699 xfrm_dev_state_flush_secctx_check(struct net *net, struct net_device *dev, bool task_valid)
700 {
701         return 0;
702 }
703 #endif
704
705 int xfrm_state_flush(struct net *net, u8 proto, bool task_valid)
706 {
707         int i, err = 0, cnt = 0;
708
709         spin_lock_bh(&net->xfrm.xfrm_state_lock);
710         err = xfrm_state_flush_secctx_check(net, proto, task_valid);
711         if (err)
712                 goto out;
713
714         err = -ESRCH;
715         for (i = 0; i <= net->xfrm.state_hmask; i++) {
716                 struct xfrm_state *x;
717 restart:
718                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
719                         if (!xfrm_state_kern(x) &&
720                             xfrm_id_proto_match(x->id.proto, proto)) {
721                                 xfrm_state_hold(x);
722                                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
723
724                                 err = xfrm_state_delete(x);
725                                 xfrm_audit_state_delete(x, err ? 0 : 1,
726                                                         task_valid);
727                                 xfrm_state_put(x);
728                                 if (!err)
729                                         cnt++;
730
731                                 spin_lock_bh(&net->xfrm.xfrm_state_lock);
732                                 goto restart;
733                         }
734                 }
735         }
736 out:
737         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
738         if (cnt) {
739                 err = 0;
740                 xfrm_policy_cache_flush();
741         }
742         return err;
743 }
744 EXPORT_SYMBOL(xfrm_state_flush);
745
746 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid)
747 {
748         int i, err = 0, cnt = 0;
749
750         spin_lock_bh(&net->xfrm.xfrm_state_lock);
751         err = xfrm_dev_state_flush_secctx_check(net, dev, task_valid);
752         if (err)
753                 goto out;
754
755         err = -ESRCH;
756         for (i = 0; i <= net->xfrm.state_hmask; i++) {
757                 struct xfrm_state *x;
758                 struct xfrm_state_offload *xso;
759 restart:
760                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
761                         xso = &x->xso;
762
763                         if (!xfrm_state_kern(x) && xso->dev == dev) {
764                                 xfrm_state_hold(x);
765                                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
766
767                                 err = xfrm_state_delete(x);
768                                 xfrm_audit_state_delete(x, err ? 0 : 1,
769                                                         task_valid);
770                                 xfrm_state_put(x);
771                                 if (!err)
772                                         cnt++;
773
774                                 spin_lock_bh(&net->xfrm.xfrm_state_lock);
775                                 goto restart;
776                         }
777                 }
778         }
779         if (cnt)
780                 err = 0;
781
782 out:
783         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
784         return err;
785 }
786 EXPORT_SYMBOL(xfrm_dev_state_flush);
787
788 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
789 {
790         spin_lock_bh(&net->xfrm.xfrm_state_lock);
791         si->sadcnt = net->xfrm.state_num;
792         si->sadhcnt = net->xfrm.state_hmask;
793         si->sadhmcnt = xfrm_state_hashmax;
794         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
795 }
796 EXPORT_SYMBOL(xfrm_sad_getinfo);
797
798 static void
799 xfrm_init_tempstate(struct xfrm_state *x, const struct flowi *fl,
800                     const struct xfrm_tmpl *tmpl,
801                     const xfrm_address_t *daddr, const xfrm_address_t *saddr,
802                     unsigned short family)
803 {
804         struct xfrm_state_afinfo *afinfo = xfrm_state_afinfo_get_rcu(family);
805
806         if (!afinfo)
807                 return;
808
809         afinfo->init_tempsel(&x->sel, fl);
810
811         if (family != tmpl->encap_family) {
812                 afinfo = xfrm_state_afinfo_get_rcu(tmpl->encap_family);
813                 if (!afinfo)
814                         return;
815         }
816         afinfo->init_temprop(x, tmpl, daddr, saddr);
817 }
818
819 static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
820                                               const xfrm_address_t *daddr,
821                                               __be32 spi, u8 proto,
822                                               unsigned short family)
823 {
824         unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family);
825         struct xfrm_state *x;
826
827         hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) {
828                 if (x->props.family != family ||
829                     x->id.spi       != spi ||
830                     x->id.proto     != proto ||
831                     !xfrm_addr_equal(&x->id.daddr, daddr, family))
832                         continue;
833
834                 if ((mark & x->mark.m) != x->mark.v)
835                         continue;
836                 if (!xfrm_state_hold_rcu(x))
837                         continue;
838                 return x;
839         }
840
841         return NULL;
842 }
843
844 static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
845                                                      const xfrm_address_t *daddr,
846                                                      const xfrm_address_t *saddr,
847                                                      u8 proto, unsigned short family)
848 {
849         unsigned int h = xfrm_src_hash(net, daddr, saddr, family);
850         struct xfrm_state *x;
851
852         hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) {
853                 if (x->props.family != family ||
854                     x->id.proto     != proto ||
855                     !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
856                     !xfrm_addr_equal(&x->props.saddr, saddr, family))
857                         continue;
858
859                 if ((mark & x->mark.m) != x->mark.v)
860                         continue;
861                 if (!xfrm_state_hold_rcu(x))
862                         continue;
863                 return x;
864         }
865
866         return NULL;
867 }
868
869 static inline struct xfrm_state *
870 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
871 {
872         struct net *net = xs_net(x);
873         u32 mark = x->mark.v & x->mark.m;
874
875         if (use_spi)
876                 return __xfrm_state_lookup(net, mark, &x->id.daddr,
877                                            x->id.spi, x->id.proto, family);
878         else
879                 return __xfrm_state_lookup_byaddr(net, mark,
880                                                   &x->id.daddr,
881                                                   &x->props.saddr,
882                                                   x->id.proto, family);
883 }
884
885 static void xfrm_hash_grow_check(struct net *net, int have_hash_collision)
886 {
887         if (have_hash_collision &&
888             (net->xfrm.state_hmask + 1) < xfrm_state_hashmax &&
889             net->xfrm.state_num > net->xfrm.state_hmask)
890                 schedule_work(&net->xfrm.state_hash_work);
891 }
892
893 static void xfrm_state_look_at(struct xfrm_policy *pol, struct xfrm_state *x,
894                                const struct flowi *fl, unsigned short family,
895                                struct xfrm_state **best, int *acq_in_progress,
896                                int *error)
897 {
898         /* Resolution logic:
899          * 1. There is a valid state with matching selector. Done.
900          * 2. Valid state with inappropriate selector. Skip.
901          *
902          * Entering area of "sysdeps".
903          *
904          * 3. If state is not valid, selector is temporary, it selects
905          *    only session which triggered previous resolution. Key
906          *    manager will do something to install a state with proper
907          *    selector.
908          */
909         if (x->km.state == XFRM_STATE_VALID) {
910                 if ((x->sel.family &&
911                      !xfrm_selector_match(&x->sel, fl, x->sel.family)) ||
912                     !security_xfrm_state_pol_flow_match(x, pol, fl))
913                         return;
914
915                 if (!*best ||
916                     (*best)->km.dying > x->km.dying ||
917                     ((*best)->km.dying == x->km.dying &&
918                      (*best)->curlft.add_time < x->curlft.add_time))
919                         *best = x;
920         } else if (x->km.state == XFRM_STATE_ACQ) {
921                 *acq_in_progress = 1;
922         } else if (x->km.state == XFRM_STATE_ERROR ||
923                    x->km.state == XFRM_STATE_EXPIRED) {
924                 if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
925                     security_xfrm_state_pol_flow_match(x, pol, fl))
926                         *error = -ESRCH;
927         }
928 }
929
930 struct xfrm_state *
931 xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
932                 const struct flowi *fl, struct xfrm_tmpl *tmpl,
933                 struct xfrm_policy *pol, int *err,
934                 unsigned short family)
935 {
936         static xfrm_address_t saddr_wildcard = { };
937         struct net *net = xp_net(pol);
938         unsigned int h, h_wildcard;
939         struct xfrm_state *x, *x0, *to_put;
940         int acquire_in_progress = 0;
941         int error = 0;
942         struct xfrm_state *best = NULL;
943         u32 mark = pol->mark.v & pol->mark.m;
944         unsigned short encap_family = tmpl->encap_family;
945         unsigned int sequence;
946         struct km_event c;
947
948         to_put = NULL;
949
950         sequence = read_seqcount_begin(&xfrm_state_hash_generation);
951
952         rcu_read_lock();
953         h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
954         hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
955                 if (x->props.family == encap_family &&
956                     x->props.reqid == tmpl->reqid &&
957                     (mark & x->mark.m) == x->mark.v &&
958                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
959                     xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
960                     tmpl->mode == x->props.mode &&
961                     tmpl->id.proto == x->id.proto &&
962                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
963                         xfrm_state_look_at(pol, x, fl, encap_family,
964                                            &best, &acquire_in_progress, &error);
965         }
966         if (best || acquire_in_progress)
967                 goto found;
968
969         h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family);
970         hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) {
971                 if (x->props.family == encap_family &&
972                     x->props.reqid == tmpl->reqid &&
973                     (mark & x->mark.m) == x->mark.v &&
974                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
975                     xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
976                     tmpl->mode == x->props.mode &&
977                     tmpl->id.proto == x->id.proto &&
978                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
979                         xfrm_state_look_at(pol, x, fl, encap_family,
980                                            &best, &acquire_in_progress, &error);
981         }
982
983 found:
984         x = best;
985         if (!x && !error && !acquire_in_progress) {
986                 if (tmpl->id.spi &&
987                     (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi,
988                                               tmpl->id.proto, encap_family)) != NULL) {
989                         to_put = x0;
990                         error = -EEXIST;
991                         goto out;
992                 }
993
994                 c.net = net;
995                 /* If the KMs have no listeners (yet...), avoid allocating an SA
996                  * for each and every packet - garbage collection might not
997                  * handle the flood.
998                  */
999                 if (!km_is_alive(&c)) {
1000                         error = -ESRCH;
1001                         goto out;
1002                 }
1003
1004                 x = xfrm_state_alloc(net);
1005                 if (x == NULL) {
1006                         error = -ENOMEM;
1007                         goto out;
1008                 }
1009                 /* Initialize temporary state matching only
1010                  * to current session. */
1011                 xfrm_init_tempstate(x, fl, tmpl, daddr, saddr, family);
1012                 memcpy(&x->mark, &pol->mark, sizeof(x->mark));
1013
1014                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->flowi_secid);
1015                 if (error) {
1016                         x->km.state = XFRM_STATE_DEAD;
1017                         to_put = x;
1018                         x = NULL;
1019                         goto out;
1020                 }
1021
1022                 if (km_query(x, tmpl, pol) == 0) {
1023                         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1024                         x->km.state = XFRM_STATE_ACQ;
1025                         list_add(&x->km.all, &net->xfrm.state_all);
1026                         hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1027                         h = xfrm_src_hash(net, daddr, saddr, encap_family);
1028                         hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1029                         if (x->id.spi) {
1030                                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
1031                                 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1032                         }
1033                         x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1034                         tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1035                         net->xfrm.state_num++;
1036                         xfrm_hash_grow_check(net, x->bydst.next != NULL);
1037                         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1038                 } else {
1039                         x->km.state = XFRM_STATE_DEAD;
1040                         to_put = x;
1041                         x = NULL;
1042                         error = -ESRCH;
1043                 }
1044         }
1045 out:
1046         if (x) {
1047                 if (!xfrm_state_hold_rcu(x)) {
1048                         *err = -EAGAIN;
1049                         x = NULL;
1050                 }
1051         } else {
1052                 *err = acquire_in_progress ? -EAGAIN : error;
1053         }
1054         rcu_read_unlock();
1055         if (to_put)
1056                 xfrm_state_put(to_put);
1057
1058         if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) {
1059                 *err = -EAGAIN;
1060                 if (x) {
1061                         xfrm_state_put(x);
1062                         x = NULL;
1063                 }
1064         }
1065
1066         return x;
1067 }
1068
1069 struct xfrm_state *
1070 xfrm_stateonly_find(struct net *net, u32 mark,
1071                     xfrm_address_t *daddr, xfrm_address_t *saddr,
1072                     unsigned short family, u8 mode, u8 proto, u32 reqid)
1073 {
1074         unsigned int h;
1075         struct xfrm_state *rx = NULL, *x = NULL;
1076
1077         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1078         h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1079         hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1080                 if (x->props.family == family &&
1081                     x->props.reqid == reqid &&
1082                     (mark & x->mark.m) == x->mark.v &&
1083                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
1084                     xfrm_state_addr_check(x, daddr, saddr, family) &&
1085                     mode == x->props.mode &&
1086                     proto == x->id.proto &&
1087                     x->km.state == XFRM_STATE_VALID) {
1088                         rx = x;
1089                         break;
1090                 }
1091         }
1092
1093         if (rx)
1094                 xfrm_state_hold(rx);
1095         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1096
1097
1098         return rx;
1099 }
1100 EXPORT_SYMBOL(xfrm_stateonly_find);
1101
1102 struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
1103                                               unsigned short family)
1104 {
1105         struct xfrm_state *x;
1106         struct xfrm_state_walk *w;
1107
1108         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1109         list_for_each_entry(w, &net->xfrm.state_all, all) {
1110                 x = container_of(w, struct xfrm_state, km);
1111                 if (x->props.family != family ||
1112                         x->id.spi != spi)
1113                         continue;
1114
1115                 xfrm_state_hold(x);
1116                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1117                 return x;
1118         }
1119         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1120         return NULL;
1121 }
1122 EXPORT_SYMBOL(xfrm_state_lookup_byspi);
1123
1124 static void __xfrm_state_insert(struct xfrm_state *x)
1125 {
1126         struct net *net = xs_net(x);
1127         unsigned int h;
1128
1129         list_add(&x->km.all, &net->xfrm.state_all);
1130
1131         h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
1132                           x->props.reqid, x->props.family);
1133         hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1134
1135         h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family);
1136         hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1137
1138         if (x->id.spi) {
1139                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto,
1140                                   x->props.family);
1141
1142                 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1143         }
1144
1145         tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1146         if (x->replay_maxage)
1147                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
1148
1149         net->xfrm.state_num++;
1150
1151         xfrm_hash_grow_check(net, x->bydst.next != NULL);
1152 }
1153
1154 /* net->xfrm.xfrm_state_lock is held */
1155 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
1156 {
1157         struct net *net = xs_net(xnew);
1158         unsigned short family = xnew->props.family;
1159         u32 reqid = xnew->props.reqid;
1160         struct xfrm_state *x;
1161         unsigned int h;
1162         u32 mark = xnew->mark.v & xnew->mark.m;
1163
1164         h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family);
1165         hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1166                 if (x->props.family     == family &&
1167                     x->props.reqid      == reqid &&
1168                     (mark & x->mark.m) == x->mark.v &&
1169                     xfrm_addr_equal(&x->id.daddr, &xnew->id.daddr, family) &&
1170                     xfrm_addr_equal(&x->props.saddr, &xnew->props.saddr, family))
1171                         x->genid++;
1172         }
1173 }
1174
1175 void xfrm_state_insert(struct xfrm_state *x)
1176 {
1177         struct net *net = xs_net(x);
1178
1179         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1180         __xfrm_state_bump_genids(x);
1181         __xfrm_state_insert(x);
1182         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1183 }
1184 EXPORT_SYMBOL(xfrm_state_insert);
1185
1186 /* net->xfrm.xfrm_state_lock is held */
1187 static struct xfrm_state *__find_acq_core(struct net *net,
1188                                           const struct xfrm_mark *m,
1189                                           unsigned short family, u8 mode,
1190                                           u32 reqid, u8 proto,
1191                                           const xfrm_address_t *daddr,
1192                                           const xfrm_address_t *saddr,
1193                                           int create)
1194 {
1195         unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family);
1196         struct xfrm_state *x;
1197         u32 mark = m->v & m->m;
1198
1199         hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1200                 if (x->props.reqid  != reqid ||
1201                     x->props.mode   != mode ||
1202                     x->props.family != family ||
1203                     x->km.state     != XFRM_STATE_ACQ ||
1204                     x->id.spi       != 0 ||
1205                     x->id.proto     != proto ||
1206                     (mark & x->mark.m) != x->mark.v ||
1207                     !xfrm_addr_equal(&x->id.daddr, daddr, family) ||
1208                     !xfrm_addr_equal(&x->props.saddr, saddr, family))
1209                         continue;
1210
1211                 xfrm_state_hold(x);
1212                 return x;
1213         }
1214
1215         if (!create)
1216                 return NULL;
1217
1218         x = xfrm_state_alloc(net);
1219         if (likely(x)) {
1220                 switch (family) {
1221                 case AF_INET:
1222                         x->sel.daddr.a4 = daddr->a4;
1223                         x->sel.saddr.a4 = saddr->a4;
1224                         x->sel.prefixlen_d = 32;
1225                         x->sel.prefixlen_s = 32;
1226                         x->props.saddr.a4 = saddr->a4;
1227                         x->id.daddr.a4 = daddr->a4;
1228                         break;
1229
1230                 case AF_INET6:
1231                         x->sel.daddr.in6 = daddr->in6;
1232                         x->sel.saddr.in6 = saddr->in6;
1233                         x->sel.prefixlen_d = 128;
1234                         x->sel.prefixlen_s = 128;
1235                         x->props.saddr.in6 = saddr->in6;
1236                         x->id.daddr.in6 = daddr->in6;
1237                         break;
1238                 }
1239
1240                 x->km.state = XFRM_STATE_ACQ;
1241                 x->id.proto = proto;
1242                 x->props.family = family;
1243                 x->props.mode = mode;
1244                 x->props.reqid = reqid;
1245                 x->mark.v = m->v;
1246                 x->mark.m = m->m;
1247                 x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires;
1248                 xfrm_state_hold(x);
1249                 tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL);
1250                 list_add(&x->km.all, &net->xfrm.state_all);
1251                 hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h);
1252                 h = xfrm_src_hash(net, daddr, saddr, family);
1253                 hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h);
1254
1255                 net->xfrm.state_num++;
1256
1257                 xfrm_hash_grow_check(net, x->bydst.next != NULL);
1258         }
1259
1260         return x;
1261 }
1262
1263 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
1264
1265 int xfrm_state_add(struct xfrm_state *x)
1266 {
1267         struct net *net = xs_net(x);
1268         struct xfrm_state *x1, *to_put;
1269         int family;
1270         int err;
1271         u32 mark = x->mark.v & x->mark.m;
1272         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1273
1274         family = x->props.family;
1275
1276         to_put = NULL;
1277
1278         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1279
1280         x1 = __xfrm_state_locate(x, use_spi, family);
1281         if (x1) {
1282                 to_put = x1;
1283                 x1 = NULL;
1284                 err = -EEXIST;
1285                 goto out;
1286         }
1287
1288         if (use_spi && x->km.seq) {
1289                 x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq);
1290                 if (x1 && ((x1->id.proto != x->id.proto) ||
1291                     !xfrm_addr_equal(&x1->id.daddr, &x->id.daddr, family))) {
1292                         to_put = x1;
1293                         x1 = NULL;
1294                 }
1295         }
1296
1297         if (use_spi && !x1)
1298                 x1 = __find_acq_core(net, &x->mark, family, x->props.mode,
1299                                      x->props.reqid, x->id.proto,
1300                                      &x->id.daddr, &x->props.saddr, 0);
1301
1302         __xfrm_state_bump_genids(x);
1303         __xfrm_state_insert(x);
1304         err = 0;
1305
1306 out:
1307         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1308
1309         if (x1) {
1310                 xfrm_state_delete(x1);
1311                 xfrm_state_put(x1);
1312         }
1313
1314         if (to_put)
1315                 xfrm_state_put(to_put);
1316
1317         return err;
1318 }
1319 EXPORT_SYMBOL(xfrm_state_add);
1320
1321 #ifdef CONFIG_XFRM_MIGRATE
1322 static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
1323                                            struct xfrm_encap_tmpl *encap)
1324 {
1325         struct net *net = xs_net(orig);
1326         struct xfrm_state *x = xfrm_state_alloc(net);
1327         if (!x)
1328                 goto out;
1329
1330         memcpy(&x->id, &orig->id, sizeof(x->id));
1331         memcpy(&x->sel, &orig->sel, sizeof(x->sel));
1332         memcpy(&x->lft, &orig->lft, sizeof(x->lft));
1333         x->props.mode = orig->props.mode;
1334         x->props.replay_window = orig->props.replay_window;
1335         x->props.reqid = orig->props.reqid;
1336         x->props.family = orig->props.family;
1337         x->props.saddr = orig->props.saddr;
1338
1339         if (orig->aalg) {
1340                 x->aalg = xfrm_algo_auth_clone(orig->aalg);
1341                 if (!x->aalg)
1342                         goto error;
1343         }
1344         x->props.aalgo = orig->props.aalgo;
1345
1346         if (orig->aead) {
1347                 x->aead = xfrm_algo_aead_clone(orig->aead);
1348                 x->geniv = orig->geniv;
1349                 if (!x->aead)
1350                         goto error;
1351         }
1352         if (orig->ealg) {
1353                 x->ealg = xfrm_algo_clone(orig->ealg);
1354                 if (!x->ealg)
1355                         goto error;
1356         }
1357         x->props.ealgo = orig->props.ealgo;
1358
1359         if (orig->calg) {
1360                 x->calg = xfrm_algo_clone(orig->calg);
1361                 if (!x->calg)
1362                         goto error;
1363         }
1364         x->props.calgo = orig->props.calgo;
1365
1366         if (encap || orig->encap) {
1367                 if (encap)
1368                         x->encap = kmemdup(encap, sizeof(*x->encap),
1369                                         GFP_KERNEL);
1370                 else
1371                         x->encap = kmemdup(orig->encap, sizeof(*x->encap),
1372                                         GFP_KERNEL);
1373
1374                 if (!x->encap)
1375                         goto error;
1376         }
1377
1378         if (orig->coaddr) {
1379                 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
1380                                     GFP_KERNEL);
1381                 if (!x->coaddr)
1382                         goto error;
1383         }
1384
1385         if (orig->replay_esn) {
1386                 if (xfrm_replay_clone(x, orig))
1387                         goto error;
1388         }
1389
1390         memcpy(&x->mark, &orig->mark, sizeof(x->mark));
1391
1392         if (xfrm_init_state(x) < 0)
1393                 goto error;
1394
1395         x->props.flags = orig->props.flags;
1396         x->props.extra_flags = orig->props.extra_flags;
1397
1398         x->tfcpad = orig->tfcpad;
1399         x->replay_maxdiff = orig->replay_maxdiff;
1400         x->replay_maxage = orig->replay_maxage;
1401         x->curlft.add_time = orig->curlft.add_time;
1402         x->km.state = orig->km.state;
1403         x->km.seq = orig->km.seq;
1404         x->replay = orig->replay;
1405         x->preplay = orig->preplay;
1406
1407         return x;
1408
1409  error:
1410         xfrm_state_put(x);
1411 out:
1412         return NULL;
1413 }
1414
1415 struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net)
1416 {
1417         unsigned int h;
1418         struct xfrm_state *x = NULL;
1419
1420         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1421
1422         if (m->reqid) {
1423                 h = xfrm_dst_hash(net, &m->old_daddr, &m->old_saddr,
1424                                   m->reqid, m->old_family);
1425                 hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) {
1426                         if (x->props.mode != m->mode ||
1427                             x->id.proto != m->proto)
1428                                 continue;
1429                         if (m->reqid && x->props.reqid != m->reqid)
1430                                 continue;
1431                         if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1432                                              m->old_family) ||
1433                             !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1434                                              m->old_family))
1435                                 continue;
1436                         xfrm_state_hold(x);
1437                         break;
1438                 }
1439         } else {
1440                 h = xfrm_src_hash(net, &m->old_daddr, &m->old_saddr,
1441                                   m->old_family);
1442                 hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) {
1443                         if (x->props.mode != m->mode ||
1444                             x->id.proto != m->proto)
1445                                 continue;
1446                         if (!xfrm_addr_equal(&x->id.daddr, &m->old_daddr,
1447                                              m->old_family) ||
1448                             !xfrm_addr_equal(&x->props.saddr, &m->old_saddr,
1449                                              m->old_family))
1450                                 continue;
1451                         xfrm_state_hold(x);
1452                         break;
1453                 }
1454         }
1455
1456         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1457
1458         return x;
1459 }
1460 EXPORT_SYMBOL(xfrm_migrate_state_find);
1461
1462 struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
1463                                       struct xfrm_migrate *m,
1464                                       struct xfrm_encap_tmpl *encap)
1465 {
1466         struct xfrm_state *xc;
1467
1468         xc = xfrm_state_clone(x, encap);
1469         if (!xc)
1470                 return NULL;
1471
1472         memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1473         memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1474
1475         /* add state */
1476         if (xfrm_addr_equal(&x->id.daddr, &m->new_daddr, m->new_family)) {
1477                 /* a care is needed when the destination address of the
1478                    state is to be updated as it is a part of triplet */
1479                 xfrm_state_insert(xc);
1480         } else {
1481                 if (xfrm_state_add(xc) < 0)
1482                         goto error;
1483         }
1484
1485         return xc;
1486 error:
1487         xfrm_state_put(xc);
1488         return NULL;
1489 }
1490 EXPORT_SYMBOL(xfrm_state_migrate);
1491 #endif
1492
1493 int xfrm_state_update(struct xfrm_state *x)
1494 {
1495         struct xfrm_state *x1, *to_put;
1496         int err;
1497         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1498         struct net *net = xs_net(x);
1499
1500         to_put = NULL;
1501
1502         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1503         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1504
1505         err = -ESRCH;
1506         if (!x1)
1507                 goto out;
1508
1509         if (xfrm_state_kern(x1)) {
1510                 to_put = x1;
1511                 err = -EEXIST;
1512                 goto out;
1513         }
1514
1515         if (x1->km.state == XFRM_STATE_ACQ) {
1516                 __xfrm_state_insert(x);
1517                 x = NULL;
1518         }
1519         err = 0;
1520
1521 out:
1522         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1523
1524         if (to_put)
1525                 xfrm_state_put(to_put);
1526
1527         if (err)
1528                 return err;
1529
1530         if (!x) {
1531                 xfrm_state_delete(x1);
1532                 xfrm_state_put(x1);
1533                 return 0;
1534         }
1535
1536         err = -EINVAL;
1537         spin_lock_bh(&x1->lock);
1538         if (likely(x1->km.state == XFRM_STATE_VALID)) {
1539                 if (x->encap && x1->encap &&
1540                     x->encap->encap_type == x1->encap->encap_type)
1541                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1542                 else if (x->encap || x1->encap)
1543                         goto fail;
1544
1545                 if (x->coaddr && x1->coaddr) {
1546                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1547                 }
1548                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1549                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1550                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1551                 x1->km.dying = 0;
1552
1553                 tasklet_hrtimer_start(&x1->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL);
1554                 if (x1->curlft.use_time)
1555                         xfrm_state_check_expire(x1);
1556
1557                 err = 0;
1558                 x->km.state = XFRM_STATE_DEAD;
1559                 __xfrm_state_put(x);
1560         }
1561
1562 fail:
1563         spin_unlock_bh(&x1->lock);
1564
1565         xfrm_state_put(x1);
1566
1567         return err;
1568 }
1569 EXPORT_SYMBOL(xfrm_state_update);
1570
1571 int xfrm_state_check_expire(struct xfrm_state *x)
1572 {
1573         if (!x->curlft.use_time)
1574                 x->curlft.use_time = get_seconds();
1575
1576         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1577             x->curlft.packets >= x->lft.hard_packet_limit) {
1578                 x->km.state = XFRM_STATE_EXPIRED;
1579                 tasklet_hrtimer_start(&x->mtimer, 0, HRTIMER_MODE_REL);
1580                 return -EINVAL;
1581         }
1582
1583         if (!x->km.dying &&
1584             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1585              x->curlft.packets >= x->lft.soft_packet_limit)) {
1586                 x->km.dying = 1;
1587                 km_state_expired(x, 0, 0);
1588         }
1589         return 0;
1590 }
1591 EXPORT_SYMBOL(xfrm_state_check_expire);
1592
1593 struct xfrm_state *
1594 xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi,
1595                   u8 proto, unsigned short family)
1596 {
1597         struct xfrm_state *x;
1598
1599         rcu_read_lock();
1600         x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
1601         rcu_read_unlock();
1602         return x;
1603 }
1604 EXPORT_SYMBOL(xfrm_state_lookup);
1605
1606 struct xfrm_state *
1607 xfrm_state_lookup_byaddr(struct net *net, u32 mark,
1608                          const xfrm_address_t *daddr, const xfrm_address_t *saddr,
1609                          u8 proto, unsigned short family)
1610 {
1611         struct xfrm_state *x;
1612
1613         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1614         x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family);
1615         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1616         return x;
1617 }
1618 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1619
1620 struct xfrm_state *
1621 xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid,
1622               u8 proto, const xfrm_address_t *daddr,
1623               const xfrm_address_t *saddr, int create, unsigned short family)
1624 {
1625         struct xfrm_state *x;
1626
1627         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1628         x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create);
1629         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1630
1631         return x;
1632 }
1633 EXPORT_SYMBOL(xfrm_find_acq);
1634
1635 #ifdef CONFIG_XFRM_SUB_POLICY
1636 int
1637 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1638                unsigned short family, struct net *net)
1639 {
1640         int i;
1641         int err = 0;
1642         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1643         if (!afinfo)
1644                 return -EAFNOSUPPORT;
1645
1646         spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
1647         if (afinfo->tmpl_sort)
1648                 err = afinfo->tmpl_sort(dst, src, n);
1649         else
1650                 for (i = 0; i < n; i++)
1651                         dst[i] = src[i];
1652         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1653         rcu_read_unlock();
1654         return err;
1655 }
1656 EXPORT_SYMBOL(xfrm_tmpl_sort);
1657
1658 int
1659 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1660                 unsigned short family)
1661 {
1662         int i;
1663         int err = 0;
1664         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1665         struct net *net = xs_net(*src);
1666
1667         if (!afinfo)
1668                 return -EAFNOSUPPORT;
1669
1670         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1671         if (afinfo->state_sort)
1672                 err = afinfo->state_sort(dst, src, n);
1673         else
1674                 for (i = 0; i < n; i++)
1675                         dst[i] = src[i];
1676         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1677         rcu_read_unlock();
1678         return err;
1679 }
1680 EXPORT_SYMBOL(xfrm_state_sort);
1681 #endif
1682
1683 /* Silly enough, but I'm lazy to build resolution list */
1684
1685 static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1686 {
1687         int i;
1688
1689         for (i = 0; i <= net->xfrm.state_hmask; i++) {
1690                 struct xfrm_state *x;
1691
1692                 hlist_for_each_entry(x, net->xfrm.state_bydst+i, bydst) {
1693                         if (x->km.seq == seq &&
1694                             (mark & x->mark.m) == x->mark.v &&
1695                             x->km.state == XFRM_STATE_ACQ) {
1696                                 xfrm_state_hold(x);
1697                                 return x;
1698                         }
1699                 }
1700         }
1701         return NULL;
1702 }
1703
1704 struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq)
1705 {
1706         struct xfrm_state *x;
1707
1708         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1709         x = __xfrm_find_acq_byseq(net, mark, seq);
1710         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1711         return x;
1712 }
1713 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1714
1715 u32 xfrm_get_acqseq(void)
1716 {
1717         u32 res;
1718         static atomic_t acqseq;
1719
1720         do {
1721                 res = atomic_inc_return(&acqseq);
1722         } while (!res);
1723
1724         return res;
1725 }
1726 EXPORT_SYMBOL(xfrm_get_acqseq);
1727
1728 int verify_spi_info(u8 proto, u32 min, u32 max)
1729 {
1730         switch (proto) {
1731         case IPPROTO_AH:
1732         case IPPROTO_ESP:
1733                 break;
1734
1735         case IPPROTO_COMP:
1736                 /* IPCOMP spi is 16-bits. */
1737                 if (max >= 0x10000)
1738                         return -EINVAL;
1739                 break;
1740
1741         default:
1742                 return -EINVAL;
1743         }
1744
1745         if (min > max)
1746                 return -EINVAL;
1747
1748         return 0;
1749 }
1750 EXPORT_SYMBOL(verify_spi_info);
1751
1752 int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high)
1753 {
1754         struct net *net = xs_net(x);
1755         unsigned int h;
1756         struct xfrm_state *x0;
1757         int err = -ENOENT;
1758         __be32 minspi = htonl(low);
1759         __be32 maxspi = htonl(high);
1760         u32 mark = x->mark.v & x->mark.m;
1761
1762         spin_lock_bh(&x->lock);
1763         if (x->km.state == XFRM_STATE_DEAD)
1764                 goto unlock;
1765
1766         err = 0;
1767         if (x->id.spi)
1768                 goto unlock;
1769
1770         err = -ENOENT;
1771
1772         if (minspi == maxspi) {
1773                 x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family);
1774                 if (x0) {
1775                         xfrm_state_put(x0);
1776                         goto unlock;
1777                 }
1778                 x->id.spi = minspi;
1779         } else {
1780                 u32 spi = 0;
1781                 for (h = 0; h < high-low+1; h++) {
1782                         spi = low + prandom_u32()%(high-low+1);
1783                         x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1784                         if (x0 == NULL) {
1785                                 x->id.spi = htonl(spi);
1786                                 break;
1787                         }
1788                         xfrm_state_put(x0);
1789                 }
1790         }
1791         if (x->id.spi) {
1792                 spin_lock_bh(&net->xfrm.xfrm_state_lock);
1793                 h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1794                 hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h);
1795                 spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1796
1797                 err = 0;
1798         }
1799
1800 unlock:
1801         spin_unlock_bh(&x->lock);
1802
1803         return err;
1804 }
1805 EXPORT_SYMBOL(xfrm_alloc_spi);
1806
1807 static bool __xfrm_state_filter_match(struct xfrm_state *x,
1808                                       struct xfrm_address_filter *filter)
1809 {
1810         if (filter) {
1811                 if ((filter->family == AF_INET ||
1812                      filter->family == AF_INET6) &&
1813                     x->props.family != filter->family)
1814                         return false;
1815
1816                 return addr_match(&x->props.saddr, &filter->saddr,
1817                                   filter->splen) &&
1818                        addr_match(&x->id.daddr, &filter->daddr,
1819                                   filter->dplen);
1820         }
1821         return true;
1822 }
1823
1824 int xfrm_state_walk(struct net *net, struct xfrm_state_walk *walk,
1825                     int (*func)(struct xfrm_state *, int, void*),
1826                     void *data)
1827 {
1828         struct xfrm_state *state;
1829         struct xfrm_state_walk *x;
1830         int err = 0;
1831
1832         if (walk->seq != 0 && list_empty(&walk->all))
1833                 return 0;
1834
1835         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1836         if (list_empty(&walk->all))
1837                 x = list_first_entry(&net->xfrm.state_all, struct xfrm_state_walk, all);
1838         else
1839                 x = list_first_entry(&walk->all, struct xfrm_state_walk, all);
1840         list_for_each_entry_from(x, &net->xfrm.state_all, all) {
1841                 if (x->state == XFRM_STATE_DEAD)
1842                         continue;
1843                 state = container_of(x, struct xfrm_state, km);
1844                 if (!xfrm_id_proto_match(state->id.proto, walk->proto))
1845                         continue;
1846                 if (!__xfrm_state_filter_match(state, walk->filter))
1847                         continue;
1848                 err = func(state, walk->seq, data);
1849                 if (err) {
1850                         list_move_tail(&walk->all, &x->all);
1851                         goto out;
1852                 }
1853                 walk->seq++;
1854         }
1855         if (walk->seq == 0) {
1856                 err = -ENOENT;
1857                 goto out;
1858         }
1859         list_del_init(&walk->all);
1860 out:
1861         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1862         return err;
1863 }
1864 EXPORT_SYMBOL(xfrm_state_walk);
1865
1866 void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto,
1867                           struct xfrm_address_filter *filter)
1868 {
1869         INIT_LIST_HEAD(&walk->all);
1870         walk->proto = proto;
1871         walk->state = XFRM_STATE_DEAD;
1872         walk->seq = 0;
1873         walk->filter = filter;
1874 }
1875 EXPORT_SYMBOL(xfrm_state_walk_init);
1876
1877 void xfrm_state_walk_done(struct xfrm_state_walk *walk, struct net *net)
1878 {
1879         kfree(walk->filter);
1880
1881         if (list_empty(&walk->all))
1882                 return;
1883
1884         spin_lock_bh(&net->xfrm.xfrm_state_lock);
1885         list_del(&walk->all);
1886         spin_unlock_bh(&net->xfrm.xfrm_state_lock);
1887 }
1888 EXPORT_SYMBOL(xfrm_state_walk_done);
1889
1890 static void xfrm_replay_timer_handler(struct timer_list *t)
1891 {
1892         struct xfrm_state *x = from_timer(x, t, rtimer);
1893
1894         spin_lock(&x->lock);
1895
1896         if (x->km.state == XFRM_STATE_VALID) {
1897                 if (xfrm_aevent_is_on(xs_net(x)))
1898                         x->repl->notify(x, XFRM_REPLAY_TIMEOUT);
1899                 else
1900                         x->xflags |= XFRM_TIME_DEFER;
1901         }
1902
1903         spin_unlock(&x->lock);
1904 }
1905
1906 static LIST_HEAD(xfrm_km_list);
1907
1908 void km_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
1909 {
1910         struct xfrm_mgr *km;
1911
1912         rcu_read_lock();
1913         list_for_each_entry_rcu(km, &xfrm_km_list, list)
1914                 if (km->notify_policy)
1915                         km->notify_policy(xp, dir, c);
1916         rcu_read_unlock();
1917 }
1918
1919 void km_state_notify(struct xfrm_state *x, const struct km_event *c)
1920 {
1921         struct xfrm_mgr *km;
1922         rcu_read_lock();
1923         list_for_each_entry_rcu(km, &xfrm_km_list, list)
1924                 if (km->notify)
1925                         km->notify(x, c);
1926         rcu_read_unlock();
1927 }
1928
1929 EXPORT_SYMBOL(km_policy_notify);
1930 EXPORT_SYMBOL(km_state_notify);
1931
1932 void km_state_expired(struct xfrm_state *x, int hard, u32 portid)
1933 {
1934         struct km_event c;
1935
1936         c.data.hard = hard;
1937         c.portid = portid;
1938         c.event = XFRM_MSG_EXPIRE;
1939         km_state_notify(x, &c);
1940 }
1941
1942 EXPORT_SYMBOL(km_state_expired);
1943 /*
1944  * We send to all registered managers regardless of failure
1945  * We are happy with one success
1946 */
1947 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1948 {
1949         int err = -EINVAL, acqret;
1950         struct xfrm_mgr *km;
1951
1952         rcu_read_lock();
1953         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1954                 acqret = km->acquire(x, t, pol);
1955                 if (!acqret)
1956                         err = acqret;
1957         }
1958         rcu_read_unlock();
1959         return err;
1960 }
1961 EXPORT_SYMBOL(km_query);
1962
1963 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1964 {
1965         int err = -EINVAL;
1966         struct xfrm_mgr *km;
1967
1968         rcu_read_lock();
1969         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
1970                 if (km->new_mapping)
1971                         err = km->new_mapping(x, ipaddr, sport);
1972                 if (!err)
1973                         break;
1974         }
1975         rcu_read_unlock();
1976         return err;
1977 }
1978 EXPORT_SYMBOL(km_new_mapping);
1979
1980 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid)
1981 {
1982         struct km_event c;
1983
1984         c.data.hard = hard;
1985         c.portid = portid;
1986         c.event = XFRM_MSG_POLEXPIRE;
1987         km_policy_notify(pol, dir, &c);
1988 }
1989 EXPORT_SYMBOL(km_policy_expired);
1990
1991 #ifdef CONFIG_XFRM_MIGRATE
1992 int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
1993                const struct xfrm_migrate *m, int num_migrate,
1994                const struct xfrm_kmaddress *k,
1995                const struct xfrm_encap_tmpl *encap)
1996 {
1997         int err = -EINVAL;
1998         int ret;
1999         struct xfrm_mgr *km;
2000
2001         rcu_read_lock();
2002         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2003                 if (km->migrate) {
2004                         ret = km->migrate(sel, dir, type, m, num_migrate, k,
2005                                           encap);
2006                         if (!ret)
2007                                 err = ret;
2008                 }
2009         }
2010         rcu_read_unlock();
2011         return err;
2012 }
2013 EXPORT_SYMBOL(km_migrate);
2014 #endif
2015
2016 int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
2017 {
2018         int err = -EINVAL;
2019         int ret;
2020         struct xfrm_mgr *km;
2021
2022         rcu_read_lock();
2023         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2024                 if (km->report) {
2025                         ret = km->report(net, proto, sel, addr);
2026                         if (!ret)
2027                                 err = ret;
2028                 }
2029         }
2030         rcu_read_unlock();
2031         return err;
2032 }
2033 EXPORT_SYMBOL(km_report);
2034
2035 bool km_is_alive(const struct km_event *c)
2036 {
2037         struct xfrm_mgr *km;
2038         bool is_alive = false;
2039
2040         rcu_read_lock();
2041         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2042                 if (km->is_alive && km->is_alive(c)) {
2043                         is_alive = true;
2044                         break;
2045                 }
2046         }
2047         rcu_read_unlock();
2048
2049         return is_alive;
2050 }
2051 EXPORT_SYMBOL(km_is_alive);
2052
2053 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
2054 {
2055         int err;
2056         u8 *data;
2057         struct xfrm_mgr *km;
2058         struct xfrm_policy *pol = NULL;
2059
2060 #ifdef CONFIG_COMPAT
2061         if (in_compat_syscall())
2062                 return -EOPNOTSUPP;
2063 #endif
2064
2065         if (!optval && !optlen) {
2066                 xfrm_sk_policy_insert(sk, XFRM_POLICY_IN, NULL);
2067                 xfrm_sk_policy_insert(sk, XFRM_POLICY_OUT, NULL);
2068                 __sk_dst_reset(sk);
2069                 return 0;
2070         }
2071
2072         if (optlen <= 0 || optlen > PAGE_SIZE)
2073                 return -EMSGSIZE;
2074
2075         data = memdup_user(optval, optlen);
2076         if (IS_ERR(data))
2077                 return PTR_ERR(data);
2078
2079         err = -EINVAL;
2080         rcu_read_lock();
2081         list_for_each_entry_rcu(km, &xfrm_km_list, list) {
2082                 pol = km->compile_policy(sk, optname, data,
2083                                          optlen, &err);
2084                 if (err >= 0)
2085                         break;
2086         }
2087         rcu_read_unlock();
2088
2089         if (err >= 0) {
2090                 xfrm_sk_policy_insert(sk, err, pol);
2091                 xfrm_pol_put(pol);
2092                 __sk_dst_reset(sk);
2093                 err = 0;
2094         }
2095
2096         kfree(data);
2097         return err;
2098 }
2099 EXPORT_SYMBOL(xfrm_user_policy);
2100
2101 static DEFINE_SPINLOCK(xfrm_km_lock);
2102
2103 int xfrm_register_km(struct xfrm_mgr *km)
2104 {
2105         spin_lock_bh(&xfrm_km_lock);
2106         list_add_tail_rcu(&km->list, &xfrm_km_list);
2107         spin_unlock_bh(&xfrm_km_lock);
2108         return 0;
2109 }
2110 EXPORT_SYMBOL(xfrm_register_km);
2111
2112 int xfrm_unregister_km(struct xfrm_mgr *km)
2113 {
2114         spin_lock_bh(&xfrm_km_lock);
2115         list_del_rcu(&km->list);
2116         spin_unlock_bh(&xfrm_km_lock);
2117         synchronize_rcu();
2118         return 0;
2119 }
2120 EXPORT_SYMBOL(xfrm_unregister_km);
2121
2122 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
2123 {
2124         int err = 0;
2125
2126         if (WARN_ON(afinfo->family >= NPROTO))
2127                 return -EAFNOSUPPORT;
2128
2129         spin_lock_bh(&xfrm_state_afinfo_lock);
2130         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
2131                 err = -EEXIST;
2132         else
2133                 rcu_assign_pointer(xfrm_state_afinfo[afinfo->family], afinfo);
2134         spin_unlock_bh(&xfrm_state_afinfo_lock);
2135         return err;
2136 }
2137 EXPORT_SYMBOL(xfrm_state_register_afinfo);
2138
2139 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
2140 {
2141         int err = 0, family = afinfo->family;
2142
2143         if (WARN_ON(family >= NPROTO))
2144                 return -EAFNOSUPPORT;
2145
2146         spin_lock_bh(&xfrm_state_afinfo_lock);
2147         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
2148                 if (rcu_access_pointer(xfrm_state_afinfo[family]) != afinfo)
2149                         err = -EINVAL;
2150                 else
2151                         RCU_INIT_POINTER(xfrm_state_afinfo[afinfo->family], NULL);
2152         }
2153         spin_unlock_bh(&xfrm_state_afinfo_lock);
2154         synchronize_rcu();
2155         return err;
2156 }
2157 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
2158
2159 struct xfrm_state_afinfo *xfrm_state_afinfo_get_rcu(unsigned int family)
2160 {
2161         if (unlikely(family >= NPROTO))
2162                 return NULL;
2163
2164         return rcu_dereference(xfrm_state_afinfo[family]);
2165 }
2166
2167 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family)
2168 {
2169         struct xfrm_state_afinfo *afinfo;
2170         if (unlikely(family >= NPROTO))
2171                 return NULL;
2172         rcu_read_lock();
2173         afinfo = rcu_dereference(xfrm_state_afinfo[family]);
2174         if (unlikely(!afinfo))
2175                 rcu_read_unlock();
2176         return afinfo;
2177 }
2178
2179 void xfrm_flush_gc(void)
2180 {
2181         flush_work(&xfrm_state_gc_work);
2182 }
2183 EXPORT_SYMBOL(xfrm_flush_gc);
2184
2185 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
2186 void xfrm_state_delete_tunnel(struct xfrm_state *x)
2187 {
2188         if (x->tunnel) {
2189                 struct xfrm_state *t = x->tunnel;
2190
2191                 if (atomic_read(&t->tunnel_users) == 2)
2192                         xfrm_state_delete(t);
2193                 atomic_dec(&t->tunnel_users);
2194                 xfrm_state_put(t);
2195                 x->tunnel = NULL;
2196         }
2197 }
2198 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
2199
2200 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
2201 {
2202         const struct xfrm_type *type = READ_ONCE(x->type);
2203
2204         if (x->km.state == XFRM_STATE_VALID &&
2205             type && type->get_mtu)
2206                 return type->get_mtu(x, mtu);
2207
2208         return mtu - x->props.header_len;
2209 }
2210
2211 int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload)
2212 {
2213         struct xfrm_state_afinfo *afinfo;
2214         struct xfrm_mode *inner_mode;
2215         int family = x->props.family;
2216         int err;
2217
2218         err = -EAFNOSUPPORT;
2219         afinfo = xfrm_state_get_afinfo(family);
2220         if (!afinfo)
2221                 goto error;
2222
2223         err = 0;
2224         if (afinfo->init_flags)
2225                 err = afinfo->init_flags(x);
2226
2227         rcu_read_unlock();
2228
2229         if (err)
2230                 goto error;
2231
2232         err = -EPROTONOSUPPORT;
2233
2234         if (x->sel.family != AF_UNSPEC) {
2235                 inner_mode = xfrm_get_mode(x->props.mode, x->sel.family);
2236                 if (inner_mode == NULL)
2237                         goto error;
2238
2239                 if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2240                     family != x->sel.family) {
2241                         xfrm_put_mode(inner_mode);
2242                         goto error;
2243                 }
2244
2245                 x->inner_mode = inner_mode;
2246         } else {
2247                 struct xfrm_mode *inner_mode_iaf;
2248                 int iafamily = AF_INET;
2249
2250                 inner_mode = xfrm_get_mode(x->props.mode, x->props.family);
2251                 if (inner_mode == NULL)
2252                         goto error;
2253
2254                 if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) {
2255                         xfrm_put_mode(inner_mode);
2256                         goto error;
2257                 }
2258                 x->inner_mode = inner_mode;
2259
2260                 if (x->props.family == AF_INET)
2261                         iafamily = AF_INET6;
2262
2263                 inner_mode_iaf = xfrm_get_mode(x->props.mode, iafamily);
2264                 if (inner_mode_iaf) {
2265                         if (inner_mode_iaf->flags & XFRM_MODE_FLAG_TUNNEL)
2266                                 x->inner_mode_iaf = inner_mode_iaf;
2267                         else
2268                                 xfrm_put_mode(inner_mode_iaf);
2269                 }
2270         }
2271
2272         x->type = xfrm_get_type(x->id.proto, family);
2273         if (x->type == NULL)
2274                 goto error;
2275
2276         x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload);
2277
2278         err = x->type->init_state(x);
2279         if (err)
2280                 goto error;
2281
2282         x->outer_mode = xfrm_get_mode(x->props.mode, family);
2283         if (x->outer_mode == NULL) {
2284                 err = -EPROTONOSUPPORT;
2285                 goto error;
2286         }
2287
2288         if (init_replay) {
2289                 err = xfrm_init_replay(x);
2290                 if (err)
2291                         goto error;
2292         }
2293
2294 error:
2295         return err;
2296 }
2297
2298 EXPORT_SYMBOL(__xfrm_init_state);
2299
2300 int xfrm_init_state(struct xfrm_state *x)
2301 {
2302         int err;
2303
2304         err = __xfrm_init_state(x, true, false);
2305         if (!err)
2306                 x->km.state = XFRM_STATE_VALID;
2307
2308         return err;
2309 }
2310
2311 EXPORT_SYMBOL(xfrm_init_state);
2312
2313 int __net_init xfrm_state_init(struct net *net)
2314 {
2315         unsigned int sz;
2316
2317         if (net_eq(net, &init_net))
2318                 xfrm_state_cache = KMEM_CACHE(xfrm_state,
2319                                               SLAB_HWCACHE_ALIGN | SLAB_PANIC);
2320
2321         INIT_LIST_HEAD(&net->xfrm.state_all);
2322
2323         sz = sizeof(struct hlist_head) * 8;
2324
2325         net->xfrm.state_bydst = xfrm_hash_alloc(sz);
2326         if (!net->xfrm.state_bydst)
2327                 goto out_bydst;
2328         net->xfrm.state_bysrc = xfrm_hash_alloc(sz);
2329         if (!net->xfrm.state_bysrc)
2330                 goto out_bysrc;
2331         net->xfrm.state_byspi = xfrm_hash_alloc(sz);
2332         if (!net->xfrm.state_byspi)
2333                 goto out_byspi;
2334         net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
2335
2336         net->xfrm.state_num = 0;
2337         INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize);
2338         spin_lock_init(&net->xfrm.xfrm_state_lock);
2339         return 0;
2340
2341 out_byspi:
2342         xfrm_hash_free(net->xfrm.state_bysrc, sz);
2343 out_bysrc:
2344         xfrm_hash_free(net->xfrm.state_bydst, sz);
2345 out_bydst:
2346         return -ENOMEM;
2347 }
2348
2349 void xfrm_state_fini(struct net *net)
2350 {
2351         unsigned int sz;
2352
2353         flush_work(&net->xfrm.state_hash_work);
2354         xfrm_state_flush(net, IPSEC_PROTO_ANY, false);
2355         flush_work(&xfrm_state_gc_work);
2356
2357         WARN_ON(!list_empty(&net->xfrm.state_all));
2358
2359         sz = (net->xfrm.state_hmask + 1) * sizeof(struct hlist_head);
2360         WARN_ON(!hlist_empty(net->xfrm.state_byspi));
2361         xfrm_hash_free(net->xfrm.state_byspi, sz);
2362         WARN_ON(!hlist_empty(net->xfrm.state_bysrc));
2363         xfrm_hash_free(net->xfrm.state_bysrc, sz);
2364         WARN_ON(!hlist_empty(net->xfrm.state_bydst));
2365         xfrm_hash_free(net->xfrm.state_bydst, sz);
2366 }
2367
2368 #ifdef CONFIG_AUDITSYSCALL
2369 static void xfrm_audit_helper_sainfo(struct xfrm_state *x,
2370                                      struct audit_buffer *audit_buf)
2371 {
2372         struct xfrm_sec_ctx *ctx = x->security;
2373         u32 spi = ntohl(x->id.spi);
2374
2375         if (ctx)
2376                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2377                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2378
2379         switch (x->props.family) {
2380         case AF_INET:
2381                 audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2382                                  &x->props.saddr.a4, &x->id.daddr.a4);
2383                 break;
2384         case AF_INET6:
2385                 audit_log_format(audit_buf, " src=%pI6 dst=%pI6",
2386                                  x->props.saddr.a6, x->id.daddr.a6);
2387                 break;
2388         }
2389
2390         audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2391 }
2392
2393 static void xfrm_audit_helper_pktinfo(struct sk_buff *skb, u16 family,
2394                                       struct audit_buffer *audit_buf)
2395 {
2396         const struct iphdr *iph4;
2397         const struct ipv6hdr *iph6;
2398
2399         switch (family) {
2400         case AF_INET:
2401                 iph4 = ip_hdr(skb);
2402                 audit_log_format(audit_buf, " src=%pI4 dst=%pI4",
2403                                  &iph4->saddr, &iph4->daddr);
2404                 break;
2405         case AF_INET6:
2406                 iph6 = ipv6_hdr(skb);
2407                 audit_log_format(audit_buf,
2408                                  " src=%pI6 dst=%pI6 flowlbl=0x%x%02x%02x",
2409                                  &iph6->saddr, &iph6->daddr,
2410                                  iph6->flow_lbl[0] & 0x0f,
2411                                  iph6->flow_lbl[1],
2412                                  iph6->flow_lbl[2]);
2413                 break;
2414         }
2415 }
2416
2417 void xfrm_audit_state_add(struct xfrm_state *x, int result, bool task_valid)
2418 {
2419         struct audit_buffer *audit_buf;
2420
2421         audit_buf = xfrm_audit_start("SAD-add");
2422         if (audit_buf == NULL)
2423                 return;
2424         xfrm_audit_helper_usrinfo(task_valid, audit_buf);
2425         xfrm_audit_helper_sainfo(x, audit_buf);
2426         audit_log_format(audit_buf, " res=%u", result);
2427         audit_log_end(audit_buf);
2428 }
2429 EXPORT_SYMBOL_GPL(xfrm_audit_state_add);
2430
2431 void xfrm_audit_state_delete(struct xfrm_state *x, int result, bool task_valid)
2432 {
2433         struct audit_buffer *audit_buf;
2434
2435         audit_buf = xfrm_audit_start("SAD-delete");
2436         if (audit_buf == NULL)
2437                 return;
2438         xfrm_audit_helper_usrinfo(task_valid, audit_buf);
2439         xfrm_audit_helper_sainfo(x, audit_buf);
2440         audit_log_format(audit_buf, " res=%u", result);
2441         audit_log_end(audit_buf);
2442 }
2443 EXPORT_SYMBOL_GPL(xfrm_audit_state_delete);
2444
2445 void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
2446                                       struct sk_buff *skb)
2447 {
2448         struct audit_buffer *audit_buf;
2449         u32 spi;
2450
2451         audit_buf = xfrm_audit_start("SA-replay-overflow");
2452         if (audit_buf == NULL)
2453                 return;
2454         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2455         /* don't record the sequence number because it's inherent in this kind
2456          * of audit message */
2457         spi = ntohl(x->id.spi);
2458         audit_log_format(audit_buf, " spi=%u(0x%x)", spi, spi);
2459         audit_log_end(audit_buf);
2460 }
2461 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay_overflow);
2462
2463 void xfrm_audit_state_replay(struct xfrm_state *x,
2464                              struct sk_buff *skb, __be32 net_seq)
2465 {
2466         struct audit_buffer *audit_buf;
2467         u32 spi;
2468
2469         audit_buf = xfrm_audit_start("SA-replayed-pkt");
2470         if (audit_buf == NULL)
2471                 return;
2472         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2473         spi = ntohl(x->id.spi);
2474         audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2475                          spi, spi, ntohl(net_seq));
2476         audit_log_end(audit_buf);
2477 }
2478 EXPORT_SYMBOL_GPL(xfrm_audit_state_replay);
2479
2480 void xfrm_audit_state_notfound_simple(struct sk_buff *skb, u16 family)
2481 {
2482         struct audit_buffer *audit_buf;
2483
2484         audit_buf = xfrm_audit_start("SA-notfound");
2485         if (audit_buf == NULL)
2486                 return;
2487         xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2488         audit_log_end(audit_buf);
2489 }
2490 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound_simple);
2491
2492 void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
2493                                __be32 net_spi, __be32 net_seq)
2494 {
2495         struct audit_buffer *audit_buf;
2496         u32 spi;
2497
2498         audit_buf = xfrm_audit_start("SA-notfound");
2499         if (audit_buf == NULL)
2500                 return;
2501         xfrm_audit_helper_pktinfo(skb, family, audit_buf);
2502         spi = ntohl(net_spi);
2503         audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2504                          spi, spi, ntohl(net_seq));
2505         audit_log_end(audit_buf);
2506 }
2507 EXPORT_SYMBOL_GPL(xfrm_audit_state_notfound);
2508
2509 void xfrm_audit_state_icvfail(struct xfrm_state *x,
2510                               struct sk_buff *skb, u8 proto)
2511 {
2512         struct audit_buffer *audit_buf;
2513         __be32 net_spi;
2514         __be32 net_seq;
2515
2516         audit_buf = xfrm_audit_start("SA-icv-failure");
2517         if (audit_buf == NULL)
2518                 return;
2519         xfrm_audit_helper_pktinfo(skb, x->props.family, audit_buf);
2520         if (xfrm_parse_spi(skb, proto, &net_spi, &net_seq) == 0) {
2521                 u32 spi = ntohl(net_spi);
2522                 audit_log_format(audit_buf, " spi=%u(0x%x) seqno=%u",
2523                                  spi, spi, ntohl(net_seq));
2524         }
2525         audit_log_end(audit_buf);
2526 }
2527 EXPORT_SYMBOL_GPL(xfrm_audit_state_icvfail);
2528 #endif /* CONFIG_AUDITSYSCALL */