Merge tag 'for-linus-4.15-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux] / net / openvswitch / flow_netlink.c
1 /*
2  * Copyright (c) 2007-2017 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include "flow.h"
22 #include "datapath.h"
23 #include <linux/uaccess.h>
24 #include <linux/netdevice.h>
25 #include <linux/etherdevice.h>
26 #include <linux/if_ether.h>
27 #include <linux/if_vlan.h>
28 #include <net/llc_pdu.h>
29 #include <linux/kernel.h>
30 #include <linux/jhash.h>
31 #include <linux/jiffies.h>
32 #include <linux/llc.h>
33 #include <linux/module.h>
34 #include <linux/in.h>
35 #include <linux/rcupdate.h>
36 #include <linux/if_arp.h>
37 #include <linux/ip.h>
38 #include <linux/ipv6.h>
39 #include <linux/sctp.h>
40 #include <linux/tcp.h>
41 #include <linux/udp.h>
42 #include <linux/icmp.h>
43 #include <linux/icmpv6.h>
44 #include <linux/rculist.h>
45 #include <net/geneve.h>
46 #include <net/ip.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/mpls.h>
50 #include <net/vxlan.h>
51 #include <net/tun_proto.h>
52 #include <net/erspan.h>
53
54 #include "flow_netlink.h"
55
56 struct ovs_len_tbl {
57         int len;
58         const struct ovs_len_tbl *next;
59 };
60
61 #define OVS_ATTR_NESTED -1
62 #define OVS_ATTR_VARIABLE -2
63
64 static bool actions_may_change_flow(const struct nlattr *actions)
65 {
66         struct nlattr *nla;
67         int rem;
68
69         nla_for_each_nested(nla, actions, rem) {
70                 u16 action = nla_type(nla);
71
72                 switch (action) {
73                 case OVS_ACTION_ATTR_OUTPUT:
74                 case OVS_ACTION_ATTR_RECIRC:
75                 case OVS_ACTION_ATTR_TRUNC:
76                 case OVS_ACTION_ATTR_USERSPACE:
77                         break;
78
79                 case OVS_ACTION_ATTR_CT:
80                 case OVS_ACTION_ATTR_CT_CLEAR:
81                 case OVS_ACTION_ATTR_HASH:
82                 case OVS_ACTION_ATTR_POP_ETH:
83                 case OVS_ACTION_ATTR_POP_MPLS:
84                 case OVS_ACTION_ATTR_POP_NSH:
85                 case OVS_ACTION_ATTR_POP_VLAN:
86                 case OVS_ACTION_ATTR_PUSH_ETH:
87                 case OVS_ACTION_ATTR_PUSH_MPLS:
88                 case OVS_ACTION_ATTR_PUSH_NSH:
89                 case OVS_ACTION_ATTR_PUSH_VLAN:
90                 case OVS_ACTION_ATTR_SAMPLE:
91                 case OVS_ACTION_ATTR_SET:
92                 case OVS_ACTION_ATTR_SET_MASKED:
93                 case OVS_ACTION_ATTR_METER:
94                 default:
95                         return true;
96                 }
97         }
98         return false;
99 }
100
101 static void update_range(struct sw_flow_match *match,
102                          size_t offset, size_t size, bool is_mask)
103 {
104         struct sw_flow_key_range *range;
105         size_t start = rounddown(offset, sizeof(long));
106         size_t end = roundup(offset + size, sizeof(long));
107
108         if (!is_mask)
109                 range = &match->range;
110         else
111                 range = &match->mask->range;
112
113         if (range->start == range->end) {
114                 range->start = start;
115                 range->end = end;
116                 return;
117         }
118
119         if (range->start > start)
120                 range->start = start;
121
122         if (range->end < end)
123                 range->end = end;
124 }
125
126 #define SW_FLOW_KEY_PUT(match, field, value, is_mask) \
127         do { \
128                 update_range(match, offsetof(struct sw_flow_key, field),    \
129                              sizeof((match)->key->field), is_mask);         \
130                 if (is_mask)                                                \
131                         (match)->mask->key.field = value;                   \
132                 else                                                        \
133                         (match)->key->field = value;                        \
134         } while (0)
135
136 #define SW_FLOW_KEY_MEMCPY_OFFSET(match, offset, value_p, len, is_mask)     \
137         do {                                                                \
138                 update_range(match, offset, len, is_mask);                  \
139                 if (is_mask)                                                \
140                         memcpy((u8 *)&(match)->mask->key + offset, value_p, \
141                                len);                                       \
142                 else                                                        \
143                         memcpy((u8 *)(match)->key + offset, value_p, len);  \
144         } while (0)
145
146 #define SW_FLOW_KEY_MEMCPY(match, field, value_p, len, is_mask)               \
147         SW_FLOW_KEY_MEMCPY_OFFSET(match, offsetof(struct sw_flow_key, field), \
148                                   value_p, len, is_mask)
149
150 #define SW_FLOW_KEY_MEMSET_FIELD(match, field, value, is_mask)              \
151         do {                                                                \
152                 update_range(match, offsetof(struct sw_flow_key, field),    \
153                              sizeof((match)->key->field), is_mask);         \
154                 if (is_mask)                                                \
155                         memset((u8 *)&(match)->mask->key.field, value,      \
156                                sizeof((match)->mask->key.field));           \
157                 else                                                        \
158                         memset((u8 *)&(match)->key->field, value,           \
159                                sizeof((match)->key->field));                \
160         } while (0)
161
162 static bool match_validate(const struct sw_flow_match *match,
163                            u64 key_attrs, u64 mask_attrs, bool log)
164 {
165         u64 key_expected = 0;
166         u64 mask_allowed = key_attrs;  /* At most allow all key attributes */
167
168         /* The following mask attributes allowed only if they
169          * pass the validation tests. */
170         mask_allowed &= ~((1 << OVS_KEY_ATTR_IPV4)
171                         | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)
172                         | (1 << OVS_KEY_ATTR_IPV6)
173                         | (1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)
174                         | (1 << OVS_KEY_ATTR_TCP)
175                         | (1 << OVS_KEY_ATTR_TCP_FLAGS)
176                         | (1 << OVS_KEY_ATTR_UDP)
177                         | (1 << OVS_KEY_ATTR_SCTP)
178                         | (1 << OVS_KEY_ATTR_ICMP)
179                         | (1 << OVS_KEY_ATTR_ICMPV6)
180                         | (1 << OVS_KEY_ATTR_ARP)
181                         | (1 << OVS_KEY_ATTR_ND)
182                         | (1 << OVS_KEY_ATTR_MPLS)
183                         | (1 << OVS_KEY_ATTR_NSH));
184
185         /* Always allowed mask fields. */
186         mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL)
187                        | (1 << OVS_KEY_ATTR_IN_PORT)
188                        | (1 << OVS_KEY_ATTR_ETHERTYPE));
189
190         /* Check key attributes. */
191         if (match->key->eth.type == htons(ETH_P_ARP)
192                         || match->key->eth.type == htons(ETH_P_RARP)) {
193                 key_expected |= 1 << OVS_KEY_ATTR_ARP;
194                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
195                         mask_allowed |= 1 << OVS_KEY_ATTR_ARP;
196         }
197
198         if (eth_p_mpls(match->key->eth.type)) {
199                 key_expected |= 1 << OVS_KEY_ATTR_MPLS;
200                 if (match->mask && (match->mask->key.eth.type == htons(0xffff)))
201                         mask_allowed |= 1 << OVS_KEY_ATTR_MPLS;
202         }
203
204         if (match->key->eth.type == htons(ETH_P_IP)) {
205                 key_expected |= 1 << OVS_KEY_ATTR_IPV4;
206                 if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
207                         mask_allowed |= 1 << OVS_KEY_ATTR_IPV4;
208                         mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4;
209                 }
210
211                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
212                         if (match->key->ip.proto == IPPROTO_UDP) {
213                                 key_expected |= 1 << OVS_KEY_ATTR_UDP;
214                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
215                                         mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
216                         }
217
218                         if (match->key->ip.proto == IPPROTO_SCTP) {
219                                 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
220                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
221                                         mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
222                         }
223
224                         if (match->key->ip.proto == IPPROTO_TCP) {
225                                 key_expected |= 1 << OVS_KEY_ATTR_TCP;
226                                 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
227                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
228                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
229                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
230                                 }
231                         }
232
233                         if (match->key->ip.proto == IPPROTO_ICMP) {
234                                 key_expected |= 1 << OVS_KEY_ATTR_ICMP;
235                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
236                                         mask_allowed |= 1 << OVS_KEY_ATTR_ICMP;
237                         }
238                 }
239         }
240
241         if (match->key->eth.type == htons(ETH_P_IPV6)) {
242                 key_expected |= 1 << OVS_KEY_ATTR_IPV6;
243                 if (match->mask && match->mask->key.eth.type == htons(0xffff)) {
244                         mask_allowed |= 1 << OVS_KEY_ATTR_IPV6;
245                         mask_allowed |= 1 << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6;
246                 }
247
248                 if (match->key->ip.frag != OVS_FRAG_TYPE_LATER) {
249                         if (match->key->ip.proto == IPPROTO_UDP) {
250                                 key_expected |= 1 << OVS_KEY_ATTR_UDP;
251                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
252                                         mask_allowed |= 1 << OVS_KEY_ATTR_UDP;
253                         }
254
255                         if (match->key->ip.proto == IPPROTO_SCTP) {
256                                 key_expected |= 1 << OVS_KEY_ATTR_SCTP;
257                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
258                                         mask_allowed |= 1 << OVS_KEY_ATTR_SCTP;
259                         }
260
261                         if (match->key->ip.proto == IPPROTO_TCP) {
262                                 key_expected |= 1 << OVS_KEY_ATTR_TCP;
263                                 key_expected |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
264                                 if (match->mask && (match->mask->key.ip.proto == 0xff)) {
265                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP;
266                                         mask_allowed |= 1 << OVS_KEY_ATTR_TCP_FLAGS;
267                                 }
268                         }
269
270                         if (match->key->ip.proto == IPPROTO_ICMPV6) {
271                                 key_expected |= 1 << OVS_KEY_ATTR_ICMPV6;
272                                 if (match->mask && (match->mask->key.ip.proto == 0xff))
273                                         mask_allowed |= 1 << OVS_KEY_ATTR_ICMPV6;
274
275                                 if (match->key->tp.src ==
276                                                 htons(NDISC_NEIGHBOUR_SOLICITATION) ||
277                                     match->key->tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
278                                         key_expected |= 1 << OVS_KEY_ATTR_ND;
279                                         /* Original direction conntrack tuple
280                                          * uses the same space as the ND fields
281                                          * in the key, so both are not allowed
282                                          * at the same time.
283                                          */
284                                         mask_allowed &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
285                                         if (match->mask && (match->mask->key.tp.src == htons(0xff)))
286                                                 mask_allowed |= 1 << OVS_KEY_ATTR_ND;
287                                 }
288                         }
289                 }
290         }
291
292         if (match->key->eth.type == htons(ETH_P_NSH)) {
293                 key_expected |= 1 << OVS_KEY_ATTR_NSH;
294                 if (match->mask &&
295                     match->mask->key.eth.type == htons(0xffff)) {
296                         mask_allowed |= 1 << OVS_KEY_ATTR_NSH;
297                 }
298         }
299
300         if ((key_attrs & key_expected) != key_expected) {
301                 /* Key attributes check failed. */
302                 OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)",
303                           (unsigned long long)key_attrs,
304                           (unsigned long long)key_expected);
305                 return false;
306         }
307
308         if ((mask_attrs & mask_allowed) != mask_attrs) {
309                 /* Mask attributes check failed. */
310                 OVS_NLERR(log, "Unexpected mask (mask=%llx, allowed=%llx)",
311                           (unsigned long long)mask_attrs,
312                           (unsigned long long)mask_allowed);
313                 return false;
314         }
315
316         return true;
317 }
318
319 size_t ovs_tun_key_attr_size(void)
320 {
321         /* Whenever adding new OVS_TUNNEL_KEY_ FIELDS, we should consider
322          * updating this function.
323          */
324         return    nla_total_size_64bit(8) /* OVS_TUNNEL_KEY_ATTR_ID */
325                 + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_SRC */
326                 + nla_total_size(16)   /* OVS_TUNNEL_KEY_ATTR_IPV[46]_DST */
327                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TOS */
328                 + nla_total_size(1)    /* OVS_TUNNEL_KEY_ATTR_TTL */
329                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT */
330                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
331                 + nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
332                 + nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
333                 /* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
334                  * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
335                  */
336                 + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
337                 + nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_DST */
338                 + nla_total_size(4);   /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */
339 }
340
341 static size_t ovs_nsh_key_attr_size(void)
342 {
343         /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider
344          * updating this function.
345          */
346         return  nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */
347                 /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are
348                  * mutually exclusive, so the bigger one can cover
349                  * the small one.
350                  */
351                 + nla_total_size(NSH_CTX_HDRS_MAX_LEN);
352 }
353
354 size_t ovs_key_attr_size(void)
355 {
356         /* Whenever adding new OVS_KEY_ FIELDS, we should consider
357          * updating this function.
358          */
359         BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29);
360
361         return    nla_total_size(4)   /* OVS_KEY_ATTR_PRIORITY */
362                 + nla_total_size(0)   /* OVS_KEY_ATTR_TUNNEL */
363                   + ovs_tun_key_attr_size()
364                 + nla_total_size(4)   /* OVS_KEY_ATTR_IN_PORT */
365                 + nla_total_size(4)   /* OVS_KEY_ATTR_SKB_MARK */
366                 + nla_total_size(4)   /* OVS_KEY_ATTR_DP_HASH */
367                 + nla_total_size(4)   /* OVS_KEY_ATTR_RECIRC_ID */
368                 + nla_total_size(4)   /* OVS_KEY_ATTR_CT_STATE */
369                 + nla_total_size(2)   /* OVS_KEY_ATTR_CT_ZONE */
370                 + nla_total_size(4)   /* OVS_KEY_ATTR_CT_MARK */
371                 + nla_total_size(16)  /* OVS_KEY_ATTR_CT_LABELS */
372                 + nla_total_size(40)  /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */
373                 + nla_total_size(0)   /* OVS_KEY_ATTR_NSH */
374                   + ovs_nsh_key_attr_size()
375                 + nla_total_size(12)  /* OVS_KEY_ATTR_ETHERNET */
376                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
377                 + nla_total_size(4)   /* OVS_KEY_ATTR_VLAN */
378                 + nla_total_size(0)   /* OVS_KEY_ATTR_ENCAP */
379                 + nla_total_size(2)   /* OVS_KEY_ATTR_ETHERTYPE */
380                 + nla_total_size(40)  /* OVS_KEY_ATTR_IPV6 */
381                 + nla_total_size(2)   /* OVS_KEY_ATTR_ICMPV6 */
382                 + nla_total_size(28); /* OVS_KEY_ATTR_ND */
383 }
384
385 static const struct ovs_len_tbl ovs_vxlan_ext_key_lens[OVS_VXLAN_EXT_MAX + 1] = {
386         [OVS_VXLAN_EXT_GBP]         = { .len = sizeof(u32) },
387 };
388
389 static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] = {
390         [OVS_TUNNEL_KEY_ATTR_ID]            = { .len = sizeof(u64) },
391         [OVS_TUNNEL_KEY_ATTR_IPV4_SRC]      = { .len = sizeof(u32) },
392         [OVS_TUNNEL_KEY_ATTR_IPV4_DST]      = { .len = sizeof(u32) },
393         [OVS_TUNNEL_KEY_ATTR_TOS]           = { .len = 1 },
394         [OVS_TUNNEL_KEY_ATTR_TTL]           = { .len = 1 },
395         [OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT] = { .len = 0 },
396         [OVS_TUNNEL_KEY_ATTR_CSUM]          = { .len = 0 },
397         [OVS_TUNNEL_KEY_ATTR_TP_SRC]        = { .len = sizeof(u16) },
398         [OVS_TUNNEL_KEY_ATTR_TP_DST]        = { .len = sizeof(u16) },
399         [OVS_TUNNEL_KEY_ATTR_OAM]           = { .len = 0 },
400         [OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_VARIABLE },
401         [OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED,
402                                                 .next = ovs_vxlan_ext_key_lens },
403         [OVS_TUNNEL_KEY_ATTR_IPV6_SRC]      = { .len = sizeof(struct in6_addr) },
404         [OVS_TUNNEL_KEY_ATTR_IPV6_DST]      = { .len = sizeof(struct in6_addr) },
405         [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS]   = { .len = sizeof(u32) },
406 };
407
408 static const struct ovs_len_tbl
409 ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = {
410         [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) },
411         [OVS_NSH_KEY_ATTR_MD1]  = { .len = sizeof(struct ovs_nsh_key_md1) },
412         [OVS_NSH_KEY_ATTR_MD2]  = { .len = OVS_ATTR_VARIABLE },
413 };
414
415 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
416 static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
417         [OVS_KEY_ATTR_ENCAP]     = { .len = OVS_ATTR_NESTED },
418         [OVS_KEY_ATTR_PRIORITY]  = { .len = sizeof(u32) },
419         [OVS_KEY_ATTR_IN_PORT]   = { .len = sizeof(u32) },
420         [OVS_KEY_ATTR_SKB_MARK]  = { .len = sizeof(u32) },
421         [OVS_KEY_ATTR_ETHERNET]  = { .len = sizeof(struct ovs_key_ethernet) },
422         [OVS_KEY_ATTR_VLAN]      = { .len = sizeof(__be16) },
423         [OVS_KEY_ATTR_ETHERTYPE] = { .len = sizeof(__be16) },
424         [OVS_KEY_ATTR_IPV4]      = { .len = sizeof(struct ovs_key_ipv4) },
425         [OVS_KEY_ATTR_IPV6]      = { .len = sizeof(struct ovs_key_ipv6) },
426         [OVS_KEY_ATTR_TCP]       = { .len = sizeof(struct ovs_key_tcp) },
427         [OVS_KEY_ATTR_TCP_FLAGS] = { .len = sizeof(__be16) },
428         [OVS_KEY_ATTR_UDP]       = { .len = sizeof(struct ovs_key_udp) },
429         [OVS_KEY_ATTR_SCTP]      = { .len = sizeof(struct ovs_key_sctp) },
430         [OVS_KEY_ATTR_ICMP]      = { .len = sizeof(struct ovs_key_icmp) },
431         [OVS_KEY_ATTR_ICMPV6]    = { .len = sizeof(struct ovs_key_icmpv6) },
432         [OVS_KEY_ATTR_ARP]       = { .len = sizeof(struct ovs_key_arp) },
433         [OVS_KEY_ATTR_ND]        = { .len = sizeof(struct ovs_key_nd) },
434         [OVS_KEY_ATTR_RECIRC_ID] = { .len = sizeof(u32) },
435         [OVS_KEY_ATTR_DP_HASH]   = { .len = sizeof(u32) },
436         [OVS_KEY_ATTR_TUNNEL]    = { .len = OVS_ATTR_NESTED,
437                                      .next = ovs_tunnel_key_lens, },
438         [OVS_KEY_ATTR_MPLS]      = { .len = sizeof(struct ovs_key_mpls) },
439         [OVS_KEY_ATTR_CT_STATE]  = { .len = sizeof(u32) },
440         [OVS_KEY_ATTR_CT_ZONE]   = { .len = sizeof(u16) },
441         [OVS_KEY_ATTR_CT_MARK]   = { .len = sizeof(u32) },
442         [OVS_KEY_ATTR_CT_LABELS] = { .len = sizeof(struct ovs_key_ct_labels) },
443         [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4] = {
444                 .len = sizeof(struct ovs_key_ct_tuple_ipv4) },
445         [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = {
446                 .len = sizeof(struct ovs_key_ct_tuple_ipv6) },
447         [OVS_KEY_ATTR_NSH]       = { .len = OVS_ATTR_NESTED,
448                                      .next = ovs_nsh_key_attr_lens, },
449 };
450
451 static bool check_attr_len(unsigned int attr_len, unsigned int expected_len)
452 {
453         return expected_len == attr_len ||
454                expected_len == OVS_ATTR_NESTED ||
455                expected_len == OVS_ATTR_VARIABLE;
456 }
457
458 static bool is_all_zero(const u8 *fp, size_t size)
459 {
460         int i;
461
462         if (!fp)
463                 return false;
464
465         for (i = 0; i < size; i++)
466                 if (fp[i])
467                         return false;
468
469         return true;
470 }
471
472 static int __parse_flow_nlattrs(const struct nlattr *attr,
473                                 const struct nlattr *a[],
474                                 u64 *attrsp, bool log, bool nz)
475 {
476         const struct nlattr *nla;
477         u64 attrs;
478         int rem;
479
480         attrs = *attrsp;
481         nla_for_each_nested(nla, attr, rem) {
482                 u16 type = nla_type(nla);
483                 int expected_len;
484
485                 if (type > OVS_KEY_ATTR_MAX) {
486                         OVS_NLERR(log, "Key type %d is out of range max %d",
487                                   type, OVS_KEY_ATTR_MAX);
488                         return -EINVAL;
489                 }
490
491                 if (attrs & (1 << type)) {
492                         OVS_NLERR(log, "Duplicate key (type %d).", type);
493                         return -EINVAL;
494                 }
495
496                 expected_len = ovs_key_lens[type].len;
497                 if (!check_attr_len(nla_len(nla), expected_len)) {
498                         OVS_NLERR(log, "Key %d has unexpected len %d expected %d",
499                                   type, nla_len(nla), expected_len);
500                         return -EINVAL;
501                 }
502
503                 if (!nz || !is_all_zero(nla_data(nla), expected_len)) {
504                         attrs |= 1 << type;
505                         a[type] = nla;
506                 }
507         }
508         if (rem) {
509                 OVS_NLERR(log, "Message has %d unknown bytes.", rem);
510                 return -EINVAL;
511         }
512
513         *attrsp = attrs;
514         return 0;
515 }
516
517 static int parse_flow_mask_nlattrs(const struct nlattr *attr,
518                                    const struct nlattr *a[], u64 *attrsp,
519                                    bool log)
520 {
521         return __parse_flow_nlattrs(attr, a, attrsp, log, true);
522 }
523
524 int parse_flow_nlattrs(const struct nlattr *attr, const struct nlattr *a[],
525                        u64 *attrsp, bool log)
526 {
527         return __parse_flow_nlattrs(attr, a, attrsp, log, false);
528 }
529
530 static int genev_tun_opt_from_nlattr(const struct nlattr *a,
531                                      struct sw_flow_match *match, bool is_mask,
532                                      bool log)
533 {
534         unsigned long opt_key_offset;
535
536         if (nla_len(a) > sizeof(match->key->tun_opts)) {
537                 OVS_NLERR(log, "Geneve option length err (len %d, max %zu).",
538                           nla_len(a), sizeof(match->key->tun_opts));
539                 return -EINVAL;
540         }
541
542         if (nla_len(a) % 4 != 0) {
543                 OVS_NLERR(log, "Geneve opt len %d is not a multiple of 4.",
544                           nla_len(a));
545                 return -EINVAL;
546         }
547
548         /* We need to record the length of the options passed
549          * down, otherwise packets with the same format but
550          * additional options will be silently matched.
551          */
552         if (!is_mask) {
553                 SW_FLOW_KEY_PUT(match, tun_opts_len, nla_len(a),
554                                 false);
555         } else {
556                 /* This is somewhat unusual because it looks at
557                  * both the key and mask while parsing the
558                  * attributes (and by extension assumes the key
559                  * is parsed first). Normally, we would verify
560                  * that each is the correct length and that the
561                  * attributes line up in the validate function.
562                  * However, that is difficult because this is
563                  * variable length and we won't have the
564                  * information later.
565                  */
566                 if (match->key->tun_opts_len != nla_len(a)) {
567                         OVS_NLERR(log, "Geneve option len %d != mask len %d",
568                                   match->key->tun_opts_len, nla_len(a));
569                         return -EINVAL;
570                 }
571
572                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
573         }
574
575         opt_key_offset = TUN_METADATA_OFFSET(nla_len(a));
576         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, nla_data(a),
577                                   nla_len(a), is_mask);
578         return 0;
579 }
580
581 static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr,
582                                      struct sw_flow_match *match, bool is_mask,
583                                      bool log)
584 {
585         struct nlattr *a;
586         int rem;
587         unsigned long opt_key_offset;
588         struct vxlan_metadata opts;
589
590         BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
591
592         memset(&opts, 0, sizeof(opts));
593         nla_for_each_nested(a, attr, rem) {
594                 int type = nla_type(a);
595
596                 if (type > OVS_VXLAN_EXT_MAX) {
597                         OVS_NLERR(log, "VXLAN extension %d out of range max %d",
598                                   type, OVS_VXLAN_EXT_MAX);
599                         return -EINVAL;
600                 }
601
602                 if (!check_attr_len(nla_len(a),
603                                     ovs_vxlan_ext_key_lens[type].len)) {
604                         OVS_NLERR(log, "VXLAN extension %d has unexpected len %d expected %d",
605                                   type, nla_len(a),
606                                   ovs_vxlan_ext_key_lens[type].len);
607                         return -EINVAL;
608                 }
609
610                 switch (type) {
611                 case OVS_VXLAN_EXT_GBP:
612                         opts.gbp = nla_get_u32(a);
613                         break;
614                 default:
615                         OVS_NLERR(log, "Unknown VXLAN extension attribute %d",
616                                   type);
617                         return -EINVAL;
618                 }
619         }
620         if (rem) {
621                 OVS_NLERR(log, "VXLAN extension message has %d unknown bytes.",
622                           rem);
623                 return -EINVAL;
624         }
625
626         if (!is_mask)
627                 SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
628         else
629                 SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
630
631         opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
632         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
633                                   is_mask);
634         return 0;
635 }
636
637 static int erspan_tun_opt_from_nlattr(const struct nlattr *attr,
638                                       struct sw_flow_match *match, bool is_mask,
639                                       bool log)
640 {
641         unsigned long opt_key_offset;
642         struct erspan_metadata opts;
643
644         BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
645
646         memset(&opts, 0, sizeof(opts));
647         opts.index = nla_get_be32(attr);
648
649         /* Index has only 20-bit */
650         if (ntohl(opts.index) & ~INDEX_MASK) {
651                 OVS_NLERR(log, "ERSPAN index number %x too large.",
652                           ntohl(opts.index));
653                 return -EINVAL;
654         }
655
656         SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask);
657         opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
658         SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
659                                   is_mask);
660
661         return 0;
662 }
663
664 static int ip_tun_from_nlattr(const struct nlattr *attr,
665                               struct sw_flow_match *match, bool is_mask,
666                               bool log)
667 {
668         bool ttl = false, ipv4 = false, ipv6 = false;
669         __be16 tun_flags = 0;
670         int opts_type = 0;
671         struct nlattr *a;
672         int rem;
673
674         nla_for_each_nested(a, attr, rem) {
675                 int type = nla_type(a);
676                 int err;
677
678                 if (type > OVS_TUNNEL_KEY_ATTR_MAX) {
679                         OVS_NLERR(log, "Tunnel attr %d out of range max %d",
680                                   type, OVS_TUNNEL_KEY_ATTR_MAX);
681                         return -EINVAL;
682                 }
683
684                 if (!check_attr_len(nla_len(a),
685                                     ovs_tunnel_key_lens[type].len)) {
686                         OVS_NLERR(log, "Tunnel attr %d has unexpected len %d expected %d",
687                                   type, nla_len(a), ovs_tunnel_key_lens[type].len);
688                         return -EINVAL;
689                 }
690
691                 switch (type) {
692                 case OVS_TUNNEL_KEY_ATTR_ID:
693                         SW_FLOW_KEY_PUT(match, tun_key.tun_id,
694                                         nla_get_be64(a), is_mask);
695                         tun_flags |= TUNNEL_KEY;
696                         break;
697                 case OVS_TUNNEL_KEY_ATTR_IPV4_SRC:
698                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.src,
699                                         nla_get_in_addr(a), is_mask);
700                         ipv4 = true;
701                         break;
702                 case OVS_TUNNEL_KEY_ATTR_IPV4_DST:
703                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv4.dst,
704                                         nla_get_in_addr(a), is_mask);
705                         ipv4 = true;
706                         break;
707                 case OVS_TUNNEL_KEY_ATTR_IPV6_SRC:
708                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.src,
709                                         nla_get_in6_addr(a), is_mask);
710                         ipv6 = true;
711                         break;
712                 case OVS_TUNNEL_KEY_ATTR_IPV6_DST:
713                         SW_FLOW_KEY_PUT(match, tun_key.u.ipv6.dst,
714                                         nla_get_in6_addr(a), is_mask);
715                         ipv6 = true;
716                         break;
717                 case OVS_TUNNEL_KEY_ATTR_TOS:
718                         SW_FLOW_KEY_PUT(match, tun_key.tos,
719                                         nla_get_u8(a), is_mask);
720                         break;
721                 case OVS_TUNNEL_KEY_ATTR_TTL:
722                         SW_FLOW_KEY_PUT(match, tun_key.ttl,
723                                         nla_get_u8(a), is_mask);
724                         ttl = true;
725                         break;
726                 case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT:
727                         tun_flags |= TUNNEL_DONT_FRAGMENT;
728                         break;
729                 case OVS_TUNNEL_KEY_ATTR_CSUM:
730                         tun_flags |= TUNNEL_CSUM;
731                         break;
732                 case OVS_TUNNEL_KEY_ATTR_TP_SRC:
733                         SW_FLOW_KEY_PUT(match, tun_key.tp_src,
734                                         nla_get_be16(a), is_mask);
735                         break;
736                 case OVS_TUNNEL_KEY_ATTR_TP_DST:
737                         SW_FLOW_KEY_PUT(match, tun_key.tp_dst,
738                                         nla_get_be16(a), is_mask);
739                         break;
740                 case OVS_TUNNEL_KEY_ATTR_OAM:
741                         tun_flags |= TUNNEL_OAM;
742                         break;
743                 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
744                         if (opts_type) {
745                                 OVS_NLERR(log, "Multiple metadata blocks provided");
746                                 return -EINVAL;
747                         }
748
749                         err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
750                         if (err)
751                                 return err;
752
753                         tun_flags |= TUNNEL_GENEVE_OPT;
754                         opts_type = type;
755                         break;
756                 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
757                         if (opts_type) {
758                                 OVS_NLERR(log, "Multiple metadata blocks provided");
759                                 return -EINVAL;
760                         }
761
762                         err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
763                         if (err)
764                                 return err;
765
766                         tun_flags |= TUNNEL_VXLAN_OPT;
767                         opts_type = type;
768                         break;
769                 case OVS_TUNNEL_KEY_ATTR_PAD:
770                         break;
771                 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
772                         if (opts_type) {
773                                 OVS_NLERR(log, "Multiple metadata blocks provided");
774                                 return -EINVAL;
775                         }
776
777                         err = erspan_tun_opt_from_nlattr(a, match, is_mask, log);
778                         if (err)
779                                 return err;
780
781                         tun_flags |= TUNNEL_ERSPAN_OPT;
782                         opts_type = type;
783                         break;
784                 default:
785                         OVS_NLERR(log, "Unknown IP tunnel attribute %d",
786                                   type);
787                         return -EINVAL;
788                 }
789         }
790
791         SW_FLOW_KEY_PUT(match, tun_key.tun_flags, tun_flags, is_mask);
792         if (is_mask)
793                 SW_FLOW_KEY_MEMSET_FIELD(match, tun_proto, 0xff, true);
794         else
795                 SW_FLOW_KEY_PUT(match, tun_proto, ipv6 ? AF_INET6 : AF_INET,
796                                 false);
797
798         if (rem > 0) {
799                 OVS_NLERR(log, "IP tunnel attribute has %d unknown bytes.",
800                           rem);
801                 return -EINVAL;
802         }
803
804         if (ipv4 && ipv6) {
805                 OVS_NLERR(log, "Mixed IPv4 and IPv6 tunnel attributes");
806                 return -EINVAL;
807         }
808
809         if (!is_mask) {
810                 if (!ipv4 && !ipv6) {
811                         OVS_NLERR(log, "IP tunnel dst address not specified");
812                         return -EINVAL;
813                 }
814                 if (ipv4 && !match->key->tun_key.u.ipv4.dst) {
815                         OVS_NLERR(log, "IPv4 tunnel dst address is zero");
816                         return -EINVAL;
817                 }
818                 if (ipv6 && ipv6_addr_any(&match->key->tun_key.u.ipv6.dst)) {
819                         OVS_NLERR(log, "IPv6 tunnel dst address is zero");
820                         return -EINVAL;
821                 }
822
823                 if (!ttl) {
824                         OVS_NLERR(log, "IP tunnel TTL not specified.");
825                         return -EINVAL;
826                 }
827         }
828
829         return opts_type;
830 }
831
832 static int vxlan_opt_to_nlattr(struct sk_buff *skb,
833                                const void *tun_opts, int swkey_tun_opts_len)
834 {
835         const struct vxlan_metadata *opts = tun_opts;
836         struct nlattr *nla;
837
838         nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
839         if (!nla)
840                 return -EMSGSIZE;
841
842         if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
843                 return -EMSGSIZE;
844
845         nla_nest_end(skb, nla);
846         return 0;
847 }
848
849 static int __ip_tun_to_nlattr(struct sk_buff *skb,
850                               const struct ip_tunnel_key *output,
851                               const void *tun_opts, int swkey_tun_opts_len,
852                               unsigned short tun_proto)
853 {
854         if (output->tun_flags & TUNNEL_KEY &&
855             nla_put_be64(skb, OVS_TUNNEL_KEY_ATTR_ID, output->tun_id,
856                          OVS_TUNNEL_KEY_ATTR_PAD))
857                 return -EMSGSIZE;
858         switch (tun_proto) {
859         case AF_INET:
860                 if (output->u.ipv4.src &&
861                     nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_SRC,
862                                     output->u.ipv4.src))
863                         return -EMSGSIZE;
864                 if (output->u.ipv4.dst &&
865                     nla_put_in_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV4_DST,
866                                     output->u.ipv4.dst))
867                         return -EMSGSIZE;
868                 break;
869         case AF_INET6:
870                 if (!ipv6_addr_any(&output->u.ipv6.src) &&
871                     nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_SRC,
872                                      &output->u.ipv6.src))
873                         return -EMSGSIZE;
874                 if (!ipv6_addr_any(&output->u.ipv6.dst) &&
875                     nla_put_in6_addr(skb, OVS_TUNNEL_KEY_ATTR_IPV6_DST,
876                                      &output->u.ipv6.dst))
877                         return -EMSGSIZE;
878                 break;
879         }
880         if (output->tos &&
881             nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TOS, output->tos))
882                 return -EMSGSIZE;
883         if (nla_put_u8(skb, OVS_TUNNEL_KEY_ATTR_TTL, output->ttl))
884                 return -EMSGSIZE;
885         if ((output->tun_flags & TUNNEL_DONT_FRAGMENT) &&
886             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
887                 return -EMSGSIZE;
888         if ((output->tun_flags & TUNNEL_CSUM) &&
889             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_CSUM))
890                 return -EMSGSIZE;
891         if (output->tp_src &&
892             nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_SRC, output->tp_src))
893                 return -EMSGSIZE;
894         if (output->tp_dst &&
895             nla_put_be16(skb, OVS_TUNNEL_KEY_ATTR_TP_DST, output->tp_dst))
896                 return -EMSGSIZE;
897         if ((output->tun_flags & TUNNEL_OAM) &&
898             nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
899                 return -EMSGSIZE;
900         if (swkey_tun_opts_len) {
901                 if (output->tun_flags & TUNNEL_GENEVE_OPT &&
902                     nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
903                             swkey_tun_opts_len, tun_opts))
904                         return -EMSGSIZE;
905                 else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
906                          vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
907                         return -EMSGSIZE;
908                 else if (output->tun_flags & TUNNEL_ERSPAN_OPT &&
909                          nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
910                                       ((struct erspan_metadata *)tun_opts)->index))
911                         return -EMSGSIZE;
912         }
913
914         return 0;
915 }
916
917 static int ip_tun_to_nlattr(struct sk_buff *skb,
918                             const struct ip_tunnel_key *output,
919                             const void *tun_opts, int swkey_tun_opts_len,
920                             unsigned short tun_proto)
921 {
922         struct nlattr *nla;
923         int err;
924
925         nla = nla_nest_start(skb, OVS_KEY_ATTR_TUNNEL);
926         if (!nla)
927                 return -EMSGSIZE;
928
929         err = __ip_tun_to_nlattr(skb, output, tun_opts, swkey_tun_opts_len,
930                                  tun_proto);
931         if (err)
932                 return err;
933
934         nla_nest_end(skb, nla);
935         return 0;
936 }
937
938 int ovs_nla_put_tunnel_info(struct sk_buff *skb,
939                             struct ip_tunnel_info *tun_info)
940 {
941         return __ip_tun_to_nlattr(skb, &tun_info->key,
942                                   ip_tunnel_info_opts(tun_info),
943                                   tun_info->options_len,
944                                   ip_tunnel_info_af(tun_info));
945 }
946
947 static int encode_vlan_from_nlattrs(struct sw_flow_match *match,
948                                     const struct nlattr *a[],
949                                     bool is_mask, bool inner)
950 {
951         __be16 tci = 0;
952         __be16 tpid = 0;
953
954         if (a[OVS_KEY_ATTR_VLAN])
955                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
956
957         if (a[OVS_KEY_ATTR_ETHERTYPE])
958                 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
959
960         if (likely(!inner)) {
961                 SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask);
962                 SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask);
963         } else {
964                 SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask);
965                 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask);
966         }
967         return 0;
968 }
969
970 static int validate_vlan_from_nlattrs(const struct sw_flow_match *match,
971                                       u64 key_attrs, bool inner,
972                                       const struct nlattr **a, bool log)
973 {
974         __be16 tci = 0;
975
976         if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) &&
977               (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) &&
978                eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) {
979                 /* Not a VLAN. */
980                 return 0;
981         }
982
983         if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) &&
984               (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) {
985                 OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN");
986                 return -EINVAL;
987         }
988
989         if (a[OVS_KEY_ATTR_VLAN])
990                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
991
992         if (!(tci & htons(VLAN_TAG_PRESENT))) {
993                 if (tci) {
994                         OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.",
995                                   (inner) ? "C-VLAN" : "VLAN");
996                         return -EINVAL;
997                 } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) {
998                         /* Corner case for truncated VLAN header. */
999                         OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.",
1000                                   (inner) ? "C-VLAN" : "VLAN");
1001                         return -EINVAL;
1002                 }
1003         }
1004
1005         return 1;
1006 }
1007
1008 static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match,
1009                                            u64 key_attrs, bool inner,
1010                                            const struct nlattr **a, bool log)
1011 {
1012         __be16 tci = 0;
1013         __be16 tpid = 0;
1014         bool encap_valid = !!(match->key->eth.vlan.tci &
1015                               htons(VLAN_TAG_PRESENT));
1016         bool i_encap_valid = !!(match->key->eth.cvlan.tci &
1017                                 htons(VLAN_TAG_PRESENT));
1018
1019         if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) {
1020                 /* Not a VLAN. */
1021                 return 0;
1022         }
1023
1024         if ((!inner && !encap_valid) || (inner && !i_encap_valid)) {
1025                 OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.",
1026                           (inner) ? "C-VLAN" : "VLAN");
1027                 return -EINVAL;
1028         }
1029
1030         if (a[OVS_KEY_ATTR_VLAN])
1031                 tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
1032
1033         if (a[OVS_KEY_ATTR_ETHERTYPE])
1034                 tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1035
1036         if (tpid != htons(0xffff)) {
1037                 OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).",
1038                           (inner) ? "C-VLAN" : "VLAN", ntohs(tpid));
1039                 return -EINVAL;
1040         }
1041         if (!(tci & htons(VLAN_TAG_PRESENT))) {
1042                 OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.",
1043                           (inner) ? "C-VLAN" : "VLAN");
1044                 return -EINVAL;
1045         }
1046
1047         return 1;
1048 }
1049
1050 static int __parse_vlan_from_nlattrs(struct sw_flow_match *match,
1051                                      u64 *key_attrs, bool inner,
1052                                      const struct nlattr **a, bool is_mask,
1053                                      bool log)
1054 {
1055         int err;
1056         const struct nlattr *encap;
1057
1058         if (!is_mask)
1059                 err = validate_vlan_from_nlattrs(match, *key_attrs, inner,
1060                                                  a, log);
1061         else
1062                 err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner,
1063                                                       a, log);
1064         if (err <= 0)
1065                 return err;
1066
1067         err = encode_vlan_from_nlattrs(match, a, is_mask, inner);
1068         if (err)
1069                 return err;
1070
1071         *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP);
1072         *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN);
1073         *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1074
1075         encap = a[OVS_KEY_ATTR_ENCAP];
1076
1077         if (!is_mask)
1078                 err = parse_flow_nlattrs(encap, a, key_attrs, log);
1079         else
1080                 err = parse_flow_mask_nlattrs(encap, a, key_attrs, log);
1081
1082         return err;
1083 }
1084
1085 static int parse_vlan_from_nlattrs(struct sw_flow_match *match,
1086                                    u64 *key_attrs, const struct nlattr **a,
1087                                    bool is_mask, bool log)
1088 {
1089         int err;
1090         bool encap_valid = false;
1091
1092         err = __parse_vlan_from_nlattrs(match, key_attrs, false, a,
1093                                         is_mask, log);
1094         if (err)
1095                 return err;
1096
1097         encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT));
1098         if (encap_valid) {
1099                 err = __parse_vlan_from_nlattrs(match, key_attrs, true, a,
1100                                                 is_mask, log);
1101                 if (err)
1102                         return err;
1103         }
1104
1105         return 0;
1106 }
1107
1108 static int parse_eth_type_from_nlattrs(struct sw_flow_match *match,
1109                                        u64 *attrs, const struct nlattr **a,
1110                                        bool is_mask, bool log)
1111 {
1112         __be16 eth_type;
1113
1114         eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
1115         if (is_mask) {
1116                 /* Always exact match EtherType. */
1117                 eth_type = htons(0xffff);
1118         } else if (!eth_proto_is_802_3(eth_type)) {
1119                 OVS_NLERR(log, "EtherType %x is less than min %x",
1120                                 ntohs(eth_type), ETH_P_802_3_MIN);
1121                 return -EINVAL;
1122         }
1123
1124         SW_FLOW_KEY_PUT(match, eth.type, eth_type, is_mask);
1125         *attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
1126         return 0;
1127 }
1128
1129 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
1130                                  u64 *attrs, const struct nlattr **a,
1131                                  bool is_mask, bool log)
1132 {
1133         u8 mac_proto = MAC_PROTO_ETHERNET;
1134
1135         if (*attrs & (1 << OVS_KEY_ATTR_DP_HASH)) {
1136                 u32 hash_val = nla_get_u32(a[OVS_KEY_ATTR_DP_HASH]);
1137
1138                 SW_FLOW_KEY_PUT(match, ovs_flow_hash, hash_val, is_mask);
1139                 *attrs &= ~(1 << OVS_KEY_ATTR_DP_HASH);
1140         }
1141
1142         if (*attrs & (1 << OVS_KEY_ATTR_RECIRC_ID)) {
1143                 u32 recirc_id = nla_get_u32(a[OVS_KEY_ATTR_RECIRC_ID]);
1144
1145                 SW_FLOW_KEY_PUT(match, recirc_id, recirc_id, is_mask);
1146                 *attrs &= ~(1 << OVS_KEY_ATTR_RECIRC_ID);
1147         }
1148
1149         if (*attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
1150                 SW_FLOW_KEY_PUT(match, phy.priority,
1151                           nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]), is_mask);
1152                 *attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
1153         }
1154
1155         if (*attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
1156                 u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
1157
1158                 if (is_mask) {
1159                         in_port = 0xffffffff; /* Always exact match in_port. */
1160                 } else if (in_port >= DP_MAX_PORTS) {
1161                         OVS_NLERR(log, "Port %d exceeds max allowable %d",
1162                                   in_port, DP_MAX_PORTS);
1163                         return -EINVAL;
1164                 }
1165
1166                 SW_FLOW_KEY_PUT(match, phy.in_port, in_port, is_mask);
1167                 *attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
1168         } else if (!is_mask) {
1169                 SW_FLOW_KEY_PUT(match, phy.in_port, DP_MAX_PORTS, is_mask);
1170         }
1171
1172         if (*attrs & (1 << OVS_KEY_ATTR_SKB_MARK)) {
1173                 uint32_t mark = nla_get_u32(a[OVS_KEY_ATTR_SKB_MARK]);
1174
1175                 SW_FLOW_KEY_PUT(match, phy.skb_mark, mark, is_mask);
1176                 *attrs &= ~(1 << OVS_KEY_ATTR_SKB_MARK);
1177         }
1178         if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
1179                 if (ip_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
1180                                        is_mask, log) < 0)
1181                         return -EINVAL;
1182                 *attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
1183         }
1184
1185         if (*attrs & (1 << OVS_KEY_ATTR_CT_STATE) &&
1186             ovs_ct_verify(net, OVS_KEY_ATTR_CT_STATE)) {
1187                 u32 ct_state = nla_get_u32(a[OVS_KEY_ATTR_CT_STATE]);
1188
1189                 if (ct_state & ~CT_SUPPORTED_MASK) {
1190                         OVS_NLERR(log, "ct_state flags %08x unsupported",
1191                                   ct_state);
1192                         return -EINVAL;
1193                 }
1194
1195                 SW_FLOW_KEY_PUT(match, ct_state, ct_state, is_mask);
1196                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_STATE);
1197         }
1198         if (*attrs & (1 << OVS_KEY_ATTR_CT_ZONE) &&
1199             ovs_ct_verify(net, OVS_KEY_ATTR_CT_ZONE)) {
1200                 u16 ct_zone = nla_get_u16(a[OVS_KEY_ATTR_CT_ZONE]);
1201
1202                 SW_FLOW_KEY_PUT(match, ct_zone, ct_zone, is_mask);
1203                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ZONE);
1204         }
1205         if (*attrs & (1 << OVS_KEY_ATTR_CT_MARK) &&
1206             ovs_ct_verify(net, OVS_KEY_ATTR_CT_MARK)) {
1207                 u32 mark = nla_get_u32(a[OVS_KEY_ATTR_CT_MARK]);
1208
1209                 SW_FLOW_KEY_PUT(match, ct.mark, mark, is_mask);
1210                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_MARK);
1211         }
1212         if (*attrs & (1 << OVS_KEY_ATTR_CT_LABELS) &&
1213             ovs_ct_verify(net, OVS_KEY_ATTR_CT_LABELS)) {
1214                 const struct ovs_key_ct_labels *cl;
1215
1216                 cl = nla_data(a[OVS_KEY_ATTR_CT_LABELS]);
1217                 SW_FLOW_KEY_MEMCPY(match, ct.labels, cl->ct_labels,
1218                                    sizeof(*cl), is_mask);
1219                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_LABELS);
1220         }
1221         if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4)) {
1222                 const struct ovs_key_ct_tuple_ipv4 *ct;
1223
1224                 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4]);
1225
1226                 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.src, ct->ipv4_src, is_mask);
1227                 SW_FLOW_KEY_PUT(match, ipv4.ct_orig.dst, ct->ipv4_dst, is_mask);
1228                 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1229                 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1230                 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv4_proto, is_mask);
1231                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4);
1232         }
1233         if (*attrs & (1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6)) {
1234                 const struct ovs_key_ct_tuple_ipv6 *ct;
1235
1236                 ct = nla_data(a[OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6]);
1237
1238                 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.src, &ct->ipv6_src,
1239                                    sizeof(match->key->ipv6.ct_orig.src),
1240                                    is_mask);
1241                 SW_FLOW_KEY_MEMCPY(match, ipv6.ct_orig.dst, &ct->ipv6_dst,
1242                                    sizeof(match->key->ipv6.ct_orig.dst),
1243                                    is_mask);
1244                 SW_FLOW_KEY_PUT(match, ct.orig_tp.src, ct->src_port, is_mask);
1245                 SW_FLOW_KEY_PUT(match, ct.orig_tp.dst, ct->dst_port, is_mask);
1246                 SW_FLOW_KEY_PUT(match, ct_orig_proto, ct->ipv6_proto, is_mask);
1247                 *attrs &= ~(1ULL << OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6);
1248         }
1249
1250         /* For layer 3 packets the Ethernet type is provided
1251          * and treated as metadata but no MAC addresses are provided.
1252          */
1253         if (!(*attrs & (1ULL << OVS_KEY_ATTR_ETHERNET)) &&
1254             (*attrs & (1ULL << OVS_KEY_ATTR_ETHERTYPE)))
1255                 mac_proto = MAC_PROTO_NONE;
1256
1257         /* Always exact match mac_proto */
1258         SW_FLOW_KEY_PUT(match, mac_proto, is_mask ? 0xff : mac_proto, is_mask);
1259
1260         if (mac_proto == MAC_PROTO_NONE)
1261                 return parse_eth_type_from_nlattrs(match, attrs, a, is_mask,
1262                                                    log);
1263
1264         return 0;
1265 }
1266
1267 int nsh_hdr_from_nlattr(const struct nlattr *attr,
1268                         struct nshhdr *nh, size_t size)
1269 {
1270         struct nlattr *a;
1271         int rem;
1272         u8 flags = 0;
1273         u8 ttl = 0;
1274         int mdlen = 0;
1275
1276         /* validate_nsh has check this, so we needn't do duplicate check here
1277          */
1278         if (size < NSH_BASE_HDR_LEN)
1279                 return -ENOBUFS;
1280
1281         nla_for_each_nested(a, attr, rem) {
1282                 int type = nla_type(a);
1283
1284                 switch (type) {
1285                 case OVS_NSH_KEY_ATTR_BASE: {
1286                         const struct ovs_nsh_key_base *base = nla_data(a);
1287
1288                         flags = base->flags;
1289                         ttl = base->ttl;
1290                         nh->np = base->np;
1291                         nh->mdtype = base->mdtype;
1292                         nh->path_hdr = base->path_hdr;
1293                         break;
1294                 }
1295                 case OVS_NSH_KEY_ATTR_MD1:
1296                         mdlen = nla_len(a);
1297                         if (mdlen > size - NSH_BASE_HDR_LEN)
1298                                 return -ENOBUFS;
1299                         memcpy(&nh->md1, nla_data(a), mdlen);
1300                         break;
1301
1302                 case OVS_NSH_KEY_ATTR_MD2:
1303                         mdlen = nla_len(a);
1304                         if (mdlen > size - NSH_BASE_HDR_LEN)
1305                                 return -ENOBUFS;
1306                         memcpy(&nh->md2, nla_data(a), mdlen);
1307                         break;
1308
1309                 default:
1310                         return -EINVAL;
1311                 }
1312         }
1313
1314         /* nsh header length  = NSH_BASE_HDR_LEN + mdlen */
1315         nh->ver_flags_ttl_len = 0;
1316         nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen);
1317
1318         return 0;
1319 }
1320
1321 int nsh_key_from_nlattr(const struct nlattr *attr,
1322                         struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask)
1323 {
1324         struct nlattr *a;
1325         int rem;
1326
1327         /* validate_nsh has check this, so we needn't do duplicate check here
1328          */
1329         nla_for_each_nested(a, attr, rem) {
1330                 int type = nla_type(a);
1331
1332                 switch (type) {
1333                 case OVS_NSH_KEY_ATTR_BASE: {
1334                         const struct ovs_nsh_key_base *base = nla_data(a);
1335                         const struct ovs_nsh_key_base *base_mask = base + 1;
1336
1337                         nsh->base = *base;
1338                         nsh_mask->base = *base_mask;
1339                         break;
1340                 }
1341                 case OVS_NSH_KEY_ATTR_MD1: {
1342                         const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1343                         const struct ovs_nsh_key_md1 *md1_mask = md1 + 1;
1344
1345                         memcpy(nsh->context, md1->context, sizeof(*md1));
1346                         memcpy(nsh_mask->context, md1_mask->context,
1347                                sizeof(*md1_mask));
1348                         break;
1349                 }
1350                 case OVS_NSH_KEY_ATTR_MD2:
1351                         /* Not supported yet */
1352                         return -ENOTSUPP;
1353                 default:
1354                         return -EINVAL;
1355                 }
1356         }
1357
1358         return 0;
1359 }
1360
1361 static int nsh_key_put_from_nlattr(const struct nlattr *attr,
1362                                    struct sw_flow_match *match, bool is_mask,
1363                                    bool is_push_nsh, bool log)
1364 {
1365         struct nlattr *a;
1366         int rem;
1367         bool has_base = false;
1368         bool has_md1 = false;
1369         bool has_md2 = false;
1370         u8 mdtype = 0;
1371         int mdlen = 0;
1372
1373         if (WARN_ON(is_push_nsh && is_mask))
1374                 return -EINVAL;
1375
1376         nla_for_each_nested(a, attr, rem) {
1377                 int type = nla_type(a);
1378                 int i;
1379
1380                 if (type > OVS_NSH_KEY_ATTR_MAX) {
1381                         OVS_NLERR(log, "nsh attr %d is out of range max %d",
1382                                   type, OVS_NSH_KEY_ATTR_MAX);
1383                         return -EINVAL;
1384                 }
1385
1386                 if (!check_attr_len(nla_len(a),
1387                                     ovs_nsh_key_attr_lens[type].len)) {
1388                         OVS_NLERR(
1389                             log,
1390                             "nsh attr %d has unexpected len %d expected %d",
1391                             type,
1392                             nla_len(a),
1393                             ovs_nsh_key_attr_lens[type].len
1394                         );
1395                         return -EINVAL;
1396                 }
1397
1398                 switch (type) {
1399                 case OVS_NSH_KEY_ATTR_BASE: {
1400                         const struct ovs_nsh_key_base *base = nla_data(a);
1401
1402                         has_base = true;
1403                         mdtype = base->mdtype;
1404                         SW_FLOW_KEY_PUT(match, nsh.base.flags,
1405                                         base->flags, is_mask);
1406                         SW_FLOW_KEY_PUT(match, nsh.base.ttl,
1407                                         base->ttl, is_mask);
1408                         SW_FLOW_KEY_PUT(match, nsh.base.mdtype,
1409                                         base->mdtype, is_mask);
1410                         SW_FLOW_KEY_PUT(match, nsh.base.np,
1411                                         base->np, is_mask);
1412                         SW_FLOW_KEY_PUT(match, nsh.base.path_hdr,
1413                                         base->path_hdr, is_mask);
1414                         break;
1415                 }
1416                 case OVS_NSH_KEY_ATTR_MD1: {
1417                         const struct ovs_nsh_key_md1 *md1 = nla_data(a);
1418
1419                         has_md1 = true;
1420                         for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++)
1421                                 SW_FLOW_KEY_PUT(match, nsh.context[i],
1422                                                 md1->context[i], is_mask);
1423                         break;
1424                 }
1425                 case OVS_NSH_KEY_ATTR_MD2:
1426                         if (!is_push_nsh) /* Not supported MD type 2 yet */
1427                                 return -ENOTSUPP;
1428
1429                         has_md2 = true;
1430                         mdlen = nla_len(a);
1431                         if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) {
1432                                 OVS_NLERR(
1433                                     log,
1434                                     "Invalid MD length %d for MD type %d",
1435                                     mdlen,
1436                                     mdtype
1437                                 );
1438                                 return -EINVAL;
1439                         }
1440                         break;
1441                 default:
1442                         OVS_NLERR(log, "Unknown nsh attribute %d",
1443                                   type);
1444                         return -EINVAL;
1445                 }
1446         }
1447
1448         if (rem > 0) {
1449                 OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem);
1450                 return -EINVAL;
1451         }
1452
1453         if (has_md1 && has_md2) {
1454                 OVS_NLERR(
1455                     1,
1456                     "invalid nsh attribute: md1 and md2 are exclusive."
1457                 );
1458                 return -EINVAL;
1459         }
1460
1461         if (!is_mask) {
1462                 if ((has_md1 && mdtype != NSH_M_TYPE1) ||
1463                     (has_md2 && mdtype != NSH_M_TYPE2)) {
1464                         OVS_NLERR(1, "nsh attribute has unmatched MD type %d.",
1465                                   mdtype);
1466                         return -EINVAL;
1467                 }
1468
1469                 if (is_push_nsh &&
1470                     (!has_base || (!has_md1 && !has_md2))) {
1471                         OVS_NLERR(
1472                             1,
1473                             "push_nsh: missing base or metadata attributes"
1474                         );
1475                         return -EINVAL;
1476                 }
1477         }
1478
1479         return 0;
1480 }
1481
1482 static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match,
1483                                 u64 attrs, const struct nlattr **a,
1484                                 bool is_mask, bool log)
1485 {
1486         int err;
1487
1488         err = metadata_from_nlattrs(net, match, &attrs, a, is_mask, log);
1489         if (err)
1490                 return err;
1491
1492         if (attrs & (1 << OVS_KEY_ATTR_ETHERNET)) {
1493                 const struct ovs_key_ethernet *eth_key;
1494
1495                 eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
1496                 SW_FLOW_KEY_MEMCPY(match, eth.src,
1497                                 eth_key->eth_src, ETH_ALEN, is_mask);
1498                 SW_FLOW_KEY_MEMCPY(match, eth.dst,
1499                                 eth_key->eth_dst, ETH_ALEN, is_mask);
1500                 attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
1501
1502                 if (attrs & (1 << OVS_KEY_ATTR_VLAN)) {
1503                         /* VLAN attribute is always parsed before getting here since it
1504                          * may occur multiple times.
1505                          */
1506                         OVS_NLERR(log, "VLAN attribute unexpected.");
1507                         return -EINVAL;
1508                 }
1509
1510                 if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
1511                         err = parse_eth_type_from_nlattrs(match, &attrs, a, is_mask,
1512                                                           log);
1513                         if (err)
1514                                 return err;
1515                 } else if (!is_mask) {
1516                         SW_FLOW_KEY_PUT(match, eth.type, htons(ETH_P_802_2), is_mask);
1517                 }
1518         } else if (!match->key->eth.type) {
1519                 OVS_NLERR(log, "Either Ethernet header or EtherType is required.");
1520                 return -EINVAL;
1521         }
1522
1523         if (attrs & (1 << OVS_KEY_ATTR_IPV4)) {
1524                 const struct ovs_key_ipv4 *ipv4_key;
1525
1526                 ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
1527                 if (!is_mask && ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX) {
1528                         OVS_NLERR(log, "IPv4 frag type %d is out of range max %d",
1529                                   ipv4_key->ipv4_frag, OVS_FRAG_TYPE_MAX);
1530                         return -EINVAL;
1531                 }
1532                 SW_FLOW_KEY_PUT(match, ip.proto,
1533                                 ipv4_key->ipv4_proto, is_mask);
1534                 SW_FLOW_KEY_PUT(match, ip.tos,
1535                                 ipv4_key->ipv4_tos, is_mask);
1536                 SW_FLOW_KEY_PUT(match, ip.ttl,
1537                                 ipv4_key->ipv4_ttl, is_mask);
1538                 SW_FLOW_KEY_PUT(match, ip.frag,
1539                                 ipv4_key->ipv4_frag, is_mask);
1540                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1541                                 ipv4_key->ipv4_src, is_mask);
1542                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1543                                 ipv4_key->ipv4_dst, is_mask);
1544                 attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
1545         }
1546
1547         if (attrs & (1 << OVS_KEY_ATTR_IPV6)) {
1548                 const struct ovs_key_ipv6 *ipv6_key;
1549
1550                 ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
1551                 if (!is_mask && ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX) {
1552                         OVS_NLERR(log, "IPv6 frag type %d is out of range max %d",
1553                                   ipv6_key->ipv6_frag, OVS_FRAG_TYPE_MAX);
1554                         return -EINVAL;
1555                 }
1556
1557                 if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) {
1558                         OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)",
1559                                   ntohl(ipv6_key->ipv6_label), (1 << 20) - 1);
1560                         return -EINVAL;
1561                 }
1562
1563                 SW_FLOW_KEY_PUT(match, ipv6.label,
1564                                 ipv6_key->ipv6_label, is_mask);
1565                 SW_FLOW_KEY_PUT(match, ip.proto,
1566                                 ipv6_key->ipv6_proto, is_mask);
1567                 SW_FLOW_KEY_PUT(match, ip.tos,
1568                                 ipv6_key->ipv6_tclass, is_mask);
1569                 SW_FLOW_KEY_PUT(match, ip.ttl,
1570                                 ipv6_key->ipv6_hlimit, is_mask);
1571                 SW_FLOW_KEY_PUT(match, ip.frag,
1572                                 ipv6_key->ipv6_frag, is_mask);
1573                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.src,
1574                                 ipv6_key->ipv6_src,
1575                                 sizeof(match->key->ipv6.addr.src),
1576                                 is_mask);
1577                 SW_FLOW_KEY_MEMCPY(match, ipv6.addr.dst,
1578                                 ipv6_key->ipv6_dst,
1579                                 sizeof(match->key->ipv6.addr.dst),
1580                                 is_mask);
1581
1582                 attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
1583         }
1584
1585         if (attrs & (1 << OVS_KEY_ATTR_ARP)) {
1586                 const struct ovs_key_arp *arp_key;
1587
1588                 arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
1589                 if (!is_mask && (arp_key->arp_op & htons(0xff00))) {
1590                         OVS_NLERR(log, "Unknown ARP opcode (opcode=%d).",
1591                                   arp_key->arp_op);
1592                         return -EINVAL;
1593                 }
1594
1595                 SW_FLOW_KEY_PUT(match, ipv4.addr.src,
1596                                 arp_key->arp_sip, is_mask);
1597                 SW_FLOW_KEY_PUT(match, ipv4.addr.dst,
1598                         arp_key->arp_tip, is_mask);
1599                 SW_FLOW_KEY_PUT(match, ip.proto,
1600                                 ntohs(arp_key->arp_op), is_mask);
1601                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.sha,
1602                                 arp_key->arp_sha, ETH_ALEN, is_mask);
1603                 SW_FLOW_KEY_MEMCPY(match, ipv4.arp.tha,
1604                                 arp_key->arp_tha, ETH_ALEN, is_mask);
1605
1606                 attrs &= ~(1 << OVS_KEY_ATTR_ARP);
1607         }
1608
1609         if (attrs & (1 << OVS_KEY_ATTR_NSH)) {
1610                 if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match,
1611                                             is_mask, false, log) < 0)
1612                         return -EINVAL;
1613                 attrs &= ~(1 << OVS_KEY_ATTR_NSH);
1614         }
1615
1616         if (attrs & (1 << OVS_KEY_ATTR_MPLS)) {
1617                 const struct ovs_key_mpls *mpls_key;
1618
1619                 mpls_key = nla_data(a[OVS_KEY_ATTR_MPLS]);
1620                 SW_FLOW_KEY_PUT(match, mpls.top_lse,
1621                                 mpls_key->mpls_lse, is_mask);
1622
1623                 attrs &= ~(1 << OVS_KEY_ATTR_MPLS);
1624          }
1625
1626         if (attrs & (1 << OVS_KEY_ATTR_TCP)) {
1627                 const struct ovs_key_tcp *tcp_key;
1628
1629                 tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
1630                 SW_FLOW_KEY_PUT(match, tp.src, tcp_key->tcp_src, is_mask);
1631                 SW_FLOW_KEY_PUT(match, tp.dst, tcp_key->tcp_dst, is_mask);
1632                 attrs &= ~(1 << OVS_KEY_ATTR_TCP);
1633         }
1634
1635         if (attrs & (1 << OVS_KEY_ATTR_TCP_FLAGS)) {
1636                 SW_FLOW_KEY_PUT(match, tp.flags,
1637                                 nla_get_be16(a[OVS_KEY_ATTR_TCP_FLAGS]),
1638                                 is_mask);
1639                 attrs &= ~(1 << OVS_KEY_ATTR_TCP_FLAGS);
1640         }
1641
1642         if (attrs & (1 << OVS_KEY_ATTR_UDP)) {
1643                 const struct ovs_key_udp *udp_key;
1644
1645                 udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
1646                 SW_FLOW_KEY_PUT(match, tp.src, udp_key->udp_src, is_mask);
1647                 SW_FLOW_KEY_PUT(match, tp.dst, udp_key->udp_dst, is_mask);
1648                 attrs &= ~(1 << OVS_KEY_ATTR_UDP);
1649         }
1650
1651         if (attrs & (1 << OVS_KEY_ATTR_SCTP)) {
1652                 const struct ovs_key_sctp *sctp_key;
1653
1654                 sctp_key = nla_data(a[OVS_KEY_ATTR_SCTP]);
1655                 SW_FLOW_KEY_PUT(match, tp.src, sctp_key->sctp_src, is_mask);
1656                 SW_FLOW_KEY_PUT(match, tp.dst, sctp_key->sctp_dst, is_mask);
1657                 attrs &= ~(1 << OVS_KEY_ATTR_SCTP);
1658         }
1659
1660         if (attrs & (1 << OVS_KEY_ATTR_ICMP)) {
1661                 const struct ovs_key_icmp *icmp_key;
1662
1663                 icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
1664                 SW_FLOW_KEY_PUT(match, tp.src,
1665                                 htons(icmp_key->icmp_type), is_mask);
1666                 SW_FLOW_KEY_PUT(match, tp.dst,
1667                                 htons(icmp_key->icmp_code), is_mask);
1668                 attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
1669         }
1670
1671         if (attrs & (1 << OVS_KEY_ATTR_ICMPV6)) {
1672                 const struct ovs_key_icmpv6 *icmpv6_key;
1673
1674                 icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
1675                 SW_FLOW_KEY_PUT(match, tp.src,
1676                                 htons(icmpv6_key->icmpv6_type), is_mask);
1677                 SW_FLOW_KEY_PUT(match, tp.dst,
1678                                 htons(icmpv6_key->icmpv6_code), is_mask);
1679                 attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
1680         }
1681
1682         if (attrs & (1 << OVS_KEY_ATTR_ND)) {
1683                 const struct ovs_key_nd *nd_key;
1684
1685                 nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
1686                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.target,
1687                         nd_key->nd_target,
1688                         sizeof(match->key->ipv6.nd.target),
1689                         is_mask);
1690                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.sll,
1691                         nd_key->nd_sll, ETH_ALEN, is_mask);
1692                 SW_FLOW_KEY_MEMCPY(match, ipv6.nd.tll,
1693                                 nd_key->nd_tll, ETH_ALEN, is_mask);
1694                 attrs &= ~(1 << OVS_KEY_ATTR_ND);
1695         }
1696
1697         if (attrs != 0) {
1698                 OVS_NLERR(log, "Unknown key attributes %llx",
1699                           (unsigned long long)attrs);
1700                 return -EINVAL;
1701         }
1702
1703         return 0;
1704 }
1705
1706 static void nlattr_set(struct nlattr *attr, u8 val,
1707                        const struct ovs_len_tbl *tbl)
1708 {
1709         struct nlattr *nla;
1710         int rem;
1711
1712         /* The nlattr stream should already have been validated */
1713         nla_for_each_nested(nla, attr, rem) {
1714                 if (tbl[nla_type(nla)].len == OVS_ATTR_NESTED) {
1715                         if (tbl[nla_type(nla)].next)
1716                                 tbl = tbl[nla_type(nla)].next;
1717                         nlattr_set(nla, val, tbl);
1718                 } else {
1719                         memset(nla_data(nla), val, nla_len(nla));
1720                 }
1721
1722                 if (nla_type(nla) == OVS_KEY_ATTR_CT_STATE)
1723                         *(u32 *)nla_data(nla) &= CT_SUPPORTED_MASK;
1724         }
1725 }
1726
1727 static void mask_set_nlattr(struct nlattr *attr, u8 val)
1728 {
1729         nlattr_set(attr, val, ovs_key_lens);
1730 }
1731
1732 /**
1733  * ovs_nla_get_match - parses Netlink attributes into a flow key and
1734  * mask. In case the 'mask' is NULL, the flow is treated as exact match
1735  * flow. Otherwise, it is treated as a wildcarded flow, except the mask
1736  * does not include any don't care bit.
1737  * @net: Used to determine per-namespace field support.
1738  * @match: receives the extracted flow match information.
1739  * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
1740  * sequence. The fields should of the packet that triggered the creation
1741  * of this flow.
1742  * @mask: Optional. Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink
1743  * attribute specifies the mask field of the wildcarded flow.
1744  * @log: Boolean to allow kernel error logging.  Normally true, but when
1745  * probing for feature compatibility this should be passed in as false to
1746  * suppress unnecessary error logging.
1747  */
1748 int ovs_nla_get_match(struct net *net, struct sw_flow_match *match,
1749                       const struct nlattr *nla_key,
1750                       const struct nlattr *nla_mask,
1751                       bool log)
1752 {
1753         const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
1754         struct nlattr *newmask = NULL;
1755         u64 key_attrs = 0;
1756         u64 mask_attrs = 0;
1757         int err;
1758
1759         err = parse_flow_nlattrs(nla_key, a, &key_attrs, log);
1760         if (err)
1761                 return err;
1762
1763         err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log);
1764         if (err)
1765                 return err;
1766
1767         err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log);
1768         if (err)
1769                 return err;
1770
1771         if (match->mask) {
1772                 if (!nla_mask) {
1773                         /* Create an exact match mask. We need to set to 0xff
1774                          * all the 'match->mask' fields that have been touched
1775                          * in 'match->key'. We cannot simply memset
1776                          * 'match->mask', because padding bytes and fields not
1777                          * specified in 'match->key' should be left to 0.
1778                          * Instead, we use a stream of netlink attributes,
1779                          * copied from 'key' and set to 0xff.
1780                          * ovs_key_from_nlattrs() will take care of filling
1781                          * 'match->mask' appropriately.
1782                          */
1783                         newmask = kmemdup(nla_key,
1784                                           nla_total_size(nla_len(nla_key)),
1785                                           GFP_KERNEL);
1786                         if (!newmask)
1787                                 return -ENOMEM;
1788
1789                         mask_set_nlattr(newmask, 0xff);
1790
1791                         /* The userspace does not send tunnel attributes that
1792                          * are 0, but we should not wildcard them nonetheless.
1793                          */
1794                         if (match->key->tun_proto)
1795                                 SW_FLOW_KEY_MEMSET_FIELD(match, tun_key,
1796                                                          0xff, true);
1797
1798                         nla_mask = newmask;
1799                 }
1800
1801                 err = parse_flow_mask_nlattrs(nla_mask, a, &mask_attrs, log);
1802                 if (err)
1803                         goto free_newmask;
1804
1805                 /* Always match on tci. */
1806                 SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true);
1807                 SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true);
1808
1809                 err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log);
1810                 if (err)
1811                         goto free_newmask;
1812
1813                 err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true,
1814                                            log);
1815                 if (err)
1816                         goto free_newmask;
1817         }
1818
1819         if (!match_validate(match, key_attrs, mask_attrs, log))
1820                 err = -EINVAL;
1821
1822 free_newmask:
1823         kfree(newmask);
1824         return err;
1825 }
1826
1827 static size_t get_ufid_len(const struct nlattr *attr, bool log)
1828 {
1829         size_t len;
1830
1831         if (!attr)
1832                 return 0;
1833
1834         len = nla_len(attr);
1835         if (len < 1 || len > MAX_UFID_LENGTH) {
1836                 OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
1837                           nla_len(attr), MAX_UFID_LENGTH);
1838                 return 0;
1839         }
1840
1841         return len;
1842 }
1843
1844 /* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
1845  * or false otherwise.
1846  */
1847 bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
1848                       bool log)
1849 {
1850         sfid->ufid_len = get_ufid_len(attr, log);
1851         if (sfid->ufid_len)
1852                 memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
1853
1854         return sfid->ufid_len;
1855 }
1856
1857 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
1858                            const struct sw_flow_key *key, bool log)
1859 {
1860         struct sw_flow_key *new_key;
1861
1862         if (ovs_nla_get_ufid(sfid, ufid, log))
1863                 return 0;
1864
1865         /* If UFID was not provided, use unmasked key. */
1866         new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
1867         if (!new_key)
1868                 return -ENOMEM;
1869         memcpy(new_key, key, sizeof(*key));
1870         sfid->unmasked_key = new_key;
1871
1872         return 0;
1873 }
1874
1875 u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
1876 {
1877         return attr ? nla_get_u32(attr) : 0;
1878 }
1879
1880 /**
1881  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
1882  * @net: Network namespace.
1883  * @key: Receives extracted in_port, priority, tun_key, skb_mark and conntrack
1884  * metadata.
1885  * @a: Array of netlink attributes holding parsed %OVS_KEY_ATTR_* Netlink
1886  * attributes.
1887  * @attrs: Bit mask for the netlink attributes included in @a.
1888  * @log: Boolean to allow kernel error logging.  Normally true, but when
1889  * probing for feature compatibility this should be passed in as false to
1890  * suppress unnecessary error logging.
1891  *
1892  * This parses a series of Netlink attributes that form a flow key, which must
1893  * take the same form accepted by flow_from_nlattrs(), but only enough of it to
1894  * get the metadata, that is, the parts of the flow key that cannot be
1895  * extracted from the packet itself.
1896  *
1897  * This must be called before the packet key fields are filled in 'key'.
1898  */
1899
1900 int ovs_nla_get_flow_metadata(struct net *net,
1901                               const struct nlattr *a[OVS_KEY_ATTR_MAX + 1],
1902                               u64 attrs, struct sw_flow_key *key, bool log)
1903 {
1904         struct sw_flow_match match;
1905
1906         memset(&match, 0, sizeof(match));
1907         match.key = key;
1908
1909         key->ct_state = 0;
1910         key->ct_zone = 0;
1911         key->ct_orig_proto = 0;
1912         memset(&key->ct, 0, sizeof(key->ct));
1913         memset(&key->ipv4.ct_orig, 0, sizeof(key->ipv4.ct_orig));
1914         memset(&key->ipv6.ct_orig, 0, sizeof(key->ipv6.ct_orig));
1915
1916         key->phy.in_port = DP_MAX_PORTS;
1917
1918         return metadata_from_nlattrs(net, &match, &attrs, a, false, log);
1919 }
1920
1921 static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh,
1922                             bool is_mask)
1923 {
1924         __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff);
1925
1926         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) ||
1927             nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci))
1928                 return -EMSGSIZE;
1929         return 0;
1930 }
1931
1932 static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask,
1933                              struct sk_buff *skb)
1934 {
1935         struct nlattr *start;
1936
1937         start = nla_nest_start(skb, OVS_KEY_ATTR_NSH);
1938         if (!start)
1939                 return -EMSGSIZE;
1940
1941         if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base))
1942                 goto nla_put_failure;
1943
1944         if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) {
1945                 if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1,
1946                             sizeof(nsh->context), nsh->context))
1947                         goto nla_put_failure;
1948         }
1949
1950         /* Don't support MD type 2 yet */
1951
1952         nla_nest_end(skb, start);
1953
1954         return 0;
1955
1956 nla_put_failure:
1957         return -EMSGSIZE;
1958 }
1959
1960 static int __ovs_nla_put_key(const struct sw_flow_key *swkey,
1961                              const struct sw_flow_key *output, bool is_mask,
1962                              struct sk_buff *skb)
1963 {
1964         struct ovs_key_ethernet *eth_key;
1965         struct nlattr *nla;
1966         struct nlattr *encap = NULL;
1967         struct nlattr *in_encap = NULL;
1968
1969         if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id))
1970                 goto nla_put_failure;
1971
1972         if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash))
1973                 goto nla_put_failure;
1974
1975         if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority))
1976                 goto nla_put_failure;
1977
1978         if ((swkey->tun_proto || is_mask)) {
1979                 const void *opts = NULL;
1980
1981                 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT)
1982                         opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len);
1983
1984                 if (ip_tun_to_nlattr(skb, &output->tun_key, opts,
1985                                      swkey->tun_opts_len, swkey->tun_proto))
1986                         goto nla_put_failure;
1987         }
1988
1989         if (swkey->phy.in_port == DP_MAX_PORTS) {
1990                 if (is_mask && (output->phy.in_port == 0xffff))
1991                         if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff))
1992                                 goto nla_put_failure;
1993         } else {
1994                 u16 upper_u16;
1995                 upper_u16 = !is_mask ? 0 : 0xffff;
1996
1997                 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT,
1998                                 (upper_u16 << 16) | output->phy.in_port))
1999                         goto nla_put_failure;
2000         }
2001
2002         if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark))
2003                 goto nla_put_failure;
2004
2005         if (ovs_ct_put_key(swkey, output, skb))
2006                 goto nla_put_failure;
2007
2008         if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) {
2009                 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
2010                 if (!nla)
2011                         goto nla_put_failure;
2012
2013                 eth_key = nla_data(nla);
2014                 ether_addr_copy(eth_key->eth_src, output->eth.src);
2015                 ether_addr_copy(eth_key->eth_dst, output->eth.dst);
2016
2017                 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) {
2018                         if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask))
2019                                 goto nla_put_failure;
2020                         encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2021                         if (!swkey->eth.vlan.tci)
2022                                 goto unencap;
2023
2024                         if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) {
2025                                 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask))
2026                                         goto nla_put_failure;
2027                                 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
2028                                 if (!swkey->eth.cvlan.tci)
2029                                         goto unencap;
2030                         }
2031                 }
2032
2033                 if (swkey->eth.type == htons(ETH_P_802_2)) {
2034                         /*
2035                         * Ethertype 802.2 is represented in the netlink with omitted
2036                         * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and
2037                         * 0xffff in the mask attribute.  Ethertype can also
2038                         * be wildcarded.
2039                         */
2040                         if (is_mask && output->eth.type)
2041                                 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE,
2042                                                         output->eth.type))
2043                                         goto nla_put_failure;
2044                         goto unencap;
2045                 }
2046         }
2047
2048         if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type))
2049                 goto nla_put_failure;
2050
2051         if (eth_type_vlan(swkey->eth.type)) {
2052                 /* There are 3 VLAN tags, we don't know anything about the rest
2053                  * of the packet, so truncate here.
2054                  */
2055                 WARN_ON_ONCE(!(encap && in_encap));
2056                 goto unencap;
2057         }
2058
2059         if (swkey->eth.type == htons(ETH_P_IP)) {
2060                 struct ovs_key_ipv4 *ipv4_key;
2061
2062                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
2063                 if (!nla)
2064                         goto nla_put_failure;
2065                 ipv4_key = nla_data(nla);
2066                 ipv4_key->ipv4_src = output->ipv4.addr.src;
2067                 ipv4_key->ipv4_dst = output->ipv4.addr.dst;
2068                 ipv4_key->ipv4_proto = output->ip.proto;
2069                 ipv4_key->ipv4_tos = output->ip.tos;
2070                 ipv4_key->ipv4_ttl = output->ip.ttl;
2071                 ipv4_key->ipv4_frag = output->ip.frag;
2072         } else if (swkey->eth.type == htons(ETH_P_IPV6)) {
2073                 struct ovs_key_ipv6 *ipv6_key;
2074
2075                 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
2076                 if (!nla)
2077                         goto nla_put_failure;
2078                 ipv6_key = nla_data(nla);
2079                 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src,
2080                                 sizeof(ipv6_key->ipv6_src));
2081                 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst,
2082                                 sizeof(ipv6_key->ipv6_dst));
2083                 ipv6_key->ipv6_label = output->ipv6.label;
2084                 ipv6_key->ipv6_proto = output->ip.proto;
2085                 ipv6_key->ipv6_tclass = output->ip.tos;
2086                 ipv6_key->ipv6_hlimit = output->ip.ttl;
2087                 ipv6_key->ipv6_frag = output->ip.frag;
2088         } else if (swkey->eth.type == htons(ETH_P_NSH)) {
2089                 if (nsh_key_to_nlattr(&output->nsh, is_mask, skb))
2090                         goto nla_put_failure;
2091         } else if (swkey->eth.type == htons(ETH_P_ARP) ||
2092                    swkey->eth.type == htons(ETH_P_RARP)) {
2093                 struct ovs_key_arp *arp_key;
2094
2095                 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
2096                 if (!nla)
2097                         goto nla_put_failure;
2098                 arp_key = nla_data(nla);
2099                 memset(arp_key, 0, sizeof(struct ovs_key_arp));
2100                 arp_key->arp_sip = output->ipv4.addr.src;
2101                 arp_key->arp_tip = output->ipv4.addr.dst;
2102                 arp_key->arp_op = htons(output->ip.proto);
2103                 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha);
2104                 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha);
2105         } else if (eth_p_mpls(swkey->eth.type)) {
2106                 struct ovs_key_mpls *mpls_key;
2107
2108                 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key));
2109                 if (!nla)
2110                         goto nla_put_failure;
2111                 mpls_key = nla_data(nla);
2112                 mpls_key->mpls_lse = output->mpls.top_lse;
2113         }
2114
2115         if ((swkey->eth.type == htons(ETH_P_IP) ||
2116              swkey->eth.type == htons(ETH_P_IPV6)) &&
2117              swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
2118
2119                 if (swkey->ip.proto == IPPROTO_TCP) {
2120                         struct ovs_key_tcp *tcp_key;
2121
2122                         nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
2123                         if (!nla)
2124                                 goto nla_put_failure;
2125                         tcp_key = nla_data(nla);
2126                         tcp_key->tcp_src = output->tp.src;
2127                         tcp_key->tcp_dst = output->tp.dst;
2128                         if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS,
2129                                          output->tp.flags))
2130                                 goto nla_put_failure;
2131                 } else if (swkey->ip.proto == IPPROTO_UDP) {
2132                         struct ovs_key_udp *udp_key;
2133
2134                         nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
2135                         if (!nla)
2136                                 goto nla_put_failure;
2137                         udp_key = nla_data(nla);
2138                         udp_key->udp_src = output->tp.src;
2139                         udp_key->udp_dst = output->tp.dst;
2140                 } else if (swkey->ip.proto == IPPROTO_SCTP) {
2141                         struct ovs_key_sctp *sctp_key;
2142
2143                         nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key));
2144                         if (!nla)
2145                                 goto nla_put_failure;
2146                         sctp_key = nla_data(nla);
2147                         sctp_key->sctp_src = output->tp.src;
2148                         sctp_key->sctp_dst = output->tp.dst;
2149                 } else if (swkey->eth.type == htons(ETH_P_IP) &&
2150                            swkey->ip.proto == IPPROTO_ICMP) {
2151                         struct ovs_key_icmp *icmp_key;
2152
2153                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
2154                         if (!nla)
2155                                 goto nla_put_failure;
2156                         icmp_key = nla_data(nla);
2157                         icmp_key->icmp_type = ntohs(output->tp.src);
2158                         icmp_key->icmp_code = ntohs(output->tp.dst);
2159                 } else if (swkey->eth.type == htons(ETH_P_IPV6) &&
2160                            swkey->ip.proto == IPPROTO_ICMPV6) {
2161                         struct ovs_key_icmpv6 *icmpv6_key;
2162
2163                         nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
2164                                                 sizeof(*icmpv6_key));
2165                         if (!nla)
2166                                 goto nla_put_failure;
2167                         icmpv6_key = nla_data(nla);
2168                         icmpv6_key->icmpv6_type = ntohs(output->tp.src);
2169                         icmpv6_key->icmpv6_code = ntohs(output->tp.dst);
2170
2171                         if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
2172                             icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
2173                                 struct ovs_key_nd *nd_key;
2174
2175                                 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
2176                                 if (!nla)
2177                                         goto nla_put_failure;
2178                                 nd_key = nla_data(nla);
2179                                 memcpy(nd_key->nd_target, &output->ipv6.nd.target,
2180                                                         sizeof(nd_key->nd_target));
2181                                 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll);
2182                                 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll);
2183                         }
2184                 }
2185         }
2186
2187 unencap:
2188         if (in_encap)
2189                 nla_nest_end(skb, in_encap);
2190         if (encap)
2191                 nla_nest_end(skb, encap);
2192
2193         return 0;
2194
2195 nla_put_failure:
2196         return -EMSGSIZE;
2197 }
2198
2199 int ovs_nla_put_key(const struct sw_flow_key *swkey,
2200                     const struct sw_flow_key *output, int attr, bool is_mask,
2201                     struct sk_buff *skb)
2202 {
2203         int err;
2204         struct nlattr *nla;
2205
2206         nla = nla_nest_start(skb, attr);
2207         if (!nla)
2208                 return -EMSGSIZE;
2209         err = __ovs_nla_put_key(swkey, output, is_mask, skb);
2210         if (err)
2211                 return err;
2212         nla_nest_end(skb, nla);
2213
2214         return 0;
2215 }
2216
2217 /* Called with ovs_mutex or RCU read lock. */
2218 int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
2219 {
2220         if (ovs_identifier_is_ufid(&flow->id))
2221                 return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
2222                                flow->id.ufid);
2223
2224         return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
2225                                OVS_FLOW_ATTR_KEY, false, skb);
2226 }
2227
2228 /* Called with ovs_mutex or RCU read lock. */
2229 int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
2230 {
2231         return ovs_nla_put_key(&flow->key, &flow->key,
2232                                 OVS_FLOW_ATTR_KEY, false, skb);
2233 }
2234
2235 /* Called with ovs_mutex or RCU read lock. */
2236 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb)
2237 {
2238         return ovs_nla_put_key(&flow->key, &flow->mask->key,
2239                                 OVS_FLOW_ATTR_MASK, true, skb);
2240 }
2241
2242 #define MAX_ACTIONS_BUFSIZE     (32 * 1024)
2243
2244 static struct sw_flow_actions *nla_alloc_flow_actions(int size)
2245 {
2246         struct sw_flow_actions *sfa;
2247
2248         WARN_ON_ONCE(size > MAX_ACTIONS_BUFSIZE);
2249
2250         sfa = kmalloc(sizeof(*sfa) + size, GFP_KERNEL);
2251         if (!sfa)
2252                 return ERR_PTR(-ENOMEM);
2253
2254         sfa->actions_len = 0;
2255         return sfa;
2256 }
2257
2258 static void ovs_nla_free_set_action(const struct nlattr *a)
2259 {
2260         const struct nlattr *ovs_key = nla_data(a);
2261         struct ovs_tunnel_info *ovs_tun;
2262
2263         switch (nla_type(ovs_key)) {
2264         case OVS_KEY_ATTR_TUNNEL_INFO:
2265                 ovs_tun = nla_data(ovs_key);
2266                 dst_release((struct dst_entry *)ovs_tun->tun_dst);
2267                 break;
2268         }
2269 }
2270
2271 void ovs_nla_free_flow_actions(struct sw_flow_actions *sf_acts)
2272 {
2273         const struct nlattr *a;
2274         int rem;
2275
2276         if (!sf_acts)
2277                 return;
2278
2279         nla_for_each_attr(a, sf_acts->actions, sf_acts->actions_len, rem) {
2280                 switch (nla_type(a)) {
2281                 case OVS_ACTION_ATTR_SET:
2282                         ovs_nla_free_set_action(a);
2283                         break;
2284                 case OVS_ACTION_ATTR_CT:
2285                         ovs_ct_free_action(a);
2286                         break;
2287                 }
2288         }
2289
2290         kfree(sf_acts);
2291 }
2292
2293 static void __ovs_nla_free_flow_actions(struct rcu_head *head)
2294 {
2295         ovs_nla_free_flow_actions(container_of(head, struct sw_flow_actions, rcu));
2296 }
2297
2298 /* Schedules 'sf_acts' to be freed after the next RCU grace period.
2299  * The caller must hold rcu_read_lock for this to be sensible. */
2300 void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *sf_acts)
2301 {
2302         call_rcu(&sf_acts->rcu, __ovs_nla_free_flow_actions);
2303 }
2304
2305 static struct nlattr *reserve_sfa_size(struct sw_flow_actions **sfa,
2306                                        int attr_len, bool log)
2307 {
2308
2309         struct sw_flow_actions *acts;
2310         int new_acts_size;
2311         int req_size = NLA_ALIGN(attr_len);
2312         int next_offset = offsetof(struct sw_flow_actions, actions) +
2313                                         (*sfa)->actions_len;
2314
2315         if (req_size <= (ksize(*sfa) - next_offset))
2316                 goto out;
2317
2318         new_acts_size = ksize(*sfa) * 2;
2319
2320         if (new_acts_size > MAX_ACTIONS_BUFSIZE) {
2321                 if ((MAX_ACTIONS_BUFSIZE - next_offset) < req_size) {
2322                         OVS_NLERR(log, "Flow action size exceeds max %u",
2323                                   MAX_ACTIONS_BUFSIZE);
2324                         return ERR_PTR(-EMSGSIZE);
2325                 }
2326                 new_acts_size = MAX_ACTIONS_BUFSIZE;
2327         }
2328
2329         acts = nla_alloc_flow_actions(new_acts_size);
2330         if (IS_ERR(acts))
2331                 return (void *)acts;
2332
2333         memcpy(acts->actions, (*sfa)->actions, (*sfa)->actions_len);
2334         acts->actions_len = (*sfa)->actions_len;
2335         acts->orig_len = (*sfa)->orig_len;
2336         kfree(*sfa);
2337         *sfa = acts;
2338
2339 out:
2340         (*sfa)->actions_len += req_size;
2341         return  (struct nlattr *) ((unsigned char *)(*sfa) + next_offset);
2342 }
2343
2344 static struct nlattr *__add_action(struct sw_flow_actions **sfa,
2345                                    int attrtype, void *data, int len, bool log)
2346 {
2347         struct nlattr *a;
2348
2349         a = reserve_sfa_size(sfa, nla_attr_size(len), log);
2350         if (IS_ERR(a))
2351                 return a;
2352
2353         a->nla_type = attrtype;
2354         a->nla_len = nla_attr_size(len);
2355
2356         if (data)
2357                 memcpy(nla_data(a), data, len);
2358         memset((unsigned char *) a + a->nla_len, 0, nla_padlen(len));
2359
2360         return a;
2361 }
2362
2363 int ovs_nla_add_action(struct sw_flow_actions **sfa, int attrtype, void *data,
2364                        int len, bool log)
2365 {
2366         struct nlattr *a;
2367
2368         a = __add_action(sfa, attrtype, data, len, log);
2369
2370         return PTR_ERR_OR_ZERO(a);
2371 }
2372
2373 static inline int add_nested_action_start(struct sw_flow_actions **sfa,
2374                                           int attrtype, bool log)
2375 {
2376         int used = (*sfa)->actions_len;
2377         int err;
2378
2379         err = ovs_nla_add_action(sfa, attrtype, NULL, 0, log);
2380         if (err)
2381                 return err;
2382
2383         return used;
2384 }
2385
2386 static inline void add_nested_action_end(struct sw_flow_actions *sfa,
2387                                          int st_offset)
2388 {
2389         struct nlattr *a = (struct nlattr *) ((unsigned char *)sfa->actions +
2390                                                                st_offset);
2391
2392         a->nla_len = sfa->actions_len - st_offset;
2393 }
2394
2395 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2396                                   const struct sw_flow_key *key,
2397                                   struct sw_flow_actions **sfa,
2398                                   __be16 eth_type, __be16 vlan_tci, bool log);
2399
2400 static int validate_and_copy_sample(struct net *net, const struct nlattr *attr,
2401                                     const struct sw_flow_key *key,
2402                                     struct sw_flow_actions **sfa,
2403                                     __be16 eth_type, __be16 vlan_tci,
2404                                     bool log, bool last)
2405 {
2406         const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
2407         const struct nlattr *probability, *actions;
2408         const struct nlattr *a;
2409         int rem, start, err;
2410         struct sample_arg arg;
2411
2412         memset(attrs, 0, sizeof(attrs));
2413         nla_for_each_nested(a, attr, rem) {
2414                 int type = nla_type(a);
2415                 if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
2416                         return -EINVAL;
2417                 attrs[type] = a;
2418         }
2419         if (rem)
2420                 return -EINVAL;
2421
2422         probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
2423         if (!probability || nla_len(probability) != sizeof(u32))
2424                 return -EINVAL;
2425
2426         actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
2427         if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
2428                 return -EINVAL;
2429
2430         /* validation done, copy sample action. */
2431         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SAMPLE, log);
2432         if (start < 0)
2433                 return start;
2434
2435         /* When both skb and flow may be changed, put the sample
2436          * into a deferred fifo. On the other hand, if only skb
2437          * may be modified, the actions can be executed in place.
2438          *
2439          * Do this analysis at the flow installation time.
2440          * Set 'clone_action->exec' to true if the actions can be
2441          * executed without being deferred.
2442          *
2443          * If the sample is the last action, it can always be excuted
2444          * rather than deferred.
2445          */
2446         arg.exec = last || !actions_may_change_flow(actions);
2447         arg.probability = nla_get_u32(probability);
2448
2449         err = ovs_nla_add_action(sfa, OVS_SAMPLE_ATTR_ARG, &arg, sizeof(arg),
2450                                  log);
2451         if (err)
2452                 return err;
2453
2454         err = __ovs_nla_copy_actions(net, actions, key, sfa,
2455                                      eth_type, vlan_tci, log);
2456
2457         if (err)
2458                 return err;
2459
2460         add_nested_action_end(*sfa, start);
2461
2462         return 0;
2463 }
2464
2465 void ovs_match_init(struct sw_flow_match *match,
2466                     struct sw_flow_key *key,
2467                     bool reset_key,
2468                     struct sw_flow_mask *mask)
2469 {
2470         memset(match, 0, sizeof(*match));
2471         match->key = key;
2472         match->mask = mask;
2473
2474         if (reset_key)
2475                 memset(key, 0, sizeof(*key));
2476
2477         if (mask) {
2478                 memset(&mask->key, 0, sizeof(mask->key));
2479                 mask->range.start = mask->range.end = 0;
2480         }
2481 }
2482
2483 static int validate_geneve_opts(struct sw_flow_key *key)
2484 {
2485         struct geneve_opt *option;
2486         int opts_len = key->tun_opts_len;
2487         bool crit_opt = false;
2488
2489         option = (struct geneve_opt *)TUN_METADATA_OPTS(key, key->tun_opts_len);
2490         while (opts_len > 0) {
2491                 int len;
2492
2493                 if (opts_len < sizeof(*option))
2494                         return -EINVAL;
2495
2496                 len = sizeof(*option) + option->length * 4;
2497                 if (len > opts_len)
2498                         return -EINVAL;
2499
2500                 crit_opt |= !!(option->type & GENEVE_CRIT_OPT_TYPE);
2501
2502                 option = (struct geneve_opt *)((u8 *)option + len);
2503                 opts_len -= len;
2504         };
2505
2506         key->tun_key.tun_flags |= crit_opt ? TUNNEL_CRIT_OPT : 0;
2507
2508         return 0;
2509 }
2510
2511 static int validate_and_copy_set_tun(const struct nlattr *attr,
2512                                      struct sw_flow_actions **sfa, bool log)
2513 {
2514         struct sw_flow_match match;
2515         struct sw_flow_key key;
2516         struct metadata_dst *tun_dst;
2517         struct ip_tunnel_info *tun_info;
2518         struct ovs_tunnel_info *ovs_tun;
2519         struct nlattr *a;
2520         int err = 0, start, opts_type;
2521
2522         ovs_match_init(&match, &key, true, NULL);
2523         opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log);
2524         if (opts_type < 0)
2525                 return opts_type;
2526
2527         if (key.tun_opts_len) {
2528                 switch (opts_type) {
2529                 case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
2530                         err = validate_geneve_opts(&key);
2531                         if (err < 0)
2532                                 return err;
2533                         break;
2534                 case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
2535                         break;
2536                 case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS:
2537                         break;
2538                 }
2539         };
2540
2541         start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
2542         if (start < 0)
2543                 return start;
2544
2545         tun_dst = metadata_dst_alloc(key.tun_opts_len, METADATA_IP_TUNNEL,
2546                                      GFP_KERNEL);
2547
2548         if (!tun_dst)
2549                 return -ENOMEM;
2550
2551         err = dst_cache_init(&tun_dst->u.tun_info.dst_cache, GFP_KERNEL);
2552         if (err) {
2553                 dst_release((struct dst_entry *)tun_dst);
2554                 return err;
2555         }
2556
2557         a = __add_action(sfa, OVS_KEY_ATTR_TUNNEL_INFO, NULL,
2558                          sizeof(*ovs_tun), log);
2559         if (IS_ERR(a)) {
2560                 dst_release((struct dst_entry *)tun_dst);
2561                 return PTR_ERR(a);
2562         }
2563
2564         ovs_tun = nla_data(a);
2565         ovs_tun->tun_dst = tun_dst;
2566
2567         tun_info = &tun_dst->u.tun_info;
2568         tun_info->mode = IP_TUNNEL_INFO_TX;
2569         if (key.tun_proto == AF_INET6)
2570                 tun_info->mode |= IP_TUNNEL_INFO_IPV6;
2571         tun_info->key = key.tun_key;
2572
2573         /* We need to store the options in the action itself since
2574          * everything else will go away after flow setup. We can append
2575          * it to tun_info and then point there.
2576          */
2577         ip_tunnel_info_opts_set(tun_info,
2578                                 TUN_METADATA_OPTS(&key, key.tun_opts_len),
2579                                 key.tun_opts_len);
2580         add_nested_action_end(*sfa, start);
2581
2582         return err;
2583 }
2584
2585 static bool validate_nsh(const struct nlattr *attr, bool is_mask,
2586                          bool is_push_nsh, bool log)
2587 {
2588         struct sw_flow_match match;
2589         struct sw_flow_key key;
2590         int ret = 0;
2591
2592         ovs_match_init(&match, &key, true, NULL);
2593         ret = nsh_key_put_from_nlattr(attr, &match, is_mask,
2594                                       is_push_nsh, log);
2595         return !ret;
2596 }
2597
2598 /* Return false if there are any non-masked bits set.
2599  * Mask follows data immediately, before any netlink padding.
2600  */
2601 static bool validate_masked(u8 *data, int len)
2602 {
2603         u8 *mask = data + len;
2604
2605         while (len--)
2606                 if (*data++ & ~*mask++)
2607                         return false;
2608
2609         return true;
2610 }
2611
2612 static int validate_set(const struct nlattr *a,
2613                         const struct sw_flow_key *flow_key,
2614                         struct sw_flow_actions **sfa, bool *skip_copy,
2615                         u8 mac_proto, __be16 eth_type, bool masked, bool log)
2616 {
2617         const struct nlattr *ovs_key = nla_data(a);
2618         int key_type = nla_type(ovs_key);
2619         size_t key_len;
2620
2621         /* There can be only one key in a action */
2622         if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
2623                 return -EINVAL;
2624
2625         key_len = nla_len(ovs_key);
2626         if (masked)
2627                 key_len /= 2;
2628
2629         if (key_type > OVS_KEY_ATTR_MAX ||
2630             !check_attr_len(key_len, ovs_key_lens[key_type].len))
2631                 return -EINVAL;
2632
2633         if (masked && !validate_masked(nla_data(ovs_key), key_len))
2634                 return -EINVAL;
2635
2636         switch (key_type) {
2637         const struct ovs_key_ipv4 *ipv4_key;
2638         const struct ovs_key_ipv6 *ipv6_key;
2639         int err;
2640
2641         case OVS_KEY_ATTR_PRIORITY:
2642         case OVS_KEY_ATTR_SKB_MARK:
2643         case OVS_KEY_ATTR_CT_MARK:
2644         case OVS_KEY_ATTR_CT_LABELS:
2645                 break;
2646
2647         case OVS_KEY_ATTR_ETHERNET:
2648                 if (mac_proto != MAC_PROTO_ETHERNET)
2649                         return -EINVAL;
2650                 break;
2651
2652         case OVS_KEY_ATTR_TUNNEL:
2653                 if (masked)
2654                         return -EINVAL; /* Masked tunnel set not supported. */
2655
2656                 *skip_copy = true;
2657                 err = validate_and_copy_set_tun(a, sfa, log);
2658                 if (err)
2659                         return err;
2660                 break;
2661
2662         case OVS_KEY_ATTR_IPV4:
2663                 if (eth_type != htons(ETH_P_IP))
2664                         return -EINVAL;
2665
2666                 ipv4_key = nla_data(ovs_key);
2667
2668                 if (masked) {
2669                         const struct ovs_key_ipv4 *mask = ipv4_key + 1;
2670
2671                         /* Non-writeable fields. */
2672                         if (mask->ipv4_proto || mask->ipv4_frag)
2673                                 return -EINVAL;
2674                 } else {
2675                         if (ipv4_key->ipv4_proto != flow_key->ip.proto)
2676                                 return -EINVAL;
2677
2678                         if (ipv4_key->ipv4_frag != flow_key->ip.frag)
2679                                 return -EINVAL;
2680                 }
2681                 break;
2682
2683         case OVS_KEY_ATTR_IPV6:
2684                 if (eth_type != htons(ETH_P_IPV6))
2685                         return -EINVAL;
2686
2687                 ipv6_key = nla_data(ovs_key);
2688
2689                 if (masked) {
2690                         const struct ovs_key_ipv6 *mask = ipv6_key + 1;
2691
2692                         /* Non-writeable fields. */
2693                         if (mask->ipv6_proto || mask->ipv6_frag)
2694                                 return -EINVAL;
2695
2696                         /* Invalid bits in the flow label mask? */
2697                         if (ntohl(mask->ipv6_label) & 0xFFF00000)
2698                                 return -EINVAL;
2699                 } else {
2700                         if (ipv6_key->ipv6_proto != flow_key->ip.proto)
2701                                 return -EINVAL;
2702
2703                         if (ipv6_key->ipv6_frag != flow_key->ip.frag)
2704                                 return -EINVAL;
2705                 }
2706                 if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
2707                         return -EINVAL;
2708
2709                 break;
2710
2711         case OVS_KEY_ATTR_TCP:
2712                 if ((eth_type != htons(ETH_P_IP) &&
2713                      eth_type != htons(ETH_P_IPV6)) ||
2714                     flow_key->ip.proto != IPPROTO_TCP)
2715                         return -EINVAL;
2716
2717                 break;
2718
2719         case OVS_KEY_ATTR_UDP:
2720                 if ((eth_type != htons(ETH_P_IP) &&
2721                      eth_type != htons(ETH_P_IPV6)) ||
2722                     flow_key->ip.proto != IPPROTO_UDP)
2723                         return -EINVAL;
2724
2725                 break;
2726
2727         case OVS_KEY_ATTR_MPLS:
2728                 if (!eth_p_mpls(eth_type))
2729                         return -EINVAL;
2730                 break;
2731
2732         case OVS_KEY_ATTR_SCTP:
2733                 if ((eth_type != htons(ETH_P_IP) &&
2734                      eth_type != htons(ETH_P_IPV6)) ||
2735                     flow_key->ip.proto != IPPROTO_SCTP)
2736                         return -EINVAL;
2737
2738                 break;
2739
2740         case OVS_KEY_ATTR_NSH:
2741                 if (eth_type != htons(ETH_P_NSH))
2742                         return -EINVAL;
2743                 if (!validate_nsh(nla_data(a), masked, false, log))
2744                         return -EINVAL;
2745                 break;
2746
2747         default:
2748                 return -EINVAL;
2749         }
2750
2751         /* Convert non-masked non-tunnel set actions to masked set actions. */
2752         if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
2753                 int start, len = key_len * 2;
2754                 struct nlattr *at;
2755
2756                 *skip_copy = true;
2757
2758                 start = add_nested_action_start(sfa,
2759                                                 OVS_ACTION_ATTR_SET_TO_MASKED,
2760                                                 log);
2761                 if (start < 0)
2762                         return start;
2763
2764                 at = __add_action(sfa, key_type, NULL, len, log);
2765                 if (IS_ERR(at))
2766                         return PTR_ERR(at);
2767
2768                 memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
2769                 memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
2770                 /* Clear non-writeable bits from otherwise writeable fields. */
2771                 if (key_type == OVS_KEY_ATTR_IPV6) {
2772                         struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
2773
2774                         mask->ipv6_label &= htonl(0x000FFFFF);
2775                 }
2776                 add_nested_action_end(*sfa, start);
2777         }
2778
2779         return 0;
2780 }
2781
2782 static int validate_userspace(const struct nlattr *attr)
2783 {
2784         static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] = {
2785                 [OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
2786                 [OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_UNSPEC },
2787                 [OVS_USERSPACE_ATTR_EGRESS_TUN_PORT] = {.type = NLA_U32 },
2788         };
2789         struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
2790         int error;
2791
2792         error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX, attr,
2793                                  userspace_policy, NULL);
2794         if (error)
2795                 return error;
2796
2797         if (!a[OVS_USERSPACE_ATTR_PID] ||
2798             !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
2799                 return -EINVAL;
2800
2801         return 0;
2802 }
2803
2804 static int copy_action(const struct nlattr *from,
2805                        struct sw_flow_actions **sfa, bool log)
2806 {
2807         int totlen = NLA_ALIGN(from->nla_len);
2808         struct nlattr *to;
2809
2810         to = reserve_sfa_size(sfa, from->nla_len, log);
2811         if (IS_ERR(to))
2812                 return PTR_ERR(to);
2813
2814         memcpy(to, from, totlen);
2815         return 0;
2816 }
2817
2818 static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
2819                                   const struct sw_flow_key *key,
2820                                   struct sw_flow_actions **sfa,
2821                                   __be16 eth_type, __be16 vlan_tci, bool log)
2822 {
2823         u8 mac_proto = ovs_key_mac_proto(key);
2824         const struct nlattr *a;
2825         int rem, err;
2826
2827         nla_for_each_nested(a, attr, rem) {
2828                 /* Expected argument lengths, (u32)-1 for variable length. */
2829                 static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
2830                         [OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
2831                         [OVS_ACTION_ATTR_RECIRC] = sizeof(u32),
2832                         [OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
2833                         [OVS_ACTION_ATTR_PUSH_MPLS] = sizeof(struct ovs_action_push_mpls),
2834                         [OVS_ACTION_ATTR_POP_MPLS] = sizeof(__be16),
2835                         [OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
2836                         [OVS_ACTION_ATTR_POP_VLAN] = 0,
2837                         [OVS_ACTION_ATTR_SET] = (u32)-1,
2838                         [OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
2839                         [OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
2840                         [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash),
2841                         [OVS_ACTION_ATTR_CT] = (u32)-1,
2842                         [OVS_ACTION_ATTR_CT_CLEAR] = 0,
2843                         [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc),
2844                         [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth),
2845                         [OVS_ACTION_ATTR_POP_ETH] = 0,
2846                         [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1,
2847                         [OVS_ACTION_ATTR_POP_NSH] = 0,
2848                         [OVS_ACTION_ATTR_METER] = sizeof(u32),
2849                 };
2850                 const struct ovs_action_push_vlan *vlan;
2851                 int type = nla_type(a);
2852                 bool skip_copy;
2853
2854                 if (type > OVS_ACTION_ATTR_MAX ||
2855                     (action_lens[type] != nla_len(a) &&
2856                      action_lens[type] != (u32)-1))
2857                         return -EINVAL;
2858
2859                 skip_copy = false;
2860                 switch (type) {
2861                 case OVS_ACTION_ATTR_UNSPEC:
2862                         return -EINVAL;
2863
2864                 case OVS_ACTION_ATTR_USERSPACE:
2865                         err = validate_userspace(a);
2866                         if (err)
2867                                 return err;
2868                         break;
2869
2870                 case OVS_ACTION_ATTR_OUTPUT:
2871                         if (nla_get_u32(a) >= DP_MAX_PORTS)
2872                                 return -EINVAL;
2873                         break;
2874
2875                 case OVS_ACTION_ATTR_TRUNC: {
2876                         const struct ovs_action_trunc *trunc = nla_data(a);
2877
2878                         if (trunc->max_len < ETH_HLEN)
2879                                 return -EINVAL;
2880                         break;
2881                 }
2882
2883                 case OVS_ACTION_ATTR_HASH: {
2884                         const struct ovs_action_hash *act_hash = nla_data(a);
2885
2886                         switch (act_hash->hash_alg) {
2887                         case OVS_HASH_ALG_L4:
2888                                 break;
2889                         default:
2890                                 return  -EINVAL;
2891                         }
2892
2893                         break;
2894                 }
2895
2896                 case OVS_ACTION_ATTR_POP_VLAN:
2897                         if (mac_proto != MAC_PROTO_ETHERNET)
2898                                 return -EINVAL;
2899                         vlan_tci = htons(0);
2900                         break;
2901
2902                 case OVS_ACTION_ATTR_PUSH_VLAN:
2903                         if (mac_proto != MAC_PROTO_ETHERNET)
2904                                 return -EINVAL;
2905                         vlan = nla_data(a);
2906                         if (!eth_type_vlan(vlan->vlan_tpid))
2907                                 return -EINVAL;
2908                         if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
2909                                 return -EINVAL;
2910                         vlan_tci = vlan->vlan_tci;
2911                         break;
2912
2913                 case OVS_ACTION_ATTR_RECIRC:
2914                         break;
2915
2916                 case OVS_ACTION_ATTR_PUSH_MPLS: {
2917                         const struct ovs_action_push_mpls *mpls = nla_data(a);
2918
2919                         if (!eth_p_mpls(mpls->mpls_ethertype))
2920                                 return -EINVAL;
2921                         /* Prohibit push MPLS other than to a white list
2922                          * for packets that have a known tag order.
2923                          */
2924                         if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2925                             (eth_type != htons(ETH_P_IP) &&
2926                              eth_type != htons(ETH_P_IPV6) &&
2927                              eth_type != htons(ETH_P_ARP) &&
2928                              eth_type != htons(ETH_P_RARP) &&
2929                              !eth_p_mpls(eth_type)))
2930                                 return -EINVAL;
2931                         eth_type = mpls->mpls_ethertype;
2932                         break;
2933                 }
2934
2935                 case OVS_ACTION_ATTR_POP_MPLS:
2936                         if (vlan_tci & htons(VLAN_TAG_PRESENT) ||
2937                             !eth_p_mpls(eth_type))
2938                                 return -EINVAL;
2939
2940                         /* Disallow subsequent L2.5+ set and mpls_pop actions
2941                          * as there is no check here to ensure that the new
2942                          * eth_type is valid and thus set actions could
2943                          * write off the end of the packet or otherwise
2944                          * corrupt it.
2945                          *
2946                          * Support for these actions is planned using packet
2947                          * recirculation.
2948                          */
2949                         eth_type = htons(0);
2950                         break;
2951
2952                 case OVS_ACTION_ATTR_SET:
2953                         err = validate_set(a, key, sfa,
2954                                            &skip_copy, mac_proto, eth_type,
2955                                            false, log);
2956                         if (err)
2957                                 return err;
2958                         break;
2959
2960                 case OVS_ACTION_ATTR_SET_MASKED:
2961                         err = validate_set(a, key, sfa,
2962                                            &skip_copy, mac_proto, eth_type,
2963                                            true, log);
2964                         if (err)
2965                                 return err;
2966                         break;
2967
2968                 case OVS_ACTION_ATTR_SAMPLE: {
2969                         bool last = nla_is_last(a, rem);
2970
2971                         err = validate_and_copy_sample(net, a, key, sfa,
2972                                                        eth_type, vlan_tci,
2973                                                        log, last);
2974                         if (err)
2975                                 return err;
2976                         skip_copy = true;
2977                         break;
2978                 }
2979
2980                 case OVS_ACTION_ATTR_CT:
2981                         err = ovs_ct_copy_action(net, a, key, sfa, log);
2982                         if (err)
2983                                 return err;
2984                         skip_copy = true;
2985                         break;
2986
2987                 case OVS_ACTION_ATTR_CT_CLEAR:
2988                         break;
2989
2990                 case OVS_ACTION_ATTR_PUSH_ETH:
2991                         /* Disallow pushing an Ethernet header if one
2992                          * is already present */
2993                         if (mac_proto != MAC_PROTO_NONE)
2994                                 return -EINVAL;
2995                         mac_proto = MAC_PROTO_NONE;
2996                         break;
2997
2998                 case OVS_ACTION_ATTR_POP_ETH:
2999                         if (mac_proto != MAC_PROTO_ETHERNET)
3000                                 return -EINVAL;
3001                         if (vlan_tci & htons(VLAN_TAG_PRESENT))
3002                                 return -EINVAL;
3003                         mac_proto = MAC_PROTO_ETHERNET;
3004                         break;
3005
3006                 case OVS_ACTION_ATTR_PUSH_NSH:
3007                         if (mac_proto != MAC_PROTO_ETHERNET) {
3008                                 u8 next_proto;
3009
3010                                 next_proto = tun_p_from_eth_p(eth_type);
3011                                 if (!next_proto)
3012                                         return -EINVAL;
3013                         }
3014                         mac_proto = MAC_PROTO_NONE;
3015                         if (!validate_nsh(nla_data(a), false, true, true))
3016                                 return -EINVAL;
3017                         break;
3018
3019                 case OVS_ACTION_ATTR_POP_NSH: {
3020                         __be16 inner_proto;
3021
3022                         if (eth_type != htons(ETH_P_NSH))
3023                                 return -EINVAL;
3024                         inner_proto = tun_p_to_eth_p(key->nsh.base.np);
3025                         if (!inner_proto)
3026                                 return -EINVAL;
3027                         if (key->nsh.base.np == TUN_P_ETHERNET)
3028                                 mac_proto = MAC_PROTO_ETHERNET;
3029                         else
3030                                 mac_proto = MAC_PROTO_NONE;
3031                         break;
3032                 }
3033
3034                 case OVS_ACTION_ATTR_METER:
3035                         /* Non-existent meters are simply ignored.  */
3036                         break;
3037
3038                 default:
3039                         OVS_NLERR(log, "Unknown Action type %d", type);
3040                         return -EINVAL;
3041                 }
3042                 if (!skip_copy) {
3043                         err = copy_action(a, sfa, log);
3044                         if (err)
3045                                 return err;
3046                 }
3047         }
3048
3049         if (rem > 0)
3050                 return -EINVAL;
3051
3052         return 0;
3053 }
3054
3055 /* 'key' must be the masked key. */
3056 int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr,
3057                          const struct sw_flow_key *key,
3058                          struct sw_flow_actions **sfa, bool log)
3059 {
3060         int err;
3061
3062         *sfa = nla_alloc_flow_actions(min(nla_len(attr), MAX_ACTIONS_BUFSIZE));
3063         if (IS_ERR(*sfa))
3064                 return PTR_ERR(*sfa);
3065
3066         (*sfa)->orig_len = nla_len(attr);
3067         err = __ovs_nla_copy_actions(net, attr, key, sfa, key->eth.type,
3068                                      key->eth.vlan.tci, log);
3069         if (err)
3070                 ovs_nla_free_flow_actions(*sfa);
3071
3072         return err;
3073 }
3074
3075 static int sample_action_to_attr(const struct nlattr *attr,
3076                                  struct sk_buff *skb)
3077 {
3078         struct nlattr *start, *ac_start = NULL, *sample_arg;
3079         int err = 0, rem = nla_len(attr);
3080         const struct sample_arg *arg;
3081         struct nlattr *actions;
3082
3083         start = nla_nest_start(skb, OVS_ACTION_ATTR_SAMPLE);
3084         if (!start)
3085                 return -EMSGSIZE;
3086
3087         sample_arg = nla_data(attr);
3088         arg = nla_data(sample_arg);
3089         actions = nla_next(sample_arg, &rem);
3090
3091         if (nla_put_u32(skb, OVS_SAMPLE_ATTR_PROBABILITY, arg->probability)) {
3092                 err = -EMSGSIZE;
3093                 goto out;
3094         }
3095
3096         ac_start = nla_nest_start(skb, OVS_SAMPLE_ATTR_ACTIONS);
3097         if (!ac_start) {
3098                 err = -EMSGSIZE;
3099                 goto out;
3100         }
3101
3102         err = ovs_nla_put_actions(actions, rem, skb);
3103
3104 out:
3105         if (err) {
3106                 nla_nest_cancel(skb, ac_start);
3107                 nla_nest_cancel(skb, start);
3108         } else {
3109                 nla_nest_end(skb, ac_start);
3110                 nla_nest_end(skb, start);
3111         }
3112
3113         return err;
3114 }
3115
3116 static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
3117 {
3118         const struct nlattr *ovs_key = nla_data(a);
3119         int key_type = nla_type(ovs_key);
3120         struct nlattr *start;
3121         int err;
3122
3123         switch (key_type) {
3124         case OVS_KEY_ATTR_TUNNEL_INFO: {
3125                 struct ovs_tunnel_info *ovs_tun = nla_data(ovs_key);
3126                 struct ip_tunnel_info *tun_info = &ovs_tun->tun_dst->u.tun_info;
3127
3128                 start = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3129                 if (!start)
3130                         return -EMSGSIZE;
3131
3132                 err =  ip_tun_to_nlattr(skb, &tun_info->key,
3133                                         ip_tunnel_info_opts(tun_info),
3134                                         tun_info->options_len,
3135                                         ip_tunnel_info_af(tun_info));
3136                 if (err)
3137                         return err;
3138                 nla_nest_end(skb, start);
3139                 break;
3140         }
3141         default:
3142                 if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a), ovs_key))
3143                         return -EMSGSIZE;
3144                 break;
3145         }
3146
3147         return 0;
3148 }
3149
3150 static int masked_set_action_to_set_action_attr(const struct nlattr *a,
3151                                                 struct sk_buff *skb)
3152 {
3153         const struct nlattr *ovs_key = nla_data(a);
3154         struct nlattr *nla;
3155         size_t key_len = nla_len(ovs_key) / 2;
3156
3157         /* Revert the conversion we did from a non-masked set action to
3158          * masked set action.
3159          */
3160         nla = nla_nest_start(skb, OVS_ACTION_ATTR_SET);
3161         if (!nla)
3162                 return -EMSGSIZE;
3163
3164         if (nla_put(skb, nla_type(ovs_key), key_len, nla_data(ovs_key)))
3165                 return -EMSGSIZE;
3166
3167         nla_nest_end(skb, nla);
3168         return 0;
3169 }
3170
3171 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
3172 {
3173         const struct nlattr *a;
3174         int rem, err;
3175
3176         nla_for_each_attr(a, attr, len, rem) {
3177                 int type = nla_type(a);
3178
3179                 switch (type) {
3180                 case OVS_ACTION_ATTR_SET:
3181                         err = set_action_to_attr(a, skb);
3182                         if (err)
3183                                 return err;
3184                         break;
3185
3186                 case OVS_ACTION_ATTR_SET_TO_MASKED:
3187                         err = masked_set_action_to_set_action_attr(a, skb);
3188                         if (err)
3189                                 return err;
3190                         break;
3191
3192                 case OVS_ACTION_ATTR_SAMPLE:
3193                         err = sample_action_to_attr(a, skb);
3194                         if (err)
3195                                 return err;
3196                         break;
3197
3198                 case OVS_ACTION_ATTR_CT:
3199                         err = ovs_ct_action_to_attr(nla_data(a), skb);
3200                         if (err)
3201                                 return err;
3202                         break;
3203
3204                 default:
3205                         if (nla_put(skb, type, nla_len(a), nla_data(a)))
3206                                 return -EMSGSIZE;
3207                         break;
3208                 }
3209         }
3210
3211         return 0;
3212 }