1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
5 *
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
8 *
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
13 *
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 */
21
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68
69 #include <trace/events/tcp.h>
70
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
74
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr,
85 int l3index)
86 {
87 return NULL;
88 }
89 #endif
90
91 /* Helper returning the inet6 address from a given tcp socket.
92 * It can be used in TCP stack instead of inet6_sk(sk).
93 * This avoids a dereference and allow compiler optimizations.
94 * It is a specialized version of inet6_sk_generic().
95 */
tcp_inet6_sk(const struct sock * sk)96 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 {
98 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99
100 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
101 }
102
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)103 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 {
105 struct dst_entry *dst = skb_dst(skb);
106
107 if (dst && dst_hold_safe(dst)) {
108 const struct rt6_info *rt = (const struct rt6_info *)dst;
109
110 sk->sk_rx_dst = dst;
111 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
112 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
113 }
114 }
115
tcp_v6_init_seq(const struct sk_buff * skb)116 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 {
118 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
119 ipv6_hdr(skb)->saddr.s6_addr32,
120 tcp_hdr(skb)->dest,
121 tcp_hdr(skb)->source);
122 }
123
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)124 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 {
126 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
127 ipv6_hdr(skb)->saddr.s6_addr32);
128 }
129
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)130 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
131 int addr_len)
132 {
133 /* This check is replicated from tcp_v6_connect() and intended to
134 * prevent BPF program called below from accessing bytes that are out
135 * of the bound specified by user in addr_len.
136 */
137 if (addr_len < SIN6_LEN_RFC2133)
138 return -EINVAL;
139
140 sock_owned_by_me(sk);
141
142 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
143 }
144
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)145 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
146 int addr_len)
147 {
148 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
149 struct inet_sock *inet = inet_sk(sk);
150 struct inet_connection_sock *icsk = inet_csk(sk);
151 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
152 struct tcp_sock *tp = tcp_sk(sk);
153 struct in6_addr *saddr = NULL, *final_p, final;
154 struct ipv6_txoptions *opt;
155 struct flowi6 fl6;
156 struct dst_entry *dst;
157 int addr_type;
158 int err;
159 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160
161 if (addr_len < SIN6_LEN_RFC2133)
162 return -EINVAL;
163
164 if (usin->sin6_family != AF_INET6)
165 return -EAFNOSUPPORT;
166
167 memset(&fl6, 0, sizeof(fl6));
168
169 if (np->sndflow) {
170 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
171 IP6_ECN_flow_init(fl6.flowlabel);
172 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
173 struct ip6_flowlabel *flowlabel;
174 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
175 if (IS_ERR(flowlabel))
176 return -EINVAL;
177 fl6_sock_release(flowlabel);
178 }
179 }
180
181 /*
182 * connect() to INADDR_ANY means loopback (BSD'ism).
183 */
184
185 if (ipv6_addr_any(&usin->sin6_addr)) {
186 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
187 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
188 &usin->sin6_addr);
189 else
190 usin->sin6_addr = in6addr_loopback;
191 }
192
193 addr_type = ipv6_addr_type(&usin->sin6_addr);
194
195 if (addr_type & IPV6_ADDR_MULTICAST)
196 return -ENETUNREACH;
197
198 if (addr_type&IPV6_ADDR_LINKLOCAL) {
199 if (addr_len >= sizeof(struct sockaddr_in6) &&
200 usin->sin6_scope_id) {
201 /* If interface is set while binding, indices
202 * must coincide.
203 */
204 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
205 return -EINVAL;
206
207 sk->sk_bound_dev_if = usin->sin6_scope_id;
208 }
209
210 /* Connect to link-local address requires an interface */
211 if (!sk->sk_bound_dev_if)
212 return -EINVAL;
213 }
214
215 if (tp->rx_opt.ts_recent_stamp &&
216 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
217 tp->rx_opt.ts_recent = 0;
218 tp->rx_opt.ts_recent_stamp = 0;
219 WRITE_ONCE(tp->write_seq, 0);
220 }
221
222 sk->sk_v6_daddr = usin->sin6_addr;
223 np->flow_label = fl6.flowlabel;
224
225 /*
226 * TCP over IPv4
227 */
228
229 if (addr_type & IPV6_ADDR_MAPPED) {
230 u32 exthdrlen = icsk->icsk_ext_hdr_len;
231 struct sockaddr_in sin;
232
233 if (__ipv6_only_sock(sk))
234 return -ENETUNREACH;
235
236 sin.sin_family = AF_INET;
237 sin.sin_port = usin->sin6_port;
238 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239
240 icsk->icsk_af_ops = &ipv6_mapped;
241 if (sk_is_mptcp(sk))
242 mptcpv6_handle_mapped(sk, true);
243 sk->sk_backlog_rcv = tcp_v4_do_rcv;
244 #ifdef CONFIG_TCP_MD5SIG
245 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
246 #endif
247
248 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
249
250 if (err) {
251 icsk->icsk_ext_hdr_len = exthdrlen;
252 icsk->icsk_af_ops = &ipv6_specific;
253 if (sk_is_mptcp(sk))
254 mptcpv6_handle_mapped(sk, false);
255 sk->sk_backlog_rcv = tcp_v6_do_rcv;
256 #ifdef CONFIG_TCP_MD5SIG
257 tp->af_specific = &tcp_sock_ipv6_specific;
258 #endif
259 goto failure;
260 }
261 np->saddr = sk->sk_v6_rcv_saddr;
262
263 return err;
264 }
265
266 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
267 saddr = &sk->sk_v6_rcv_saddr;
268
269 fl6.flowi6_proto = IPPROTO_TCP;
270 fl6.daddr = sk->sk_v6_daddr;
271 fl6.saddr = saddr ? *saddr : np->saddr;
272 fl6.flowi6_oif = sk->sk_bound_dev_if;
273 fl6.flowi6_mark = sk->sk_mark;
274 fl6.fl6_dport = usin->sin6_port;
275 fl6.fl6_sport = inet->inet_sport;
276 fl6.flowi6_uid = sk->sk_uid;
277
278 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
279 final_p = fl6_update_dst(&fl6, opt, &final);
280
281 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
282
283 dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p);
284 if (IS_ERR(dst)) {
285 err = PTR_ERR(dst);
286 goto failure;
287 }
288
289 if (!saddr) {
290 saddr = &fl6.saddr;
291 sk->sk_v6_rcv_saddr = *saddr;
292 }
293
294 /* set the source address */
295 np->saddr = *saddr;
296 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
297
298 sk->sk_gso_type = SKB_GSO_TCPV6;
299 ip6_dst_store(sk, dst, NULL, NULL);
300
301 icsk->icsk_ext_hdr_len = 0;
302 if (opt)
303 icsk->icsk_ext_hdr_len = opt->opt_flen +
304 opt->opt_nflen;
305
306 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
307
308 inet->inet_dport = usin->sin6_port;
309
310 tcp_set_state(sk, TCP_SYN_SENT);
311 err = inet6_hash_connect(tcp_death_row, sk);
312 if (err)
313 goto late_failure;
314
315 sk_set_txhash(sk);
316
317 if (likely(!tp->repair)) {
318 if (!tp->write_seq)
319 WRITE_ONCE(tp->write_seq,
320 secure_tcpv6_seq(np->saddr.s6_addr32,
321 sk->sk_v6_daddr.s6_addr32,
322 inet->inet_sport,
323 inet->inet_dport));
324 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
325 np->saddr.s6_addr32,
326 sk->sk_v6_daddr.s6_addr32);
327 }
328
329 if (tcp_fastopen_defer_connect(sk, &err))
330 return err;
331 if (err)
332 goto late_failure;
333
334 err = tcp_connect(sk);
335 if (err)
336 goto late_failure;
337
338 return 0;
339
340 late_failure:
341 tcp_set_state(sk, TCP_CLOSE);
342 failure:
343 inet->inet_dport = 0;
344 sk->sk_route_caps = 0;
345 return err;
346 }
347
tcp_v6_mtu_reduced(struct sock * sk)348 static void tcp_v6_mtu_reduced(struct sock *sk)
349 {
350 struct dst_entry *dst;
351
352 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
353 return;
354
355 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
356 if (!dst)
357 return;
358
359 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
360 tcp_sync_mss(sk, dst_mtu(dst));
361 tcp_simple_retransmit(sk);
362 }
363 }
364
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)365 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
366 u8 type, u8 code, int offset, __be32 info)
367 {
368 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
369 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
370 struct net *net = dev_net(skb->dev);
371 struct request_sock *fastopen;
372 struct ipv6_pinfo *np;
373 struct tcp_sock *tp;
374 __u32 seq, snd_una;
375 struct sock *sk;
376 bool fatal;
377 int err;
378
379 sk = __inet6_lookup_established(net, &tcp_hashinfo,
380 &hdr->daddr, th->dest,
381 &hdr->saddr, ntohs(th->source),
382 skb->dev->ifindex, inet6_sdif(skb));
383
384 if (!sk) {
385 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
386 ICMP6_MIB_INERRORS);
387 return -ENOENT;
388 }
389
390 if (sk->sk_state == TCP_TIME_WAIT) {
391 inet_twsk_put(inet_twsk(sk));
392 return 0;
393 }
394 seq = ntohl(th->seq);
395 fatal = icmpv6_err_convert(type, code, &err);
396 if (sk->sk_state == TCP_NEW_SYN_RECV) {
397 tcp_req_err(sk, seq, fatal);
398 return 0;
399 }
400
401 bh_lock_sock(sk);
402 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
403 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
404
405 if (sk->sk_state == TCP_CLOSE)
406 goto out;
407
408 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
409 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
410 goto out;
411 }
412
413 tp = tcp_sk(sk);
414 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
415 fastopen = rcu_dereference(tp->fastopen_rsk);
416 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
417 if (sk->sk_state != TCP_LISTEN &&
418 !between(seq, snd_una, tp->snd_nxt)) {
419 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
420 goto out;
421 }
422
423 np = tcp_inet6_sk(sk);
424
425 if (type == NDISC_REDIRECT) {
426 if (!sock_owned_by_user(sk)) {
427 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
428
429 if (dst)
430 dst->ops->redirect(dst, sk, skb);
431 }
432 goto out;
433 }
434
435 if (type == ICMPV6_PKT_TOOBIG) {
436 /* We are not interested in TCP_LISTEN and open_requests
437 * (SYN-ACKs send out by Linux are always <576bytes so
438 * they should go through unfragmented).
439 */
440 if (sk->sk_state == TCP_LISTEN)
441 goto out;
442
443 if (!ip6_sk_accept_pmtu(sk))
444 goto out;
445
446 tp->mtu_info = ntohl(info);
447 if (!sock_owned_by_user(sk))
448 tcp_v6_mtu_reduced(sk);
449 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
450 &sk->sk_tsq_flags))
451 sock_hold(sk);
452 goto out;
453 }
454
455
456 /* Might be for an request_sock */
457 switch (sk->sk_state) {
458 case TCP_SYN_SENT:
459 case TCP_SYN_RECV:
460 /* Only in fast or simultaneous open. If a fast open socket is
461 * already accepted it is treated as a connected one below.
462 */
463 if (fastopen && !fastopen->sk)
464 break;
465
466 ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
467
468 if (!sock_owned_by_user(sk)) {
469 sk->sk_err = err;
470 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
471
472 tcp_done(sk);
473 } else
474 sk->sk_err_soft = err;
475 goto out;
476 case TCP_LISTEN:
477 break;
478 default:
479 /* check if this ICMP message allows revert of backoff.
480 * (see RFC 6069)
481 */
482 if (!fastopen && type == ICMPV6_DEST_UNREACH &&
483 code == ICMPV6_NOROUTE)
484 tcp_ld_RTO_revert(sk, seq);
485 }
486
487 if (!sock_owned_by_user(sk) && np->recverr) {
488 sk->sk_err = err;
489 sk->sk_error_report(sk);
490 } else
491 sk->sk_err_soft = err;
492
493 out:
494 bh_unlock_sock(sk);
495 sock_put(sk);
496 return 0;
497 }
498
499
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type,struct sk_buff * syn_skb)500 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
501 struct flowi *fl,
502 struct request_sock *req,
503 struct tcp_fastopen_cookie *foc,
504 enum tcp_synack_type synack_type,
505 struct sk_buff *syn_skb)
506 {
507 struct inet_request_sock *ireq = inet_rsk(req);
508 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
509 struct ipv6_txoptions *opt;
510 struct flowi6 *fl6 = &fl->u.ip6;
511 struct sk_buff *skb;
512 int err = -ENOMEM;
513 u8 tclass;
514
515 /* First, grab a route. */
516 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
517 IPPROTO_TCP)) == NULL)
518 goto done;
519
520 skb = tcp_make_synack(sk, dst, req, foc, synack_type, syn_skb);
521
522 if (skb) {
523 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
524 &ireq->ir_v6_rmt_addr);
525
526 fl6->daddr = ireq->ir_v6_rmt_addr;
527 if (np->repflow && ireq->pktopts)
528 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
529
530 tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
531 (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
532 (np->tclass & INET_ECN_MASK) :
533 np->tclass;
534
535 if (!INET_ECN_is_capable(tclass) &&
536 tcp_bpf_ca_needs_ecn((struct sock *)req))
537 tclass |= INET_ECN_ECT_0;
538
539 rcu_read_lock();
540 opt = ireq->ipv6_opt;
541 if (!opt)
542 opt = rcu_dereference(np->opt);
543 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt,
544 tclass, sk->sk_priority);
545 rcu_read_unlock();
546 err = net_xmit_eval(err);
547 }
548
549 done:
550 return err;
551 }
552
553
tcp_v6_reqsk_destructor(struct request_sock * req)554 static void tcp_v6_reqsk_destructor(struct request_sock *req)
555 {
556 kfree(inet_rsk(req)->ipv6_opt);
557 kfree_skb(inet_rsk(req)->pktopts);
558 }
559
560 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr,int l3index)561 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
562 const struct in6_addr *addr,
563 int l3index)
564 {
565 return tcp_md5_do_lookup(sk, l3index,
566 (union tcp_md5_addr *)addr, AF_INET6);
567 }
568
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)569 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
570 const struct sock *addr_sk)
571 {
572 int l3index;
573
574 l3index = l3mdev_master_ifindex_by_index(sock_net(sk),
575 addr_sk->sk_bound_dev_if);
576 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr,
577 l3index);
578 }
579
tcp_v6_parse_md5_keys(struct sock * sk,int optname,sockptr_t optval,int optlen)580 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
581 sockptr_t optval, int optlen)
582 {
583 struct tcp_md5sig cmd;
584 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
585 int l3index = 0;
586 u8 prefixlen;
587
588 if (optlen < sizeof(cmd))
589 return -EINVAL;
590
591 if (copy_from_sockptr(&cmd, optval, sizeof(cmd)))
592 return -EFAULT;
593
594 if (sin6->sin6_family != AF_INET6)
595 return -EINVAL;
596
597 if (optname == TCP_MD5SIG_EXT &&
598 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
599 prefixlen = cmd.tcpm_prefixlen;
600 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
601 prefixlen > 32))
602 return -EINVAL;
603 } else {
604 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
605 }
606
607 if (optname == TCP_MD5SIG_EXT &&
608 cmd.tcpm_flags & TCP_MD5SIG_FLAG_IFINDEX) {
609 struct net_device *dev;
610
611 rcu_read_lock();
612 dev = dev_get_by_index_rcu(sock_net(sk), cmd.tcpm_ifindex);
613 if (dev && netif_is_l3_master(dev))
614 l3index = dev->ifindex;
615 rcu_read_unlock();
616
617 /* ok to reference set/not set outside of rcu;
618 * right now device MUST be an L3 master
619 */
620 if (!dev || !l3index)
621 return -EINVAL;
622 }
623
624 if (!cmd.tcpm_keylen) {
625 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
626 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
627 AF_INET, prefixlen,
628 l3index);
629 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
630 AF_INET6, prefixlen, l3index);
631 }
632
633 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
634 return -EINVAL;
635
636 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
637 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
638 AF_INET, prefixlen, l3index,
639 cmd.tcpm_key, cmd.tcpm_keylen,
640 GFP_KERNEL);
641
642 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
643 AF_INET6, prefixlen, l3index,
644 cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
645 }
646
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)647 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
648 const struct in6_addr *daddr,
649 const struct in6_addr *saddr,
650 const struct tcphdr *th, int nbytes)
651 {
652 struct tcp6_pseudohdr *bp;
653 struct scatterlist sg;
654 struct tcphdr *_th;
655
656 bp = hp->scratch;
657 /* 1. TCP pseudo-header (RFC2460) */
658 bp->saddr = *saddr;
659 bp->daddr = *daddr;
660 bp->protocol = cpu_to_be32(IPPROTO_TCP);
661 bp->len = cpu_to_be32(nbytes);
662
663 _th = (struct tcphdr *)(bp + 1);
664 memcpy(_th, th, sizeof(*th));
665 _th->check = 0;
666
667 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
668 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
669 sizeof(*bp) + sizeof(*th));
670 return crypto_ahash_update(hp->md5_req);
671 }
672
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)673 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
674 const struct in6_addr *daddr, struct in6_addr *saddr,
675 const struct tcphdr *th)
676 {
677 struct tcp_md5sig_pool *hp;
678 struct ahash_request *req;
679
680 hp = tcp_get_md5sig_pool();
681 if (!hp)
682 goto clear_hash_noput;
683 req = hp->md5_req;
684
685 if (crypto_ahash_init(req))
686 goto clear_hash;
687 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
688 goto clear_hash;
689 if (tcp_md5_hash_key(hp, key))
690 goto clear_hash;
691 ahash_request_set_crypt(req, NULL, md5_hash, 0);
692 if (crypto_ahash_final(req))
693 goto clear_hash;
694
695 tcp_put_md5sig_pool();
696 return 0;
697
698 clear_hash:
699 tcp_put_md5sig_pool();
700 clear_hash_noput:
701 memset(md5_hash, 0, 16);
702 return 1;
703 }
704
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)705 static int tcp_v6_md5_hash_skb(char *md5_hash,
706 const struct tcp_md5sig_key *key,
707 const struct sock *sk,
708 const struct sk_buff *skb)
709 {
710 const struct in6_addr *saddr, *daddr;
711 struct tcp_md5sig_pool *hp;
712 struct ahash_request *req;
713 const struct tcphdr *th = tcp_hdr(skb);
714
715 if (sk) { /* valid for establish/request sockets */
716 saddr = &sk->sk_v6_rcv_saddr;
717 daddr = &sk->sk_v6_daddr;
718 } else {
719 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
720 saddr = &ip6h->saddr;
721 daddr = &ip6h->daddr;
722 }
723
724 hp = tcp_get_md5sig_pool();
725 if (!hp)
726 goto clear_hash_noput;
727 req = hp->md5_req;
728
729 if (crypto_ahash_init(req))
730 goto clear_hash;
731
732 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
733 goto clear_hash;
734 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
735 goto clear_hash;
736 if (tcp_md5_hash_key(hp, key))
737 goto clear_hash;
738 ahash_request_set_crypt(req, NULL, md5_hash, 0);
739 if (crypto_ahash_final(req))
740 goto clear_hash;
741
742 tcp_put_md5sig_pool();
743 return 0;
744
745 clear_hash:
746 tcp_put_md5sig_pool();
747 clear_hash_noput:
748 memset(md5_hash, 0, 16);
749 return 1;
750 }
751
752 #endif
753
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb,int dif,int sdif)754 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
755 const struct sk_buff *skb,
756 int dif, int sdif)
757 {
758 #ifdef CONFIG_TCP_MD5SIG
759 const __u8 *hash_location = NULL;
760 struct tcp_md5sig_key *hash_expected;
761 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
762 const struct tcphdr *th = tcp_hdr(skb);
763 int genhash, l3index;
764 u8 newhash[16];
765
766 /* sdif set, means packet ingressed via a device
767 * in an L3 domain and dif is set to the l3mdev
768 */
769 l3index = sdif ? dif : 0;
770
771 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr, l3index);
772 hash_location = tcp_parse_md5sig_option(th);
773
774 /* We've parsed the options - do we have a hash? */
775 if (!hash_expected && !hash_location)
776 return false;
777
778 if (hash_expected && !hash_location) {
779 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
780 return true;
781 }
782
783 if (!hash_expected && hash_location) {
784 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
785 return true;
786 }
787
788 /* check the signature */
789 genhash = tcp_v6_md5_hash_skb(newhash,
790 hash_expected,
791 NULL, skb);
792
793 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
794 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
795 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u L3 index %d\n",
796 genhash ? "failed" : "mismatch",
797 &ip6h->saddr, ntohs(th->source),
798 &ip6h->daddr, ntohs(th->dest), l3index);
799 return true;
800 }
801 #endif
802 return false;
803 }
804
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)805 static void tcp_v6_init_req(struct request_sock *req,
806 const struct sock *sk_listener,
807 struct sk_buff *skb)
808 {
809 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
810 struct inet_request_sock *ireq = inet_rsk(req);
811 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
812
813 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
814 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
815
816 /* So that link locals have meaning */
817 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
818 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
819 ireq->ir_iif = tcp_v6_iif(skb);
820
821 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
822 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
823 np->rxopt.bits.rxinfo ||
824 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
825 np->rxopt.bits.rxohlim || np->repflow)) {
826 refcount_inc(&skb->users);
827 ireq->pktopts = skb;
828 }
829 }
830
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)831 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
832 struct flowi *fl,
833 const struct request_sock *req)
834 {
835 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
836 }
837
838 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
839 .family = AF_INET6,
840 .obj_size = sizeof(struct tcp6_request_sock),
841 .rtx_syn_ack = tcp_rtx_synack,
842 .send_ack = tcp_v6_reqsk_send_ack,
843 .destructor = tcp_v6_reqsk_destructor,
844 .send_reset = tcp_v6_send_reset,
845 .syn_ack_timeout = tcp_syn_ack_timeout,
846 };
847
848 const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
849 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
850 sizeof(struct ipv6hdr),
851 #ifdef CONFIG_TCP_MD5SIG
852 .req_md5_lookup = tcp_v6_md5_lookup,
853 .calc_md5_hash = tcp_v6_md5_hash_skb,
854 #endif
855 .init_req = tcp_v6_init_req,
856 #ifdef CONFIG_SYN_COOKIES
857 .cookie_init_seq = cookie_v6_init_sequence,
858 #endif
859 .route_req = tcp_v6_route_req,
860 .init_seq = tcp_v6_init_seq,
861 .init_ts_off = tcp_v6_init_ts_off,
862 .send_synack = tcp_v6_send_synack,
863 };
864
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)865 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
866 u32 ack, u32 win, u32 tsval, u32 tsecr,
867 int oif, struct tcp_md5sig_key *key, int rst,
868 u8 tclass, __be32 label, u32 priority)
869 {
870 const struct tcphdr *th = tcp_hdr(skb);
871 struct tcphdr *t1;
872 struct sk_buff *buff;
873 struct flowi6 fl6;
874 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
875 struct sock *ctl_sk = net->ipv6.tcp_sk;
876 unsigned int tot_len = sizeof(struct tcphdr);
877 struct dst_entry *dst;
878 __be32 *topt;
879 __u32 mark = 0;
880
881 if (tsecr)
882 tot_len += TCPOLEN_TSTAMP_ALIGNED;
883 #ifdef CONFIG_TCP_MD5SIG
884 if (key)
885 tot_len += TCPOLEN_MD5SIG_ALIGNED;
886 #endif
887
888 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
889 GFP_ATOMIC);
890 if (!buff)
891 return;
892
893 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
894
895 t1 = skb_push(buff, tot_len);
896 skb_reset_transport_header(buff);
897
898 /* Swap the send and the receive. */
899 memset(t1, 0, sizeof(*t1));
900 t1->dest = th->source;
901 t1->source = th->dest;
902 t1->doff = tot_len / 4;
903 t1->seq = htonl(seq);
904 t1->ack_seq = htonl(ack);
905 t1->ack = !rst || !th->ack;
906 t1->rst = rst;
907 t1->window = htons(win);
908
909 topt = (__be32 *)(t1 + 1);
910
911 if (tsecr) {
912 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
913 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
914 *topt++ = htonl(tsval);
915 *topt++ = htonl(tsecr);
916 }
917
918 #ifdef CONFIG_TCP_MD5SIG
919 if (key) {
920 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
921 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
922 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
923 &ipv6_hdr(skb)->saddr,
924 &ipv6_hdr(skb)->daddr, t1);
925 }
926 #endif
927
928 memset(&fl6, 0, sizeof(fl6));
929 fl6.daddr = ipv6_hdr(skb)->saddr;
930 fl6.saddr = ipv6_hdr(skb)->daddr;
931 fl6.flowlabel = label;
932
933 buff->ip_summed = CHECKSUM_PARTIAL;
934 buff->csum = 0;
935
936 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
937
938 fl6.flowi6_proto = IPPROTO_TCP;
939 if (rt6_need_strict(&fl6.daddr) && !oif)
940 fl6.flowi6_oif = tcp_v6_iif(skb);
941 else {
942 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
943 oif = skb->skb_iif;
944
945 fl6.flowi6_oif = oif;
946 }
947
948 if (sk) {
949 if (sk->sk_state == TCP_TIME_WAIT) {
950 mark = inet_twsk(sk)->tw_mark;
951 /* autoflowlabel relies on buff->hash */
952 skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
953 PKT_HASH_TYPE_L4);
954 } else {
955 mark = sk->sk_mark;
956 }
957 buff->tstamp = tcp_transmit_time(sk);
958 }
959 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
960 fl6.fl6_dport = t1->dest;
961 fl6.fl6_sport = t1->source;
962 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
963 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
964
965 /* Pass a socket to ip6_dst_lookup either it is for RST
966 * Underlying function will use this to retrieve the network
967 * namespace
968 */
969 dst = ip6_dst_lookup_flow(sock_net(ctl_sk), ctl_sk, &fl6, NULL);
970 if (!IS_ERR(dst)) {
971 skb_dst_set(buff, dst);
972 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL,
973 tclass & ~INET_ECN_MASK, priority);
974 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
975 if (rst)
976 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
977 return;
978 }
979
980 kfree_skb(buff);
981 }
982
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)983 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
984 {
985 const struct tcphdr *th = tcp_hdr(skb);
986 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
987 u32 seq = 0, ack_seq = 0;
988 struct tcp_md5sig_key *key = NULL;
989 #ifdef CONFIG_TCP_MD5SIG
990 const __u8 *hash_location = NULL;
991 unsigned char newhash[16];
992 int genhash;
993 struct sock *sk1 = NULL;
994 #endif
995 __be32 label = 0;
996 u32 priority = 0;
997 struct net *net;
998 int oif = 0;
999
1000 if (th->rst)
1001 return;
1002
1003 /* If sk not NULL, it means we did a successful lookup and incoming
1004 * route had to be correct. prequeue might have dropped our dst.
1005 */
1006 if (!sk && !ipv6_unicast_destination(skb))
1007 return;
1008
1009 net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
1010 #ifdef CONFIG_TCP_MD5SIG
1011 rcu_read_lock();
1012 hash_location = tcp_parse_md5sig_option(th);
1013 if (sk && sk_fullsock(sk)) {
1014 int l3index;
1015
1016 /* sdif set, means packet ingressed via a device
1017 * in an L3 domain and inet_iif is set to it.
1018 */
1019 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1020 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr, l3index);
1021 } else if (hash_location) {
1022 int dif = tcp_v6_iif_l3_slave(skb);
1023 int sdif = tcp_v6_sdif(skb);
1024 int l3index;
1025
1026 /*
1027 * active side is lost. Try to find listening socket through
1028 * source port, and then find md5 key through listening socket.
1029 * we are not loose security here:
1030 * Incoming packet is checked with md5 hash with finding key,
1031 * no RST generated if md5 hash doesn't match.
1032 */
1033 sk1 = inet6_lookup_listener(net,
1034 &tcp_hashinfo, NULL, 0,
1035 &ipv6h->saddr,
1036 th->source, &ipv6h->daddr,
1037 ntohs(th->source), dif, sdif);
1038 if (!sk1)
1039 goto out;
1040
1041 /* sdif set, means packet ingressed via a device
1042 * in an L3 domain and dif is set to it.
1043 */
1044 l3index = tcp_v6_sdif(skb) ? dif : 0;
1045
1046 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr, l3index);
1047 if (!key)
1048 goto out;
1049
1050 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
1051 if (genhash || memcmp(hash_location, newhash, 16) != 0)
1052 goto out;
1053 }
1054 #endif
1055
1056 if (th->ack)
1057 seq = ntohl(th->ack_seq);
1058 else
1059 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1060 (th->doff << 2);
1061
1062 if (sk) {
1063 oif = sk->sk_bound_dev_if;
1064 if (sk_fullsock(sk)) {
1065 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1066
1067 trace_tcp_send_reset(sk, skb);
1068 if (np->repflow)
1069 label = ip6_flowlabel(ipv6h);
1070 priority = sk->sk_priority;
1071 }
1072 if (sk->sk_state == TCP_TIME_WAIT) {
1073 label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1074 priority = inet_twsk(sk)->tw_priority;
1075 }
1076 } else {
1077 if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1078 label = ip6_flowlabel(ipv6h);
1079 }
1080
1081 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1,
1082 ipv6_get_dsfield(ipv6h), label, priority);
1083
1084 #ifdef CONFIG_TCP_MD5SIG
1085 out:
1086 rcu_read_unlock();
1087 #endif
1088 }
1089
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1090 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1091 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1092 struct tcp_md5sig_key *key, u8 tclass,
1093 __be32 label, u32 priority)
1094 {
1095 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1096 tclass, label, priority);
1097 }
1098
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1099 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1100 {
1101 struct inet_timewait_sock *tw = inet_twsk(sk);
1102 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1103
1104 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1105 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1106 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1107 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1108 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1109
1110 inet_twsk_put(tw);
1111 }
1112
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1113 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1114 struct request_sock *req)
1115 {
1116 int l3index;
1117
1118 l3index = tcp_v6_sdif(skb) ? tcp_v6_iif_l3_slave(skb) : 0;
1119
1120 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1121 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1122 */
1123 /* RFC 7323 2.3
1124 * The window field (SEG.WND) of every outgoing segment, with the
1125 * exception of <SYN> segments, MUST be right-shifted by
1126 * Rcv.Wind.Shift bits:
1127 */
1128 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1129 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1130 tcp_rsk(req)->rcv_nxt,
1131 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1132 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1133 req->ts_recent, sk->sk_bound_dev_if,
1134 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
1135 ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority);
1136 }
1137
1138
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1139 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1140 {
1141 #ifdef CONFIG_SYN_COOKIES
1142 const struct tcphdr *th = tcp_hdr(skb);
1143
1144 if (!th->syn)
1145 sk = cookie_v6_check(sk, skb);
1146 #endif
1147 return sk;
1148 }
1149
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1150 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1151 struct tcphdr *th, u32 *cookie)
1152 {
1153 u16 mss = 0;
1154 #ifdef CONFIG_SYN_COOKIES
1155 mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1156 &tcp_request_sock_ipv6_ops, sk, th);
1157 if (mss) {
1158 *cookie = __cookie_v6_init_sequence(iph, th, &mss);
1159 tcp_synq_overflow(sk);
1160 }
1161 #endif
1162 return mss;
1163 }
1164
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1165 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1166 {
1167 if (skb->protocol == htons(ETH_P_IP))
1168 return tcp_v4_conn_request(sk, skb);
1169
1170 if (!ipv6_unicast_destination(skb))
1171 goto drop;
1172
1173 return tcp_conn_request(&tcp6_request_sock_ops,
1174 &tcp_request_sock_ipv6_ops, sk, skb);
1175
1176 drop:
1177 tcp_listendrop(sk);
1178 return 0; /* don't send reset */
1179 }
1180
tcp_v6_restore_cb(struct sk_buff * skb)1181 static void tcp_v6_restore_cb(struct sk_buff *skb)
1182 {
1183 /* We need to move header back to the beginning if xfrm6_policy_check()
1184 * and tcp_v6_fill_cb() are going to be called again.
1185 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1186 */
1187 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1188 sizeof(struct inet6_skb_parm));
1189 }
1190
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1191 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1192 struct request_sock *req,
1193 struct dst_entry *dst,
1194 struct request_sock *req_unhash,
1195 bool *own_req)
1196 {
1197 struct inet_request_sock *ireq;
1198 struct ipv6_pinfo *newnp;
1199 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1200 struct ipv6_txoptions *opt;
1201 struct inet_sock *newinet;
1202 bool found_dup_sk = false;
1203 struct tcp_sock *newtp;
1204 struct sock *newsk;
1205 #ifdef CONFIG_TCP_MD5SIG
1206 struct tcp_md5sig_key *key;
1207 int l3index;
1208 #endif
1209 struct flowi6 fl6;
1210
1211 if (skb->protocol == htons(ETH_P_IP)) {
1212 /*
1213 * v6 mapped
1214 */
1215
1216 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1217 req_unhash, own_req);
1218
1219 if (!newsk)
1220 return NULL;
1221
1222 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1223
1224 newinet = inet_sk(newsk);
1225 newnp = tcp_inet6_sk(newsk);
1226 newtp = tcp_sk(newsk);
1227
1228 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1229
1230 newnp->saddr = newsk->sk_v6_rcv_saddr;
1231
1232 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1233 if (sk_is_mptcp(newsk))
1234 mptcpv6_handle_mapped(newsk, true);
1235 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1236 #ifdef CONFIG_TCP_MD5SIG
1237 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1238 #endif
1239
1240 newnp->ipv6_mc_list = NULL;
1241 newnp->ipv6_ac_list = NULL;
1242 newnp->ipv6_fl_list = NULL;
1243 newnp->pktoptions = NULL;
1244 newnp->opt = NULL;
1245 newnp->mcast_oif = inet_iif(skb);
1246 newnp->mcast_hops = ip_hdr(skb)->ttl;
1247 newnp->rcv_flowinfo = 0;
1248 if (np->repflow)
1249 newnp->flow_label = 0;
1250
1251 /*
1252 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1253 * here, tcp_create_openreq_child now does this for us, see the comment in
1254 * that function for the gory details. -acme
1255 */
1256
1257 /* It is tricky place. Until this moment IPv4 tcp
1258 worked with IPv6 icsk.icsk_af_ops.
1259 Sync it now.
1260 */
1261 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1262
1263 return newsk;
1264 }
1265
1266 ireq = inet_rsk(req);
1267
1268 if (sk_acceptq_is_full(sk))
1269 goto out_overflow;
1270
1271 if (!dst) {
1272 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1273 if (!dst)
1274 goto out;
1275 }
1276
1277 newsk = tcp_create_openreq_child(sk, req, skb);
1278 if (!newsk)
1279 goto out_nonewsk;
1280
1281 /*
1282 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1283 * count here, tcp_create_openreq_child now does this for us, see the
1284 * comment in that function for the gory details. -acme
1285 */
1286
1287 newsk->sk_gso_type = SKB_GSO_TCPV6;
1288 ip6_dst_store(newsk, dst, NULL, NULL);
1289 inet6_sk_rx_dst_set(newsk, skb);
1290
1291 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1292
1293 newtp = tcp_sk(newsk);
1294 newinet = inet_sk(newsk);
1295 newnp = tcp_inet6_sk(newsk);
1296
1297 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1298
1299 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1300 newnp->saddr = ireq->ir_v6_loc_addr;
1301 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1302 newsk->sk_bound_dev_if = ireq->ir_iif;
1303
1304 /* Now IPv6 options...
1305
1306 First: no IPv4 options.
1307 */
1308 newinet->inet_opt = NULL;
1309 newnp->ipv6_mc_list = NULL;
1310 newnp->ipv6_ac_list = NULL;
1311 newnp->ipv6_fl_list = NULL;
1312
1313 /* Clone RX bits */
1314 newnp->rxopt.all = np->rxopt.all;
1315
1316 newnp->pktoptions = NULL;
1317 newnp->opt = NULL;
1318 newnp->mcast_oif = tcp_v6_iif(skb);
1319 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1320 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1321 if (np->repflow)
1322 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1323
1324 /* Set ToS of the new socket based upon the value of incoming SYN.
1325 * ECT bits are set later in tcp_init_transfer().
1326 */
1327 if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
1328 newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
1329
1330 /* Clone native IPv6 options from listening socket (if any)
1331
1332 Yes, keeping reference count would be much more clever,
1333 but we make one more one thing there: reattach optmem
1334 to newsk.
1335 */
1336 opt = ireq->ipv6_opt;
1337 if (!opt)
1338 opt = rcu_dereference(np->opt);
1339 if (opt) {
1340 opt = ipv6_dup_options(newsk, opt);
1341 RCU_INIT_POINTER(newnp->opt, opt);
1342 }
1343 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1344 if (opt)
1345 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1346 opt->opt_flen;
1347
1348 tcp_ca_openreq_child(newsk, dst);
1349
1350 tcp_sync_mss(newsk, dst_mtu(dst));
1351 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1352
1353 tcp_initialize_rcv_mss(newsk);
1354
1355 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1356 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1357
1358 #ifdef CONFIG_TCP_MD5SIG
1359 l3index = l3mdev_master_ifindex_by_index(sock_net(sk), ireq->ir_iif);
1360
1361 /* Copy over the MD5 key from the original socket */
1362 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr, l3index);
1363 if (key) {
1364 /* We're using one, so create a matching key
1365 * on the newsk structure. If we fail to get
1366 * memory, then we end up not copying the key
1367 * across. Shucks.
1368 */
1369 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1370 AF_INET6, 128, l3index, key->key, key->keylen,
1371 sk_gfp_mask(sk, GFP_ATOMIC));
1372 }
1373 #endif
1374
1375 if (__inet_inherit_port(sk, newsk) < 0) {
1376 inet_csk_prepare_forced_close(newsk);
1377 tcp_done(newsk);
1378 goto out;
1379 }
1380 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash),
1381 &found_dup_sk);
1382 if (*own_req) {
1383 tcp_move_syn(newtp, req);
1384
1385 /* Clone pktoptions received with SYN, if we own the req */
1386 if (ireq->pktopts) {
1387 newnp->pktoptions = skb_clone(ireq->pktopts,
1388 sk_gfp_mask(sk, GFP_ATOMIC));
1389 consume_skb(ireq->pktopts);
1390 ireq->pktopts = NULL;
1391 if (newnp->pktoptions) {
1392 tcp_v6_restore_cb(newnp->pktoptions);
1393 skb_set_owner_r(newnp->pktoptions, newsk);
1394 }
1395 }
1396 } else {
1397 if (!req_unhash && found_dup_sk) {
1398 /* This code path should only be executed in the
1399 * syncookie case only
1400 */
1401 bh_unlock_sock(newsk);
1402 sock_put(newsk);
1403 newsk = NULL;
1404 }
1405 }
1406
1407 return newsk;
1408
1409 out_overflow:
1410 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1411 out_nonewsk:
1412 dst_release(dst);
1413 out:
1414 tcp_listendrop(sk);
1415 return NULL;
1416 }
1417
1418 /* The socket must have it's spinlock held when we get
1419 * here, unless it is a TCP_LISTEN socket.
1420 *
1421 * We have a potential double-lock case here, so even when
1422 * doing backlog processing we use the BH locking scheme.
1423 * This is because we cannot sleep with the original spinlock
1424 * held.
1425 */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1426 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1427 {
1428 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1429 struct sk_buff *opt_skb = NULL;
1430 struct tcp_sock *tp;
1431
1432 /* Imagine: socket is IPv6. IPv4 packet arrives,
1433 goes to IPv4 receive handler and backlogged.
1434 From backlog it always goes here. Kerboom...
1435 Fortunately, tcp_rcv_established and rcv_established
1436 handle them correctly, but it is not case with
1437 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1438 */
1439
1440 if (skb->protocol == htons(ETH_P_IP))
1441 return tcp_v4_do_rcv(sk, skb);
1442
1443 /*
1444 * socket locking is here for SMP purposes as backlog rcv
1445 * is currently called with bh processing disabled.
1446 */
1447
1448 /* Do Stevens' IPV6_PKTOPTIONS.
1449
1450 Yes, guys, it is the only place in our code, where we
1451 may make it not affecting IPv4.
1452 The rest of code is protocol independent,
1453 and I do not like idea to uglify IPv4.
1454
1455 Actually, all the idea behind IPV6_PKTOPTIONS
1456 looks not very well thought. For now we latch
1457 options, received in the last packet, enqueued
1458 by tcp. Feel free to propose better solution.
1459 --ANK (980728)
1460 */
1461 if (np->rxopt.all)
1462 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1463
1464 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1465 struct dst_entry *dst = sk->sk_rx_dst;
1466
1467 sock_rps_save_rxhash(sk, skb);
1468 sk_mark_napi_id(sk, skb);
1469 if (dst) {
1470 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1471 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1472 dst_release(dst);
1473 sk->sk_rx_dst = NULL;
1474 }
1475 }
1476
1477 tcp_rcv_established(sk, skb);
1478 if (opt_skb)
1479 goto ipv6_pktoptions;
1480 return 0;
1481 }
1482
1483 if (tcp_checksum_complete(skb))
1484 goto csum_err;
1485
1486 if (sk->sk_state == TCP_LISTEN) {
1487 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1488
1489 if (!nsk)
1490 goto discard;
1491
1492 if (nsk != sk) {
1493 if (tcp_child_process(sk, nsk, skb))
1494 goto reset;
1495 if (opt_skb)
1496 __kfree_skb(opt_skb);
1497 return 0;
1498 }
1499 } else
1500 sock_rps_save_rxhash(sk, skb);
1501
1502 if (tcp_rcv_state_process(sk, skb))
1503 goto reset;
1504 if (opt_skb)
1505 goto ipv6_pktoptions;
1506 return 0;
1507
1508 reset:
1509 tcp_v6_send_reset(sk, skb);
1510 discard:
1511 if (opt_skb)
1512 __kfree_skb(opt_skb);
1513 kfree_skb(skb);
1514 return 0;
1515 csum_err:
1516 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1517 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1518 goto discard;
1519
1520
1521 ipv6_pktoptions:
1522 /* Do you ask, what is it?
1523
1524 1. skb was enqueued by tcp.
1525 2. skb is added to tail of read queue, rather than out of order.
1526 3. socket is not in passive state.
1527 4. Finally, it really contains options, which user wants to receive.
1528 */
1529 tp = tcp_sk(sk);
1530 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1531 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1532 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1533 np->mcast_oif = tcp_v6_iif(opt_skb);
1534 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1535 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1536 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1537 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1538 if (np->repflow)
1539 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1540 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1541 skb_set_owner_r(opt_skb, sk);
1542 tcp_v6_restore_cb(opt_skb);
1543 opt_skb = xchg(&np->pktoptions, opt_skb);
1544 } else {
1545 __kfree_skb(opt_skb);
1546 opt_skb = xchg(&np->pktoptions, NULL);
1547 }
1548 }
1549
1550 kfree_skb(opt_skb);
1551 return 0;
1552 }
1553
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1554 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1555 const struct tcphdr *th)
1556 {
1557 /* This is tricky: we move IP6CB at its correct location into
1558 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1559 * _decode_session6() uses IP6CB().
1560 * barrier() makes sure compiler won't play aliasing games.
1561 */
1562 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1563 sizeof(struct inet6_skb_parm));
1564 barrier();
1565
1566 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1567 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1568 skb->len - th->doff*4);
1569 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1570 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1571 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1572 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1573 TCP_SKB_CB(skb)->sacked = 0;
1574 TCP_SKB_CB(skb)->has_rxtstamp =
1575 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1576 }
1577
tcp_v6_rcv(struct sk_buff * skb)1578 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1579 {
1580 struct sk_buff *skb_to_free;
1581 int sdif = inet6_sdif(skb);
1582 int dif = inet6_iif(skb);
1583 const struct tcphdr *th;
1584 const struct ipv6hdr *hdr;
1585 bool refcounted;
1586 struct sock *sk;
1587 int ret;
1588 struct net *net = dev_net(skb->dev);
1589
1590 if (skb->pkt_type != PACKET_HOST)
1591 goto discard_it;
1592
1593 /*
1594 * Count it even if it's bad.
1595 */
1596 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1597
1598 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1599 goto discard_it;
1600
1601 th = (const struct tcphdr *)skb->data;
1602
1603 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1604 goto bad_packet;
1605 if (!pskb_may_pull(skb, th->doff*4))
1606 goto discard_it;
1607
1608 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1609 goto csum_error;
1610
1611 th = (const struct tcphdr *)skb->data;
1612 hdr = ipv6_hdr(skb);
1613
1614 lookup:
1615 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1616 th->source, th->dest, inet6_iif(skb), sdif,
1617 &refcounted);
1618 if (!sk)
1619 goto no_tcp_socket;
1620
1621 process:
1622 if (sk->sk_state == TCP_TIME_WAIT)
1623 goto do_time_wait;
1624
1625 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1626 struct request_sock *req = inet_reqsk(sk);
1627 bool req_stolen = false;
1628 struct sock *nsk;
1629
1630 sk = req->rsk_listener;
1631 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif)) {
1632 sk_drops_add(sk, skb);
1633 reqsk_put(req);
1634 goto discard_it;
1635 }
1636 if (tcp_checksum_complete(skb)) {
1637 reqsk_put(req);
1638 goto csum_error;
1639 }
1640 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1641 inet_csk_reqsk_queue_drop_and_put(sk, req);
1642 goto lookup;
1643 }
1644 sock_hold(sk);
1645 refcounted = true;
1646 nsk = NULL;
1647 if (!tcp_filter(sk, skb)) {
1648 th = (const struct tcphdr *)skb->data;
1649 hdr = ipv6_hdr(skb);
1650 tcp_v6_fill_cb(skb, hdr, th);
1651 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1652 }
1653 if (!nsk) {
1654 reqsk_put(req);
1655 if (req_stolen) {
1656 /* Another cpu got exclusive access to req
1657 * and created a full blown socket.
1658 * Try to feed this packet to this socket
1659 * instead of discarding it.
1660 */
1661 tcp_v6_restore_cb(skb);
1662 sock_put(sk);
1663 goto lookup;
1664 }
1665 goto discard_and_relse;
1666 }
1667 if (nsk == sk) {
1668 reqsk_put(req);
1669 tcp_v6_restore_cb(skb);
1670 } else if (tcp_child_process(sk, nsk, skb)) {
1671 tcp_v6_send_reset(nsk, skb);
1672 goto discard_and_relse;
1673 } else {
1674 sock_put(sk);
1675 return 0;
1676 }
1677 }
1678 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1679 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1680 goto discard_and_relse;
1681 }
1682
1683 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1684 goto discard_and_relse;
1685
1686 if (tcp_v6_inbound_md5_hash(sk, skb, dif, sdif))
1687 goto discard_and_relse;
1688
1689 if (tcp_filter(sk, skb))
1690 goto discard_and_relse;
1691 th = (const struct tcphdr *)skb->data;
1692 hdr = ipv6_hdr(skb);
1693 tcp_v6_fill_cb(skb, hdr, th);
1694
1695 skb->dev = NULL;
1696
1697 if (sk->sk_state == TCP_LISTEN) {
1698 ret = tcp_v6_do_rcv(sk, skb);
1699 goto put_and_return;
1700 }
1701
1702 sk_incoming_cpu_update(sk);
1703
1704 bh_lock_sock_nested(sk);
1705 tcp_segs_in(tcp_sk(sk), skb);
1706 ret = 0;
1707 if (!sock_owned_by_user(sk)) {
1708 skb_to_free = sk->sk_rx_skb_cache;
1709 sk->sk_rx_skb_cache = NULL;
1710 ret = tcp_v6_do_rcv(sk, skb);
1711 } else {
1712 if (tcp_add_backlog(sk, skb))
1713 goto discard_and_relse;
1714 skb_to_free = NULL;
1715 }
1716 bh_unlock_sock(sk);
1717 if (skb_to_free)
1718 __kfree_skb(skb_to_free);
1719 put_and_return:
1720 if (refcounted)
1721 sock_put(sk);
1722 return ret ? -1 : 0;
1723
1724 no_tcp_socket:
1725 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1726 goto discard_it;
1727
1728 tcp_v6_fill_cb(skb, hdr, th);
1729
1730 if (tcp_checksum_complete(skb)) {
1731 csum_error:
1732 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1733 bad_packet:
1734 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1735 } else {
1736 tcp_v6_send_reset(NULL, skb);
1737 }
1738
1739 discard_it:
1740 kfree_skb(skb);
1741 return 0;
1742
1743 discard_and_relse:
1744 sk_drops_add(sk, skb);
1745 if (refcounted)
1746 sock_put(sk);
1747 goto discard_it;
1748
1749 do_time_wait:
1750 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1751 inet_twsk_put(inet_twsk(sk));
1752 goto discard_it;
1753 }
1754
1755 tcp_v6_fill_cb(skb, hdr, th);
1756
1757 if (tcp_checksum_complete(skb)) {
1758 inet_twsk_put(inet_twsk(sk));
1759 goto csum_error;
1760 }
1761
1762 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1763 case TCP_TW_SYN:
1764 {
1765 struct sock *sk2;
1766
1767 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1768 skb, __tcp_hdrlen(th),
1769 &ipv6_hdr(skb)->saddr, th->source,
1770 &ipv6_hdr(skb)->daddr,
1771 ntohs(th->dest),
1772 tcp_v6_iif_l3_slave(skb),
1773 sdif);
1774 if (sk2) {
1775 struct inet_timewait_sock *tw = inet_twsk(sk);
1776 inet_twsk_deschedule_put(tw);
1777 sk = sk2;
1778 tcp_v6_restore_cb(skb);
1779 refcounted = false;
1780 goto process;
1781 }
1782 }
1783 /* to ACK */
1784 fallthrough;
1785 case TCP_TW_ACK:
1786 tcp_v6_timewait_ack(sk, skb);
1787 break;
1788 case TCP_TW_RST:
1789 tcp_v6_send_reset(sk, skb);
1790 inet_twsk_deschedule_put(inet_twsk(sk));
1791 goto discard_it;
1792 case TCP_TW_SUCCESS:
1793 ;
1794 }
1795 goto discard_it;
1796 }
1797
tcp_v6_early_demux(struct sk_buff * skb)1798 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1799 {
1800 const struct ipv6hdr *hdr;
1801 const struct tcphdr *th;
1802 struct sock *sk;
1803
1804 if (skb->pkt_type != PACKET_HOST)
1805 return;
1806
1807 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1808 return;
1809
1810 hdr = ipv6_hdr(skb);
1811 th = tcp_hdr(skb);
1812
1813 if (th->doff < sizeof(struct tcphdr) / 4)
1814 return;
1815
1816 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1817 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1818 &hdr->saddr, th->source,
1819 &hdr->daddr, ntohs(th->dest),
1820 inet6_iif(skb), inet6_sdif(skb));
1821 if (sk) {
1822 skb->sk = sk;
1823 skb->destructor = sock_edemux;
1824 if (sk_fullsock(sk)) {
1825 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1826
1827 if (dst)
1828 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1829 if (dst &&
1830 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1831 skb_dst_set_noref(skb, dst);
1832 }
1833 }
1834 }
1835
1836 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1837 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1838 .twsk_unique = tcp_twsk_unique,
1839 .twsk_destructor = tcp_twsk_destructor,
1840 };
1841
tcp_v6_send_check(struct sock * sk,struct sk_buff * skb)1842 INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
1843 {
1844 struct ipv6_pinfo *np = inet6_sk(sk);
1845
1846 __tcp_v6_send_check(skb, &np->saddr, &sk->sk_v6_daddr);
1847 }
1848
1849 const struct inet_connection_sock_af_ops ipv6_specific = {
1850 .queue_xmit = inet6_csk_xmit,
1851 .send_check = tcp_v6_send_check,
1852 .rebuild_header = inet6_sk_rebuild_header,
1853 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1854 .conn_request = tcp_v6_conn_request,
1855 .syn_recv_sock = tcp_v6_syn_recv_sock,
1856 .net_header_len = sizeof(struct ipv6hdr),
1857 .net_frag_header_len = sizeof(struct frag_hdr),
1858 .setsockopt = ipv6_setsockopt,
1859 .getsockopt = ipv6_getsockopt,
1860 .addr2sockaddr = inet6_csk_addr2sockaddr,
1861 .sockaddr_len = sizeof(struct sockaddr_in6),
1862 .mtu_reduced = tcp_v6_mtu_reduced,
1863 };
1864
1865 #ifdef CONFIG_TCP_MD5SIG
1866 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1867 .md5_lookup = tcp_v6_md5_lookup,
1868 .calc_md5_hash = tcp_v6_md5_hash_skb,
1869 .md5_parse = tcp_v6_parse_md5_keys,
1870 };
1871 #endif
1872
1873 /*
1874 * TCP over IPv4 via INET6 API
1875 */
1876 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1877 .queue_xmit = ip_queue_xmit,
1878 .send_check = tcp_v4_send_check,
1879 .rebuild_header = inet_sk_rebuild_header,
1880 .sk_rx_dst_set = inet_sk_rx_dst_set,
1881 .conn_request = tcp_v6_conn_request,
1882 .syn_recv_sock = tcp_v6_syn_recv_sock,
1883 .net_header_len = sizeof(struct iphdr),
1884 .setsockopt = ipv6_setsockopt,
1885 .getsockopt = ipv6_getsockopt,
1886 .addr2sockaddr = inet6_csk_addr2sockaddr,
1887 .sockaddr_len = sizeof(struct sockaddr_in6),
1888 .mtu_reduced = tcp_v4_mtu_reduced,
1889 };
1890
1891 #ifdef CONFIG_TCP_MD5SIG
1892 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1893 .md5_lookup = tcp_v4_md5_lookup,
1894 .calc_md5_hash = tcp_v4_md5_hash_skb,
1895 .md5_parse = tcp_v6_parse_md5_keys,
1896 };
1897 #endif
1898
1899 /* NOTE: A lot of things set to zero explicitly by call to
1900 * sk_alloc() so need not be done here.
1901 */
tcp_v6_init_sock(struct sock * sk)1902 static int tcp_v6_init_sock(struct sock *sk)
1903 {
1904 struct inet_connection_sock *icsk = inet_csk(sk);
1905
1906 tcp_init_sock(sk);
1907
1908 icsk->icsk_af_ops = &ipv6_specific;
1909
1910 #ifdef CONFIG_TCP_MD5SIG
1911 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1912 #endif
1913
1914 return 0;
1915 }
1916
tcp_v6_destroy_sock(struct sock * sk)1917 static void tcp_v6_destroy_sock(struct sock *sk)
1918 {
1919 tcp_v4_destroy_sock(sk);
1920 inet6_destroy_sock(sk);
1921 }
1922
1923 #ifdef CONFIG_PROC_FS
1924 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1925 static void get_openreq6(struct seq_file *seq,
1926 const struct request_sock *req, int i)
1927 {
1928 long ttd = req->rsk_timer.expires - jiffies;
1929 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1930 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1931
1932 if (ttd < 0)
1933 ttd = 0;
1934
1935 seq_printf(seq,
1936 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1937 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1938 i,
1939 src->s6_addr32[0], src->s6_addr32[1],
1940 src->s6_addr32[2], src->s6_addr32[3],
1941 inet_rsk(req)->ir_num,
1942 dest->s6_addr32[0], dest->s6_addr32[1],
1943 dest->s6_addr32[2], dest->s6_addr32[3],
1944 ntohs(inet_rsk(req)->ir_rmt_port),
1945 TCP_SYN_RECV,
1946 0, 0, /* could print option size, but that is af dependent. */
1947 1, /* timers active (only the expire timer) */
1948 jiffies_to_clock_t(ttd),
1949 req->num_timeout,
1950 from_kuid_munged(seq_user_ns(seq),
1951 sock_i_uid(req->rsk_listener)),
1952 0, /* non standard timer */
1953 0, /* open_requests have no inode */
1954 0, req);
1955 }
1956
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1957 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1958 {
1959 const struct in6_addr *dest, *src;
1960 __u16 destp, srcp;
1961 int timer_active;
1962 unsigned long timer_expires;
1963 const struct inet_sock *inet = inet_sk(sp);
1964 const struct tcp_sock *tp = tcp_sk(sp);
1965 const struct inet_connection_sock *icsk = inet_csk(sp);
1966 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1967 int rx_queue;
1968 int state;
1969
1970 dest = &sp->sk_v6_daddr;
1971 src = &sp->sk_v6_rcv_saddr;
1972 destp = ntohs(inet->inet_dport);
1973 srcp = ntohs(inet->inet_sport);
1974
1975 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1976 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1977 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1978 timer_active = 1;
1979 timer_expires = icsk->icsk_timeout;
1980 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1981 timer_active = 4;
1982 timer_expires = icsk->icsk_timeout;
1983 } else if (timer_pending(&sp->sk_timer)) {
1984 timer_active = 2;
1985 timer_expires = sp->sk_timer.expires;
1986 } else {
1987 timer_active = 0;
1988 timer_expires = jiffies;
1989 }
1990
1991 state = inet_sk_state_load(sp);
1992 if (state == TCP_LISTEN)
1993 rx_queue = READ_ONCE(sp->sk_ack_backlog);
1994 else
1995 /* Because we don't lock the socket,
1996 * we might find a transient negative value.
1997 */
1998 rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1999 READ_ONCE(tp->copied_seq), 0);
2000
2001 seq_printf(seq,
2002 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2003 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
2004 i,
2005 src->s6_addr32[0], src->s6_addr32[1],
2006 src->s6_addr32[2], src->s6_addr32[3], srcp,
2007 dest->s6_addr32[0], dest->s6_addr32[1],
2008 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2009 state,
2010 READ_ONCE(tp->write_seq) - tp->snd_una,
2011 rx_queue,
2012 timer_active,
2013 jiffies_delta_to_clock_t(timer_expires - jiffies),
2014 icsk->icsk_retransmits,
2015 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
2016 icsk->icsk_probes_out,
2017 sock_i_ino(sp),
2018 refcount_read(&sp->sk_refcnt), sp,
2019 jiffies_to_clock_t(icsk->icsk_rto),
2020 jiffies_to_clock_t(icsk->icsk_ack.ato),
2021 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
2022 tp->snd_cwnd,
2023 state == TCP_LISTEN ?
2024 fastopenq->max_qlen :
2025 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
2026 );
2027 }
2028
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)2029 static void get_timewait6_sock(struct seq_file *seq,
2030 struct inet_timewait_sock *tw, int i)
2031 {
2032 long delta = tw->tw_timer.expires - jiffies;
2033 const struct in6_addr *dest, *src;
2034 __u16 destp, srcp;
2035
2036 dest = &tw->tw_v6_daddr;
2037 src = &tw->tw_v6_rcv_saddr;
2038 destp = ntohs(tw->tw_dport);
2039 srcp = ntohs(tw->tw_sport);
2040
2041 seq_printf(seq,
2042 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2043 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2044 i,
2045 src->s6_addr32[0], src->s6_addr32[1],
2046 src->s6_addr32[2], src->s6_addr32[3], srcp,
2047 dest->s6_addr32[0], dest->s6_addr32[1],
2048 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2049 tw->tw_substate, 0, 0,
2050 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
2051 refcount_read(&tw->tw_refcnt), tw);
2052 }
2053
tcp6_seq_show(struct seq_file * seq,void * v)2054 static int tcp6_seq_show(struct seq_file *seq, void *v)
2055 {
2056 struct tcp_iter_state *st;
2057 struct sock *sk = v;
2058
2059 if (v == SEQ_START_TOKEN) {
2060 seq_puts(seq,
2061 " sl "
2062 "local_address "
2063 "remote_address "
2064 "st tx_queue rx_queue tr tm->when retrnsmt"
2065 " uid timeout inode\n");
2066 goto out;
2067 }
2068 st = seq->private;
2069
2070 if (sk->sk_state == TCP_TIME_WAIT)
2071 get_timewait6_sock(seq, v, st->num);
2072 else if (sk->sk_state == TCP_NEW_SYN_RECV)
2073 get_openreq6(seq, v, st->num);
2074 else
2075 get_tcp6_sock(seq, v, st->num);
2076 out:
2077 return 0;
2078 }
2079
2080 static const struct seq_operations tcp6_seq_ops = {
2081 .show = tcp6_seq_show,
2082 .start = tcp_seq_start,
2083 .next = tcp_seq_next,
2084 .stop = tcp_seq_stop,
2085 };
2086
2087 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2088 .family = AF_INET6,
2089 };
2090
tcp6_proc_init(struct net * net)2091 int __net_init tcp6_proc_init(struct net *net)
2092 {
2093 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
2094 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
2095 return -ENOMEM;
2096 return 0;
2097 }
2098
tcp6_proc_exit(struct net * net)2099 void tcp6_proc_exit(struct net *net)
2100 {
2101 remove_proc_entry("tcp6", net->proc_net);
2102 }
2103 #endif
2104
2105 struct proto tcpv6_prot = {
2106 .name = "TCPv6",
2107 .owner = THIS_MODULE,
2108 .close = tcp_close,
2109 .pre_connect = tcp_v6_pre_connect,
2110 .connect = tcp_v6_connect,
2111 .disconnect = tcp_disconnect,
2112 .accept = inet_csk_accept,
2113 .ioctl = tcp_ioctl,
2114 .init = tcp_v6_init_sock,
2115 .destroy = tcp_v6_destroy_sock,
2116 .shutdown = tcp_shutdown,
2117 .setsockopt = tcp_setsockopt,
2118 .getsockopt = tcp_getsockopt,
2119 .keepalive = tcp_set_keepalive,
2120 .recvmsg = tcp_recvmsg,
2121 .sendmsg = tcp_sendmsg,
2122 .sendpage = tcp_sendpage,
2123 .backlog_rcv = tcp_v6_do_rcv,
2124 .release_cb = tcp_release_cb,
2125 .hash = inet6_hash,
2126 .unhash = inet_unhash,
2127 .get_port = inet_csk_get_port,
2128 .enter_memory_pressure = tcp_enter_memory_pressure,
2129 .leave_memory_pressure = tcp_leave_memory_pressure,
2130 .stream_memory_free = tcp_stream_memory_free,
2131 .sockets_allocated = &tcp_sockets_allocated,
2132 .memory_allocated = &tcp_memory_allocated,
2133 .memory_pressure = &tcp_memory_pressure,
2134 .orphan_count = &tcp_orphan_count,
2135 .sysctl_mem = sysctl_tcp_mem,
2136 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
2137 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
2138 .max_header = MAX_TCP_HEADER,
2139 .obj_size = sizeof(struct tcp6_sock),
2140 .slab_flags = SLAB_TYPESAFE_BY_RCU,
2141 .twsk_prot = &tcp6_timewait_sock_ops,
2142 .rsk_prot = &tcp6_request_sock_ops,
2143 .h.hashinfo = &tcp_hashinfo,
2144 .no_autobind = true,
2145 .diag_destroy = tcp_abort,
2146 };
2147 EXPORT_SYMBOL_GPL(tcpv6_prot);
2148
2149 /* thinking of making this const? Don't.
2150 * early_demux can change based on sysctl.
2151 */
2152 static struct inet6_protocol tcpv6_protocol = {
2153 .early_demux = tcp_v6_early_demux,
2154 .early_demux_handler = tcp_v6_early_demux,
2155 .handler = tcp_v6_rcv,
2156 .err_handler = tcp_v6_err,
2157 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2158 };
2159
2160 static struct inet_protosw tcpv6_protosw = {
2161 .type = SOCK_STREAM,
2162 .protocol = IPPROTO_TCP,
2163 .prot = &tcpv6_prot,
2164 .ops = &inet6_stream_ops,
2165 .flags = INET_PROTOSW_PERMANENT |
2166 INET_PROTOSW_ICSK,
2167 };
2168
tcpv6_net_init(struct net * net)2169 static int __net_init tcpv6_net_init(struct net *net)
2170 {
2171 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2172 SOCK_RAW, IPPROTO_TCP, net);
2173 }
2174
tcpv6_net_exit(struct net * net)2175 static void __net_exit tcpv6_net_exit(struct net *net)
2176 {
2177 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2178 }
2179
tcpv6_net_exit_batch(struct list_head * net_exit_list)2180 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2181 {
2182 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2183 }
2184
2185 static struct pernet_operations tcpv6_net_ops = {
2186 .init = tcpv6_net_init,
2187 .exit = tcpv6_net_exit,
2188 .exit_batch = tcpv6_net_exit_batch,
2189 };
2190
tcpv6_init(void)2191 int __init tcpv6_init(void)
2192 {
2193 int ret;
2194
2195 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2196 if (ret)
2197 goto out;
2198
2199 /* register inet6 protocol */
2200 ret = inet6_register_protosw(&tcpv6_protosw);
2201 if (ret)
2202 goto out_tcpv6_protocol;
2203
2204 ret = register_pernet_subsys(&tcpv6_net_ops);
2205 if (ret)
2206 goto out_tcpv6_protosw;
2207
2208 ret = mptcpv6_init();
2209 if (ret)
2210 goto out_tcpv6_pernet_subsys;
2211
2212 out:
2213 return ret;
2214
2215 out_tcpv6_pernet_subsys:
2216 unregister_pernet_subsys(&tcpv6_net_ops);
2217 out_tcpv6_protosw:
2218 inet6_unregister_protosw(&tcpv6_protosw);
2219 out_tcpv6_protocol:
2220 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2221 goto out;
2222 }
2223
tcpv6_exit(void)2224 void tcpv6_exit(void)
2225 {
2226 unregister_pernet_subsys(&tcpv6_net_ops);
2227 inet6_unregister_protosw(&tcpv6_protosw);
2228 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2229 }
2230