1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *	TCP over IPv6
4  *	Linux INET6 implementation
5  *
6  *	Authors:
7  *	Pedro Roque		<roque@di.fc.ul.pt>
8  *
9  *	Based on:
10  *	linux/net/ipv4/tcp.c
11  *	linux/net/ipv4/tcp_input.c
12  *	linux/net/ipv4/tcp_output.c
13  *
14  *	Fixes:
15  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
16  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
17  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
18  *					a single port at the same time.
19  *	YOSHIFUJI Hideaki @USAGI:	convert /proc/net/tcp6 to seq_file.
20  */
21 
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
43 
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
62 
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
65 
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
68 
69 #include <trace/events/tcp.h>
70 
71 static void	tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void	tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 				      struct request_sock *req);
74 
75 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
76 
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 static const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr)83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 						   const struct in6_addr *addr)
85 {
86 	return NULL;
87 }
88 #endif
89 
90 /* Helper returning the inet6 address from a given tcp socket.
91  * It can be used in TCP stack instead of inet6_sk(sk).
92  * This avoids a dereference and allow compiler optimizations.
93  * It is a specialized version of inet6_sk_generic().
94  */
tcp_inet6_sk(const struct sock * sk)95 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
96 {
97 	unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
98 
99 	return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
100 }
101 
inet6_sk_rx_dst_set(struct sock * sk,const struct sk_buff * skb)102 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
103 {
104 	struct dst_entry *dst = skb_dst(skb);
105 
106 	if (dst && dst_hold_safe(dst)) {
107 		const struct rt6_info *rt = (const struct rt6_info *)dst;
108 
109 		sk->sk_rx_dst = dst;
110 		inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111 		tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
112 	}
113 }
114 
tcp_v6_init_seq(const struct sk_buff * skb)115 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
116 {
117 	return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118 				ipv6_hdr(skb)->saddr.s6_addr32,
119 				tcp_hdr(skb)->dest,
120 				tcp_hdr(skb)->source);
121 }
122 
tcp_v6_init_ts_off(const struct net * net,const struct sk_buff * skb)123 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
124 {
125 	return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126 				   ipv6_hdr(skb)->saddr.s6_addr32);
127 }
128 
tcp_v6_pre_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)129 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
130 			      int addr_len)
131 {
132 	/* This check is replicated from tcp_v6_connect() and intended to
133 	 * prevent BPF program called below from accessing bytes that are out
134 	 * of the bound specified by user in addr_len.
135 	 */
136 	if (addr_len < SIN6_LEN_RFC2133)
137 		return -EINVAL;
138 
139 	sock_owned_by_me(sk);
140 
141 	return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
142 }
143 
tcp_v6_connect(struct sock * sk,struct sockaddr * uaddr,int addr_len)144 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
145 			  int addr_len)
146 {
147 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148 	struct inet_sock *inet = inet_sk(sk);
149 	struct inet_connection_sock *icsk = inet_csk(sk);
150 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151 	struct tcp_sock *tp = tcp_sk(sk);
152 	struct in6_addr *saddr = NULL, *final_p, final;
153 	struct ipv6_txoptions *opt;
154 	struct flowi6 fl6;
155 	struct dst_entry *dst;
156 	int addr_type;
157 	int err;
158 	struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
159 
160 	if (addr_len < SIN6_LEN_RFC2133)
161 		return -EINVAL;
162 
163 	if (usin->sin6_family != AF_INET6)
164 		return -EAFNOSUPPORT;
165 
166 	memset(&fl6, 0, sizeof(fl6));
167 
168 	if (np->sndflow) {
169 		fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170 		IP6_ECN_flow_init(fl6.flowlabel);
171 		if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172 			struct ip6_flowlabel *flowlabel;
173 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174 			if (IS_ERR(flowlabel))
175 				return -EINVAL;
176 			fl6_sock_release(flowlabel);
177 		}
178 	}
179 
180 	/*
181 	 *	connect() to INADDR_ANY means loopback (BSD'ism).
182 	 */
183 
184 	if (ipv6_addr_any(&usin->sin6_addr)) {
185 		if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186 			ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
187 					       &usin->sin6_addr);
188 		else
189 			usin->sin6_addr = in6addr_loopback;
190 	}
191 
192 	addr_type = ipv6_addr_type(&usin->sin6_addr);
193 
194 	if (addr_type & IPV6_ADDR_MULTICAST)
195 		return -ENETUNREACH;
196 
197 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
198 		if (addr_len >= sizeof(struct sockaddr_in6) &&
199 		    usin->sin6_scope_id) {
200 			/* If interface is set while binding, indices
201 			 * must coincide.
202 			 */
203 			if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
204 				return -EINVAL;
205 
206 			sk->sk_bound_dev_if = usin->sin6_scope_id;
207 		}
208 
209 		/* Connect to link-local address requires an interface */
210 		if (!sk->sk_bound_dev_if)
211 			return -EINVAL;
212 	}
213 
214 	if (tp->rx_opt.ts_recent_stamp &&
215 	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 		tp->rx_opt.ts_recent = 0;
217 		tp->rx_opt.ts_recent_stamp = 0;
218 		WRITE_ONCE(tp->write_seq, 0);
219 	}
220 
221 	sk->sk_v6_daddr = usin->sin6_addr;
222 	np->flow_label = fl6.flowlabel;
223 
224 	/*
225 	 *	TCP over IPv4
226 	 */
227 
228 	if (addr_type & IPV6_ADDR_MAPPED) {
229 		u32 exthdrlen = icsk->icsk_ext_hdr_len;
230 		struct sockaddr_in sin;
231 
232 		if (__ipv6_only_sock(sk))
233 			return -ENETUNREACH;
234 
235 		sin.sin_family = AF_INET;
236 		sin.sin_port = usin->sin6_port;
237 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
238 
239 		icsk->icsk_af_ops = &ipv6_mapped;
240 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
241 #ifdef CONFIG_TCP_MD5SIG
242 		tp->af_specific = &tcp_sock_ipv6_mapped_specific;
243 #endif
244 
245 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
246 
247 		if (err) {
248 			icsk->icsk_ext_hdr_len = exthdrlen;
249 			icsk->icsk_af_ops = &ipv6_specific;
250 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #ifdef CONFIG_TCP_MD5SIG
252 			tp->af_specific = &tcp_sock_ipv6_specific;
253 #endif
254 			goto failure;
255 		}
256 		np->saddr = sk->sk_v6_rcv_saddr;
257 
258 		return err;
259 	}
260 
261 	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262 		saddr = &sk->sk_v6_rcv_saddr;
263 
264 	fl6.flowi6_proto = IPPROTO_TCP;
265 	fl6.daddr = sk->sk_v6_daddr;
266 	fl6.saddr = saddr ? *saddr : np->saddr;
267 	fl6.flowi6_oif = sk->sk_bound_dev_if;
268 	fl6.flowi6_mark = sk->sk_mark;
269 	fl6.fl6_dport = usin->sin6_port;
270 	fl6.fl6_sport = inet->inet_sport;
271 	fl6.flowi6_uid = sk->sk_uid;
272 
273 	opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274 	final_p = fl6_update_dst(&fl6, opt, &final);
275 
276 	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
277 
278 	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
279 	if (IS_ERR(dst)) {
280 		err = PTR_ERR(dst);
281 		goto failure;
282 	}
283 
284 	if (!saddr) {
285 		saddr = &fl6.saddr;
286 		sk->sk_v6_rcv_saddr = *saddr;
287 	}
288 
289 	/* set the source address */
290 	np->saddr = *saddr;
291 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
292 
293 	sk->sk_gso_type = SKB_GSO_TCPV6;
294 	ip6_dst_store(sk, dst, NULL, NULL);
295 
296 	icsk->icsk_ext_hdr_len = 0;
297 	if (opt)
298 		icsk->icsk_ext_hdr_len = opt->opt_flen +
299 					 opt->opt_nflen;
300 
301 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
302 
303 	inet->inet_dport = usin->sin6_port;
304 
305 	tcp_set_state(sk, TCP_SYN_SENT);
306 	err = inet6_hash_connect(tcp_death_row, sk);
307 	if (err)
308 		goto late_failure;
309 
310 	sk_set_txhash(sk);
311 
312 	if (likely(!tp->repair)) {
313 		if (!tp->write_seq)
314 			WRITE_ONCE(tp->write_seq,
315 				   secure_tcpv6_seq(np->saddr.s6_addr32,
316 						    sk->sk_v6_daddr.s6_addr32,
317 						    inet->inet_sport,
318 						    inet->inet_dport));
319 		tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
320 						   np->saddr.s6_addr32,
321 						   sk->sk_v6_daddr.s6_addr32);
322 	}
323 
324 	if (tcp_fastopen_defer_connect(sk, &err))
325 		return err;
326 	if (err)
327 		goto late_failure;
328 
329 	err = tcp_connect(sk);
330 	if (err)
331 		goto late_failure;
332 
333 	return 0;
334 
335 late_failure:
336 	tcp_set_state(sk, TCP_CLOSE);
337 failure:
338 	inet->inet_dport = 0;
339 	sk->sk_route_caps = 0;
340 	return err;
341 }
342 
tcp_v6_mtu_reduced(struct sock * sk)343 static void tcp_v6_mtu_reduced(struct sock *sk)
344 {
345 	struct dst_entry *dst;
346 
347 	if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
348 		return;
349 
350 	dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
351 	if (!dst)
352 		return;
353 
354 	if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
355 		tcp_sync_mss(sk, dst_mtu(dst));
356 		tcp_simple_retransmit(sk);
357 	}
358 }
359 
tcp_v6_err(struct sk_buff * skb,struct inet6_skb_parm * opt,u8 type,u8 code,int offset,__be32 info)360 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
361 		u8 type, u8 code, int offset, __be32 info)
362 {
363 	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
364 	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
365 	struct net *net = dev_net(skb->dev);
366 	struct request_sock *fastopen;
367 	struct ipv6_pinfo *np;
368 	struct tcp_sock *tp;
369 	__u32 seq, snd_una;
370 	struct sock *sk;
371 	bool fatal;
372 	int err;
373 
374 	sk = __inet6_lookup_established(net, &tcp_hashinfo,
375 					&hdr->daddr, th->dest,
376 					&hdr->saddr, ntohs(th->source),
377 					skb->dev->ifindex, inet6_sdif(skb));
378 
379 	if (!sk) {
380 		__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
381 				  ICMP6_MIB_INERRORS);
382 		return -ENOENT;
383 	}
384 
385 	if (sk->sk_state == TCP_TIME_WAIT) {
386 		inet_twsk_put(inet_twsk(sk));
387 		return 0;
388 	}
389 	seq = ntohl(th->seq);
390 	fatal = icmpv6_err_convert(type, code, &err);
391 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
392 		tcp_req_err(sk, seq, fatal);
393 		return 0;
394 	}
395 
396 	bh_lock_sock(sk);
397 	if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
398 		__NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
399 
400 	if (sk->sk_state == TCP_CLOSE)
401 		goto out;
402 
403 	if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
404 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
405 		goto out;
406 	}
407 
408 	tp = tcp_sk(sk);
409 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
410 	fastopen = rcu_dereference(tp->fastopen_rsk);
411 	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
412 	if (sk->sk_state != TCP_LISTEN &&
413 	    !between(seq, snd_una, tp->snd_nxt)) {
414 		__NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
415 		goto out;
416 	}
417 
418 	np = tcp_inet6_sk(sk);
419 
420 	if (type == NDISC_REDIRECT) {
421 		if (!sock_owned_by_user(sk)) {
422 			struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
423 
424 			if (dst)
425 				dst->ops->redirect(dst, sk, skb);
426 		}
427 		goto out;
428 	}
429 
430 	if (type == ICMPV6_PKT_TOOBIG) {
431 		/* We are not interested in TCP_LISTEN and open_requests
432 		 * (SYN-ACKs send out by Linux are always <576bytes so
433 		 * they should go through unfragmented).
434 		 */
435 		if (sk->sk_state == TCP_LISTEN)
436 			goto out;
437 
438 		if (!ip6_sk_accept_pmtu(sk))
439 			goto out;
440 
441 		tp->mtu_info = ntohl(info);
442 		if (!sock_owned_by_user(sk))
443 			tcp_v6_mtu_reduced(sk);
444 		else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
445 					   &sk->sk_tsq_flags))
446 			sock_hold(sk);
447 		goto out;
448 	}
449 
450 
451 	/* Might be for an request_sock */
452 	switch (sk->sk_state) {
453 	case TCP_SYN_SENT:
454 	case TCP_SYN_RECV:
455 		/* Only in fast or simultaneous open. If a fast open socket is
456 		 * is already accepted it is treated as a connected one below.
457 		 */
458 		if (fastopen && !fastopen->sk)
459 			break;
460 
461 		if (!sock_owned_by_user(sk)) {
462 			sk->sk_err = err;
463 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
464 
465 			tcp_done(sk);
466 		} else
467 			sk->sk_err_soft = err;
468 		goto out;
469 	}
470 
471 	if (!sock_owned_by_user(sk) && np->recverr) {
472 		sk->sk_err = err;
473 		sk->sk_error_report(sk);
474 	} else
475 		sk->sk_err_soft = err;
476 
477 out:
478 	bh_unlock_sock(sk);
479 	sock_put(sk);
480 	return 0;
481 }
482 
483 
tcp_v6_send_synack(const struct sock * sk,struct dst_entry * dst,struct flowi * fl,struct request_sock * req,struct tcp_fastopen_cookie * foc,enum tcp_synack_type synack_type)484 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
485 			      struct flowi *fl,
486 			      struct request_sock *req,
487 			      struct tcp_fastopen_cookie *foc,
488 			      enum tcp_synack_type synack_type)
489 {
490 	struct inet_request_sock *ireq = inet_rsk(req);
491 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
492 	struct ipv6_txoptions *opt;
493 	struct flowi6 *fl6 = &fl->u.ip6;
494 	struct sk_buff *skb;
495 	int err = -ENOMEM;
496 
497 	/* First, grab a route. */
498 	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
499 					       IPPROTO_TCP)) == NULL)
500 		goto done;
501 
502 	skb = tcp_make_synack(sk, dst, req, foc, synack_type);
503 
504 	if (skb) {
505 		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
506 				    &ireq->ir_v6_rmt_addr);
507 
508 		fl6->daddr = ireq->ir_v6_rmt_addr;
509 		if (np->repflow && ireq->pktopts)
510 			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
511 
512 		rcu_read_lock();
513 		opt = ireq->ipv6_opt;
514 		if (!opt)
515 			opt = rcu_dereference(np->opt);
516 		err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass,
517 			       sk->sk_priority);
518 		rcu_read_unlock();
519 		err = net_xmit_eval(err);
520 	}
521 
522 done:
523 	return err;
524 }
525 
526 
tcp_v6_reqsk_destructor(struct request_sock * req)527 static void tcp_v6_reqsk_destructor(struct request_sock *req)
528 {
529 	kfree(inet_rsk(req)->ipv6_opt);
530 	kfree_skb(inet_rsk(req)->pktopts);
531 }
532 
533 #ifdef CONFIG_TCP_MD5SIG
tcp_v6_md5_do_lookup(const struct sock * sk,const struct in6_addr * addr)534 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
535 						   const struct in6_addr *addr)
536 {
537 	return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
538 }
539 
tcp_v6_md5_lookup(const struct sock * sk,const struct sock * addr_sk)540 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
541 						const struct sock *addr_sk)
542 {
543 	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
544 }
545 
tcp_v6_parse_md5_keys(struct sock * sk,int optname,char __user * optval,int optlen)546 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
547 				 char __user *optval, int optlen)
548 {
549 	struct tcp_md5sig cmd;
550 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
551 	u8 prefixlen;
552 
553 	if (optlen < sizeof(cmd))
554 		return -EINVAL;
555 
556 	if (copy_from_user(&cmd, optval, sizeof(cmd)))
557 		return -EFAULT;
558 
559 	if (sin6->sin6_family != AF_INET6)
560 		return -EINVAL;
561 
562 	if (optname == TCP_MD5SIG_EXT &&
563 	    cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
564 		prefixlen = cmd.tcpm_prefixlen;
565 		if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
566 					prefixlen > 32))
567 			return -EINVAL;
568 	} else {
569 		prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
570 	}
571 
572 	if (!cmd.tcpm_keylen) {
573 		if (ipv6_addr_v4mapped(&sin6->sin6_addr))
574 			return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
575 					      AF_INET, prefixlen);
576 		return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
577 				      AF_INET6, prefixlen);
578 	}
579 
580 	if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
581 		return -EINVAL;
582 
583 	if (ipv6_addr_v4mapped(&sin6->sin6_addr))
584 		return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
585 				      AF_INET, prefixlen, cmd.tcpm_key,
586 				      cmd.tcpm_keylen, GFP_KERNEL);
587 
588 	return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
589 			      AF_INET6, prefixlen, cmd.tcpm_key,
590 			      cmd.tcpm_keylen, GFP_KERNEL);
591 }
592 
tcp_v6_md5_hash_headers(struct tcp_md5sig_pool * hp,const struct in6_addr * daddr,const struct in6_addr * saddr,const struct tcphdr * th,int nbytes)593 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
594 				   const struct in6_addr *daddr,
595 				   const struct in6_addr *saddr,
596 				   const struct tcphdr *th, int nbytes)
597 {
598 	struct tcp6_pseudohdr *bp;
599 	struct scatterlist sg;
600 	struct tcphdr *_th;
601 
602 	bp = hp->scratch;
603 	/* 1. TCP pseudo-header (RFC2460) */
604 	bp->saddr = *saddr;
605 	bp->daddr = *daddr;
606 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
607 	bp->len = cpu_to_be32(nbytes);
608 
609 	_th = (struct tcphdr *)(bp + 1);
610 	memcpy(_th, th, sizeof(*th));
611 	_th->check = 0;
612 
613 	sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
614 	ahash_request_set_crypt(hp->md5_req, &sg, NULL,
615 				sizeof(*bp) + sizeof(*th));
616 	return crypto_ahash_update(hp->md5_req);
617 }
618 
tcp_v6_md5_hash_hdr(char * md5_hash,const struct tcp_md5sig_key * key,const struct in6_addr * daddr,struct in6_addr * saddr,const struct tcphdr * th)619 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
620 			       const struct in6_addr *daddr, struct in6_addr *saddr,
621 			       const struct tcphdr *th)
622 {
623 	struct tcp_md5sig_pool *hp;
624 	struct ahash_request *req;
625 
626 	hp = tcp_get_md5sig_pool();
627 	if (!hp)
628 		goto clear_hash_noput;
629 	req = hp->md5_req;
630 
631 	if (crypto_ahash_init(req))
632 		goto clear_hash;
633 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
634 		goto clear_hash;
635 	if (tcp_md5_hash_key(hp, key))
636 		goto clear_hash;
637 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
638 	if (crypto_ahash_final(req))
639 		goto clear_hash;
640 
641 	tcp_put_md5sig_pool();
642 	return 0;
643 
644 clear_hash:
645 	tcp_put_md5sig_pool();
646 clear_hash_noput:
647 	memset(md5_hash, 0, 16);
648 	return 1;
649 }
650 
tcp_v6_md5_hash_skb(char * md5_hash,const struct tcp_md5sig_key * key,const struct sock * sk,const struct sk_buff * skb)651 static int tcp_v6_md5_hash_skb(char *md5_hash,
652 			       const struct tcp_md5sig_key *key,
653 			       const struct sock *sk,
654 			       const struct sk_buff *skb)
655 {
656 	const struct in6_addr *saddr, *daddr;
657 	struct tcp_md5sig_pool *hp;
658 	struct ahash_request *req;
659 	const struct tcphdr *th = tcp_hdr(skb);
660 
661 	if (sk) { /* valid for establish/request sockets */
662 		saddr = &sk->sk_v6_rcv_saddr;
663 		daddr = &sk->sk_v6_daddr;
664 	} else {
665 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
666 		saddr = &ip6h->saddr;
667 		daddr = &ip6h->daddr;
668 	}
669 
670 	hp = tcp_get_md5sig_pool();
671 	if (!hp)
672 		goto clear_hash_noput;
673 	req = hp->md5_req;
674 
675 	if (crypto_ahash_init(req))
676 		goto clear_hash;
677 
678 	if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
679 		goto clear_hash;
680 	if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
681 		goto clear_hash;
682 	if (tcp_md5_hash_key(hp, key))
683 		goto clear_hash;
684 	ahash_request_set_crypt(req, NULL, md5_hash, 0);
685 	if (crypto_ahash_final(req))
686 		goto clear_hash;
687 
688 	tcp_put_md5sig_pool();
689 	return 0;
690 
691 clear_hash:
692 	tcp_put_md5sig_pool();
693 clear_hash_noput:
694 	memset(md5_hash, 0, 16);
695 	return 1;
696 }
697 
698 #endif
699 
tcp_v6_inbound_md5_hash(const struct sock * sk,const struct sk_buff * skb)700 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
701 				    const struct sk_buff *skb)
702 {
703 #ifdef CONFIG_TCP_MD5SIG
704 	const __u8 *hash_location = NULL;
705 	struct tcp_md5sig_key *hash_expected;
706 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
707 	const struct tcphdr *th = tcp_hdr(skb);
708 	int genhash;
709 	u8 newhash[16];
710 
711 	hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
712 	hash_location = tcp_parse_md5sig_option(th);
713 
714 	/* We've parsed the options - do we have a hash? */
715 	if (!hash_expected && !hash_location)
716 		return false;
717 
718 	if (hash_expected && !hash_location) {
719 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
720 		return true;
721 	}
722 
723 	if (!hash_expected && hash_location) {
724 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
725 		return true;
726 	}
727 
728 	/* check the signature */
729 	genhash = tcp_v6_md5_hash_skb(newhash,
730 				      hash_expected,
731 				      NULL, skb);
732 
733 	if (genhash || memcmp(hash_location, newhash, 16) != 0) {
734 		NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
735 		net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
736 				     genhash ? "failed" : "mismatch",
737 				     &ip6h->saddr, ntohs(th->source),
738 				     &ip6h->daddr, ntohs(th->dest));
739 		return true;
740 	}
741 #endif
742 	return false;
743 }
744 
tcp_v6_init_req(struct request_sock * req,const struct sock * sk_listener,struct sk_buff * skb)745 static void tcp_v6_init_req(struct request_sock *req,
746 			    const struct sock *sk_listener,
747 			    struct sk_buff *skb)
748 {
749 	bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
750 	struct inet_request_sock *ireq = inet_rsk(req);
751 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
752 
753 	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
754 	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
755 
756 	/* So that link locals have meaning */
757 	if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
758 	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
759 		ireq->ir_iif = tcp_v6_iif(skb);
760 
761 	if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
762 	    (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
763 	     np->rxopt.bits.rxinfo ||
764 	     np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
765 	     np->rxopt.bits.rxohlim || np->repflow)) {
766 		refcount_inc(&skb->users);
767 		ireq->pktopts = skb;
768 	}
769 }
770 
tcp_v6_route_req(const struct sock * sk,struct flowi * fl,const struct request_sock * req)771 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
772 					  struct flowi *fl,
773 					  const struct request_sock *req)
774 {
775 	return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
776 }
777 
778 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
779 	.family		=	AF_INET6,
780 	.obj_size	=	sizeof(struct tcp6_request_sock),
781 	.rtx_syn_ack	=	tcp_rtx_synack,
782 	.send_ack	=	tcp_v6_reqsk_send_ack,
783 	.destructor	=	tcp_v6_reqsk_destructor,
784 	.send_reset	=	tcp_v6_send_reset,
785 	.syn_ack_timeout =	tcp_syn_ack_timeout,
786 };
787 
788 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
789 	.mss_clamp	=	IPV6_MIN_MTU - sizeof(struct tcphdr) -
790 				sizeof(struct ipv6hdr),
791 #ifdef CONFIG_TCP_MD5SIG
792 	.req_md5_lookup	=	tcp_v6_md5_lookup,
793 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
794 #endif
795 	.init_req	=	tcp_v6_init_req,
796 #ifdef CONFIG_SYN_COOKIES
797 	.cookie_init_seq =	cookie_v6_init_sequence,
798 #endif
799 	.route_req	=	tcp_v6_route_req,
800 	.init_seq	=	tcp_v6_init_seq,
801 	.init_ts_off	=	tcp_v6_init_ts_off,
802 	.send_synack	=	tcp_v6_send_synack,
803 };
804 
tcp_v6_send_response(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,int rst,u8 tclass,__be32 label,u32 priority)805 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
806 				 u32 ack, u32 win, u32 tsval, u32 tsecr,
807 				 int oif, struct tcp_md5sig_key *key, int rst,
808 				 u8 tclass, __be32 label, u32 priority)
809 {
810 	const struct tcphdr *th = tcp_hdr(skb);
811 	struct tcphdr *t1;
812 	struct sk_buff *buff;
813 	struct flowi6 fl6;
814 	struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
815 	struct sock *ctl_sk = net->ipv6.tcp_sk;
816 	unsigned int tot_len = sizeof(struct tcphdr);
817 	struct dst_entry *dst;
818 	__be32 *topt;
819 	__u32 mark = 0;
820 
821 	if (tsecr)
822 		tot_len += TCPOLEN_TSTAMP_ALIGNED;
823 #ifdef CONFIG_TCP_MD5SIG
824 	if (key)
825 		tot_len += TCPOLEN_MD5SIG_ALIGNED;
826 #endif
827 
828 	buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
829 			 GFP_ATOMIC);
830 	if (!buff)
831 		return;
832 
833 	skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
834 
835 	t1 = skb_push(buff, tot_len);
836 	skb_reset_transport_header(buff);
837 
838 	/* Swap the send and the receive. */
839 	memset(t1, 0, sizeof(*t1));
840 	t1->dest = th->source;
841 	t1->source = th->dest;
842 	t1->doff = tot_len / 4;
843 	t1->seq = htonl(seq);
844 	t1->ack_seq = htonl(ack);
845 	t1->ack = !rst || !th->ack;
846 	t1->rst = rst;
847 	t1->window = htons(win);
848 
849 	topt = (__be32 *)(t1 + 1);
850 
851 	if (tsecr) {
852 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
853 				(TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
854 		*topt++ = htonl(tsval);
855 		*topt++ = htonl(tsecr);
856 	}
857 
858 #ifdef CONFIG_TCP_MD5SIG
859 	if (key) {
860 		*topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
861 				(TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
862 		tcp_v6_md5_hash_hdr((__u8 *)topt, key,
863 				    &ipv6_hdr(skb)->saddr,
864 				    &ipv6_hdr(skb)->daddr, t1);
865 	}
866 #endif
867 
868 	memset(&fl6, 0, sizeof(fl6));
869 	fl6.daddr = ipv6_hdr(skb)->saddr;
870 	fl6.saddr = ipv6_hdr(skb)->daddr;
871 	fl6.flowlabel = label;
872 
873 	buff->ip_summed = CHECKSUM_PARTIAL;
874 	buff->csum = 0;
875 
876 	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
877 
878 	fl6.flowi6_proto = IPPROTO_TCP;
879 	if (rt6_need_strict(&fl6.daddr) && !oif)
880 		fl6.flowi6_oif = tcp_v6_iif(skb);
881 	else {
882 		if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
883 			oif = skb->skb_iif;
884 
885 		fl6.flowi6_oif = oif;
886 	}
887 
888 	if (sk) {
889 		if (sk->sk_state == TCP_TIME_WAIT) {
890 			mark = inet_twsk(sk)->tw_mark;
891 			/* autoflowlabel relies on buff->hash */
892 			skb_set_hash(buff, inet_twsk(sk)->tw_txhash,
893 				     PKT_HASH_TYPE_L4);
894 		} else {
895 			mark = sk->sk_mark;
896 		}
897 		buff->tstamp = tcp_transmit_time(sk);
898 	}
899 	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
900 	fl6.fl6_dport = t1->dest;
901 	fl6.fl6_sport = t1->source;
902 	fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
903 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
904 
905 	/* Pass a socket to ip6_dst_lookup either it is for RST
906 	 * Underlying function will use this to retrieve the network
907 	 * namespace
908 	 */
909 	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
910 	if (!IS_ERR(dst)) {
911 		skb_dst_set(buff, dst);
912 		ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass,
913 			 priority);
914 		TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
915 		if (rst)
916 			TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
917 		return;
918 	}
919 
920 	kfree_skb(buff);
921 }
922 
tcp_v6_send_reset(const struct sock * sk,struct sk_buff * skb)923 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
924 {
925 	const struct tcphdr *th = tcp_hdr(skb);
926 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
927 	u32 seq = 0, ack_seq = 0;
928 	struct tcp_md5sig_key *key = NULL;
929 #ifdef CONFIG_TCP_MD5SIG
930 	const __u8 *hash_location = NULL;
931 	unsigned char newhash[16];
932 	int genhash;
933 	struct sock *sk1 = NULL;
934 #endif
935 	__be32 label = 0;
936 	u32 priority = 0;
937 	struct net *net;
938 	int oif = 0;
939 
940 	if (th->rst)
941 		return;
942 
943 	/* If sk not NULL, it means we did a successful lookup and incoming
944 	 * route had to be correct. prequeue might have dropped our dst.
945 	 */
946 	if (!sk && !ipv6_unicast_destination(skb))
947 		return;
948 
949 	net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
950 #ifdef CONFIG_TCP_MD5SIG
951 	rcu_read_lock();
952 	hash_location = tcp_parse_md5sig_option(th);
953 	if (sk && sk_fullsock(sk)) {
954 		key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
955 	} else if (hash_location) {
956 		/*
957 		 * active side is lost. Try to find listening socket through
958 		 * source port, and then find md5 key through listening socket.
959 		 * we are not loose security here:
960 		 * Incoming packet is checked with md5 hash with finding key,
961 		 * no RST generated if md5 hash doesn't match.
962 		 */
963 		sk1 = inet6_lookup_listener(net,
964 					   &tcp_hashinfo, NULL, 0,
965 					   &ipv6h->saddr,
966 					   th->source, &ipv6h->daddr,
967 					   ntohs(th->source),
968 					   tcp_v6_iif_l3_slave(skb),
969 					   tcp_v6_sdif(skb));
970 		if (!sk1)
971 			goto out;
972 
973 		key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
974 		if (!key)
975 			goto out;
976 
977 		genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
978 		if (genhash || memcmp(hash_location, newhash, 16) != 0)
979 			goto out;
980 	}
981 #endif
982 
983 	if (th->ack)
984 		seq = ntohl(th->ack_seq);
985 	else
986 		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
987 			  (th->doff << 2);
988 
989 	if (sk) {
990 		oif = sk->sk_bound_dev_if;
991 		if (sk_fullsock(sk)) {
992 			const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
993 
994 			trace_tcp_send_reset(sk, skb);
995 			if (np->repflow)
996 				label = ip6_flowlabel(ipv6h);
997 			priority = sk->sk_priority;
998 		}
999 		if (sk->sk_state == TCP_TIME_WAIT) {
1000 			label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel);
1001 			priority = inet_twsk(sk)->tw_priority;
1002 		}
1003 	} else {
1004 		if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET)
1005 			label = ip6_flowlabel(ipv6h);
1006 	}
1007 
1008 	tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0,
1009 			     label, priority);
1010 
1011 #ifdef CONFIG_TCP_MD5SIG
1012 out:
1013 	rcu_read_unlock();
1014 #endif
1015 }
1016 
tcp_v6_send_ack(const struct sock * sk,struct sk_buff * skb,u32 seq,u32 ack,u32 win,u32 tsval,u32 tsecr,int oif,struct tcp_md5sig_key * key,u8 tclass,__be32 label,u32 priority)1017 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
1018 			    u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
1019 			    struct tcp_md5sig_key *key, u8 tclass,
1020 			    __be32 label, u32 priority)
1021 {
1022 	tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
1023 			     tclass, label, priority);
1024 }
1025 
tcp_v6_timewait_ack(struct sock * sk,struct sk_buff * skb)1026 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1027 {
1028 	struct inet_timewait_sock *tw = inet_twsk(sk);
1029 	struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1030 
1031 	tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1032 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1033 			tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1034 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1035 			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel), tw->tw_priority);
1036 
1037 	inet_twsk_put(tw);
1038 }
1039 
tcp_v6_reqsk_send_ack(const struct sock * sk,struct sk_buff * skb,struct request_sock * req)1040 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1041 				  struct request_sock *req)
1042 {
1043 	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1044 	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1045 	 */
1046 	/* RFC 7323 2.3
1047 	 * The window field (SEG.WND) of every outgoing segment, with the
1048 	 * exception of <SYN> segments, MUST be right-shifted by
1049 	 * Rcv.Wind.Shift bits:
1050 	 */
1051 	tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1052 			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1053 			tcp_rsk(req)->rcv_nxt,
1054 			req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1055 			tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1056 			req->ts_recent, sk->sk_bound_dev_if,
1057 			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1058 			0, 0, sk->sk_priority);
1059 }
1060 
1061 
tcp_v6_cookie_check(struct sock * sk,struct sk_buff * skb)1062 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1063 {
1064 #ifdef CONFIG_SYN_COOKIES
1065 	const struct tcphdr *th = tcp_hdr(skb);
1066 
1067 	if (!th->syn)
1068 		sk = cookie_v6_check(sk, skb);
1069 #endif
1070 	return sk;
1071 }
1072 
tcp_v6_get_syncookie(struct sock * sk,struct ipv6hdr * iph,struct tcphdr * th,u32 * cookie)1073 u16 tcp_v6_get_syncookie(struct sock *sk, struct ipv6hdr *iph,
1074 			 struct tcphdr *th, u32 *cookie)
1075 {
1076 	u16 mss = 0;
1077 #ifdef CONFIG_SYN_COOKIES
1078 	mss = tcp_get_syncookie_mss(&tcp6_request_sock_ops,
1079 				    &tcp_request_sock_ipv6_ops, sk, th);
1080 	if (mss) {
1081 		*cookie = __cookie_v6_init_sequence(iph, th, &mss);
1082 		tcp_synq_overflow(sk);
1083 	}
1084 #endif
1085 	return mss;
1086 }
1087 
tcp_v6_conn_request(struct sock * sk,struct sk_buff * skb)1088 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1089 {
1090 	if (skb->protocol == htons(ETH_P_IP))
1091 		return tcp_v4_conn_request(sk, skb);
1092 
1093 	if (!ipv6_unicast_destination(skb))
1094 		goto drop;
1095 
1096 	return tcp_conn_request(&tcp6_request_sock_ops,
1097 				&tcp_request_sock_ipv6_ops, sk, skb);
1098 
1099 drop:
1100 	tcp_listendrop(sk);
1101 	return 0; /* don't send reset */
1102 }
1103 
tcp_v6_restore_cb(struct sk_buff * skb)1104 static void tcp_v6_restore_cb(struct sk_buff *skb)
1105 {
1106 	/* We need to move header back to the beginning if xfrm6_policy_check()
1107 	 * and tcp_v6_fill_cb() are going to be called again.
1108 	 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1109 	 */
1110 	memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1111 		sizeof(struct inet6_skb_parm));
1112 }
1113 
tcp_v6_syn_recv_sock(const struct sock * sk,struct sk_buff * skb,struct request_sock * req,struct dst_entry * dst,struct request_sock * req_unhash,bool * own_req)1114 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1115 					 struct request_sock *req,
1116 					 struct dst_entry *dst,
1117 					 struct request_sock *req_unhash,
1118 					 bool *own_req)
1119 {
1120 	struct inet_request_sock *ireq;
1121 	struct ipv6_pinfo *newnp;
1122 	const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1123 	struct ipv6_txoptions *opt;
1124 	struct inet_sock *newinet;
1125 	struct tcp_sock *newtp;
1126 	struct sock *newsk;
1127 #ifdef CONFIG_TCP_MD5SIG
1128 	struct tcp_md5sig_key *key;
1129 #endif
1130 	struct flowi6 fl6;
1131 
1132 	if (skb->protocol == htons(ETH_P_IP)) {
1133 		/*
1134 		 *	v6 mapped
1135 		 */
1136 
1137 		newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1138 					     req_unhash, own_req);
1139 
1140 		if (!newsk)
1141 			return NULL;
1142 
1143 		inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1144 
1145 		newinet = inet_sk(newsk);
1146 		newnp = tcp_inet6_sk(newsk);
1147 		newtp = tcp_sk(newsk);
1148 
1149 		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1150 
1151 		newnp->saddr = newsk->sk_v6_rcv_saddr;
1152 
1153 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1154 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1155 #ifdef CONFIG_TCP_MD5SIG
1156 		newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1157 #endif
1158 
1159 		newnp->ipv6_mc_list = NULL;
1160 		newnp->ipv6_ac_list = NULL;
1161 		newnp->ipv6_fl_list = NULL;
1162 		newnp->pktoptions  = NULL;
1163 		newnp->opt	   = NULL;
1164 		newnp->mcast_oif   = inet_iif(skb);
1165 		newnp->mcast_hops  = ip_hdr(skb)->ttl;
1166 		newnp->rcv_flowinfo = 0;
1167 		if (np->repflow)
1168 			newnp->flow_label = 0;
1169 
1170 		/*
1171 		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1172 		 * here, tcp_create_openreq_child now does this for us, see the comment in
1173 		 * that function for the gory details. -acme
1174 		 */
1175 
1176 		/* It is tricky place. Until this moment IPv4 tcp
1177 		   worked with IPv6 icsk.icsk_af_ops.
1178 		   Sync it now.
1179 		 */
1180 		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1181 
1182 		return newsk;
1183 	}
1184 
1185 	ireq = inet_rsk(req);
1186 
1187 	if (sk_acceptq_is_full(sk))
1188 		goto out_overflow;
1189 
1190 	if (!dst) {
1191 		dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1192 		if (!dst)
1193 			goto out;
1194 	}
1195 
1196 	newsk = tcp_create_openreq_child(sk, req, skb);
1197 	if (!newsk)
1198 		goto out_nonewsk;
1199 
1200 	/*
1201 	 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1202 	 * count here, tcp_create_openreq_child now does this for us, see the
1203 	 * comment in that function for the gory details. -acme
1204 	 */
1205 
1206 	newsk->sk_gso_type = SKB_GSO_TCPV6;
1207 	ip6_dst_store(newsk, dst, NULL, NULL);
1208 	inet6_sk_rx_dst_set(newsk, skb);
1209 
1210 	inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1211 
1212 	newtp = tcp_sk(newsk);
1213 	newinet = inet_sk(newsk);
1214 	newnp = tcp_inet6_sk(newsk);
1215 
1216 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1217 
1218 	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1219 	newnp->saddr = ireq->ir_v6_loc_addr;
1220 	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1221 	newsk->sk_bound_dev_if = ireq->ir_iif;
1222 
1223 	/* Now IPv6 options...
1224 
1225 	   First: no IPv4 options.
1226 	 */
1227 	newinet->inet_opt = NULL;
1228 	newnp->ipv6_mc_list = NULL;
1229 	newnp->ipv6_ac_list = NULL;
1230 	newnp->ipv6_fl_list = NULL;
1231 
1232 	/* Clone RX bits */
1233 	newnp->rxopt.all = np->rxopt.all;
1234 
1235 	newnp->pktoptions = NULL;
1236 	newnp->opt	  = NULL;
1237 	newnp->mcast_oif  = tcp_v6_iif(skb);
1238 	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1239 	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1240 	if (np->repflow)
1241 		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1242 
1243 	/* Clone native IPv6 options from listening socket (if any)
1244 
1245 	   Yes, keeping reference count would be much more clever,
1246 	   but we make one more one thing there: reattach optmem
1247 	   to newsk.
1248 	 */
1249 	opt = ireq->ipv6_opt;
1250 	if (!opt)
1251 		opt = rcu_dereference(np->opt);
1252 	if (opt) {
1253 		opt = ipv6_dup_options(newsk, opt);
1254 		RCU_INIT_POINTER(newnp->opt, opt);
1255 	}
1256 	inet_csk(newsk)->icsk_ext_hdr_len = 0;
1257 	if (opt)
1258 		inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1259 						    opt->opt_flen;
1260 
1261 	tcp_ca_openreq_child(newsk, dst);
1262 
1263 	tcp_sync_mss(newsk, dst_mtu(dst));
1264 	newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1265 
1266 	tcp_initialize_rcv_mss(newsk);
1267 
1268 	newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1269 	newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1270 
1271 #ifdef CONFIG_TCP_MD5SIG
1272 	/* Copy over the MD5 key from the original socket */
1273 	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1274 	if (key) {
1275 		/* We're using one, so create a matching key
1276 		 * on the newsk structure. If we fail to get
1277 		 * memory, then we end up not copying the key
1278 		 * across. Shucks.
1279 		 */
1280 		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1281 			       AF_INET6, 128, key->key, key->keylen,
1282 			       sk_gfp_mask(sk, GFP_ATOMIC));
1283 	}
1284 #endif
1285 
1286 	if (__inet_inherit_port(sk, newsk) < 0) {
1287 		inet_csk_prepare_forced_close(newsk);
1288 		tcp_done(newsk);
1289 		goto out;
1290 	}
1291 	*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1292 	if (*own_req) {
1293 		tcp_move_syn(newtp, req);
1294 
1295 		/* Clone pktoptions received with SYN, if we own the req */
1296 		if (ireq->pktopts) {
1297 			newnp->pktoptions = skb_clone(ireq->pktopts,
1298 						      sk_gfp_mask(sk, GFP_ATOMIC));
1299 			consume_skb(ireq->pktopts);
1300 			ireq->pktopts = NULL;
1301 			if (newnp->pktoptions) {
1302 				tcp_v6_restore_cb(newnp->pktoptions);
1303 				skb_set_owner_r(newnp->pktoptions, newsk);
1304 			}
1305 		}
1306 	}
1307 
1308 	return newsk;
1309 
1310 out_overflow:
1311 	__NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1312 out_nonewsk:
1313 	dst_release(dst);
1314 out:
1315 	tcp_listendrop(sk);
1316 	return NULL;
1317 }
1318 
1319 /* The socket must have it's spinlock held when we get
1320  * here, unless it is a TCP_LISTEN socket.
1321  *
1322  * We have a potential double-lock case here, so even when
1323  * doing backlog processing we use the BH locking scheme.
1324  * This is because we cannot sleep with the original spinlock
1325  * held.
1326  */
tcp_v6_do_rcv(struct sock * sk,struct sk_buff * skb)1327 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1328 {
1329 	struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1330 	struct sk_buff *opt_skb = NULL;
1331 	struct tcp_sock *tp;
1332 
1333 	/* Imagine: socket is IPv6. IPv4 packet arrives,
1334 	   goes to IPv4 receive handler and backlogged.
1335 	   From backlog it always goes here. Kerboom...
1336 	   Fortunately, tcp_rcv_established and rcv_established
1337 	   handle them correctly, but it is not case with
1338 	   tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1339 	 */
1340 
1341 	if (skb->protocol == htons(ETH_P_IP))
1342 		return tcp_v4_do_rcv(sk, skb);
1343 
1344 	/*
1345 	 *	socket locking is here for SMP purposes as backlog rcv
1346 	 *	is currently called with bh processing disabled.
1347 	 */
1348 
1349 	/* Do Stevens' IPV6_PKTOPTIONS.
1350 
1351 	   Yes, guys, it is the only place in our code, where we
1352 	   may make it not affecting IPv4.
1353 	   The rest of code is protocol independent,
1354 	   and I do not like idea to uglify IPv4.
1355 
1356 	   Actually, all the idea behind IPV6_PKTOPTIONS
1357 	   looks not very well thought. For now we latch
1358 	   options, received in the last packet, enqueued
1359 	   by tcp. Feel free to propose better solution.
1360 					       --ANK (980728)
1361 	 */
1362 	if (np->rxopt.all)
1363 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1364 
1365 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1366 		struct dst_entry *dst = sk->sk_rx_dst;
1367 
1368 		sock_rps_save_rxhash(sk, skb);
1369 		sk_mark_napi_id(sk, skb);
1370 		if (dst) {
1371 			if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1372 			    dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1373 				dst_release(dst);
1374 				sk->sk_rx_dst = NULL;
1375 			}
1376 		}
1377 
1378 		tcp_rcv_established(sk, skb);
1379 		if (opt_skb)
1380 			goto ipv6_pktoptions;
1381 		return 0;
1382 	}
1383 
1384 	if (tcp_checksum_complete(skb))
1385 		goto csum_err;
1386 
1387 	if (sk->sk_state == TCP_LISTEN) {
1388 		struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1389 
1390 		if (!nsk)
1391 			goto discard;
1392 
1393 		if (nsk != sk) {
1394 			if (tcp_child_process(sk, nsk, skb))
1395 				goto reset;
1396 			if (opt_skb)
1397 				__kfree_skb(opt_skb);
1398 			return 0;
1399 		}
1400 	} else
1401 		sock_rps_save_rxhash(sk, skb);
1402 
1403 	if (tcp_rcv_state_process(sk, skb))
1404 		goto reset;
1405 	if (opt_skb)
1406 		goto ipv6_pktoptions;
1407 	return 0;
1408 
1409 reset:
1410 	tcp_v6_send_reset(sk, skb);
1411 discard:
1412 	if (opt_skb)
1413 		__kfree_skb(opt_skb);
1414 	kfree_skb(skb);
1415 	return 0;
1416 csum_err:
1417 	TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1418 	TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1419 	goto discard;
1420 
1421 
1422 ipv6_pktoptions:
1423 	/* Do you ask, what is it?
1424 
1425 	   1. skb was enqueued by tcp.
1426 	   2. skb is added to tail of read queue, rather than out of order.
1427 	   3. socket is not in passive state.
1428 	   4. Finally, it really contains options, which user wants to receive.
1429 	 */
1430 	tp = tcp_sk(sk);
1431 	if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1432 	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1433 		if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1434 			np->mcast_oif = tcp_v6_iif(opt_skb);
1435 		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1436 			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1437 		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1438 			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1439 		if (np->repflow)
1440 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1441 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1442 			skb_set_owner_r(opt_skb, sk);
1443 			tcp_v6_restore_cb(opt_skb);
1444 			opt_skb = xchg(&np->pktoptions, opt_skb);
1445 		} else {
1446 			__kfree_skb(opt_skb);
1447 			opt_skb = xchg(&np->pktoptions, NULL);
1448 		}
1449 	}
1450 
1451 	kfree_skb(opt_skb);
1452 	return 0;
1453 }
1454 
tcp_v6_fill_cb(struct sk_buff * skb,const struct ipv6hdr * hdr,const struct tcphdr * th)1455 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1456 			   const struct tcphdr *th)
1457 {
1458 	/* This is tricky: we move IP6CB at its correct location into
1459 	 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1460 	 * _decode_session6() uses IP6CB().
1461 	 * barrier() makes sure compiler won't play aliasing games.
1462 	 */
1463 	memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1464 		sizeof(struct inet6_skb_parm));
1465 	barrier();
1466 
1467 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1468 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1469 				    skb->len - th->doff*4);
1470 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1471 	TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1472 	TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1473 	TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1474 	TCP_SKB_CB(skb)->sacked = 0;
1475 	TCP_SKB_CB(skb)->has_rxtstamp =
1476 			skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1477 }
1478 
tcp_v6_rcv(struct sk_buff * skb)1479 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1480 {
1481 	struct sk_buff *skb_to_free;
1482 	int sdif = inet6_sdif(skb);
1483 	const struct tcphdr *th;
1484 	const struct ipv6hdr *hdr;
1485 	bool refcounted;
1486 	struct sock *sk;
1487 	int ret;
1488 	struct net *net = dev_net(skb->dev);
1489 
1490 	if (skb->pkt_type != PACKET_HOST)
1491 		goto discard_it;
1492 
1493 	/*
1494 	 *	Count it even if it's bad.
1495 	 */
1496 	__TCP_INC_STATS(net, TCP_MIB_INSEGS);
1497 
1498 	if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1499 		goto discard_it;
1500 
1501 	th = (const struct tcphdr *)skb->data;
1502 
1503 	if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1504 		goto bad_packet;
1505 	if (!pskb_may_pull(skb, th->doff*4))
1506 		goto discard_it;
1507 
1508 	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1509 		goto csum_error;
1510 
1511 	th = (const struct tcphdr *)skb->data;
1512 	hdr = ipv6_hdr(skb);
1513 
1514 lookup:
1515 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1516 				th->source, th->dest, inet6_iif(skb), sdif,
1517 				&refcounted);
1518 	if (!sk)
1519 		goto no_tcp_socket;
1520 
1521 process:
1522 	if (sk->sk_state == TCP_TIME_WAIT)
1523 		goto do_time_wait;
1524 
1525 	if (sk->sk_state == TCP_NEW_SYN_RECV) {
1526 		struct request_sock *req = inet_reqsk(sk);
1527 		bool req_stolen = false;
1528 		struct sock *nsk;
1529 
1530 		sk = req->rsk_listener;
1531 		if (tcp_v6_inbound_md5_hash(sk, skb)) {
1532 			sk_drops_add(sk, skb);
1533 			reqsk_put(req);
1534 			goto discard_it;
1535 		}
1536 		if (tcp_checksum_complete(skb)) {
1537 			reqsk_put(req);
1538 			goto csum_error;
1539 		}
1540 		if (unlikely(sk->sk_state != TCP_LISTEN)) {
1541 			inet_csk_reqsk_queue_drop_and_put(sk, req);
1542 			goto lookup;
1543 		}
1544 		sock_hold(sk);
1545 		refcounted = true;
1546 		nsk = NULL;
1547 		if (!tcp_filter(sk, skb)) {
1548 			th = (const struct tcphdr *)skb->data;
1549 			hdr = ipv6_hdr(skb);
1550 			tcp_v6_fill_cb(skb, hdr, th);
1551 			nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1552 		}
1553 		if (!nsk) {
1554 			reqsk_put(req);
1555 			if (req_stolen) {
1556 				/* Another cpu got exclusive access to req
1557 				 * and created a full blown socket.
1558 				 * Try to feed this packet to this socket
1559 				 * instead of discarding it.
1560 				 */
1561 				tcp_v6_restore_cb(skb);
1562 				sock_put(sk);
1563 				goto lookup;
1564 			}
1565 			goto discard_and_relse;
1566 		}
1567 		if (nsk == sk) {
1568 			reqsk_put(req);
1569 			tcp_v6_restore_cb(skb);
1570 		} else if (tcp_child_process(sk, nsk, skb)) {
1571 			tcp_v6_send_reset(nsk, skb);
1572 			goto discard_and_relse;
1573 		} else {
1574 			sock_put(sk);
1575 			return 0;
1576 		}
1577 	}
1578 	if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1579 		__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1580 		goto discard_and_relse;
1581 	}
1582 
1583 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1584 		goto discard_and_relse;
1585 
1586 	if (tcp_v6_inbound_md5_hash(sk, skb))
1587 		goto discard_and_relse;
1588 
1589 	if (tcp_filter(sk, skb))
1590 		goto discard_and_relse;
1591 	th = (const struct tcphdr *)skb->data;
1592 	hdr = ipv6_hdr(skb);
1593 	tcp_v6_fill_cb(skb, hdr, th);
1594 
1595 	skb->dev = NULL;
1596 
1597 	if (sk->sk_state == TCP_LISTEN) {
1598 		ret = tcp_v6_do_rcv(sk, skb);
1599 		goto put_and_return;
1600 	}
1601 
1602 	sk_incoming_cpu_update(sk);
1603 
1604 	bh_lock_sock_nested(sk);
1605 	tcp_segs_in(tcp_sk(sk), skb);
1606 	ret = 0;
1607 	if (!sock_owned_by_user(sk)) {
1608 		skb_to_free = sk->sk_rx_skb_cache;
1609 		sk->sk_rx_skb_cache = NULL;
1610 		ret = tcp_v6_do_rcv(sk, skb);
1611 	} else {
1612 		if (tcp_add_backlog(sk, skb))
1613 			goto discard_and_relse;
1614 		skb_to_free = NULL;
1615 	}
1616 	bh_unlock_sock(sk);
1617 	if (skb_to_free)
1618 		__kfree_skb(skb_to_free);
1619 put_and_return:
1620 	if (refcounted)
1621 		sock_put(sk);
1622 	return ret ? -1 : 0;
1623 
1624 no_tcp_socket:
1625 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1626 		goto discard_it;
1627 
1628 	tcp_v6_fill_cb(skb, hdr, th);
1629 
1630 	if (tcp_checksum_complete(skb)) {
1631 csum_error:
1632 		__TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1633 bad_packet:
1634 		__TCP_INC_STATS(net, TCP_MIB_INERRS);
1635 	} else {
1636 		tcp_v6_send_reset(NULL, skb);
1637 	}
1638 
1639 discard_it:
1640 	kfree_skb(skb);
1641 	return 0;
1642 
1643 discard_and_relse:
1644 	sk_drops_add(sk, skb);
1645 	if (refcounted)
1646 		sock_put(sk);
1647 	goto discard_it;
1648 
1649 do_time_wait:
1650 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1651 		inet_twsk_put(inet_twsk(sk));
1652 		goto discard_it;
1653 	}
1654 
1655 	tcp_v6_fill_cb(skb, hdr, th);
1656 
1657 	if (tcp_checksum_complete(skb)) {
1658 		inet_twsk_put(inet_twsk(sk));
1659 		goto csum_error;
1660 	}
1661 
1662 	switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1663 	case TCP_TW_SYN:
1664 	{
1665 		struct sock *sk2;
1666 
1667 		sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1668 					    skb, __tcp_hdrlen(th),
1669 					    &ipv6_hdr(skb)->saddr, th->source,
1670 					    &ipv6_hdr(skb)->daddr,
1671 					    ntohs(th->dest),
1672 					    tcp_v6_iif_l3_slave(skb),
1673 					    sdif);
1674 		if (sk2) {
1675 			struct inet_timewait_sock *tw = inet_twsk(sk);
1676 			inet_twsk_deschedule_put(tw);
1677 			sk = sk2;
1678 			tcp_v6_restore_cb(skb);
1679 			refcounted = false;
1680 			goto process;
1681 		}
1682 	}
1683 		/* to ACK */
1684 		/* fall through */
1685 	case TCP_TW_ACK:
1686 		tcp_v6_timewait_ack(sk, skb);
1687 		break;
1688 	case TCP_TW_RST:
1689 		tcp_v6_send_reset(sk, skb);
1690 		inet_twsk_deschedule_put(inet_twsk(sk));
1691 		goto discard_it;
1692 	case TCP_TW_SUCCESS:
1693 		;
1694 	}
1695 	goto discard_it;
1696 }
1697 
tcp_v6_early_demux(struct sk_buff * skb)1698 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1699 {
1700 	const struct ipv6hdr *hdr;
1701 	const struct tcphdr *th;
1702 	struct sock *sk;
1703 
1704 	if (skb->pkt_type != PACKET_HOST)
1705 		return;
1706 
1707 	if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1708 		return;
1709 
1710 	hdr = ipv6_hdr(skb);
1711 	th = tcp_hdr(skb);
1712 
1713 	if (th->doff < sizeof(struct tcphdr) / 4)
1714 		return;
1715 
1716 	/* Note : We use inet6_iif() here, not tcp_v6_iif() */
1717 	sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1718 					&hdr->saddr, th->source,
1719 					&hdr->daddr, ntohs(th->dest),
1720 					inet6_iif(skb), inet6_sdif(skb));
1721 	if (sk) {
1722 		skb->sk = sk;
1723 		skb->destructor = sock_edemux;
1724 		if (sk_fullsock(sk)) {
1725 			struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1726 
1727 			if (dst)
1728 				dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1729 			if (dst &&
1730 			    inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1731 				skb_dst_set_noref(skb, dst);
1732 		}
1733 	}
1734 }
1735 
1736 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1737 	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
1738 	.twsk_unique	= tcp_twsk_unique,
1739 	.twsk_destructor = tcp_twsk_destructor,
1740 };
1741 
1742 static const struct inet_connection_sock_af_ops ipv6_specific = {
1743 	.queue_xmit	   = inet6_csk_xmit,
1744 	.send_check	   = tcp_v6_send_check,
1745 	.rebuild_header	   = inet6_sk_rebuild_header,
1746 	.sk_rx_dst_set	   = inet6_sk_rx_dst_set,
1747 	.conn_request	   = tcp_v6_conn_request,
1748 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1749 	.net_header_len	   = sizeof(struct ipv6hdr),
1750 	.net_frag_header_len = sizeof(struct frag_hdr),
1751 	.setsockopt	   = ipv6_setsockopt,
1752 	.getsockopt	   = ipv6_getsockopt,
1753 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1754 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1755 #ifdef CONFIG_COMPAT
1756 	.compat_setsockopt = compat_ipv6_setsockopt,
1757 	.compat_getsockopt = compat_ipv6_getsockopt,
1758 #endif
1759 	.mtu_reduced	   = tcp_v6_mtu_reduced,
1760 };
1761 
1762 #ifdef CONFIG_TCP_MD5SIG
1763 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1764 	.md5_lookup	=	tcp_v6_md5_lookup,
1765 	.calc_md5_hash	=	tcp_v6_md5_hash_skb,
1766 	.md5_parse	=	tcp_v6_parse_md5_keys,
1767 };
1768 #endif
1769 
1770 /*
1771  *	TCP over IPv4 via INET6 API
1772  */
1773 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1774 	.queue_xmit	   = ip_queue_xmit,
1775 	.send_check	   = tcp_v4_send_check,
1776 	.rebuild_header	   = inet_sk_rebuild_header,
1777 	.sk_rx_dst_set	   = inet_sk_rx_dst_set,
1778 	.conn_request	   = tcp_v6_conn_request,
1779 	.syn_recv_sock	   = tcp_v6_syn_recv_sock,
1780 	.net_header_len	   = sizeof(struct iphdr),
1781 	.setsockopt	   = ipv6_setsockopt,
1782 	.getsockopt	   = ipv6_getsockopt,
1783 	.addr2sockaddr	   = inet6_csk_addr2sockaddr,
1784 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
1785 #ifdef CONFIG_COMPAT
1786 	.compat_setsockopt = compat_ipv6_setsockopt,
1787 	.compat_getsockopt = compat_ipv6_getsockopt,
1788 #endif
1789 	.mtu_reduced	   = tcp_v4_mtu_reduced,
1790 };
1791 
1792 #ifdef CONFIG_TCP_MD5SIG
1793 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1794 	.md5_lookup	=	tcp_v4_md5_lookup,
1795 	.calc_md5_hash	=	tcp_v4_md5_hash_skb,
1796 	.md5_parse	=	tcp_v6_parse_md5_keys,
1797 };
1798 #endif
1799 
1800 /* NOTE: A lot of things set to zero explicitly by call to
1801  *       sk_alloc() so need not be done here.
1802  */
tcp_v6_init_sock(struct sock * sk)1803 static int tcp_v6_init_sock(struct sock *sk)
1804 {
1805 	struct inet_connection_sock *icsk = inet_csk(sk);
1806 
1807 	tcp_init_sock(sk);
1808 
1809 	icsk->icsk_af_ops = &ipv6_specific;
1810 
1811 #ifdef CONFIG_TCP_MD5SIG
1812 	tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1813 #endif
1814 
1815 	return 0;
1816 }
1817 
tcp_v6_destroy_sock(struct sock * sk)1818 static void tcp_v6_destroy_sock(struct sock *sk)
1819 {
1820 	tcp_v4_destroy_sock(sk);
1821 	inet6_destroy_sock(sk);
1822 }
1823 
1824 #ifdef CONFIG_PROC_FS
1825 /* Proc filesystem TCPv6 sock list dumping. */
get_openreq6(struct seq_file * seq,const struct request_sock * req,int i)1826 static void get_openreq6(struct seq_file *seq,
1827 			 const struct request_sock *req, int i)
1828 {
1829 	long ttd = req->rsk_timer.expires - jiffies;
1830 	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1831 	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1832 
1833 	if (ttd < 0)
1834 		ttd = 0;
1835 
1836 	seq_printf(seq,
1837 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1838 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1839 		   i,
1840 		   src->s6_addr32[0], src->s6_addr32[1],
1841 		   src->s6_addr32[2], src->s6_addr32[3],
1842 		   inet_rsk(req)->ir_num,
1843 		   dest->s6_addr32[0], dest->s6_addr32[1],
1844 		   dest->s6_addr32[2], dest->s6_addr32[3],
1845 		   ntohs(inet_rsk(req)->ir_rmt_port),
1846 		   TCP_SYN_RECV,
1847 		   0, 0, /* could print option size, but that is af dependent. */
1848 		   1,   /* timers active (only the expire timer) */
1849 		   jiffies_to_clock_t(ttd),
1850 		   req->num_timeout,
1851 		   from_kuid_munged(seq_user_ns(seq),
1852 				    sock_i_uid(req->rsk_listener)),
1853 		   0,  /* non standard timer */
1854 		   0, /* open_requests have no inode */
1855 		   0, req);
1856 }
1857 
get_tcp6_sock(struct seq_file * seq,struct sock * sp,int i)1858 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1859 {
1860 	const struct in6_addr *dest, *src;
1861 	__u16 destp, srcp;
1862 	int timer_active;
1863 	unsigned long timer_expires;
1864 	const struct inet_sock *inet = inet_sk(sp);
1865 	const struct tcp_sock *tp = tcp_sk(sp);
1866 	const struct inet_connection_sock *icsk = inet_csk(sp);
1867 	const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1868 	int rx_queue;
1869 	int state;
1870 
1871 	dest  = &sp->sk_v6_daddr;
1872 	src   = &sp->sk_v6_rcv_saddr;
1873 	destp = ntohs(inet->inet_dport);
1874 	srcp  = ntohs(inet->inet_sport);
1875 
1876 	if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1877 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1878 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1879 		timer_active	= 1;
1880 		timer_expires	= icsk->icsk_timeout;
1881 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1882 		timer_active	= 4;
1883 		timer_expires	= icsk->icsk_timeout;
1884 	} else if (timer_pending(&sp->sk_timer)) {
1885 		timer_active	= 2;
1886 		timer_expires	= sp->sk_timer.expires;
1887 	} else {
1888 		timer_active	= 0;
1889 		timer_expires = jiffies;
1890 	}
1891 
1892 	state = inet_sk_state_load(sp);
1893 	if (state == TCP_LISTEN)
1894 		rx_queue = sp->sk_ack_backlog;
1895 	else
1896 		/* Because we don't lock the socket,
1897 		 * we might find a transient negative value.
1898 		 */
1899 		rx_queue = max_t(int, READ_ONCE(tp->rcv_nxt) -
1900 				      READ_ONCE(tp->copied_seq), 0);
1901 
1902 	seq_printf(seq,
1903 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1904 		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1905 		   i,
1906 		   src->s6_addr32[0], src->s6_addr32[1],
1907 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1908 		   dest->s6_addr32[0], dest->s6_addr32[1],
1909 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1910 		   state,
1911 		   READ_ONCE(tp->write_seq) - tp->snd_una,
1912 		   rx_queue,
1913 		   timer_active,
1914 		   jiffies_delta_to_clock_t(timer_expires - jiffies),
1915 		   icsk->icsk_retransmits,
1916 		   from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1917 		   icsk->icsk_probes_out,
1918 		   sock_i_ino(sp),
1919 		   refcount_read(&sp->sk_refcnt), sp,
1920 		   jiffies_to_clock_t(icsk->icsk_rto),
1921 		   jiffies_to_clock_t(icsk->icsk_ack.ato),
1922 		   (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1923 		   tp->snd_cwnd,
1924 		   state == TCP_LISTEN ?
1925 			fastopenq->max_qlen :
1926 			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1927 		   );
1928 }
1929 
get_timewait6_sock(struct seq_file * seq,struct inet_timewait_sock * tw,int i)1930 static void get_timewait6_sock(struct seq_file *seq,
1931 			       struct inet_timewait_sock *tw, int i)
1932 {
1933 	long delta = tw->tw_timer.expires - jiffies;
1934 	const struct in6_addr *dest, *src;
1935 	__u16 destp, srcp;
1936 
1937 	dest = &tw->tw_v6_daddr;
1938 	src  = &tw->tw_v6_rcv_saddr;
1939 	destp = ntohs(tw->tw_dport);
1940 	srcp  = ntohs(tw->tw_sport);
1941 
1942 	seq_printf(seq,
1943 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1944 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1945 		   i,
1946 		   src->s6_addr32[0], src->s6_addr32[1],
1947 		   src->s6_addr32[2], src->s6_addr32[3], srcp,
1948 		   dest->s6_addr32[0], dest->s6_addr32[1],
1949 		   dest->s6_addr32[2], dest->s6_addr32[3], destp,
1950 		   tw->tw_substate, 0, 0,
1951 		   3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1952 		   refcount_read(&tw->tw_refcnt), tw);
1953 }
1954 
tcp6_seq_show(struct seq_file * seq,void * v)1955 static int tcp6_seq_show(struct seq_file *seq, void *v)
1956 {
1957 	struct tcp_iter_state *st;
1958 	struct sock *sk = v;
1959 
1960 	if (v == SEQ_START_TOKEN) {
1961 		seq_puts(seq,
1962 			 "  sl  "
1963 			 "local_address                         "
1964 			 "remote_address                        "
1965 			 "st tx_queue rx_queue tr tm->when retrnsmt"
1966 			 "   uid  timeout inode\n");
1967 		goto out;
1968 	}
1969 	st = seq->private;
1970 
1971 	if (sk->sk_state == TCP_TIME_WAIT)
1972 		get_timewait6_sock(seq, v, st->num);
1973 	else if (sk->sk_state == TCP_NEW_SYN_RECV)
1974 		get_openreq6(seq, v, st->num);
1975 	else
1976 		get_tcp6_sock(seq, v, st->num);
1977 out:
1978 	return 0;
1979 }
1980 
1981 static const struct seq_operations tcp6_seq_ops = {
1982 	.show		= tcp6_seq_show,
1983 	.start		= tcp_seq_start,
1984 	.next		= tcp_seq_next,
1985 	.stop		= tcp_seq_stop,
1986 };
1987 
1988 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1989 	.family		= AF_INET6,
1990 };
1991 
tcp6_proc_init(struct net * net)1992 int __net_init tcp6_proc_init(struct net *net)
1993 {
1994 	if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1995 			sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1996 		return -ENOMEM;
1997 	return 0;
1998 }
1999 
tcp6_proc_exit(struct net * net)2000 void tcp6_proc_exit(struct net *net)
2001 {
2002 	remove_proc_entry("tcp6", net->proc_net);
2003 }
2004 #endif
2005 
2006 struct proto tcpv6_prot = {
2007 	.name			= "TCPv6",
2008 	.owner			= THIS_MODULE,
2009 	.close			= tcp_close,
2010 	.pre_connect		= tcp_v6_pre_connect,
2011 	.connect		= tcp_v6_connect,
2012 	.disconnect		= tcp_disconnect,
2013 	.accept			= inet_csk_accept,
2014 	.ioctl			= tcp_ioctl,
2015 	.init			= tcp_v6_init_sock,
2016 	.destroy		= tcp_v6_destroy_sock,
2017 	.shutdown		= tcp_shutdown,
2018 	.setsockopt		= tcp_setsockopt,
2019 	.getsockopt		= tcp_getsockopt,
2020 	.keepalive		= tcp_set_keepalive,
2021 	.recvmsg		= tcp_recvmsg,
2022 	.sendmsg		= tcp_sendmsg,
2023 	.sendpage		= tcp_sendpage,
2024 	.backlog_rcv		= tcp_v6_do_rcv,
2025 	.release_cb		= tcp_release_cb,
2026 	.hash			= inet6_hash,
2027 	.unhash			= inet_unhash,
2028 	.get_port		= inet_csk_get_port,
2029 	.enter_memory_pressure	= tcp_enter_memory_pressure,
2030 	.leave_memory_pressure	= tcp_leave_memory_pressure,
2031 	.stream_memory_free	= tcp_stream_memory_free,
2032 	.sockets_allocated	= &tcp_sockets_allocated,
2033 	.memory_allocated	= &tcp_memory_allocated,
2034 	.memory_pressure	= &tcp_memory_pressure,
2035 	.orphan_count		= &tcp_orphan_count,
2036 	.sysctl_mem		= sysctl_tcp_mem,
2037 	.sysctl_wmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_wmem),
2038 	.sysctl_rmem_offset	= offsetof(struct net, ipv4.sysctl_tcp_rmem),
2039 	.max_header		= MAX_TCP_HEADER,
2040 	.obj_size		= sizeof(struct tcp6_sock),
2041 	.slab_flags		= SLAB_TYPESAFE_BY_RCU,
2042 	.twsk_prot		= &tcp6_timewait_sock_ops,
2043 	.rsk_prot		= &tcp6_request_sock_ops,
2044 	.h.hashinfo		= &tcp_hashinfo,
2045 	.no_autobind		= true,
2046 #ifdef CONFIG_COMPAT
2047 	.compat_setsockopt	= compat_tcp_setsockopt,
2048 	.compat_getsockopt	= compat_tcp_getsockopt,
2049 #endif
2050 	.diag_destroy		= tcp_abort,
2051 };
2052 
2053 /* thinking of making this const? Don't.
2054  * early_demux can change based on sysctl.
2055  */
2056 static struct inet6_protocol tcpv6_protocol = {
2057 	.early_demux	=	tcp_v6_early_demux,
2058 	.early_demux_handler =  tcp_v6_early_demux,
2059 	.handler	=	tcp_v6_rcv,
2060 	.err_handler	=	tcp_v6_err,
2061 	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2062 };
2063 
2064 static struct inet_protosw tcpv6_protosw = {
2065 	.type		=	SOCK_STREAM,
2066 	.protocol	=	IPPROTO_TCP,
2067 	.prot		=	&tcpv6_prot,
2068 	.ops		=	&inet6_stream_ops,
2069 	.flags		=	INET_PROTOSW_PERMANENT |
2070 				INET_PROTOSW_ICSK,
2071 };
2072 
tcpv6_net_init(struct net * net)2073 static int __net_init tcpv6_net_init(struct net *net)
2074 {
2075 	return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2076 				    SOCK_RAW, IPPROTO_TCP, net);
2077 }
2078 
tcpv6_net_exit(struct net * net)2079 static void __net_exit tcpv6_net_exit(struct net *net)
2080 {
2081 	inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2082 }
2083 
tcpv6_net_exit_batch(struct list_head * net_exit_list)2084 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2085 {
2086 	inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2087 }
2088 
2089 static struct pernet_operations tcpv6_net_ops = {
2090 	.init	    = tcpv6_net_init,
2091 	.exit	    = tcpv6_net_exit,
2092 	.exit_batch = tcpv6_net_exit_batch,
2093 };
2094 
tcpv6_init(void)2095 int __init tcpv6_init(void)
2096 {
2097 	int ret;
2098 
2099 	ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2100 	if (ret)
2101 		goto out;
2102 
2103 	/* register inet6 protocol */
2104 	ret = inet6_register_protosw(&tcpv6_protosw);
2105 	if (ret)
2106 		goto out_tcpv6_protocol;
2107 
2108 	ret = register_pernet_subsys(&tcpv6_net_ops);
2109 	if (ret)
2110 		goto out_tcpv6_protosw;
2111 out:
2112 	return ret;
2113 
2114 out_tcpv6_protosw:
2115 	inet6_unregister_protosw(&tcpv6_protosw);
2116 out_tcpv6_protocol:
2117 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2118 	goto out;
2119 }
2120 
tcpv6_exit(void)2121 void tcpv6_exit(void)
2122 {
2123 	unregister_pernet_subsys(&tcpv6_net_ops);
2124 	inet6_unregister_protosw(&tcpv6_protosw);
2125 	inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2126 }
2127