1 /* (C) 1999-2001 Paul `Rusty' Russell
2  * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3  * (C) 2002-2013 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4  * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10 
11 #include <linux/types.h>
12 #include <linux/timer.h>
13 #include <linux/module.h>
14 #include <linux/in.h>
15 #include <linux/tcp.h>
16 #include <linux/spinlock.h>
17 #include <linux/skbuff.h>
18 #include <linux/ipv6.h>
19 #include <net/ip6_checksum.h>
20 #include <asm/unaligned.h>
21 
22 #include <net/tcp.h>
23 
24 #include <linux/netfilter.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_l4proto.h>
29 #include <net/netfilter/nf_conntrack_ecache.h>
30 #include <net/netfilter/nf_conntrack_seqadj.h>
31 #include <net/netfilter/nf_conntrack_synproxy.h>
32 #include <net/netfilter/nf_conntrack_timeout.h>
33 #include <net/netfilter/nf_log.h>
34 #include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
35 #include <net/netfilter/ipv6/nf_conntrack_ipv6.h>
36 
37 /* "Be conservative in what you do,
38     be liberal in what you accept from others."
39     If it's non-zero, we mark only out of window RST segments as INVALID. */
40 static int nf_ct_tcp_be_liberal __read_mostly = 0;
41 
42 /* If it is set to zero, we disable picking up already established
43    connections. */
44 static int nf_ct_tcp_loose __read_mostly = 1;
45 
46 /* Max number of the retransmitted packets without receiving an (acceptable)
47    ACK from the destination. If this number is reached, a shorter timer
48    will be started. */
49 static int nf_ct_tcp_max_retrans __read_mostly = 3;
50 
51   /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
52      closely.  They're more complex. --RR */
53 
54 static const char *const tcp_conntrack_names[] = {
55 	"NONE",
56 	"SYN_SENT",
57 	"SYN_RECV",
58 	"ESTABLISHED",
59 	"FIN_WAIT",
60 	"CLOSE_WAIT",
61 	"LAST_ACK",
62 	"TIME_WAIT",
63 	"CLOSE",
64 	"SYN_SENT2",
65 };
66 
67 #define SECS * HZ
68 #define MINS * 60 SECS
69 #define HOURS * 60 MINS
70 #define DAYS * 24 HOURS
71 
72 static const unsigned int tcp_timeouts[TCP_CONNTRACK_TIMEOUT_MAX] = {
73 	[TCP_CONNTRACK_SYN_SENT]	= 2 MINS,
74 	[TCP_CONNTRACK_SYN_RECV]	= 60 SECS,
75 	[TCP_CONNTRACK_ESTABLISHED]	= 5 DAYS,
76 	[TCP_CONNTRACK_FIN_WAIT]	= 2 MINS,
77 	[TCP_CONNTRACK_CLOSE_WAIT]	= 60 SECS,
78 	[TCP_CONNTRACK_LAST_ACK]	= 30 SECS,
79 	[TCP_CONNTRACK_TIME_WAIT]	= 2 MINS,
80 	[TCP_CONNTRACK_CLOSE]		= 10 SECS,
81 	[TCP_CONNTRACK_SYN_SENT2]	= 2 MINS,
82 /* RFC1122 says the R2 limit should be at least 100 seconds.
83    Linux uses 15 packets as limit, which corresponds
84    to ~13-30min depending on RTO. */
85 	[TCP_CONNTRACK_RETRANS]		= 5 MINS,
86 	[TCP_CONNTRACK_UNACK]		= 5 MINS,
87 };
88 
89 #define sNO TCP_CONNTRACK_NONE
90 #define sSS TCP_CONNTRACK_SYN_SENT
91 #define sSR TCP_CONNTRACK_SYN_RECV
92 #define sES TCP_CONNTRACK_ESTABLISHED
93 #define sFW TCP_CONNTRACK_FIN_WAIT
94 #define sCW TCP_CONNTRACK_CLOSE_WAIT
95 #define sLA TCP_CONNTRACK_LAST_ACK
96 #define sTW TCP_CONNTRACK_TIME_WAIT
97 #define sCL TCP_CONNTRACK_CLOSE
98 #define sS2 TCP_CONNTRACK_SYN_SENT2
99 #define sIV TCP_CONNTRACK_MAX
100 #define sIG TCP_CONNTRACK_IGNORE
101 
102 /* What TCP flags are set from RST/SYN/FIN/ACK. */
103 enum tcp_bit_set {
104 	TCP_SYN_SET,
105 	TCP_SYNACK_SET,
106 	TCP_FIN_SET,
107 	TCP_ACK_SET,
108 	TCP_RST_SET,
109 	TCP_NONE_SET,
110 };
111 
112 /*
113  * The TCP state transition table needs a few words...
114  *
115  * We are the man in the middle. All the packets go through us
116  * but might get lost in transit to the destination.
117  * It is assumed that the destinations can't receive segments
118  * we haven't seen.
119  *
120  * The checked segment is in window, but our windows are *not*
121  * equivalent with the ones of the sender/receiver. We always
122  * try to guess the state of the current sender.
123  *
124  * The meaning of the states are:
125  *
126  * NONE:	initial state
127  * SYN_SENT:	SYN-only packet seen
128  * SYN_SENT2:	SYN-only packet seen from reply dir, simultaneous open
129  * SYN_RECV:	SYN-ACK packet seen
130  * ESTABLISHED:	ACK packet seen
131  * FIN_WAIT:	FIN packet seen
132  * CLOSE_WAIT:	ACK seen (after FIN)
133  * LAST_ACK:	FIN seen (after FIN)
134  * TIME_WAIT:	last ACK seen
135  * CLOSE:	closed connection (RST)
136  *
137  * Packets marked as IGNORED (sIG):
138  *	if they may be either invalid or valid
139  *	and the receiver may send back a connection
140  *	closing RST or a SYN/ACK.
141  *
142  * Packets marked as INVALID (sIV):
143  *	if we regard them as truly invalid packets
144  */
145 static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
146 	{
147 /* ORIGINAL */
148 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
149 /*syn*/	   { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sS2 },
150 /*
151  *	sNO -> sSS	Initialize a new connection
152  *	sSS -> sSS	Retransmitted SYN
153  *	sS2 -> sS2	Late retransmitted SYN
154  *	sSR -> sIG
155  *	sES -> sIG	Error: SYNs in window outside the SYN_SENT state
156  *			are errors. Receiver will reply with RST
157  *			and close the connection.
158  *			Or we are not in sync and hold a dead connection.
159  *	sFW -> sIG
160  *	sCW -> sIG
161  *	sLA -> sIG
162  *	sTW -> sSS	Reopened connection (RFC 1122).
163  *	sCL -> sSS
164  */
165 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
166 /*synack*/ { sIV, sIV, sSR, sIV, sIV, sIV, sIV, sIV, sIV, sSR },
167 /*
168  *	sNO -> sIV	Too late and no reason to do anything
169  *	sSS -> sIV	Client can't send SYN and then SYN/ACK
170  *	sS2 -> sSR	SYN/ACK sent to SYN2 in simultaneous open
171  *	sSR -> sSR	Late retransmitted SYN/ACK in simultaneous open
172  *	sES -> sIV	Invalid SYN/ACK packets sent by the client
173  *	sFW -> sIV
174  *	sCW -> sIV
175  *	sLA -> sIV
176  *	sTW -> sIV
177  *	sCL -> sIV
178  */
179 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
180 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
181 /*
182  *	sNO -> sIV	Too late and no reason to do anything...
183  *	sSS -> sIV	Client migth not send FIN in this state:
184  *			we enforce waiting for a SYN/ACK reply first.
185  *	sS2 -> sIV
186  *	sSR -> sFW	Close started.
187  *	sES -> sFW
188  *	sFW -> sLA	FIN seen in both directions, waiting for
189  *			the last ACK.
190  *			Migth be a retransmitted FIN as well...
191  *	sCW -> sLA
192  *	sLA -> sLA	Retransmitted FIN. Remain in the same state.
193  *	sTW -> sTW
194  *	sCL -> sCL
195  */
196 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
197 /*ack*/	   { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
198 /*
199  *	sNO -> sES	Assumed.
200  *	sSS -> sIV	ACK is invalid: we haven't seen a SYN/ACK yet.
201  *	sS2 -> sIV
202  *	sSR -> sES	Established state is reached.
203  *	sES -> sES	:-)
204  *	sFW -> sCW	Normal close request answered by ACK.
205  *	sCW -> sCW
206  *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
207  *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
208  *	sCL -> sCL
209  */
210 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
211 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
212 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
213 	},
214 	{
215 /* REPLY */
216 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
217 /*syn*/	   { sIV, sS2, sIV, sIV, sIV, sIV, sIV, sSS, sIV, sS2 },
218 /*
219  *	sNO -> sIV	Never reached.
220  *	sSS -> sS2	Simultaneous open
221  *	sS2 -> sS2	Retransmitted simultaneous SYN
222  *	sSR -> sIV	Invalid SYN packets sent by the server
223  *	sES -> sIV
224  *	sFW -> sIV
225  *	sCW -> sIV
226  *	sLA -> sIV
227  *	sTW -> sSS	Reopened connection, but server may have switched role
228  *	sCL -> sIV
229  */
230 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
231 /*synack*/ { sIV, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIG, sSR },
232 /*
233  *	sSS -> sSR	Standard open.
234  *	sS2 -> sSR	Simultaneous open
235  *	sSR -> sIG	Retransmitted SYN/ACK, ignore it.
236  *	sES -> sIG	Late retransmitted SYN/ACK?
237  *	sFW -> sIG	Might be SYN/ACK answering ignored SYN
238  *	sCW -> sIG
239  *	sLA -> sIG
240  *	sTW -> sIG
241  *	sCL -> sIG
242  */
243 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
244 /*fin*/    { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
245 /*
246  *	sSS -> sIV	Server might not send FIN in this state.
247  *	sS2 -> sIV
248  *	sSR -> sFW	Close started.
249  *	sES -> sFW
250  *	sFW -> sLA	FIN seen in both directions.
251  *	sCW -> sLA
252  *	sLA -> sLA	Retransmitted FIN.
253  *	sTW -> sTW
254  *	sCL -> sCL
255  */
256 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
257 /*ack*/	   { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIG },
258 /*
259  *	sSS -> sIG	Might be a half-open connection.
260  *	sS2 -> sIG
261  *	sSR -> sSR	Might answer late resent SYN.
262  *	sES -> sES	:-)
263  *	sFW -> sCW	Normal close request answered by ACK.
264  *	sCW -> sCW
265  *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
266  *	sTW -> sTW	Retransmitted last ACK.
267  *	sCL -> sCL
268  */
269 /* 	     sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sS2	*/
270 /*rst*/    { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL },
271 /*none*/   { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
272 	}
273 };
274 
tcp_pernet(struct net * net)275 static inline struct nf_tcp_net *tcp_pernet(struct net *net)
276 {
277 	return &net->ct.nf_ct_proto.tcp;
278 }
279 
280 #ifdef CONFIG_NF_CONNTRACK_PROCFS
281 /* Print out the private part of the conntrack. */
tcp_print_conntrack(struct seq_file * s,struct nf_conn * ct)282 static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
283 {
284 	if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
285 		return;
286 
287 	seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]);
288 }
289 #endif
290 
get_conntrack_index(const struct tcphdr * tcph)291 static unsigned int get_conntrack_index(const struct tcphdr *tcph)
292 {
293 	if (tcph->rst) return TCP_RST_SET;
294 	else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
295 	else if (tcph->fin) return TCP_FIN_SET;
296 	else if (tcph->ack) return TCP_ACK_SET;
297 	else return TCP_NONE_SET;
298 }
299 
300 /* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
301    in IP Filter' by Guido van Rooij.
302 
303    http://www.sane.nl/events/sane2000/papers.html
304    http://www.darkart.com/mirrors/www.obfuscation.org/ipf/
305 
306    The boundaries and the conditions are changed according to RFC793:
307    the packet must intersect the window (i.e. segments may be
308    after the right or before the left edge) and thus receivers may ACK
309    segments after the right edge of the window.
310 
311 	td_maxend = max(sack + max(win,1)) seen in reply packets
312 	td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
313 	td_maxwin += seq + len - sender.td_maxend
314 			if seq + len > sender.td_maxend
315 	td_end    = max(seq + len) seen in sent packets
316 
317    I.   Upper bound for valid data:	seq <= sender.td_maxend
318    II.  Lower bound for valid data:	seq + len >= sender.td_end - receiver.td_maxwin
319    III.	Upper bound for valid (s)ack:   sack <= receiver.td_end
320    IV.	Lower bound for valid (s)ack:	sack >= receiver.td_end - MAXACKWINDOW
321 
322    where sack is the highest right edge of sack block found in the packet
323    or ack in the case of packet without SACK option.
324 
325    The upper bound limit for a valid (s)ack is not ignored -
326    we doesn't have to deal with fragments.
327 */
328 
segment_seq_plus_len(__u32 seq,size_t len,unsigned int dataoff,const struct tcphdr * tcph)329 static inline __u32 segment_seq_plus_len(__u32 seq,
330 					 size_t len,
331 					 unsigned int dataoff,
332 					 const struct tcphdr *tcph)
333 {
334 	/* XXX Should I use payload length field in IP/IPv6 header ?
335 	 * - YK */
336 	return (seq + len - dataoff - tcph->doff*4
337 		+ (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
338 }
339 
340 /* Fixme: what about big packets? */
341 #define MAXACKWINCONST			66000
342 #define MAXACKWINDOW(sender)						\
343 	((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin	\
344 					      : MAXACKWINCONST)
345 
346 /*
347  * Simplified tcp_parse_options routine from tcp_input.c
348  */
tcp_options(const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * tcph,struct ip_ct_tcp_state * state)349 static void tcp_options(const struct sk_buff *skb,
350 			unsigned int dataoff,
351 			const struct tcphdr *tcph,
352 			struct ip_ct_tcp_state *state)
353 {
354 	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
355 	const unsigned char *ptr;
356 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
357 
358 	if (!length)
359 		return;
360 
361 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
362 				 length, buff);
363 	BUG_ON(ptr == NULL);
364 
365 	state->td_scale =
366 	state->flags = 0;
367 
368 	while (length > 0) {
369 		int opcode=*ptr++;
370 		int opsize;
371 
372 		switch (opcode) {
373 		case TCPOPT_EOL:
374 			return;
375 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
376 			length--;
377 			continue;
378 		default:
379 			if (length < 2)
380 				return;
381 			opsize=*ptr++;
382 			if (opsize < 2) /* "silly options" */
383 				return;
384 			if (opsize > length)
385 				return;	/* don't parse partial options */
386 
387 			if (opcode == TCPOPT_SACK_PERM
388 			    && opsize == TCPOLEN_SACK_PERM)
389 				state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
390 			else if (opcode == TCPOPT_WINDOW
391 				 && opsize == TCPOLEN_WINDOW) {
392 				state->td_scale = *(u_int8_t *)ptr;
393 
394 				if (state->td_scale > TCP_MAX_WSCALE)
395 					state->td_scale = TCP_MAX_WSCALE;
396 
397 				state->flags |=
398 					IP_CT_TCP_FLAG_WINDOW_SCALE;
399 			}
400 			ptr += opsize - 2;
401 			length -= opsize;
402 		}
403 	}
404 }
405 
tcp_sack(const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * tcph,__u32 * sack)406 static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff,
407                      const struct tcphdr *tcph, __u32 *sack)
408 {
409 	unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
410 	const unsigned char *ptr;
411 	int length = (tcph->doff*4) - sizeof(struct tcphdr);
412 	__u32 tmp;
413 
414 	if (!length)
415 		return;
416 
417 	ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
418 				 length, buff);
419 	BUG_ON(ptr == NULL);
420 
421 	/* Fast path for timestamp-only option */
422 	if (length == TCPOLEN_TSTAMP_ALIGNED
423 	    && *(__be32 *)ptr == htonl((TCPOPT_NOP << 24)
424 				       | (TCPOPT_NOP << 16)
425 				       | (TCPOPT_TIMESTAMP << 8)
426 				       | TCPOLEN_TIMESTAMP))
427 		return;
428 
429 	while (length > 0) {
430 		int opcode = *ptr++;
431 		int opsize, i;
432 
433 		switch (opcode) {
434 		case TCPOPT_EOL:
435 			return;
436 		case TCPOPT_NOP:	/* Ref: RFC 793 section 3.1 */
437 			length--;
438 			continue;
439 		default:
440 			if (length < 2)
441 				return;
442 			opsize = *ptr++;
443 			if (opsize < 2) /* "silly options" */
444 				return;
445 			if (opsize > length)
446 				return;	/* don't parse partial options */
447 
448 			if (opcode == TCPOPT_SACK
449 			    && opsize >= (TCPOLEN_SACK_BASE
450 					  + TCPOLEN_SACK_PERBLOCK)
451 			    && !((opsize - TCPOLEN_SACK_BASE)
452 				 % TCPOLEN_SACK_PERBLOCK)) {
453 				for (i = 0;
454 				     i < (opsize - TCPOLEN_SACK_BASE);
455 				     i += TCPOLEN_SACK_PERBLOCK) {
456 					tmp = get_unaligned_be32((__be32 *)(ptr+i)+1);
457 
458 					if (after(tmp, *sack))
459 						*sack = tmp;
460 				}
461 				return;
462 			}
463 			ptr += opsize - 2;
464 			length -= opsize;
465 		}
466 	}
467 }
468 
tcp_in_window(const struct nf_conn * ct,struct ip_ct_tcp * state,enum ip_conntrack_dir dir,unsigned int index,const struct sk_buff * skb,unsigned int dataoff,const struct tcphdr * tcph)469 static bool tcp_in_window(const struct nf_conn *ct,
470 			  struct ip_ct_tcp *state,
471 			  enum ip_conntrack_dir dir,
472 			  unsigned int index,
473 			  const struct sk_buff *skb,
474 			  unsigned int dataoff,
475 			  const struct tcphdr *tcph)
476 {
477 	struct net *net = nf_ct_net(ct);
478 	struct nf_tcp_net *tn = tcp_pernet(net);
479 	struct ip_ct_tcp_state *sender = &state->seen[dir];
480 	struct ip_ct_tcp_state *receiver = &state->seen[!dir];
481 	const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
482 	__u32 seq, ack, sack, end, win, swin;
483 	s32 receiver_offset;
484 	bool res, in_recv_win;
485 
486 	/*
487 	 * Get the required data from the packet.
488 	 */
489 	seq = ntohl(tcph->seq);
490 	ack = sack = ntohl(tcph->ack_seq);
491 	win = ntohs(tcph->window);
492 	end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
493 
494 	if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
495 		tcp_sack(skb, dataoff, tcph, &sack);
496 
497 	/* Take into account NAT sequence number mangling */
498 	receiver_offset = nf_ct_seq_offset(ct, !dir, ack - 1);
499 	ack -= receiver_offset;
500 	sack -= receiver_offset;
501 
502 	pr_debug("tcp_in_window: START\n");
503 	pr_debug("tcp_in_window: ");
504 	nf_ct_dump_tuple(tuple);
505 	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
506 		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
507 	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
508 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
509 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
510 		 sender->td_scale,
511 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
512 		 receiver->td_scale);
513 
514 	if (sender->td_maxwin == 0) {
515 		/*
516 		 * Initialize sender data.
517 		 */
518 		if (tcph->syn) {
519 			/*
520 			 * SYN-ACK in reply to a SYN
521 			 * or SYN from reply direction in simultaneous open.
522 			 */
523 			sender->td_end =
524 			sender->td_maxend = end;
525 			sender->td_maxwin = (win == 0 ? 1 : win);
526 
527 			tcp_options(skb, dataoff, tcph, sender);
528 			/*
529 			 * RFC 1323:
530 			 * Both sides must send the Window Scale option
531 			 * to enable window scaling in either direction.
532 			 */
533 			if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
534 			      && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
535 				sender->td_scale =
536 				receiver->td_scale = 0;
537 			if (!tcph->ack)
538 				/* Simultaneous open */
539 				return true;
540 		} else {
541 			/*
542 			 * We are in the middle of a connection,
543 			 * its history is lost for us.
544 			 * Let's try to use the data from the packet.
545 			 */
546 			sender->td_end = end;
547 			swin = win << sender->td_scale;
548 			sender->td_maxwin = (swin == 0 ? 1 : swin);
549 			sender->td_maxend = end + sender->td_maxwin;
550 			/*
551 			 * We haven't seen traffic in the other direction yet
552 			 * but we have to tweak window tracking to pass III
553 			 * and IV until that happens.
554 			 */
555 			if (receiver->td_maxwin == 0)
556 				receiver->td_end = receiver->td_maxend = sack;
557 		}
558 	} else if (((state->state == TCP_CONNTRACK_SYN_SENT
559 		     && dir == IP_CT_DIR_ORIGINAL)
560 		   || (state->state == TCP_CONNTRACK_SYN_RECV
561 		     && dir == IP_CT_DIR_REPLY))
562 		   && after(end, sender->td_end)) {
563 		/*
564 		 * RFC 793: "if a TCP is reinitialized ... then it need
565 		 * not wait at all; it must only be sure to use sequence
566 		 * numbers larger than those recently used."
567 		 */
568 		sender->td_end =
569 		sender->td_maxend = end;
570 		sender->td_maxwin = (win == 0 ? 1 : win);
571 
572 		tcp_options(skb, dataoff, tcph, sender);
573 	}
574 
575 	if (!(tcph->ack)) {
576 		/*
577 		 * If there is no ACK, just pretend it was set and OK.
578 		 */
579 		ack = sack = receiver->td_end;
580 	} else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
581 		    (TCP_FLAG_ACK|TCP_FLAG_RST))
582 		   && (ack == 0)) {
583 		/*
584 		 * Broken TCP stacks, that set ACK in RST packets as well
585 		 * with zero ack value.
586 		 */
587 		ack = sack = receiver->td_end;
588 	}
589 
590 	if (tcph->rst && seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)
591 		/*
592 		 * RST sent answering SYN.
593 		 */
594 		seq = end = sender->td_end;
595 
596 	pr_debug("tcp_in_window: ");
597 	nf_ct_dump_tuple(tuple);
598 	pr_debug("seq=%u ack=%u+(%d) sack=%u+(%d) win=%u end=%u\n",
599 		 seq, ack, receiver_offset, sack, receiver_offset, win, end);
600 	pr_debug("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
601 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
602 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
603 		 sender->td_scale,
604 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
605 		 receiver->td_scale);
606 
607 	/* Is the ending sequence in the receive window (if available)? */
608 	in_recv_win = !receiver->td_maxwin ||
609 		      after(end, sender->td_end - receiver->td_maxwin - 1);
610 
611 	pr_debug("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
612 		 before(seq, sender->td_maxend + 1),
613 		 (in_recv_win ? 1 : 0),
614 		 before(sack, receiver->td_end + 1),
615 		 after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1));
616 
617 	if (before(seq, sender->td_maxend + 1) &&
618 	    in_recv_win &&
619 	    before(sack, receiver->td_end + 1) &&
620 	    after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) {
621 		/*
622 		 * Take into account window scaling (RFC 1323).
623 		 */
624 		if (!tcph->syn)
625 			win <<= sender->td_scale;
626 
627 		/*
628 		 * Update sender data.
629 		 */
630 		swin = win + (sack - ack);
631 		if (sender->td_maxwin < swin)
632 			sender->td_maxwin = swin;
633 		if (after(end, sender->td_end)) {
634 			sender->td_end = end;
635 			sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
636 		}
637 		if (tcph->ack) {
638 			if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) {
639 				sender->td_maxack = ack;
640 				sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET;
641 			} else if (after(ack, sender->td_maxack))
642 				sender->td_maxack = ack;
643 		}
644 
645 		/*
646 		 * Update receiver data.
647 		 */
648 		if (receiver->td_maxwin != 0 && after(end, sender->td_maxend))
649 			receiver->td_maxwin += end - sender->td_maxend;
650 		if (after(sack + win, receiver->td_maxend - 1)) {
651 			receiver->td_maxend = sack + win;
652 			if (win == 0)
653 				receiver->td_maxend++;
654 		}
655 		if (ack == receiver->td_end)
656 			receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED;
657 
658 		/*
659 		 * Check retransmissions.
660 		 */
661 		if (index == TCP_ACK_SET) {
662 			if (state->last_dir == dir
663 			    && state->last_seq == seq
664 			    && state->last_ack == ack
665 			    && state->last_end == end
666 			    && state->last_win == win)
667 				state->retrans++;
668 			else {
669 				state->last_dir = dir;
670 				state->last_seq = seq;
671 				state->last_ack = ack;
672 				state->last_end = end;
673 				state->last_win = win;
674 				state->retrans = 0;
675 			}
676 		}
677 		res = true;
678 	} else {
679 		res = false;
680 		if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL ||
681 		    tn->tcp_be_liberal)
682 			res = true;
683 		if (!res) {
684 			nf_ct_l4proto_log_invalid(skb, ct,
685 			"%s",
686 			before(seq, sender->td_maxend + 1) ?
687 			in_recv_win ?
688 			before(sack, receiver->td_end + 1) ?
689 			after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG"
690 			: "ACK is under the lower bound (possible overly delayed ACK)"
691 			: "ACK is over the upper bound (ACKed data not seen yet)"
692 			: "SEQ is under the lower bound (already ACKed data retransmitted)"
693 			: "SEQ is over the upper bound (over the window of the receiver)");
694 		}
695 	}
696 
697 	pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u "
698 		 "receiver end=%u maxend=%u maxwin=%u\n",
699 		 res, sender->td_end, sender->td_maxend, sender->td_maxwin,
700 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
701 
702 	return res;
703 }
704 
705 /* table of valid flag combinations - PUSH, ECE and CWR are always valid */
706 static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK|
707 				 TCPHDR_URG) + 1] =
708 {
709 	[TCPHDR_SYN]				= 1,
710 	[TCPHDR_SYN|TCPHDR_URG]			= 1,
711 	[TCPHDR_SYN|TCPHDR_ACK]			= 1,
712 	[TCPHDR_RST]				= 1,
713 	[TCPHDR_RST|TCPHDR_ACK]			= 1,
714 	[TCPHDR_FIN|TCPHDR_ACK]			= 1,
715 	[TCPHDR_FIN|TCPHDR_ACK|TCPHDR_URG]	= 1,
716 	[TCPHDR_ACK]				= 1,
717 	[TCPHDR_ACK|TCPHDR_URG]			= 1,
718 };
719 
tcp_error_log(const struct sk_buff * skb,struct net * net,u8 pf,const char * msg)720 static void tcp_error_log(const struct sk_buff *skb, struct net *net,
721 			  u8 pf, const char *msg)
722 {
723 	nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg);
724 }
725 
726 /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c.  */
tcp_error(struct net * net,struct nf_conn * tmpl,struct sk_buff * skb,unsigned int dataoff,u_int8_t pf,unsigned int hooknum)727 static int tcp_error(struct net *net, struct nf_conn *tmpl,
728 		     struct sk_buff *skb,
729 		     unsigned int dataoff,
730 		     u_int8_t pf,
731 		     unsigned int hooknum)
732 {
733 	const struct tcphdr *th;
734 	struct tcphdr _tcph;
735 	unsigned int tcplen = skb->len - dataoff;
736 	u_int8_t tcpflags;
737 
738 	/* Smaller that minimal TCP header? */
739 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
740 	if (th == NULL) {
741 		tcp_error_log(skb, net, pf, "short packet");
742 		return -NF_ACCEPT;
743 	}
744 
745 	/* Not whole TCP header or malformed packet */
746 	if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
747 		tcp_error_log(skb, net, pf, "truncated packet");
748 		return -NF_ACCEPT;
749 	}
750 
751 	/* Checksum invalid? Ignore.
752 	 * We skip checking packets on the outgoing path
753 	 * because the checksum is assumed to be correct.
754 	 */
755 	/* FIXME: Source route IP option packets --RR */
756 	if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
757 	    nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) {
758 		tcp_error_log(skb, net, pf, "bad checksum");
759 		return -NF_ACCEPT;
760 	}
761 
762 	/* Check TCP flags. */
763 	tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH));
764 	if (!tcp_valid_flags[tcpflags]) {
765 		tcp_error_log(skb, net, pf, "invalid tcp flag combination");
766 		return -NF_ACCEPT;
767 	}
768 
769 	return NF_ACCEPT;
770 }
771 
772 /* Returns verdict for packet, or -1 for invalid. */
tcp_packet(struct nf_conn * ct,const struct sk_buff * skb,unsigned int dataoff,enum ip_conntrack_info ctinfo)773 static int tcp_packet(struct nf_conn *ct,
774 		      const struct sk_buff *skb,
775 		      unsigned int dataoff,
776 		      enum ip_conntrack_info ctinfo)
777 {
778 	struct net *net = nf_ct_net(ct);
779 	struct nf_tcp_net *tn = tcp_pernet(net);
780 	struct nf_conntrack_tuple *tuple;
781 	enum tcp_conntrack new_state, old_state;
782 	unsigned int index, *timeouts;
783 	enum ip_conntrack_dir dir;
784 	const struct tcphdr *th;
785 	struct tcphdr _tcph;
786 	unsigned long timeout;
787 
788 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
789 	BUG_ON(th == NULL);
790 
791 	spin_lock_bh(&ct->lock);
792 	old_state = ct->proto.tcp.state;
793 	dir = CTINFO2DIR(ctinfo);
794 	index = get_conntrack_index(th);
795 	new_state = tcp_conntracks[dir][index][old_state];
796 	tuple = &ct->tuplehash[dir].tuple;
797 
798 	switch (new_state) {
799 	case TCP_CONNTRACK_SYN_SENT:
800 		if (old_state < TCP_CONNTRACK_TIME_WAIT)
801 			break;
802 		/* RFC 1122: "When a connection is closed actively,
803 		 * it MUST linger in TIME-WAIT state for a time 2xMSL
804 		 * (Maximum Segment Lifetime). However, it MAY accept
805 		 * a new SYN from the remote TCP to reopen the connection
806 		 * directly from TIME-WAIT state, if..."
807 		 * We ignore the conditions because we are in the
808 		 * TIME-WAIT state anyway.
809 		 *
810 		 * Handle aborted connections: we and the server
811 		 * think there is an existing connection but the client
812 		 * aborts it and starts a new one.
813 		 */
814 		if (((ct->proto.tcp.seen[dir].flags
815 		      | ct->proto.tcp.seen[!dir].flags)
816 		     & IP_CT_TCP_FLAG_CLOSE_INIT)
817 		    || (ct->proto.tcp.last_dir == dir
818 		        && ct->proto.tcp.last_index == TCP_RST_SET)) {
819 			/* Attempt to reopen a closed/aborted connection.
820 			 * Delete this connection and look up again. */
821 			spin_unlock_bh(&ct->lock);
822 
823 			/* Only repeat if we can actually remove the timer.
824 			 * Destruction may already be in progress in process
825 			 * context and we must give it a chance to terminate.
826 			 */
827 			if (nf_ct_kill(ct))
828 				return -NF_REPEAT;
829 			return NF_DROP;
830 		}
831 		/* Fall through */
832 	case TCP_CONNTRACK_IGNORE:
833 		/* Ignored packets:
834 		 *
835 		 * Our connection entry may be out of sync, so ignore
836 		 * packets which may signal the real connection between
837 		 * the client and the server.
838 		 *
839 		 * a) SYN in ORIGINAL
840 		 * b) SYN/ACK in REPLY
841 		 * c) ACK in reply direction after initial SYN in original.
842 		 *
843 		 * If the ignored packet is invalid, the receiver will send
844 		 * a RST we'll catch below.
845 		 */
846 		if (index == TCP_SYNACK_SET
847 		    && ct->proto.tcp.last_index == TCP_SYN_SET
848 		    && ct->proto.tcp.last_dir != dir
849 		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
850 			/* b) This SYN/ACK acknowledges a SYN that we earlier
851 			 * ignored as invalid. This means that the client and
852 			 * the server are both in sync, while the firewall is
853 			 * not. We get in sync from the previously annotated
854 			 * values.
855 			 */
856 			old_state = TCP_CONNTRACK_SYN_SENT;
857 			new_state = TCP_CONNTRACK_SYN_RECV;
858 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_end =
859 				ct->proto.tcp.last_end;
860 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxend =
861 				ct->proto.tcp.last_end;
862 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_maxwin =
863 				ct->proto.tcp.last_win == 0 ?
864 					1 : ct->proto.tcp.last_win;
865 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
866 				ct->proto.tcp.last_wscale;
867 			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
868 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
869 				ct->proto.tcp.last_flags;
870 			memset(&ct->proto.tcp.seen[dir], 0,
871 			       sizeof(struct ip_ct_tcp_state));
872 			break;
873 		}
874 		ct->proto.tcp.last_index = index;
875 		ct->proto.tcp.last_dir = dir;
876 		ct->proto.tcp.last_seq = ntohl(th->seq);
877 		ct->proto.tcp.last_end =
878 		    segment_seq_plus_len(ntohl(th->seq), skb->len, dataoff, th);
879 		ct->proto.tcp.last_win = ntohs(th->window);
880 
881 		/* a) This is a SYN in ORIGINAL. The client and the server
882 		 * may be in sync but we are not. In that case, we annotate
883 		 * the TCP options and let the packet go through. If it is a
884 		 * valid SYN packet, the server will reply with a SYN/ACK, and
885 		 * then we'll get in sync. Otherwise, the server potentially
886 		 * responds with a challenge ACK if implementing RFC5961.
887 		 */
888 		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
889 			struct ip_ct_tcp_state seen = {};
890 
891 			ct->proto.tcp.last_flags =
892 			ct->proto.tcp.last_wscale = 0;
893 			tcp_options(skb, dataoff, th, &seen);
894 			if (seen.flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
895 				ct->proto.tcp.last_flags |=
896 					IP_CT_TCP_FLAG_WINDOW_SCALE;
897 				ct->proto.tcp.last_wscale = seen.td_scale;
898 			}
899 			if (seen.flags & IP_CT_TCP_FLAG_SACK_PERM) {
900 				ct->proto.tcp.last_flags |=
901 					IP_CT_TCP_FLAG_SACK_PERM;
902 			}
903 			/* Mark the potential for RFC5961 challenge ACK,
904 			 * this pose a special problem for LAST_ACK state
905 			 * as ACK is intrepretated as ACKing last FIN.
906 			 */
907 			if (old_state == TCP_CONNTRACK_LAST_ACK)
908 				ct->proto.tcp.last_flags |=
909 					IP_CT_EXP_CHALLENGE_ACK;
910 		}
911 		spin_unlock_bh(&ct->lock);
912 		nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
913 					  "state %s ", tcp_conntrack_names[old_state]);
914 		return NF_ACCEPT;
915 	case TCP_CONNTRACK_MAX:
916 		/* Special case for SYN proxy: when the SYN to the server or
917 		 * the SYN/ACK from the server is lost, the client may transmit
918 		 * a keep-alive packet while in SYN_SENT state. This needs to
919 		 * be associated with the original conntrack entry in order to
920 		 * generate a new SYN with the correct sequence number.
921 		 */
922 		if (nfct_synproxy(ct) && old_state == TCP_CONNTRACK_SYN_SENT &&
923 		    index == TCP_ACK_SET && dir == IP_CT_DIR_ORIGINAL &&
924 		    ct->proto.tcp.last_dir == IP_CT_DIR_ORIGINAL &&
925 		    ct->proto.tcp.seen[dir].td_end - 1 == ntohl(th->seq)) {
926 			pr_debug("nf_ct_tcp: SYN proxy client keep alive\n");
927 			spin_unlock_bh(&ct->lock);
928 			return NF_ACCEPT;
929 		}
930 
931 		/* Invalid packet */
932 		pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
933 			 dir, get_conntrack_index(th), old_state);
934 		spin_unlock_bh(&ct->lock);
935 		nf_ct_l4proto_log_invalid(skb, ct, "invalid state");
936 		return -NF_ACCEPT;
937 	case TCP_CONNTRACK_TIME_WAIT:
938 		/* RFC5961 compliance cause stack to send "challenge-ACK"
939 		 * e.g. in response to spurious SYNs.  Conntrack MUST
940 		 * not believe this ACK is acking last FIN.
941 		 */
942 		if (old_state == TCP_CONNTRACK_LAST_ACK &&
943 		    index == TCP_ACK_SET &&
944 		    ct->proto.tcp.last_dir != dir &&
945 		    ct->proto.tcp.last_index == TCP_SYN_SET &&
946 		    (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
947 			/* Detected RFC5961 challenge ACK */
948 			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
949 			spin_unlock_bh(&ct->lock);
950 			nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored");
951 			return NF_ACCEPT; /* Don't change state */
952 		}
953 		break;
954 	case TCP_CONNTRACK_SYN_SENT2:
955 		/* tcp_conntracks table is not smart enough to handle
956 		 * simultaneous open.
957 		 */
958 		ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN;
959 		break;
960 	case TCP_CONNTRACK_SYN_RECV:
961 		if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET &&
962 		    ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN)
963 			new_state = TCP_CONNTRACK_ESTABLISHED;
964 		break;
965 	case TCP_CONNTRACK_CLOSE:
966 		if (index == TCP_RST_SET
967 		    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
968 		    && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) {
969 			/* Invalid RST  */
970 			spin_unlock_bh(&ct->lock);
971 			nf_ct_l4proto_log_invalid(skb, ct, "invalid rst");
972 			return -NF_ACCEPT;
973 		}
974 		if (index == TCP_RST_SET
975 		    && ((test_bit(IPS_SEEN_REPLY_BIT, &ct->status)
976 			 && ct->proto.tcp.last_index == TCP_SYN_SET)
977 			|| (!test_bit(IPS_ASSURED_BIT, &ct->status)
978 			    && ct->proto.tcp.last_index == TCP_ACK_SET))
979 		    && ntohl(th->ack_seq) == ct->proto.tcp.last_end) {
980 			/* RST sent to invalid SYN or ACK we had let through
981 			 * at a) and c) above:
982 			 *
983 			 * a) SYN was in window then
984 			 * c) we hold a half-open connection.
985 			 *
986 			 * Delete our connection entry.
987 			 * We skip window checking, because packet might ACK
988 			 * segments we ignored. */
989 			goto in_window;
990 		}
991 		/* Just fall through */
992 	default:
993 		/* Keep compilers happy. */
994 		break;
995 	}
996 
997 	if (!tcp_in_window(ct, &ct->proto.tcp, dir, index,
998 			   skb, dataoff, th)) {
999 		spin_unlock_bh(&ct->lock);
1000 		return -NF_ACCEPT;
1001 	}
1002      in_window:
1003 	/* From now on we have got in-window packets */
1004 	ct->proto.tcp.last_index = index;
1005 	ct->proto.tcp.last_dir = dir;
1006 
1007 	pr_debug("tcp_conntracks: ");
1008 	nf_ct_dump_tuple(tuple);
1009 	pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
1010 		 (th->syn ? 1 : 0), (th->ack ? 1 : 0),
1011 		 (th->fin ? 1 : 0), (th->rst ? 1 : 0),
1012 		 old_state, new_state);
1013 
1014 	ct->proto.tcp.state = new_state;
1015 	if (old_state != new_state
1016 	    && new_state == TCP_CONNTRACK_FIN_WAIT)
1017 		ct->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
1018 
1019 	timeouts = nf_ct_timeout_lookup(ct);
1020 	if (!timeouts)
1021 		timeouts = tn->timeouts;
1022 
1023 	if (ct->proto.tcp.retrans >= tn->tcp_max_retrans &&
1024 	    timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1025 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
1026 	else if ((ct->proto.tcp.seen[0].flags | ct->proto.tcp.seen[1].flags) &
1027 		 IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED &&
1028 		 timeouts[new_state] > timeouts[TCP_CONNTRACK_UNACK])
1029 		timeout = timeouts[TCP_CONNTRACK_UNACK];
1030 	else if (ct->proto.tcp.last_win == 0 &&
1031 		 timeouts[new_state] > timeouts[TCP_CONNTRACK_RETRANS])
1032 		timeout = timeouts[TCP_CONNTRACK_RETRANS];
1033 	else
1034 		timeout = timeouts[new_state];
1035 	spin_unlock_bh(&ct->lock);
1036 
1037 	if (new_state != old_state)
1038 		nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
1039 
1040 	if (!test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
1041 		/* If only reply is a RST, we can consider ourselves not to
1042 		   have an established connection: this is a fairly common
1043 		   problem case, so we can delete the conntrack
1044 		   immediately.  --RR */
1045 		if (th->rst) {
1046 			nf_ct_kill_acct(ct, ctinfo, skb);
1047 			return NF_ACCEPT;
1048 		}
1049 		/* ESTABLISHED without SEEN_REPLY, i.e. mid-connection
1050 		 * pickup with loose=1. Avoid large ESTABLISHED timeout.
1051 		 */
1052 		if (new_state == TCP_CONNTRACK_ESTABLISHED &&
1053 		    timeout > timeouts[TCP_CONNTRACK_UNACK])
1054 			timeout = timeouts[TCP_CONNTRACK_UNACK];
1055 	} else if (!test_bit(IPS_ASSURED_BIT, &ct->status)
1056 		   && (old_state == TCP_CONNTRACK_SYN_RECV
1057 		       || old_state == TCP_CONNTRACK_ESTABLISHED)
1058 		   && new_state == TCP_CONNTRACK_ESTABLISHED) {
1059 		/* Set ASSURED if we see see valid ack in ESTABLISHED
1060 		   after SYN_RECV or a valid answer for a picked up
1061 		   connection. */
1062 		set_bit(IPS_ASSURED_BIT, &ct->status);
1063 		nf_conntrack_event_cache(IPCT_ASSURED, ct);
1064 	}
1065 	nf_ct_refresh_acct(ct, ctinfo, skb, timeout);
1066 
1067 	return NF_ACCEPT;
1068 }
1069 
1070 /* Called when a new connection for this protocol found. */
tcp_new(struct nf_conn * ct,const struct sk_buff * skb,unsigned int dataoff)1071 static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,
1072 		    unsigned int dataoff)
1073 {
1074 	enum tcp_conntrack new_state;
1075 	const struct tcphdr *th;
1076 	struct tcphdr _tcph;
1077 	struct net *net = nf_ct_net(ct);
1078 	struct nf_tcp_net *tn = tcp_pernet(net);
1079 	const struct ip_ct_tcp_state *sender = &ct->proto.tcp.seen[0];
1080 	const struct ip_ct_tcp_state *receiver = &ct->proto.tcp.seen[1];
1081 
1082 	th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph);
1083 	BUG_ON(th == NULL);
1084 
1085 	/* Don't need lock here: this conntrack not in circulation yet */
1086 	new_state = tcp_conntracks[0][get_conntrack_index(th)][TCP_CONNTRACK_NONE];
1087 
1088 	/* Invalid: delete conntrack */
1089 	if (new_state >= TCP_CONNTRACK_MAX) {
1090 		pr_debug("nf_ct_tcp: invalid new deleting.\n");
1091 		return false;
1092 	}
1093 
1094 	if (new_state == TCP_CONNTRACK_SYN_SENT) {
1095 		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1096 		/* SYN packet */
1097 		ct->proto.tcp.seen[0].td_end =
1098 			segment_seq_plus_len(ntohl(th->seq), skb->len,
1099 					     dataoff, th);
1100 		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1101 		if (ct->proto.tcp.seen[0].td_maxwin == 0)
1102 			ct->proto.tcp.seen[0].td_maxwin = 1;
1103 		ct->proto.tcp.seen[0].td_maxend =
1104 			ct->proto.tcp.seen[0].td_end;
1105 
1106 		tcp_options(skb, dataoff, th, &ct->proto.tcp.seen[0]);
1107 	} else if (tn->tcp_loose == 0) {
1108 		/* Don't try to pick up connections. */
1109 		return false;
1110 	} else {
1111 		memset(&ct->proto.tcp, 0, sizeof(ct->proto.tcp));
1112 		/*
1113 		 * We are in the middle of a connection,
1114 		 * its history is lost for us.
1115 		 * Let's try to use the data from the packet.
1116 		 */
1117 		ct->proto.tcp.seen[0].td_end =
1118 			segment_seq_plus_len(ntohl(th->seq), skb->len,
1119 					     dataoff, th);
1120 		ct->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
1121 		if (ct->proto.tcp.seen[0].td_maxwin == 0)
1122 			ct->proto.tcp.seen[0].td_maxwin = 1;
1123 		ct->proto.tcp.seen[0].td_maxend =
1124 			ct->proto.tcp.seen[0].td_end +
1125 			ct->proto.tcp.seen[0].td_maxwin;
1126 
1127 		/* We assume SACK and liberal window checking to handle
1128 		 * window scaling */
1129 		ct->proto.tcp.seen[0].flags =
1130 		ct->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM |
1131 					      IP_CT_TCP_FLAG_BE_LIBERAL;
1132 	}
1133 
1134 	/* tcp_packet will set them */
1135 	ct->proto.tcp.last_index = TCP_NONE_SET;
1136 
1137 	pr_debug("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
1138 		 "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
1139 		 sender->td_end, sender->td_maxend, sender->td_maxwin,
1140 		 sender->td_scale,
1141 		 receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
1142 		 receiver->td_scale);
1143 	return true;
1144 }
1145 
tcp_can_early_drop(const struct nf_conn * ct)1146 static bool tcp_can_early_drop(const struct nf_conn *ct)
1147 {
1148 	switch (ct->proto.tcp.state) {
1149 	case TCP_CONNTRACK_FIN_WAIT:
1150 	case TCP_CONNTRACK_LAST_ACK:
1151 	case TCP_CONNTRACK_TIME_WAIT:
1152 	case TCP_CONNTRACK_CLOSE:
1153 	case TCP_CONNTRACK_CLOSE_WAIT:
1154 		return true;
1155 	default:
1156 		break;
1157 	}
1158 
1159 	return false;
1160 }
1161 
1162 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1163 
1164 #include <linux/netfilter/nfnetlink.h>
1165 #include <linux/netfilter/nfnetlink_conntrack.h>
1166 
tcp_to_nlattr(struct sk_buff * skb,struct nlattr * nla,struct nf_conn * ct)1167 static int tcp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,
1168 			 struct nf_conn *ct)
1169 {
1170 	struct nlattr *nest_parms;
1171 	struct nf_ct_tcp_flags tmp = {};
1172 
1173 	spin_lock_bh(&ct->lock);
1174 	nest_parms = nla_nest_start(skb, CTA_PROTOINFO_TCP | NLA_F_NESTED);
1175 	if (!nest_parms)
1176 		goto nla_put_failure;
1177 
1178 	if (nla_put_u8(skb, CTA_PROTOINFO_TCP_STATE, ct->proto.tcp.state) ||
1179 	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_ORIGINAL,
1180 		       ct->proto.tcp.seen[0].td_scale) ||
1181 	    nla_put_u8(skb, CTA_PROTOINFO_TCP_WSCALE_REPLY,
1182 		       ct->proto.tcp.seen[1].td_scale))
1183 		goto nla_put_failure;
1184 
1185 	tmp.flags = ct->proto.tcp.seen[0].flags;
1186 	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL,
1187 		    sizeof(struct nf_ct_tcp_flags), &tmp))
1188 		goto nla_put_failure;
1189 
1190 	tmp.flags = ct->proto.tcp.seen[1].flags;
1191 	if (nla_put(skb, CTA_PROTOINFO_TCP_FLAGS_REPLY,
1192 		    sizeof(struct nf_ct_tcp_flags), &tmp))
1193 		goto nla_put_failure;
1194 	spin_unlock_bh(&ct->lock);
1195 
1196 	nla_nest_end(skb, nest_parms);
1197 
1198 	return 0;
1199 
1200 nla_put_failure:
1201 	spin_unlock_bh(&ct->lock);
1202 	return -1;
1203 }
1204 
1205 static const struct nla_policy tcp_nla_policy[CTA_PROTOINFO_TCP_MAX+1] = {
1206 	[CTA_PROTOINFO_TCP_STATE]	    = { .type = NLA_U8 },
1207 	[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] = { .type = NLA_U8 },
1208 	[CTA_PROTOINFO_TCP_WSCALE_REPLY]    = { .type = NLA_U8 },
1209 	[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]  = { .len = sizeof(struct nf_ct_tcp_flags) },
1210 	[CTA_PROTOINFO_TCP_FLAGS_REPLY]	    = { .len =  sizeof(struct nf_ct_tcp_flags) },
1211 };
1212 
1213 #define TCP_NLATTR_SIZE	( \
1214 	NLA_ALIGN(NLA_HDRLEN + 1) + \
1215 	NLA_ALIGN(NLA_HDRLEN + 1) + \
1216 	NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)) + \
1217 	NLA_ALIGN(NLA_HDRLEN + sizeof(struct nf_ct_tcp_flags)))
1218 
nlattr_to_tcp(struct nlattr * cda[],struct nf_conn * ct)1219 static int nlattr_to_tcp(struct nlattr *cda[], struct nf_conn *ct)
1220 {
1221 	struct nlattr *pattr = cda[CTA_PROTOINFO_TCP];
1222 	struct nlattr *tb[CTA_PROTOINFO_TCP_MAX+1];
1223 	int err;
1224 
1225 	/* updates could not contain anything about the private
1226 	 * protocol info, in that case skip the parsing */
1227 	if (!pattr)
1228 		return 0;
1229 
1230 	err = nla_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, pattr,
1231 			       tcp_nla_policy, NULL);
1232 	if (err < 0)
1233 		return err;
1234 
1235 	if (tb[CTA_PROTOINFO_TCP_STATE] &&
1236 	    nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]) >= TCP_CONNTRACK_MAX)
1237 		return -EINVAL;
1238 
1239 	spin_lock_bh(&ct->lock);
1240 	if (tb[CTA_PROTOINFO_TCP_STATE])
1241 		ct->proto.tcp.state = nla_get_u8(tb[CTA_PROTOINFO_TCP_STATE]);
1242 
1243 	if (tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]) {
1244 		struct nf_ct_tcp_flags *attr =
1245 			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_ORIGINAL]);
1246 		ct->proto.tcp.seen[0].flags &= ~attr->mask;
1247 		ct->proto.tcp.seen[0].flags |= attr->flags & attr->mask;
1248 	}
1249 
1250 	if (tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]) {
1251 		struct nf_ct_tcp_flags *attr =
1252 			nla_data(tb[CTA_PROTOINFO_TCP_FLAGS_REPLY]);
1253 		ct->proto.tcp.seen[1].flags &= ~attr->mask;
1254 		ct->proto.tcp.seen[1].flags |= attr->flags & attr->mask;
1255 	}
1256 
1257 	if (tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL] &&
1258 	    tb[CTA_PROTOINFO_TCP_WSCALE_REPLY] &&
1259 	    ct->proto.tcp.seen[0].flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
1260 	    ct->proto.tcp.seen[1].flags & IP_CT_TCP_FLAG_WINDOW_SCALE) {
1261 		ct->proto.tcp.seen[0].td_scale =
1262 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_ORIGINAL]);
1263 		ct->proto.tcp.seen[1].td_scale =
1264 			nla_get_u8(tb[CTA_PROTOINFO_TCP_WSCALE_REPLY]);
1265 	}
1266 	spin_unlock_bh(&ct->lock);
1267 
1268 	return 0;
1269 }
1270 
tcp_nlattr_tuple_size(void)1271 static unsigned int tcp_nlattr_tuple_size(void)
1272 {
1273 	static unsigned int size __read_mostly;
1274 
1275 	if (!size)
1276 		size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1);
1277 
1278 	return size;
1279 }
1280 #endif
1281 
1282 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1283 
1284 #include <linux/netfilter/nfnetlink.h>
1285 #include <linux/netfilter/nfnetlink_cttimeout.h>
1286 
tcp_timeout_nlattr_to_obj(struct nlattr * tb[],struct net * net,void * data)1287 static int tcp_timeout_nlattr_to_obj(struct nlattr *tb[],
1288 				     struct net *net, void *data)
1289 {
1290 	struct nf_tcp_net *tn = tcp_pernet(net);
1291 	unsigned int *timeouts = data;
1292 	int i;
1293 
1294 	if (!timeouts)
1295 		timeouts = tn->timeouts;
1296 	/* set default TCP timeouts. */
1297 	for (i=0; i<TCP_CONNTRACK_TIMEOUT_MAX; i++)
1298 		timeouts[i] = tn->timeouts[i];
1299 
1300 	if (tb[CTA_TIMEOUT_TCP_SYN_SENT]) {
1301 		timeouts[TCP_CONNTRACK_SYN_SENT] =
1302 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT]))*HZ;
1303 	}
1304 
1305 	if (tb[CTA_TIMEOUT_TCP_SYN_RECV]) {
1306 		timeouts[TCP_CONNTRACK_SYN_RECV] =
1307 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_RECV]))*HZ;
1308 	}
1309 	if (tb[CTA_TIMEOUT_TCP_ESTABLISHED]) {
1310 		timeouts[TCP_CONNTRACK_ESTABLISHED] =
1311 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_ESTABLISHED]))*HZ;
1312 	}
1313 	if (tb[CTA_TIMEOUT_TCP_FIN_WAIT]) {
1314 		timeouts[TCP_CONNTRACK_FIN_WAIT] =
1315 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_FIN_WAIT]))*HZ;
1316 	}
1317 	if (tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]) {
1318 		timeouts[TCP_CONNTRACK_CLOSE_WAIT] =
1319 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE_WAIT]))*HZ;
1320 	}
1321 	if (tb[CTA_TIMEOUT_TCP_LAST_ACK]) {
1322 		timeouts[TCP_CONNTRACK_LAST_ACK] =
1323 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_LAST_ACK]))*HZ;
1324 	}
1325 	if (tb[CTA_TIMEOUT_TCP_TIME_WAIT]) {
1326 		timeouts[TCP_CONNTRACK_TIME_WAIT] =
1327 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_TIME_WAIT]))*HZ;
1328 	}
1329 	if (tb[CTA_TIMEOUT_TCP_CLOSE]) {
1330 		timeouts[TCP_CONNTRACK_CLOSE] =
1331 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_CLOSE]))*HZ;
1332 	}
1333 	if (tb[CTA_TIMEOUT_TCP_SYN_SENT2]) {
1334 		timeouts[TCP_CONNTRACK_SYN_SENT2] =
1335 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_SYN_SENT2]))*HZ;
1336 	}
1337 	if (tb[CTA_TIMEOUT_TCP_RETRANS]) {
1338 		timeouts[TCP_CONNTRACK_RETRANS] =
1339 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_RETRANS]))*HZ;
1340 	}
1341 	if (tb[CTA_TIMEOUT_TCP_UNACK]) {
1342 		timeouts[TCP_CONNTRACK_UNACK] =
1343 			ntohl(nla_get_be32(tb[CTA_TIMEOUT_TCP_UNACK]))*HZ;
1344 	}
1345 
1346 	timeouts[CTA_TIMEOUT_TCP_UNSPEC] = timeouts[CTA_TIMEOUT_TCP_SYN_SENT];
1347 	return 0;
1348 }
1349 
1350 static int
tcp_timeout_obj_to_nlattr(struct sk_buff * skb,const void * data)1351 tcp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
1352 {
1353 	const unsigned int *timeouts = data;
1354 
1355 	if (nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT,
1356 			htonl(timeouts[TCP_CONNTRACK_SYN_SENT] / HZ)) ||
1357 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_RECV,
1358 			 htonl(timeouts[TCP_CONNTRACK_SYN_RECV] / HZ)) ||
1359 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_ESTABLISHED,
1360 			 htonl(timeouts[TCP_CONNTRACK_ESTABLISHED] / HZ)) ||
1361 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_FIN_WAIT,
1362 			 htonl(timeouts[TCP_CONNTRACK_FIN_WAIT] / HZ)) ||
1363 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE_WAIT,
1364 			 htonl(timeouts[TCP_CONNTRACK_CLOSE_WAIT] / HZ)) ||
1365 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_LAST_ACK,
1366 			 htonl(timeouts[TCP_CONNTRACK_LAST_ACK] / HZ)) ||
1367 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_TIME_WAIT,
1368 			 htonl(timeouts[TCP_CONNTRACK_TIME_WAIT] / HZ)) ||
1369 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_CLOSE,
1370 			 htonl(timeouts[TCP_CONNTRACK_CLOSE] / HZ)) ||
1371 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_SYN_SENT2,
1372 			 htonl(timeouts[TCP_CONNTRACK_SYN_SENT2] / HZ)) ||
1373 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_RETRANS,
1374 			 htonl(timeouts[TCP_CONNTRACK_RETRANS] / HZ)) ||
1375 	    nla_put_be32(skb, CTA_TIMEOUT_TCP_UNACK,
1376 			 htonl(timeouts[TCP_CONNTRACK_UNACK] / HZ)))
1377 		goto nla_put_failure;
1378 	return 0;
1379 
1380 nla_put_failure:
1381 	return -ENOSPC;
1382 }
1383 
1384 static const struct nla_policy tcp_timeout_nla_policy[CTA_TIMEOUT_TCP_MAX+1] = {
1385 	[CTA_TIMEOUT_TCP_SYN_SENT]	= { .type = NLA_U32 },
1386 	[CTA_TIMEOUT_TCP_SYN_RECV]	= { .type = NLA_U32 },
1387 	[CTA_TIMEOUT_TCP_ESTABLISHED]	= { .type = NLA_U32 },
1388 	[CTA_TIMEOUT_TCP_FIN_WAIT]	= { .type = NLA_U32 },
1389 	[CTA_TIMEOUT_TCP_CLOSE_WAIT]	= { .type = NLA_U32 },
1390 	[CTA_TIMEOUT_TCP_LAST_ACK]	= { .type = NLA_U32 },
1391 	[CTA_TIMEOUT_TCP_TIME_WAIT]	= { .type = NLA_U32 },
1392 	[CTA_TIMEOUT_TCP_CLOSE]		= { .type = NLA_U32 },
1393 	[CTA_TIMEOUT_TCP_SYN_SENT2]	= { .type = NLA_U32 },
1394 	[CTA_TIMEOUT_TCP_RETRANS]	= { .type = NLA_U32 },
1395 	[CTA_TIMEOUT_TCP_UNACK]		= { .type = NLA_U32 },
1396 };
1397 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1398 
1399 #ifdef CONFIG_SYSCTL
1400 static struct ctl_table tcp_sysctl_table[] = {
1401 	{
1402 		.procname	= "nf_conntrack_tcp_timeout_syn_sent",
1403 		.maxlen		= sizeof(unsigned int),
1404 		.mode		= 0644,
1405 		.proc_handler	= proc_dointvec_jiffies,
1406 	},
1407 	{
1408 		.procname	= "nf_conntrack_tcp_timeout_syn_recv",
1409 		.maxlen		= sizeof(unsigned int),
1410 		.mode		= 0644,
1411 		.proc_handler	= proc_dointvec_jiffies,
1412 	},
1413 	{
1414 		.procname	= "nf_conntrack_tcp_timeout_established",
1415 		.maxlen		= sizeof(unsigned int),
1416 		.mode		= 0644,
1417 		.proc_handler	= proc_dointvec_jiffies,
1418 	},
1419 	{
1420 		.procname	= "nf_conntrack_tcp_timeout_fin_wait",
1421 		.maxlen		= sizeof(unsigned int),
1422 		.mode		= 0644,
1423 		.proc_handler	= proc_dointvec_jiffies,
1424 	},
1425 	{
1426 		.procname	= "nf_conntrack_tcp_timeout_close_wait",
1427 		.maxlen		= sizeof(unsigned int),
1428 		.mode		= 0644,
1429 		.proc_handler	= proc_dointvec_jiffies,
1430 	},
1431 	{
1432 		.procname	= "nf_conntrack_tcp_timeout_last_ack",
1433 		.maxlen		= sizeof(unsigned int),
1434 		.mode		= 0644,
1435 		.proc_handler	= proc_dointvec_jiffies,
1436 	},
1437 	{
1438 		.procname	= "nf_conntrack_tcp_timeout_time_wait",
1439 		.maxlen		= sizeof(unsigned int),
1440 		.mode		= 0644,
1441 		.proc_handler	= proc_dointvec_jiffies,
1442 	},
1443 	{
1444 		.procname	= "nf_conntrack_tcp_timeout_close",
1445 		.maxlen		= sizeof(unsigned int),
1446 		.mode		= 0644,
1447 		.proc_handler	= proc_dointvec_jiffies,
1448 	},
1449 	{
1450 		.procname	= "nf_conntrack_tcp_timeout_max_retrans",
1451 		.maxlen		= sizeof(unsigned int),
1452 		.mode		= 0644,
1453 		.proc_handler	= proc_dointvec_jiffies,
1454 	},
1455 	{
1456 		.procname	= "nf_conntrack_tcp_timeout_unacknowledged",
1457 		.maxlen		= sizeof(unsigned int),
1458 		.mode		= 0644,
1459 		.proc_handler	= proc_dointvec_jiffies,
1460 	},
1461 	{
1462 		.procname	= "nf_conntrack_tcp_loose",
1463 		.maxlen		= sizeof(unsigned int),
1464 		.mode		= 0644,
1465 		.proc_handler	= proc_dointvec,
1466 	},
1467 	{
1468 		.procname       = "nf_conntrack_tcp_be_liberal",
1469 		.maxlen         = sizeof(unsigned int),
1470 		.mode           = 0644,
1471 		.proc_handler   = proc_dointvec,
1472 	},
1473 	{
1474 		.procname	= "nf_conntrack_tcp_max_retrans",
1475 		.maxlen		= sizeof(unsigned int),
1476 		.mode		= 0644,
1477 		.proc_handler	= proc_dointvec,
1478 	},
1479 	{ }
1480 };
1481 #endif /* CONFIG_SYSCTL */
1482 
tcp_kmemdup_sysctl_table(struct nf_proto_net * pn,struct nf_tcp_net * tn)1483 static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn,
1484 				    struct nf_tcp_net *tn)
1485 {
1486 #ifdef CONFIG_SYSCTL
1487 	if (pn->ctl_table)
1488 		return 0;
1489 
1490 	pn->ctl_table = kmemdup(tcp_sysctl_table,
1491 				sizeof(tcp_sysctl_table),
1492 				GFP_KERNEL);
1493 	if (!pn->ctl_table)
1494 		return -ENOMEM;
1495 
1496 	pn->ctl_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT];
1497 	pn->ctl_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV];
1498 	pn->ctl_table[2].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED];
1499 	pn->ctl_table[3].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT];
1500 	pn->ctl_table[4].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT];
1501 	pn->ctl_table[5].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK];
1502 	pn->ctl_table[6].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT];
1503 	pn->ctl_table[7].data = &tn->timeouts[TCP_CONNTRACK_CLOSE];
1504 	pn->ctl_table[8].data = &tn->timeouts[TCP_CONNTRACK_RETRANS];
1505 	pn->ctl_table[9].data = &tn->timeouts[TCP_CONNTRACK_UNACK];
1506 	pn->ctl_table[10].data = &tn->tcp_loose;
1507 	pn->ctl_table[11].data = &tn->tcp_be_liberal;
1508 	pn->ctl_table[12].data = &tn->tcp_max_retrans;
1509 #endif
1510 	return 0;
1511 }
1512 
tcp_init_net(struct net * net,u_int16_t proto)1513 static int tcp_init_net(struct net *net, u_int16_t proto)
1514 {
1515 	struct nf_tcp_net *tn = tcp_pernet(net);
1516 	struct nf_proto_net *pn = &tn->pn;
1517 
1518 	if (!pn->users) {
1519 		int i;
1520 
1521 		for (i = 0; i < TCP_CONNTRACK_TIMEOUT_MAX; i++)
1522 			tn->timeouts[i] = tcp_timeouts[i];
1523 
1524 		/* timeouts[0] is unused, make it same as SYN_SENT so
1525 		 * ->timeouts[0] contains 'new' timeout, like udp or icmp.
1526 		 */
1527 		tn->timeouts[0] = tcp_timeouts[TCP_CONNTRACK_SYN_SENT];
1528 		tn->tcp_loose = nf_ct_tcp_loose;
1529 		tn->tcp_be_liberal = nf_ct_tcp_be_liberal;
1530 		tn->tcp_max_retrans = nf_ct_tcp_max_retrans;
1531 	}
1532 
1533 	return tcp_kmemdup_sysctl_table(pn, tn);
1534 }
1535 
tcp_get_net_proto(struct net * net)1536 static struct nf_proto_net *tcp_get_net_proto(struct net *net)
1537 {
1538 	return &net->ct.nf_ct_proto.tcp.pn;
1539 }
1540 
1541 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 =
1542 {
1543 	.l3proto		= PF_INET,
1544 	.l4proto 		= IPPROTO_TCP,
1545 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1546 	.print_conntrack 	= tcp_print_conntrack,
1547 #endif
1548 	.packet 		= tcp_packet,
1549 	.new 			= tcp_new,
1550 	.error			= tcp_error,
1551 	.can_early_drop		= tcp_can_early_drop,
1552 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1553 	.to_nlattr		= tcp_to_nlattr,
1554 	.from_nlattr		= nlattr_to_tcp,
1555 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1556 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1557 	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1558 	.nlattr_size		= TCP_NLATTR_SIZE,
1559 	.nla_policy		= nf_ct_port_nla_policy,
1560 #endif
1561 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1562 	.ctnl_timeout		= {
1563 		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
1564 		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
1565 		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
1566 		.obj_size	= sizeof(unsigned int) *
1567 					TCP_CONNTRACK_TIMEOUT_MAX,
1568 		.nla_policy	= tcp_timeout_nla_policy,
1569 	},
1570 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1571 	.init_net		= tcp_init_net,
1572 	.get_net_proto		= tcp_get_net_proto,
1573 };
1574 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp4);
1575 
1576 const struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 =
1577 {
1578 	.l3proto		= PF_INET6,
1579 	.l4proto 		= IPPROTO_TCP,
1580 #ifdef CONFIG_NF_CONNTRACK_PROCFS
1581 	.print_conntrack 	= tcp_print_conntrack,
1582 #endif
1583 	.packet 		= tcp_packet,
1584 	.new 			= tcp_new,
1585 	.error			= tcp_error,
1586 	.can_early_drop		= tcp_can_early_drop,
1587 #if IS_ENABLED(CONFIG_NF_CT_NETLINK)
1588 	.nlattr_size		= TCP_NLATTR_SIZE,
1589 	.to_nlattr		= tcp_to_nlattr,
1590 	.from_nlattr		= nlattr_to_tcp,
1591 	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,
1592 	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,
1593 	.nlattr_tuple_size	= tcp_nlattr_tuple_size,
1594 	.nla_policy		= nf_ct_port_nla_policy,
1595 #endif
1596 #ifdef CONFIG_NF_CONNTRACK_TIMEOUT
1597 	.ctnl_timeout		= {
1598 		.nlattr_to_obj	= tcp_timeout_nlattr_to_obj,
1599 		.obj_to_nlattr	= tcp_timeout_obj_to_nlattr,
1600 		.nlattr_max	= CTA_TIMEOUT_TCP_MAX,
1601 		.obj_size	= sizeof(unsigned int) *
1602 					TCP_CONNTRACK_TIMEOUT_MAX,
1603 		.nla_policy	= tcp_timeout_nla_policy,
1604 	},
1605 #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
1606 	.init_net		= tcp_init_net,
1607 	.get_net_proto		= tcp_get_net_proto,
1608 };
1609 EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_tcp6);
1610