Lines Matching +full:rx +full:- +full:sample +full:- +full:delay +full:- +full:ns

1 /* SPDX-License-Identifier: GPL-2.0-or-later */
46 #include <linux/bpf-cgroup.h>
57 #define TCP_MIN_GSO_SIZE (TCP_MIN_SND_MSS - MAX_TCP_OPTION_SPACE)
65 /* Minimal accepted MSS. It is (60+60+8) - (20+20). */
80 /* Maximal number of ACKs sent quickly to accelerate slow-start. */
95 * to ~3sec-8min depending on RTO.
102 * 15 is ~13-30min depending on RTO.
121 #define TCP_TIMEWAIT_LEN (60*HZ) /* how long to wait to destroy TIME-WAIT
126 * to combine FIN-WAIT-2 timeout with
127 * TIME-WAIT timer.
131 #define TCP_DELACK_MAX ((unsigned)(HZ/5)) /* maximal time to delay before sending an ACK */
133 #define TCP_DELACK_MIN ((unsigned)(HZ/25)) /* minimal time to delay before sending an ACK */
146 * valid RTT sample has been acquired,
165 #define TCP_PAWS_MSL 60 /* Per-host timestamps are invalidated
171 #define TCP_PAWS_WINDOW 1 /* Replay window for per-host
191 * experimental options. See draft-ietf-tcpm-experimental-options-00.txt
220 /* Flags in tp->nonagle */
225 /* TCP thin-stream limits */
260 if (mem_cgroup_sockets_enabled && sk->sk_memcg && in tcp_under_memory_pressure()
261 mem_cgroup_under_socket_pressure(sk->sk_memcg)) in tcp_under_memory_pressure()
273 return (__s32)(seq1-seq2) < 0; in before()
280 return seq3 - seq2 >= seq1 - seq2; in between()
285 if (sk->sk_wmem_queued > SOCK_MIN_SNDBUF && in tcp_out_of_memory()
295 struct percpu_counter *ocp = sk->sk_prot->orphan_count; in tcp_too_many_orphans()
311 #define TCP_INC_STATS(net, field) SNMP_INC_STATS((net)->mib.tcp_statistics, field)
312 #define __TCP_INC_STATS(net, field) __SNMP_INC_STATS((net)->mib.tcp_statistics, field)
313 #define TCP_DEC_STATS(net, field) SNMP_DEC_STATS((net)->mib.tcp_statistics, field)
314 #define TCP_ADD_STATS(net, field, val) SNMP_ADD_STATS((net)->mib.tcp_statistics, field, val)
360 if (icsk->icsk_ack.quick) { in tcp_dec_quickack_mode()
361 if (pkts >= icsk->icsk_ack.quick) { in tcp_dec_quickack_mode()
362 icsk->icsk_ack.quick = 0; in tcp_dec_quickack_mode()
364 icsk->icsk_ack.ato = TCP_ATO_MIN; in tcp_dec_quickack_mode()
366 icsk->icsk_ack.quick -= pkts; in tcp_dec_quickack_mode()
430 * BPF SKB-less helpers
507 if (sk->sk_reuseport) { in tcp_synq_overflow()
510 reuse = rcu_dereference(sk->sk_reuseport_cb); in tcp_synq_overflow()
512 last_overflow = READ_ONCE(reuse->synq_overflow_ts); in tcp_synq_overflow()
515 WRITE_ONCE(reuse->synq_overflow_ts, now); in tcp_synq_overflow()
520 last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); in tcp_synq_overflow()
522 WRITE_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp, now); in tcp_synq_overflow()
531 if (sk->sk_reuseport) { in tcp_synq_no_recent_overflow()
534 reuse = rcu_dereference(sk->sk_reuseport_cb); in tcp_synq_no_recent_overflow()
536 last_overflow = READ_ONCE(reuse->synq_overflow_ts); in tcp_synq_no_recent_overflow()
537 return !time_between32(now, last_overflow - HZ, in tcp_synq_no_recent_overflow()
543 last_overflow = READ_ONCE(tcp_sk(sk)->rx_opt.ts_recent_stamp); in tcp_synq_no_recent_overflow()
547 * 'last_overflow - HZ' as lower bound. That's because a concurrent in tcp_synq_no_recent_overflow()
552 return !time_between32(now, last_overflow - HZ, in tcp_synq_no_recent_overflow()
627 if (hrtimer_try_to_cancel(&tcp_sk(sk)->pacing_timer) == 1) in tcp_clear_xmit_timers()
630 if (hrtimer_try_to_cancel(&tcp_sk(sk)->compressed_ack_timer) == 1) in tcp_clear_xmit_timers()
646 * to sub-MSS pieces for the sake of SWS or making sure there in tcp_bound_to_half_wnd()
652 if (tp->max_window > TCP_MSS_DEFAULT) in tcp_bound_to_half_wnd()
653 cutoff = (tp->max_window >> 1); in tcp_bound_to_half_wnd()
655 cutoff = tp->max_window; in tcp_bound_to_half_wnd()
658 return max_t(int, cutoff, 68U - tp->tcp_header_len); in tcp_bound_to_half_wnd()
666 /* Read 'sendfile()'-style from a TCP socket */
678 if (inet_csk(sk)->icsk_rto > TCP_RTO_MAX) in tcp_bound_rto()
679 inet_csk(sk)->icsk_rto = TCP_RTO_MAX; in tcp_bound_rto()
684 return usecs_to_jiffies((tp->srtt_us >> 3) + tp->rttvar_us); in __tcp_set_rto()
693 tp->pred_flags = htonl((tp->tcp_header_len << 26) | in __tcp_fast_path_on()
700 __tcp_fast_path_on(tp, tp->snd_wnd >> tp->rx_opt.snd_wscale); in tcp_fast_path_on()
707 if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && in tcp_fast_path_check()
708 tp->rcv_wnd && in tcp_fast_path_check()
709 atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && in tcp_fast_path_check()
710 !tp->urg_data) in tcp_fast_path_check()
718 u32 rto_min = inet_csk(sk)->icsk_rto_min; in tcp_rto_min()
738 return minmax_get(&tp->rtt_min); in tcp_min_rtt()
747 s32 win = tp->rcv_wup + tp->rcv_wnd - tp->rcv_nxt; in tcp_receive_window()
764 * historically has been the same until linux-4.13.
785 /* This should only be used in contexts where tp->tcp_mstamp is up to date */
788 return div_u64(tp->tcp_mstamp, USEC_PER_SEC / TCP_TS_HZ); in tcp_time_stamp()
792 static inline u32 tcp_ns_to_ts(u64 ns) in tcp_ns_to_ts() argument
794 return div_u64(ns, NSEC_PER_SEC / TCP_TS_HZ); in tcp_ns_to_ts()
807 return max_t(s64, t1 - t0, 0); in tcp_stamp_us_delta()
812 return tcp_ns_to_ts(skb->skb_mstamp_ns); in tcp_skb_timestamp()
818 return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); in tcp_skb_timestamp_us()
836 * TCP per-packet control information to the transmission code.
837 * We also store the host-order sequence numbers in here too.
839 * If this grows please adjust skbuff.h:skbuff->cb[xxx] size appropriately.
872 has_rxtstamp:1, /* SKB has a RX timestamp */
897 #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
903 * as TCP moves IP6CB into a different location in skb->cb[]
907 return TCP_SKB_CB(skb)->header.h6.iif; in tcp_v6_iif()
912 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags); in tcp_v6_iif_l3_slave()
914 return l3_slave ? skb->skb_iif : TCP_SKB_CB(skb)->header.h6.iif; in tcp_v6_iif_l3_slave()
921 if (skb && ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags)) in tcp_v6_sdif()
922 return TCP_SKB_CB(skb)->header.h6.iif; in tcp_v6_sdif()
939 if (skb && ipv4_l3mdev_skb(TCP_SKB_CB(skb)->header.h4.flags)) in tcp_v4_sdif()
940 return TCP_SKB_CB(skb)->header.h4.iif; in tcp_v4_sdif()
950 return TCP_SKB_CB(skb)->tcp_gso_segs; in tcp_skb_pcount()
955 TCP_SKB_CB(skb)->tcp_gso_segs = segs; in tcp_skb_pcount_set()
960 TCP_SKB_CB(skb)->tcp_gso_segs += segs; in tcp_skb_pcount_add()
966 return TCP_SKB_CB(skb)->tcp_gso_size; in tcp_skb_mss()
971 return likely(!TCP_SKB_CB(skb)->eor); in tcp_skb_can_collapse_to()
991 /* Information about inbound ACK, passed to cong_ops->in_ack_event() */
1021 /* A rate sample measures the number of (original/retransmitted) data
1023 * The tcp_rate.c code fills in the rate sample, and congestion
1025 * of ACK processing can optionally chose to consult this sample when
1027 * A sample is invalid if "delivered" or "interval_us" is negative.
1031 u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
1033 long interval_us; /* time for tp->delivered to incr "delivered" */
1036 long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
1040 bool is_app_limited; /* is sample from packet with bubble in pipe? */
1041 bool is_retrans; /* is sample from retransmission? */
1064 void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
1134 return icsk->icsk_ca_ops->flags & TCP_CONG_NEEDS_ECN; in tcp_ca_needs_ecn()
1141 if (icsk->icsk_ca_ops->set_state) in tcp_set_ca_state()
1142 icsk->icsk_ca_ops->set_state(sk, ca_state); in tcp_set_ca_state()
1143 icsk->icsk_ca_state = ca_state; in tcp_set_ca_state()
1150 if (icsk->icsk_ca_ops->cwnd_event) in tcp_ca_event()
1151 icsk->icsk_ca_ops->cwnd_event(sk, event); in tcp_ca_event()
1166 * tcp_is_sack - SACK enabled
1167 * tcp_is_reno - No SACK
1171 return likely(tp->rx_opt.sack_ok); in tcp_is_sack()
1181 return tp->sacked_out + tp->lost_out; in tcp_left_out()
1190 * tp->packets_out to determine if the send queue is empty or not.
1200 return tp->packets_out - tcp_left_out(tp) + tp->retrans_out; in tcp_packets_in_flight()
1207 return tp->snd_cwnd < tp->snd_ssthresh; in tcp_in_slow_start()
1212 return tp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; in tcp_in_initial_slowstart()
1218 (1 << inet_csk(sk)->icsk_ca_state); in tcp_in_cwnd_reduction()
1221 /* If cwnd > ssthresh, we may raise ssthresh to be half-way to cwnd.
1230 return tp->snd_ssthresh; in tcp_current_ssthresh()
1232 return max(tp->snd_ssthresh, in tcp_current_ssthresh()
1233 ((tp->snd_cwnd >> 1) + in tcp_current_ssthresh()
1234 (tp->snd_cwnd >> 2))); in tcp_current_ssthresh()
1238 #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out)
1254 return tp->snd_una + tp->snd_wnd; in tcp_wnd_end()
1268 * usage, and allow application-limited process to probe bw more aggressively.
1276 return tp->snd_cwnd < 2 * tp->max_packets_out; in tcp_is_cwnd_limited()
1278 return tp->is_cwnd_limited; in tcp_is_cwnd_limited()
1289 return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED; in tcp_needs_internal_pacing()
1297 s64 delay = tcp_sk(sk)->tcp_wstamp_ns - tcp_sk(sk)->tcp_clock_cache; in tcp_pacing_delay() local
1299 return delay > 0 ? nsecs_to_jiffies(delay) : 0; in tcp_pacing_delay()
1319 return max_t(unsigned long, inet_csk(sk)->icsk_rto, TCP_RTO_MIN); in tcp_probe0_base()
1327 inet_csk(sk)->icsk_backoff); in tcp_probe0_when()
1335 if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending) in tcp_check_probe_timer()
1342 tp->snd_wl1 = seq; in tcp_init_wl()
1347 tp->snd_wl1 = seq; in tcp_update_wl()
1373 rx_opt->dsack = 0; in tcp_sack_reset()
1374 rx_opt->num_sacks = 0; in tcp_sack_reset()
1381 const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; in tcp_slow_start_after_idle_check()
1385 if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out || in tcp_slow_start_after_idle_check()
1386 ca_ops->cong_control) in tcp_slow_start_after_idle_check()
1388 delta = tcp_jiffies32 - tp->lsndtime; in tcp_slow_start_after_idle_check()
1389 if (delta > inet_csk(sk)->icsk_rto) in tcp_slow_start_after_idle_check()
1401 int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale; in tcp_win_from_space()
1404 (space>>(-tcp_adv_win_scale)) : in tcp_win_from_space()
1405 space - (space>>tcp_adv_win_scale); in tcp_win_from_space()
1411 return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf) - in tcp_space()
1412 READ_ONCE(sk->sk_backlog.len) - in tcp_space()
1413 atomic_read(&sk->sk_rmem_alloc)); in tcp_space()
1418 return tcp_win_from_space(sk, READ_ONCE(sk->sk_rcvbuf)); in tcp_full_space()
1435 rcvbuf = READ_ONCE(sk->sk_rcvbuf); in tcp_rmem_pressure()
1436 threshold = rcvbuf - (rcvbuf >> 3); in tcp_rmem_pressure()
1438 return atomic_read(&sk->sk_rmem_alloc) > threshold; in tcp_rmem_pressure()
1444 int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq); in tcp_epollin_ready()
1450 (tcp_receive_window(tp) <= inet_csk(sk)->icsk_ack.rcv_mss); in tcp_epollin_ready()
1464 return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl; in keepalive_intvl_when()
1471 return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time; in keepalive_time_when()
1478 return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes; in keepalive_probes()
1483 const struct inet_connection_sock *icsk = &tp->inet_conn; in keepalive_time_elapsed()
1485 return min_t(u32, tcp_jiffies32 - icsk->icsk_ack.lrcvtime, in keepalive_time_elapsed()
1486 tcp_jiffies32 - tp->rcv_tstamp); in keepalive_time_elapsed()
1491 int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout; in tcp_fin_time()
1492 const int rto = inet_csk(sk)->icsk_rto; in tcp_fin_time()
1494 if (fin_timeout < (rto << 2) - (rto >> 1)) in tcp_fin_time()
1495 fin_timeout = (rto << 2) - (rto >> 1); in tcp_fin_time()
1503 if ((s32)(rx_opt->ts_recent - rx_opt->rcv_tsval) <= paws_win) in tcp_paws_check()
1506 rx_opt->ts_recent_stamp + TCP_PAWS_24DAYS))) in tcp_paws_check()
1513 if (!rx_opt->ts_recent) in tcp_paws_check()
1529 out-of-sync and half-open connections will not be reset. in tcp_paws_reject()
1532 via reboots. Linux-2.2 DOES NOT! in tcp_paws_reject()
1537 rx_opt->ts_recent_stamp + TCP_PAWS_MSL)) in tcp_paws_reject()
1551 TCP_ADD_STATS(net, TCP_MIB_MAXCONN, -1); in tcp_mib_init()
1557 tp->lost_skb_hint = NULL; in tcp_clear_retrans_hints_partial()
1563 tp->retransmit_skb_hint = NULL; in tcp_clear_all_retrans_hints()
1573 /* - key database */
1586 /* - sock block */
1592 /* - pseudo header */
1615 /* - pool: digest algorithm, hash description and scratch buffer */
1621 /* - functions */
1647 #define tcp_twsk_md5_key(twsk) ((twsk)->tw_md5_key)
1724 ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); in tcp_fastopen_get_ctx()
1726 ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); in tcp_fastopen_get_ctx()
1734 if (orig->len == TCP_FASTOPEN_COOKIE_SIZE && in tcp_fastopen_cookie_match()
1735 orig->len == foc->len && in tcp_fastopen_cookie_match()
1736 !memcmp(orig->val, foc->val, foc->len)) in tcp_fastopen_cookie_match()
1744 return ctx->num; in tcp_fastopen_context_len()
1748 * chronograph-like stats that are mutually exclusive.
1752 TCP_CHRONO_BUSY, /* Actively sending data (non-empty write queue) */
1761 /* This helper is needed, because skb->tcp_tsorted_anchor uses
1762 * the same memory storage than skb->destructor/_skb_refdst
1766 skb->destructor = NULL; in tcp_skb_tsorted_anchor_cleanup()
1767 skb->_skb_refdst = 0UL; in tcp_skb_tsorted_anchor_cleanup()
1771 unsigned long _save = skb->_skb_refdst; \
1772 skb->_skb_refdst = 0UL;
1775 skb->_skb_refdst = _save; \
1782 return skb_rb_first(&sk->tcp_rtx_queue); in tcp_rtx_queue_head()
1787 return skb_rb_last(&sk->tcp_rtx_queue); in tcp_rtx_queue_tail()
1792 return skb_peek(&sk->sk_write_queue); in tcp_write_queue_head()
1797 return skb_peek_tail(&sk->sk_write_queue); in tcp_write_queue_tail()
1801 skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp)
1805 return skb_peek(&sk->sk_write_queue); in tcp_send_head()
1811 return skb_queue_is_last(&sk->sk_write_queue, skb); in tcp_skb_is_last()
1815 * tcp_write_queue_empty - test if any payload (or FIN) is available in write queue
1819 * we must not use "return skb_queue_empty(&sk->sk_write_queue)"
1825 return tp->write_seq == tp->snd_nxt; in tcp_write_queue_empty()
1830 return RB_EMPTY_ROOT(&sk->tcp_rtx_queue); in tcp_rtx_queue_empty()
1840 __skb_queue_tail(&sk->sk_write_queue, skb); in tcp_add_write_queue_tail()
1843 if (sk->sk_write_queue.next == skb) in tcp_add_write_queue_tail()
1852 __skb_queue_before(&sk->sk_write_queue, skb, new); in tcp_insert_write_queue_before()
1858 __skb_unlink(skb, &sk->sk_write_queue); in tcp_unlink_write_queue()
1866 rb_erase(&skb->rbnode, &sk->tcp_rtx_queue); in tcp_rtx_queue_unlink()
1871 list_del(&skb->tcp_tsorted_anchor); in tcp_rtx_queue_unlink_and_free()
1881 __tcp_push_pending_frames(sk, tcp_current_mss(sk), tp->nonagle); in tcp_push_pending_frames()
1891 if (!tp->sacked_out) in tcp_highest_sack_seq()
1892 return tp->snd_una; in tcp_highest_sack_seq()
1894 if (tp->highest_sack == NULL) in tcp_highest_sack_seq()
1895 return tp->snd_nxt; in tcp_highest_sack_seq()
1897 return TCP_SKB_CB(tp->highest_sack)->seq; in tcp_highest_sack_seq()
1902 tcp_sk(sk)->highest_sack = skb_rb_next(skb); in tcp_advance_highest_sack()
1907 return tcp_sk(sk)->highest_sack; in tcp_highest_sack()
1912 tcp_sk(sk)->highest_sack = tcp_rtx_queue_head(sk); in tcp_highest_sack_reset()
1921 tcp_sk(sk)->highest_sack = new; in tcp_highest_sack_replace()
1927 switch (sk->sk_state) { in inet_sk_transparent()
1929 return inet_twsk(sk)->tw_transparent; in inet_sk_transparent()
1931 return inet_rsk(inet_reqsk(sk))->no_srccheck; in inet_sk_transparent()
1933 return inet_sk(sk)->transparent; in inet_sk_transparent()
1937 * increased latency). Used to trigger latency-reducing mechanisms.
1941 return tp->packets_out < 4 && !tcp_in_initial_slowstart(tp); in tcp_stream_is_thin()
1985 return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat; in tcp_notsent_lowat()
2000 /* TCP af-specific functions */
2055 return ops->cookie_init_seq(skb, mss); in cookie_init_sequence()
2086 u32 rto = inet_csk(sk)->icsk_rto; in tcp_rto_delta_us()
2089 return rto_time_stamp_us - tcp_sk(sk)->tcp_mstamp; in tcp_rto_delta_us()
2098 const struct ip_options *opt = &TCP_SKB_CB(skb)->header.h4.opt; in tcp_v4_save_options()
2101 if (opt->optlen) { in tcp_v4_save_options()
2102 int opt_size = sizeof(*dopt) + opt->optlen; in tcp_v4_save_options()
2105 if (dopt && __ip_options_echo(net, &dopt->opt, skb, opt)) { in tcp_v4_save_options()
2113 /* locally generated TCP pure ACKs have skb->truesize == 2
2120 return skb->truesize == 2; in skb_is_tcp_pure_ack()
2125 skb->truesize = 2; in skb_set_tcp_pure_ack()
2133 if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { in tcp_inq()
2136 !tp->urg_data || in tcp_inq()
2137 before(tp->urg_seq, tp->copied_seq) || in tcp_inq()
2138 !before(tp->urg_seq, tp->rcv_nxt)) { in tcp_inq()
2140 answ = tp->rcv_nxt - tp->copied_seq; in tcp_inq()
2144 answ--; in tcp_inq()
2146 answ = tp->urg_seq - tp->copied_seq; in tcp_inq()
2158 segs_in = max_t(u16, 1, skb_shinfo(skb)->gso_segs); in tcp_segs_in()
2159 tp->segs_in += segs_in; in tcp_segs_in()
2160 if (skb->len > tcp_hdrlen(skb)) in tcp_segs_in()
2161 tp->data_segs_in += segs_in; in tcp_segs_in()
2173 atomic_inc(&((struct sock *)sk)->sk_drops); in tcp_listendrop()
2217 __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name)
2244 skops->skb = skb; in bpf_skops_init_skb()
2245 skops->skb_data_end = skb->data + end_offset; in bpf_skops_init_skb()
2281 ret = -1; in tcp_call_bpf()
2303 return -EPERM; in tcp_call_bpf()
2308 return -EPERM; in tcp_call_bpf_2arg()
2314 return -EPERM; in tcp_call_bpf_3arg()
2368 skb->skb_mstamp_ns += (u64)tp->tcp_tx_delay * NSEC_PER_USEC; in tcp_add_tx_delay()
2377 u32 delay = (sk->sk_state == TCP_TIME_WAIT) ? in tcp_transmit_time() local
2378 tcp_twsk(sk)->tw_tx_delay : tcp_sk(sk)->tcp_tx_delay; in tcp_transmit_time()
2380 return tcp_clock_ns() + (u64)delay * NSEC_PER_USEC; in tcp_transmit_time()