1 /*
2  * Copyright (c) 2018 Chelsio Communications, Inc.
3  *
4  * This program is free software; you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License version 2 as
6  * published by the Free Software Foundation.
7  *
8  * Written by: Atul Gupta (atul.gupta@chelsio.com)
9  */
10 
11 #include <linux/module.h>
12 #include <linux/list.h>
13 #include <linux/workqueue.h>
14 #include <linux/skbuff.h>
15 #include <linux/timer.h>
16 #include <linux/notifier.h>
17 #include <linux/inetdevice.h>
18 #include <linux/ip.h>
19 #include <linux/tcp.h>
20 #include <linux/sched/signal.h>
21 #include <net/tcp.h>
22 #include <net/busy_poll.h>
23 #include <crypto/aes.h>
24 
25 #include "chtls.h"
26 #include "chtls_cm.h"
27 
is_tls_tx(struct chtls_sock * csk)28 static bool is_tls_tx(struct chtls_sock *csk)
29 {
30 	return csk->tlshws.txkey >= 0;
31 }
32 
is_tls_rx(struct chtls_sock * csk)33 static bool is_tls_rx(struct chtls_sock *csk)
34 {
35 	return csk->tlshws.rxkey >= 0;
36 }
37 
data_sgl_len(const struct sk_buff * skb)38 static int data_sgl_len(const struct sk_buff *skb)
39 {
40 	unsigned int cnt;
41 
42 	cnt = skb_shinfo(skb)->nr_frags;
43 	return sgl_len(cnt) * 8;
44 }
45 
nos_ivs(struct sock * sk,unsigned int size)46 static int nos_ivs(struct sock *sk, unsigned int size)
47 {
48 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
49 
50 	return DIV_ROUND_UP(size, csk->tlshws.mfs);
51 }
52 
set_ivs_imm(struct sock * sk,const struct sk_buff * skb)53 static int set_ivs_imm(struct sock *sk, const struct sk_buff *skb)
54 {
55 	int ivs_size = nos_ivs(sk, skb->len) * CIPHER_BLOCK_SIZE;
56 	int hlen = TLS_WR_CPL_LEN + data_sgl_len(skb);
57 
58 	if ((hlen + KEY_ON_MEM_SZ + ivs_size) <
59 	    MAX_IMM_OFLD_TX_DATA_WR_LEN) {
60 		ULP_SKB_CB(skb)->ulp.tls.iv = 1;
61 		return 1;
62 	}
63 	ULP_SKB_CB(skb)->ulp.tls.iv = 0;
64 	return 0;
65 }
66 
max_ivs_size(struct sock * sk,int size)67 static int max_ivs_size(struct sock *sk, int size)
68 {
69 	return nos_ivs(sk, size) * CIPHER_BLOCK_SIZE;
70 }
71 
ivs_size(struct sock * sk,const struct sk_buff * skb)72 static int ivs_size(struct sock *sk, const struct sk_buff *skb)
73 {
74 	return set_ivs_imm(sk, skb) ? (nos_ivs(sk, skb->len) *
75 		 CIPHER_BLOCK_SIZE) : 0;
76 }
77 
flowc_wr_credits(int nparams,int * flowclenp)78 static int flowc_wr_credits(int nparams, int *flowclenp)
79 {
80 	int flowclen16, flowclen;
81 
82 	flowclen = offsetof(struct fw_flowc_wr, mnemval[nparams]);
83 	flowclen16 = DIV_ROUND_UP(flowclen, 16);
84 	flowclen = flowclen16 * 16;
85 
86 	if (flowclenp)
87 		*flowclenp = flowclen;
88 
89 	return flowclen16;
90 }
91 
create_flowc_wr_skb(struct sock * sk,struct fw_flowc_wr * flowc,int flowclen)92 static struct sk_buff *create_flowc_wr_skb(struct sock *sk,
93 					   struct fw_flowc_wr *flowc,
94 					   int flowclen)
95 {
96 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
97 	struct sk_buff *skb;
98 
99 	skb = alloc_skb(flowclen, GFP_ATOMIC);
100 	if (!skb)
101 		return NULL;
102 
103 	memcpy(__skb_put(skb, flowclen), flowc, flowclen);
104 	skb_set_queue_mapping(skb, (csk->txq_idx << 1) | CPL_PRIORITY_DATA);
105 
106 	return skb;
107 }
108 
send_flowc_wr(struct sock * sk,struct fw_flowc_wr * flowc,int flowclen)109 static int send_flowc_wr(struct sock *sk, struct fw_flowc_wr *flowc,
110 			 int flowclen)
111 {
112 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
113 	struct tcp_sock *tp = tcp_sk(sk);
114 	struct sk_buff *skb;
115 	int flowclen16;
116 	int ret;
117 
118 	flowclen16 = flowclen / 16;
119 
120 	if (csk_flag(sk, CSK_TX_DATA_SENT)) {
121 		skb = create_flowc_wr_skb(sk, flowc, flowclen);
122 		if (!skb)
123 			return -ENOMEM;
124 
125 		skb_entail(sk, skb,
126 			   ULPCB_FLAG_NO_HDR | ULPCB_FLAG_NO_APPEND);
127 		return 0;
128 	}
129 
130 	ret = cxgb4_immdata_send(csk->egress_dev,
131 				 csk->txq_idx,
132 				 flowc, flowclen);
133 	if (!ret)
134 		return flowclen16;
135 	skb = create_flowc_wr_skb(sk, flowc, flowclen);
136 	if (!skb)
137 		return -ENOMEM;
138 	send_or_defer(sk, tp, skb, 0);
139 	return flowclen16;
140 }
141 
tcp_state_to_flowc_state(u8 state)142 static u8 tcp_state_to_flowc_state(u8 state)
143 {
144 	switch (state) {
145 	case TCP_ESTABLISHED:
146 		return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
147 	case TCP_CLOSE_WAIT:
148 		return FW_FLOWC_MNEM_TCPSTATE_CLOSEWAIT;
149 	case TCP_FIN_WAIT1:
150 		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT1;
151 	case TCP_CLOSING:
152 		return FW_FLOWC_MNEM_TCPSTATE_CLOSING;
153 	case TCP_LAST_ACK:
154 		return FW_FLOWC_MNEM_TCPSTATE_LASTACK;
155 	case TCP_FIN_WAIT2:
156 		return FW_FLOWC_MNEM_TCPSTATE_FINWAIT2;
157 	}
158 
159 	return FW_FLOWC_MNEM_TCPSTATE_ESTABLISHED;
160 }
161 
send_tx_flowc_wr(struct sock * sk,int compl,u32 snd_nxt,u32 rcv_nxt)162 int send_tx_flowc_wr(struct sock *sk, int compl,
163 		     u32 snd_nxt, u32 rcv_nxt)
164 {
165 	struct flowc_packed {
166 		struct fw_flowc_wr fc;
167 		struct fw_flowc_mnemval mnemval[FW_FLOWC_MNEM_MAX];
168 	} __packed sflowc;
169 	int nparams, paramidx, flowclen16, flowclen;
170 	struct fw_flowc_wr *flowc;
171 	struct chtls_sock *csk;
172 	struct tcp_sock *tp;
173 
174 	csk = rcu_dereference_sk_user_data(sk);
175 	tp = tcp_sk(sk);
176 	memset(&sflowc, 0, sizeof(sflowc));
177 	flowc = &sflowc.fc;
178 
179 #define FLOWC_PARAM(__m, __v) \
180 	do { \
181 		flowc->mnemval[paramidx].mnemonic = FW_FLOWC_MNEM_##__m; \
182 		flowc->mnemval[paramidx].val = cpu_to_be32(__v); \
183 		paramidx++; \
184 	} while (0)
185 
186 	paramidx = 0;
187 
188 	FLOWC_PARAM(PFNVFN, FW_PFVF_CMD_PFN_V(csk->cdev->lldi->pf));
189 	FLOWC_PARAM(CH, csk->tx_chan);
190 	FLOWC_PARAM(PORT, csk->tx_chan);
191 	FLOWC_PARAM(IQID, csk->rss_qid);
192 	FLOWC_PARAM(SNDNXT, tp->snd_nxt);
193 	FLOWC_PARAM(RCVNXT, tp->rcv_nxt);
194 	FLOWC_PARAM(SNDBUF, csk->sndbuf);
195 	FLOWC_PARAM(MSS, tp->mss_cache);
196 	FLOWC_PARAM(TCPSTATE, tcp_state_to_flowc_state(sk->sk_state));
197 
198 	if (SND_WSCALE(tp))
199 		FLOWC_PARAM(RCV_SCALE, SND_WSCALE(tp));
200 
201 	if (csk->ulp_mode == ULP_MODE_TLS)
202 		FLOWC_PARAM(ULD_MODE, ULP_MODE_TLS);
203 
204 	if (csk->tlshws.fcplenmax)
205 		FLOWC_PARAM(TXDATAPLEN_MAX, csk->tlshws.fcplenmax);
206 
207 	nparams = paramidx;
208 #undef FLOWC_PARAM
209 
210 	flowclen16 = flowc_wr_credits(nparams, &flowclen);
211 	flowc->op_to_nparams =
212 		cpu_to_be32(FW_WR_OP_V(FW_FLOWC_WR) |
213 			    FW_WR_COMPL_V(compl) |
214 			    FW_FLOWC_WR_NPARAMS_V(nparams));
215 	flowc->flowid_len16 = cpu_to_be32(FW_WR_LEN16_V(flowclen16) |
216 					  FW_WR_FLOWID_V(csk->tid));
217 
218 	return send_flowc_wr(sk, flowc, flowclen);
219 }
220 
221 /* Copy IVs to WR */
tls_copy_ivs(struct sock * sk,struct sk_buff * skb)222 static int tls_copy_ivs(struct sock *sk, struct sk_buff *skb)
223 
224 {
225 	struct chtls_sock *csk;
226 	unsigned char *iv_loc;
227 	struct chtls_hws *hws;
228 	unsigned char *ivs;
229 	u16 number_of_ivs;
230 	struct page *page;
231 	int err = 0;
232 
233 	csk = rcu_dereference_sk_user_data(sk);
234 	hws = &csk->tlshws;
235 	number_of_ivs = nos_ivs(sk, skb->len);
236 
237 	if (number_of_ivs > MAX_IVS_PAGE) {
238 		pr_warn("MAX IVs in PAGE exceeded %d\n", number_of_ivs);
239 		return -ENOMEM;
240 	}
241 
242 	/* generate the  IVs */
243 	ivs = kmalloc_array(CIPHER_BLOCK_SIZE, number_of_ivs, GFP_ATOMIC);
244 	if (!ivs)
245 		return -ENOMEM;
246 	get_random_bytes(ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
247 
248 	if (skb_ulp_tls_iv_imm(skb)) {
249 		/* send the IVs as immediate data in the WR */
250 		iv_loc = (unsigned char *)__skb_push(skb, number_of_ivs *
251 						CIPHER_BLOCK_SIZE);
252 		if (iv_loc)
253 			memcpy(iv_loc, ivs, number_of_ivs * CIPHER_BLOCK_SIZE);
254 
255 		hws->ivsize = number_of_ivs * CIPHER_BLOCK_SIZE;
256 	} else {
257 		/* Send the IVs as sgls */
258 		/* Already accounted IV DSGL for credits */
259 		skb_shinfo(skb)->nr_frags--;
260 		page = alloc_pages(sk->sk_allocation | __GFP_COMP, 0);
261 		if (!page) {
262 			pr_info("%s : Page allocation for IVs failed\n",
263 				__func__);
264 			err = -ENOMEM;
265 			goto out;
266 		}
267 		memcpy(page_address(page), ivs, number_of_ivs *
268 		       CIPHER_BLOCK_SIZE);
269 		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags, page, 0,
270 				   number_of_ivs * CIPHER_BLOCK_SIZE);
271 		hws->ivsize = 0;
272 	}
273 out:
274 	kfree(ivs);
275 	return err;
276 }
277 
278 /* Copy Key to WR */
tls_copy_tx_key(struct sock * sk,struct sk_buff * skb)279 static void tls_copy_tx_key(struct sock *sk, struct sk_buff *skb)
280 {
281 	struct ulptx_sc_memrd *sc_memrd;
282 	struct chtls_sock *csk;
283 	struct chtls_dev *cdev;
284 	struct ulptx_idata *sc;
285 	struct chtls_hws *hws;
286 	u32 immdlen;
287 	int kaddr;
288 
289 	csk = rcu_dereference_sk_user_data(sk);
290 	hws = &csk->tlshws;
291 	cdev = csk->cdev;
292 
293 	immdlen = sizeof(*sc) + sizeof(*sc_memrd);
294 	kaddr = keyid_to_addr(cdev->kmap.start, hws->txkey);
295 	sc = (struct ulptx_idata *)__skb_push(skb, immdlen);
296 	if (sc) {
297 		sc->cmd_more = htonl(ULPTX_CMD_V(ULP_TX_SC_NOOP));
298 		sc->len = htonl(0);
299 		sc_memrd = (struct ulptx_sc_memrd *)(sc + 1);
300 		sc_memrd->cmd_to_len =
301 				htonl(ULPTX_CMD_V(ULP_TX_SC_MEMRD) |
302 				ULP_TX_SC_MORE_V(1) |
303 				ULPTX_LEN16_V(hws->keylen >> 4));
304 		sc_memrd->addr = htonl(kaddr);
305 	}
306 }
307 
tlstx_incr_seqnum(struct chtls_hws * hws)308 static u64 tlstx_incr_seqnum(struct chtls_hws *hws)
309 {
310 	return hws->tx_seq_no++;
311 }
312 
is_sg_request(const struct sk_buff * skb)313 static bool is_sg_request(const struct sk_buff *skb)
314 {
315 	return skb->peeked ||
316 		(skb->len > MAX_IMM_ULPTX_WR_LEN);
317 }
318 
319 /*
320  * Returns true if an sk_buff carries urgent data.
321  */
skb_urgent(struct sk_buff * skb)322 static bool skb_urgent(struct sk_buff *skb)
323 {
324 	return ULP_SKB_CB(skb)->flags & ULPCB_FLAG_URG;
325 }
326 
327 /* TLS content type for CPL SFO */
tls_content_type(unsigned char content_type)328 static unsigned char tls_content_type(unsigned char content_type)
329 {
330 	switch (content_type) {
331 	case TLS_HDR_TYPE_CCS:
332 		return CPL_TX_TLS_SFO_TYPE_CCS;
333 	case TLS_HDR_TYPE_ALERT:
334 		return CPL_TX_TLS_SFO_TYPE_ALERT;
335 	case TLS_HDR_TYPE_HANDSHAKE:
336 		return CPL_TX_TLS_SFO_TYPE_HANDSHAKE;
337 	case TLS_HDR_TYPE_HEARTBEAT:
338 		return CPL_TX_TLS_SFO_TYPE_HEARTBEAT;
339 	}
340 	return CPL_TX_TLS_SFO_TYPE_DATA;
341 }
342 
tls_tx_data_wr(struct sock * sk,struct sk_buff * skb,int dlen,int tls_immd,u32 credits,int expn,int pdus)343 static void tls_tx_data_wr(struct sock *sk, struct sk_buff *skb,
344 			   int dlen, int tls_immd, u32 credits,
345 			   int expn, int pdus)
346 {
347 	struct fw_tlstx_data_wr *req_wr;
348 	struct cpl_tx_tls_sfo *req_cpl;
349 	unsigned int wr_ulp_mode_force;
350 	struct tls_scmd *updated_scmd;
351 	unsigned char data_type;
352 	struct chtls_sock *csk;
353 	struct net_device *dev;
354 	struct chtls_hws *hws;
355 	struct tls_scmd *scmd;
356 	struct adapter *adap;
357 	unsigned char *req;
358 	int immd_len;
359 	int iv_imm;
360 	int len;
361 
362 	csk = rcu_dereference_sk_user_data(sk);
363 	iv_imm = skb_ulp_tls_iv_imm(skb);
364 	dev = csk->egress_dev;
365 	adap = netdev2adap(dev);
366 	hws = &csk->tlshws;
367 	scmd = &hws->scmd;
368 	len = dlen + expn;
369 
370 	dlen = (dlen < hws->mfs) ? dlen : hws->mfs;
371 	atomic_inc(&adap->chcr_stats.tls_pdu_tx);
372 
373 	updated_scmd = scmd;
374 	updated_scmd->seqno_numivs &= 0xffffff80;
375 	updated_scmd->seqno_numivs |= SCMD_NUM_IVS_V(pdus);
376 	hws->scmd = *updated_scmd;
377 
378 	req = (unsigned char *)__skb_push(skb, sizeof(struct cpl_tx_tls_sfo));
379 	req_cpl = (struct cpl_tx_tls_sfo *)req;
380 	req = (unsigned char *)__skb_push(skb, (sizeof(struct
381 				fw_tlstx_data_wr)));
382 
383 	req_wr = (struct fw_tlstx_data_wr *)req;
384 	immd_len = (tls_immd ? dlen : 0);
385 	req_wr->op_to_immdlen =
386 		htonl(FW_WR_OP_V(FW_TLSTX_DATA_WR) |
387 		FW_TLSTX_DATA_WR_COMPL_V(1) |
388 		FW_TLSTX_DATA_WR_IMMDLEN_V(immd_len));
389 	req_wr->flowid_len16 = htonl(FW_TLSTX_DATA_WR_FLOWID_V(csk->tid) |
390 				     FW_TLSTX_DATA_WR_LEN16_V(credits));
391 	wr_ulp_mode_force = TX_ULP_MODE_V(ULP_MODE_TLS);
392 
393 	if (is_sg_request(skb))
394 		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
395 			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
396 			FW_OFLD_TX_DATA_WR_SHOVE_F);
397 
398 	req_wr->lsodisable_to_flags =
399 			htonl(TX_ULP_MODE_V(ULP_MODE_TLS) |
400 			      FW_OFLD_TX_DATA_WR_URGENT_V(skb_urgent(skb)) |
401 			      T6_TX_FORCE_F | wr_ulp_mode_force |
402 			      TX_SHOVE_V((!csk_flag(sk, CSK_TX_MORE_DATA)) &&
403 					 skb_queue_empty(&csk->txq)));
404 
405 	req_wr->ctxloc_to_exp =
406 			htonl(FW_TLSTX_DATA_WR_NUMIVS_V(pdus) |
407 			      FW_TLSTX_DATA_WR_EXP_V(expn) |
408 			      FW_TLSTX_DATA_WR_CTXLOC_V(CHTLS_KEY_CONTEXT_DDR) |
409 			      FW_TLSTX_DATA_WR_IVDSGL_V(!iv_imm) |
410 			      FW_TLSTX_DATA_WR_KEYSIZE_V(hws->keylen >> 4));
411 
412 	/* Fill in the length */
413 	req_wr->plen = htonl(len);
414 	req_wr->mfs = htons(hws->mfs);
415 	req_wr->adjustedplen_pkd =
416 		htons(FW_TLSTX_DATA_WR_ADJUSTEDPLEN_V(hws->adjustlen));
417 	req_wr->expinplenmax_pkd =
418 		htons(FW_TLSTX_DATA_WR_EXPINPLENMAX_V(hws->expansion));
419 	req_wr->pdusinplenmax_pkd =
420 		FW_TLSTX_DATA_WR_PDUSINPLENMAX_V(hws->pdus);
421 	req_wr->r10 = 0;
422 
423 	data_type = tls_content_type(ULP_SKB_CB(skb)->ulp.tls.type);
424 	req_cpl->op_to_seg_len = htonl(CPL_TX_TLS_SFO_OPCODE_V(CPL_TX_TLS_SFO) |
425 				       CPL_TX_TLS_SFO_DATA_TYPE_V(data_type) |
426 				       CPL_TX_TLS_SFO_CPL_LEN_V(2) |
427 				       CPL_TX_TLS_SFO_SEG_LEN_V(dlen));
428 	req_cpl->pld_len = htonl(len - expn);
429 
430 	req_cpl->type_protover = htonl(CPL_TX_TLS_SFO_TYPE_V
431 		((data_type == CPL_TX_TLS_SFO_TYPE_HEARTBEAT) ?
432 		TLS_HDR_TYPE_HEARTBEAT : 0) |
433 		CPL_TX_TLS_SFO_PROTOVER_V(0));
434 
435 	/* create the s-command */
436 	req_cpl->r1_lo = 0;
437 	req_cpl->seqno_numivs  = cpu_to_be32(hws->scmd.seqno_numivs);
438 	req_cpl->ivgen_hdrlen = cpu_to_be32(hws->scmd.ivgen_hdrlen);
439 	req_cpl->scmd1 = cpu_to_be64(tlstx_incr_seqnum(hws));
440 }
441 
442 /*
443  * Calculate the TLS data expansion size
444  */
chtls_expansion_size(struct sock * sk,int data_len,int fullpdu,unsigned short * pducnt)445 static int chtls_expansion_size(struct sock *sk, int data_len,
446 				int fullpdu,
447 				unsigned short *pducnt)
448 {
449 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
450 	struct chtls_hws *hws = &csk->tlshws;
451 	struct tls_scmd *scmd = &hws->scmd;
452 	int fragsize = hws->mfs;
453 	int expnsize = 0;
454 	int fragleft;
455 	int fragcnt;
456 	int expppdu;
457 
458 	if (SCMD_CIPH_MODE_G(scmd->seqno_numivs) ==
459 	    SCMD_CIPH_MODE_AES_GCM) {
460 		expppdu = GCM_TAG_SIZE + AEAD_EXPLICIT_DATA_SIZE +
461 			  TLS_HEADER_LENGTH;
462 
463 		if (fullpdu) {
464 			*pducnt = data_len / (expppdu + fragsize);
465 			if (*pducnt > 32)
466 				*pducnt = 32;
467 			else if (!*pducnt)
468 				*pducnt = 1;
469 			expnsize = (*pducnt) * expppdu;
470 			return expnsize;
471 		}
472 		fragcnt = (data_len / fragsize);
473 		expnsize =  fragcnt * expppdu;
474 		fragleft = data_len % fragsize;
475 		if (fragleft > 0)
476 			expnsize += expppdu;
477 	}
478 	return expnsize;
479 }
480 
481 /* WR with IV, KEY and CPL SFO added */
make_tlstx_data_wr(struct sock * sk,struct sk_buff * skb,int tls_tx_imm,int tls_len,u32 credits)482 static void make_tlstx_data_wr(struct sock *sk, struct sk_buff *skb,
483 			       int tls_tx_imm, int tls_len, u32 credits)
484 {
485 	unsigned short pdus_per_ulp = 0;
486 	struct chtls_sock *csk;
487 	struct chtls_hws *hws;
488 	int expn_sz;
489 	int pdus;
490 
491 	csk = rcu_dereference_sk_user_data(sk);
492 	hws = &csk->tlshws;
493 	pdus = DIV_ROUND_UP(tls_len, hws->mfs);
494 	expn_sz = chtls_expansion_size(sk, tls_len, 0, NULL);
495 	if (!hws->compute) {
496 		hws->expansion = chtls_expansion_size(sk,
497 						      hws->fcplenmax,
498 						      1, &pdus_per_ulp);
499 		hws->pdus = pdus_per_ulp;
500 		hws->adjustlen = hws->pdus *
501 			((hws->expansion / hws->pdus) + hws->mfs);
502 		hws->compute = 1;
503 	}
504 	if (tls_copy_ivs(sk, skb))
505 		return;
506 	tls_copy_tx_key(sk, skb);
507 	tls_tx_data_wr(sk, skb, tls_len, tls_tx_imm, credits, expn_sz, pdus);
508 	hws->tx_seq_no += (pdus - 1);
509 }
510 
make_tx_data_wr(struct sock * sk,struct sk_buff * skb,unsigned int immdlen,int len,u32 credits,u32 compl)511 static void make_tx_data_wr(struct sock *sk, struct sk_buff *skb,
512 			    unsigned int immdlen, int len,
513 			    u32 credits, u32 compl)
514 {
515 	struct fw_ofld_tx_data_wr *req;
516 	unsigned int wr_ulp_mode_force;
517 	struct chtls_sock *csk;
518 	unsigned int opcode;
519 
520 	csk = rcu_dereference_sk_user_data(sk);
521 	opcode = FW_OFLD_TX_DATA_WR;
522 
523 	req = (struct fw_ofld_tx_data_wr *)__skb_push(skb, sizeof(*req));
524 	req->op_to_immdlen = htonl(WR_OP_V(opcode) |
525 				FW_WR_COMPL_V(compl) |
526 				FW_WR_IMMDLEN_V(immdlen));
527 	req->flowid_len16 = htonl(FW_WR_FLOWID_V(csk->tid) |
528 				FW_WR_LEN16_V(credits));
529 
530 	wr_ulp_mode_force = TX_ULP_MODE_V(csk->ulp_mode);
531 	if (is_sg_request(skb))
532 		wr_ulp_mode_force |= FW_OFLD_TX_DATA_WR_ALIGNPLD_F |
533 			((tcp_sk(sk)->nonagle & TCP_NAGLE_OFF) ? 0 :
534 				FW_OFLD_TX_DATA_WR_SHOVE_F);
535 
536 	req->tunnel_to_proxy = htonl(wr_ulp_mode_force |
537 			FW_OFLD_TX_DATA_WR_URGENT_V(skb_urgent(skb)) |
538 			FW_OFLD_TX_DATA_WR_SHOVE_V((!csk_flag
539 					(sk, CSK_TX_MORE_DATA)) &&
540 					 skb_queue_empty(&csk->txq)));
541 	req->plen = htonl(len);
542 }
543 
chtls_wr_size(struct chtls_sock * csk,const struct sk_buff * skb,bool size)544 static int chtls_wr_size(struct chtls_sock *csk, const struct sk_buff *skb,
545 			 bool size)
546 {
547 	int wr_size;
548 
549 	wr_size = TLS_WR_CPL_LEN;
550 	wr_size += KEY_ON_MEM_SZ;
551 	wr_size += ivs_size(csk->sk, skb);
552 
553 	if (size)
554 		return wr_size;
555 
556 	/* frags counted for IV dsgl */
557 	if (!skb_ulp_tls_iv_imm(skb))
558 		skb_shinfo(skb)->nr_frags++;
559 
560 	return wr_size;
561 }
562 
is_ofld_imm(struct chtls_sock * csk,const struct sk_buff * skb)563 static bool is_ofld_imm(struct chtls_sock *csk, const struct sk_buff *skb)
564 {
565 	int length = skb->len;
566 
567 	if (skb->peeked || skb->len > MAX_IMM_ULPTX_WR_LEN)
568 		return false;
569 
570 	if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
571 		/* Check TLS header len for Immediate */
572 		if (csk->ulp_mode == ULP_MODE_TLS &&
573 		    skb_ulp_tls_inline(skb))
574 			length += chtls_wr_size(csk, skb, true);
575 		else
576 			length += sizeof(struct fw_ofld_tx_data_wr);
577 
578 		return length <= MAX_IMM_OFLD_TX_DATA_WR_LEN;
579 	}
580 	return true;
581 }
582 
calc_tx_flits(const struct sk_buff * skb,unsigned int immdlen)583 static unsigned int calc_tx_flits(const struct sk_buff *skb,
584 				  unsigned int immdlen)
585 {
586 	unsigned int flits, cnt;
587 
588 	flits = immdlen / 8;   /* headers */
589 	cnt = skb_shinfo(skb)->nr_frags;
590 	if (skb_tail_pointer(skb) != skb_transport_header(skb))
591 		cnt++;
592 	return flits + sgl_len(cnt);
593 }
594 
arp_failure_discard(void * handle,struct sk_buff * skb)595 static void arp_failure_discard(void *handle, struct sk_buff *skb)
596 {
597 	kfree_skb(skb);
598 }
599 
chtls_push_frames(struct chtls_sock * csk,int comp)600 int chtls_push_frames(struct chtls_sock *csk, int comp)
601 {
602 	struct chtls_hws *hws = &csk->tlshws;
603 	struct tcp_sock *tp;
604 	struct sk_buff *skb;
605 	int total_size = 0;
606 	struct sock *sk;
607 	int wr_size;
608 
609 	wr_size = sizeof(struct fw_ofld_tx_data_wr);
610 	sk = csk->sk;
611 	tp = tcp_sk(sk);
612 
613 	if (unlikely(sk_in_state(sk, TCPF_SYN_SENT | TCPF_CLOSE)))
614 		return 0;
615 
616 	if (unlikely(csk_flag(sk, CSK_ABORT_SHUTDOWN)))
617 		return 0;
618 
619 	while (csk->wr_credits && (skb = skb_peek(&csk->txq)) &&
620 	       (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_HOLD) ||
621 		skb_queue_len(&csk->txq) > 1)) {
622 		unsigned int credit_len = skb->len;
623 		unsigned int credits_needed;
624 		unsigned int completion = 0;
625 		int tls_len = skb->len;/* TLS data len before IV/key */
626 		unsigned int immdlen;
627 		int len = skb->len;    /* length [ulp bytes] inserted by hw */
628 		int flowclen16 = 0;
629 		int tls_tx_imm = 0;
630 
631 		immdlen = skb->len;
632 		if (!is_ofld_imm(csk, skb)) {
633 			immdlen = skb_transport_offset(skb);
634 			if (skb_ulp_tls_inline(skb))
635 				wr_size = chtls_wr_size(csk, skb, false);
636 			credit_len = 8 * calc_tx_flits(skb, immdlen);
637 		} else {
638 			if (skb_ulp_tls_inline(skb)) {
639 				wr_size = chtls_wr_size(csk, skb, false);
640 				tls_tx_imm = 1;
641 			}
642 		}
643 		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR))
644 			credit_len += wr_size;
645 		credits_needed = DIV_ROUND_UP(credit_len, 16);
646 		if (!csk_flag_nochk(csk, CSK_TX_DATA_SENT)) {
647 			flowclen16 = send_tx_flowc_wr(sk, 1, tp->snd_nxt,
648 						      tp->rcv_nxt);
649 			if (flowclen16 <= 0)
650 				break;
651 			csk->wr_credits -= flowclen16;
652 			csk->wr_unacked += flowclen16;
653 			csk->wr_nondata += flowclen16;
654 			csk_set_flag(csk, CSK_TX_DATA_SENT);
655 		}
656 
657 		if (csk->wr_credits < credits_needed) {
658 			if (skb_ulp_tls_inline(skb) &&
659 			    !skb_ulp_tls_iv_imm(skb))
660 				skb_shinfo(skb)->nr_frags--;
661 			break;
662 		}
663 
664 		__skb_unlink(skb, &csk->txq);
665 		skb_set_queue_mapping(skb, (csk->txq_idx << 1) |
666 				      CPL_PRIORITY_DATA);
667 		if (hws->ofld)
668 			hws->txqid = (skb->queue_mapping >> 1);
669 		skb->csum = (__force __wsum)(credits_needed + csk->wr_nondata);
670 		csk->wr_credits -= credits_needed;
671 		csk->wr_unacked += credits_needed;
672 		csk->wr_nondata = 0;
673 		enqueue_wr(csk, skb);
674 
675 		if (likely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NEED_HDR)) {
676 			if ((comp && csk->wr_unacked == credits_needed) ||
677 			    (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) ||
678 			    csk->wr_unacked >= csk->wr_max_credits / 2) {
679 				completion = 1;
680 				csk->wr_unacked = 0;
681 			}
682 			if (skb_ulp_tls_inline(skb))
683 				make_tlstx_data_wr(sk, skb, tls_tx_imm,
684 						   tls_len, credits_needed);
685 			else
686 				make_tx_data_wr(sk, skb, immdlen, len,
687 						credits_needed, completion);
688 			tp->snd_nxt += len;
689 			tp->lsndtime = tcp_time_stamp(tp);
690 			if (completion)
691 				ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_NEED_HDR;
692 		} else {
693 			struct cpl_close_con_req *req = cplhdr(skb);
694 			unsigned int cmd  = CPL_OPCODE_G(ntohl
695 					     (OPCODE_TID(req)));
696 
697 			if (cmd == CPL_CLOSE_CON_REQ)
698 				csk_set_flag(csk,
699 					     CSK_CLOSE_CON_REQUESTED);
700 
701 			if ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_COMPL) &&
702 			    (csk->wr_unacked >= csk->wr_max_credits / 2)) {
703 				req->wr.wr_hi |= htonl(FW_WR_COMPL_F);
704 				csk->wr_unacked = 0;
705 			}
706 		}
707 		total_size += skb->truesize;
708 		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_BARRIER)
709 			csk_set_flag(csk, CSK_TX_WAIT_IDLE);
710 		t4_set_arp_err_handler(skb, NULL, arp_failure_discard);
711 		cxgb4_l2t_send(csk->egress_dev, skb, csk->l2t_entry);
712 	}
713 	sk->sk_wmem_queued -= total_size;
714 	return total_size;
715 }
716 
mark_urg(struct tcp_sock * tp,int flags,struct sk_buff * skb)717 static void mark_urg(struct tcp_sock *tp, int flags,
718 		     struct sk_buff *skb)
719 {
720 	if (unlikely(flags & MSG_OOB)) {
721 		tp->snd_up = tp->write_seq;
722 		ULP_SKB_CB(skb)->flags = ULPCB_FLAG_URG |
723 					 ULPCB_FLAG_BARRIER |
724 					 ULPCB_FLAG_NO_APPEND |
725 					 ULPCB_FLAG_NEED_HDR;
726 	}
727 }
728 
729 /*
730  * Returns true if a connection should send more data to TCP engine
731  */
should_push(struct sock * sk)732 static bool should_push(struct sock *sk)
733 {
734 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
735 	struct chtls_dev *cdev = csk->cdev;
736 	struct tcp_sock *tp = tcp_sk(sk);
737 
738 	/*
739 	 * If we've released our offload resources there's nothing to do ...
740 	 */
741 	if (!cdev)
742 		return false;
743 
744 	/*
745 	 * If there aren't any work requests in flight, or there isn't enough
746 	 * data in flight, or Nagle is off then send the current TX_DATA
747 	 * otherwise hold it and wait to accumulate more data.
748 	 */
749 	return csk->wr_credits == csk->wr_max_credits ||
750 		(tp->nonagle & TCP_NAGLE_OFF);
751 }
752 
753 /*
754  * Returns true if a TCP socket is corked.
755  */
corked(const struct tcp_sock * tp,int flags)756 static bool corked(const struct tcp_sock *tp, int flags)
757 {
758 	return (flags & MSG_MORE) || (tp->nonagle & TCP_NAGLE_CORK);
759 }
760 
761 /*
762  * Returns true if a send should try to push new data.
763  */
send_should_push(struct sock * sk,int flags)764 static bool send_should_push(struct sock *sk, int flags)
765 {
766 	return should_push(sk) && !corked(tcp_sk(sk), flags);
767 }
768 
chtls_tcp_push(struct sock * sk,int flags)769 void chtls_tcp_push(struct sock *sk, int flags)
770 {
771 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
772 	int qlen = skb_queue_len(&csk->txq);
773 
774 	if (likely(qlen)) {
775 		struct sk_buff *skb = skb_peek_tail(&csk->txq);
776 		struct tcp_sock *tp = tcp_sk(sk);
777 
778 		mark_urg(tp, flags, skb);
779 
780 		if (!(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) &&
781 		    corked(tp, flags)) {
782 			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_HOLD;
783 			return;
784 		}
785 
786 		ULP_SKB_CB(skb)->flags &= ~ULPCB_FLAG_HOLD;
787 		if (qlen == 1 &&
788 		    ((ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
789 		     should_push(sk)))
790 			chtls_push_frames(csk, 1);
791 	}
792 }
793 
794 /*
795  * Calculate the size for a new send sk_buff.  It's maximum size so we can
796  * pack lots of data into it, unless we plan to send it immediately, in which
797  * case we size it more tightly.
798  *
799  * Note: we don't bother compensating for MSS < PAGE_SIZE because it doesn't
800  * arise in normal cases and when it does we are just wasting memory.
801  */
select_size(struct sock * sk,int io_len,int flags,int len)802 static int select_size(struct sock *sk, int io_len, int flags, int len)
803 {
804 	const int pgbreak = SKB_MAX_HEAD(len);
805 
806 	/*
807 	 * If the data wouldn't fit in the main body anyway, put only the
808 	 * header in the main body so it can use immediate data and place all
809 	 * the payload in page fragments.
810 	 */
811 	if (io_len > pgbreak)
812 		return 0;
813 
814 	/*
815 	 * If we will be accumulating payload get a large main body.
816 	 */
817 	if (!send_should_push(sk, flags))
818 		return pgbreak;
819 
820 	return io_len;
821 }
822 
skb_entail(struct sock * sk,struct sk_buff * skb,int flags)823 void skb_entail(struct sock *sk, struct sk_buff *skb, int flags)
824 {
825 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
826 	struct tcp_sock *tp = tcp_sk(sk);
827 
828 	ULP_SKB_CB(skb)->seq = tp->write_seq;
829 	ULP_SKB_CB(skb)->flags = flags;
830 	__skb_queue_tail(&csk->txq, skb);
831 	sk->sk_wmem_queued += skb->truesize;
832 
833 	if (TCP_PAGE(sk) && TCP_OFF(sk)) {
834 		put_page(TCP_PAGE(sk));
835 		TCP_PAGE(sk) = NULL;
836 		TCP_OFF(sk) = 0;
837 	}
838 }
839 
get_tx_skb(struct sock * sk,int size)840 static struct sk_buff *get_tx_skb(struct sock *sk, int size)
841 {
842 	struct sk_buff *skb;
843 
844 	skb = alloc_skb(size + TX_HEADER_LEN, sk->sk_allocation);
845 	if (likely(skb)) {
846 		skb_reserve(skb, TX_HEADER_LEN);
847 		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
848 		skb_reset_transport_header(skb);
849 	}
850 	return skb;
851 }
852 
get_record_skb(struct sock * sk,int size,bool zcopy)853 static struct sk_buff *get_record_skb(struct sock *sk, int size, bool zcopy)
854 {
855 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
856 	struct sk_buff *skb;
857 
858 	skb = alloc_skb(((zcopy ? 0 : size) + TX_TLSHDR_LEN +
859 			KEY_ON_MEM_SZ + max_ivs_size(sk, size)),
860 			sk->sk_allocation);
861 	if (likely(skb)) {
862 		skb_reserve(skb, (TX_TLSHDR_LEN +
863 			    KEY_ON_MEM_SZ + max_ivs_size(sk, size)));
864 		skb_entail(sk, skb, ULPCB_FLAG_NEED_HDR);
865 		skb_reset_transport_header(skb);
866 		ULP_SKB_CB(skb)->ulp.tls.ofld = 1;
867 		ULP_SKB_CB(skb)->ulp.tls.type = csk->tlshws.type;
868 	}
869 	return skb;
870 }
871 
tx_skb_finalize(struct sk_buff * skb)872 static void tx_skb_finalize(struct sk_buff *skb)
873 {
874 	struct ulp_skb_cb *cb = ULP_SKB_CB(skb);
875 
876 	if (!(cb->flags & ULPCB_FLAG_NO_HDR))
877 		cb->flags = ULPCB_FLAG_NEED_HDR;
878 	cb->flags |= ULPCB_FLAG_NO_APPEND;
879 }
880 
push_frames_if_head(struct sock * sk)881 static void push_frames_if_head(struct sock *sk)
882 {
883 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
884 
885 	if (skb_queue_len(&csk->txq) == 1)
886 		chtls_push_frames(csk, 1);
887 }
888 
chtls_skb_copy_to_page_nocache(struct sock * sk,struct iov_iter * from,struct sk_buff * skb,struct page * page,int off,int copy)889 static int chtls_skb_copy_to_page_nocache(struct sock *sk,
890 					  struct iov_iter *from,
891 					  struct sk_buff *skb,
892 					  struct page *page,
893 					  int off, int copy)
894 {
895 	int err;
896 
897 	err = skb_do_copy_data_nocache(sk, skb, from, page_address(page) +
898 				       off, copy, skb->len);
899 	if (err)
900 		return err;
901 
902 	skb->len             += copy;
903 	skb->data_len        += copy;
904 	skb->truesize        += copy;
905 	sk->sk_wmem_queued   += copy;
906 	return 0;
907 }
908 
909 /* Read TLS header to find content type and data length */
tls_header_read(struct tls_hdr * thdr,struct iov_iter * from)910 static int tls_header_read(struct tls_hdr *thdr, struct iov_iter *from)
911 {
912 	if (copy_from_iter(thdr, sizeof(*thdr), from) != sizeof(*thdr))
913 		return -EFAULT;
914 	return (__force int)cpu_to_be16(thdr->length);
915 }
916 
csk_mem_free(struct chtls_dev * cdev,struct sock * sk)917 static int csk_mem_free(struct chtls_dev *cdev, struct sock *sk)
918 {
919 	return (cdev->max_host_sndbuf - sk->sk_wmem_queued);
920 }
921 
csk_wait_memory(struct chtls_dev * cdev,struct sock * sk,long * timeo_p)922 static int csk_wait_memory(struct chtls_dev *cdev,
923 			   struct sock *sk, long *timeo_p)
924 {
925 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
926 	int sndbuf, err = 0;
927 	long current_timeo;
928 	long vm_wait = 0;
929 	bool noblock;
930 
931 	current_timeo = *timeo_p;
932 	noblock = (*timeo_p ? false : true);
933 	sndbuf = cdev->max_host_sndbuf;
934 	if (csk_mem_free(cdev, sk)) {
935 		current_timeo = (prandom_u32() % (HZ / 5)) + 2;
936 		vm_wait = (prandom_u32() % (HZ / 5)) + 2;
937 	}
938 
939 	add_wait_queue(sk_sleep(sk), &wait);
940 	while (1) {
941 		sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
942 
943 		if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
944 			goto do_error;
945 		if (!*timeo_p) {
946 			if (noblock)
947 				set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
948 			goto do_nonblock;
949 		}
950 		if (signal_pending(current))
951 			goto do_interrupted;
952 		sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
953 		if (csk_mem_free(cdev, sk) && !vm_wait)
954 			break;
955 
956 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
957 		sk->sk_write_pending++;
958 		sk_wait_event(sk, &current_timeo, sk->sk_err ||
959 			      (sk->sk_shutdown & SEND_SHUTDOWN) ||
960 			      (csk_mem_free(cdev, sk) && !vm_wait), &wait);
961 		sk->sk_write_pending--;
962 
963 		if (vm_wait) {
964 			vm_wait -= current_timeo;
965 			current_timeo = *timeo_p;
966 			if (current_timeo != MAX_SCHEDULE_TIMEOUT) {
967 				current_timeo -= vm_wait;
968 				if (current_timeo < 0)
969 					current_timeo = 0;
970 			}
971 			vm_wait = 0;
972 		}
973 		*timeo_p = current_timeo;
974 	}
975 do_rm_wq:
976 	remove_wait_queue(sk_sleep(sk), &wait);
977 	return err;
978 do_error:
979 	err = -EPIPE;
980 	goto do_rm_wq;
981 do_nonblock:
982 	err = -EAGAIN;
983 	goto do_rm_wq;
984 do_interrupted:
985 	err = sock_intr_errno(*timeo_p);
986 	goto do_rm_wq;
987 }
988 
chtls_sendmsg(struct sock * sk,struct msghdr * msg,size_t size)989 int chtls_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
990 {
991 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
992 	struct chtls_dev *cdev = csk->cdev;
993 	struct tcp_sock *tp = tcp_sk(sk);
994 	struct sk_buff *skb;
995 	int mss, flags, err;
996 	int recordsz = 0;
997 	int copied = 0;
998 	int hdrlen = 0;
999 	long timeo;
1000 
1001 	lock_sock(sk);
1002 	flags = msg->msg_flags;
1003 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1004 
1005 	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
1006 		err = sk_stream_wait_connect(sk, &timeo);
1007 		if (err)
1008 			goto out_err;
1009 	}
1010 
1011 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
1012 	err = -EPIPE;
1013 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
1014 		goto out_err;
1015 
1016 	mss = csk->mss;
1017 	csk_set_flag(csk, CSK_TX_MORE_DATA);
1018 
1019 	while (msg_data_left(msg)) {
1020 		int copy = 0;
1021 
1022 		skb = skb_peek_tail(&csk->txq);
1023 		if (skb) {
1024 			copy = mss - skb->len;
1025 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1026 		}
1027 		if (!csk_mem_free(cdev, sk))
1028 			goto wait_for_sndbuf;
1029 
1030 		if (is_tls_tx(csk) && !csk->tlshws.txleft) {
1031 			struct tls_hdr hdr;
1032 
1033 			recordsz = tls_header_read(&hdr, &msg->msg_iter);
1034 			size -= TLS_HEADER_LENGTH;
1035 			hdrlen += TLS_HEADER_LENGTH;
1036 			csk->tlshws.txleft = recordsz;
1037 			csk->tlshws.type = hdr.type;
1038 			if (skb)
1039 				ULP_SKB_CB(skb)->ulp.tls.type = hdr.type;
1040 		}
1041 
1042 		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1043 		    copy <= 0) {
1044 new_buf:
1045 			if (skb) {
1046 				tx_skb_finalize(skb);
1047 				push_frames_if_head(sk);
1048 			}
1049 
1050 			if (is_tls_tx(csk)) {
1051 				skb = get_record_skb(sk,
1052 						     select_size(sk,
1053 								 recordsz,
1054 								 flags,
1055 								 TX_TLSHDR_LEN),
1056 								 false);
1057 			} else {
1058 				skb = get_tx_skb(sk,
1059 						 select_size(sk, size, flags,
1060 							     TX_HEADER_LEN));
1061 			}
1062 			if (unlikely(!skb))
1063 				goto wait_for_memory;
1064 
1065 			skb->ip_summed = CHECKSUM_UNNECESSARY;
1066 			copy = mss;
1067 		}
1068 		if (copy > size)
1069 			copy = size;
1070 
1071 		if (skb_tailroom(skb) > 0) {
1072 			copy = min(copy, skb_tailroom(skb));
1073 			if (is_tls_tx(csk))
1074 				copy = min_t(int, copy, csk->tlshws.txleft);
1075 			err = skb_add_data_nocache(sk, skb,
1076 						   &msg->msg_iter, copy);
1077 			if (err)
1078 				goto do_fault;
1079 		} else {
1080 			int i = skb_shinfo(skb)->nr_frags;
1081 			struct page *page = TCP_PAGE(sk);
1082 			int pg_size = PAGE_SIZE;
1083 			int off = TCP_OFF(sk);
1084 			bool merge;
1085 
1086 			if (!page)
1087 				goto wait_for_memory;
1088 
1089 			pg_size <<= compound_order(page);
1090 			if (off < pg_size &&
1091 			    skb_can_coalesce(skb, i, page, off)) {
1092 				merge = 1;
1093 				goto copy;
1094 			}
1095 			merge = 0;
1096 			if (i == (is_tls_tx(csk) ? (MAX_SKB_FRAGS - 1) :
1097 			    MAX_SKB_FRAGS))
1098 				goto new_buf;
1099 
1100 			if (page && off == pg_size) {
1101 				put_page(page);
1102 				TCP_PAGE(sk) = page = NULL;
1103 				pg_size = PAGE_SIZE;
1104 			}
1105 
1106 			if (!page) {
1107 				gfp_t gfp = sk->sk_allocation;
1108 				int order = cdev->send_page_order;
1109 
1110 				if (order) {
1111 					page = alloc_pages(gfp | __GFP_COMP |
1112 							   __GFP_NOWARN |
1113 							   __GFP_NORETRY,
1114 							   order);
1115 					if (page)
1116 						pg_size <<=
1117 							compound_order(page);
1118 				}
1119 				if (!page) {
1120 					page = alloc_page(gfp);
1121 					pg_size = PAGE_SIZE;
1122 				}
1123 				if (!page)
1124 					goto wait_for_memory;
1125 				off = 0;
1126 			}
1127 copy:
1128 			if (copy > pg_size - off)
1129 				copy = pg_size - off;
1130 			if (is_tls_tx(csk))
1131 				copy = min_t(int, copy, csk->tlshws.txleft);
1132 
1133 			err = chtls_skb_copy_to_page_nocache(sk, &msg->msg_iter,
1134 							     skb, page,
1135 							     off, copy);
1136 			if (unlikely(err)) {
1137 				if (!TCP_PAGE(sk)) {
1138 					TCP_PAGE(sk) = page;
1139 					TCP_OFF(sk) = 0;
1140 				}
1141 				goto do_fault;
1142 			}
1143 			/* Update the skb. */
1144 			if (merge) {
1145 				skb_shinfo(skb)->frags[i - 1].size += copy;
1146 			} else {
1147 				skb_fill_page_desc(skb, i, page, off, copy);
1148 				if (off + copy < pg_size) {
1149 					/* space left keep page */
1150 					get_page(page);
1151 					TCP_PAGE(sk) = page;
1152 				} else {
1153 					TCP_PAGE(sk) = NULL;
1154 				}
1155 			}
1156 			TCP_OFF(sk) = off + copy;
1157 		}
1158 		if (unlikely(skb->len == mss))
1159 			tx_skb_finalize(skb);
1160 		tp->write_seq += copy;
1161 		copied += copy;
1162 		size -= copy;
1163 
1164 		if (is_tls_tx(csk))
1165 			csk->tlshws.txleft -= copy;
1166 
1167 		if (corked(tp, flags) &&
1168 		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1169 			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1170 
1171 		if (size == 0)
1172 			goto out;
1173 
1174 		if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND)
1175 			push_frames_if_head(sk);
1176 		continue;
1177 wait_for_sndbuf:
1178 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1179 wait_for_memory:
1180 		err = csk_wait_memory(cdev, sk, &timeo);
1181 		if (err)
1182 			goto do_error;
1183 	}
1184 out:
1185 	csk_reset_flag(csk, CSK_TX_MORE_DATA);
1186 	if (copied)
1187 		chtls_tcp_push(sk, flags);
1188 done:
1189 	release_sock(sk);
1190 	return copied + hdrlen;
1191 do_fault:
1192 	if (!skb->len) {
1193 		__skb_unlink(skb, &csk->txq);
1194 		sk->sk_wmem_queued -= skb->truesize;
1195 		__kfree_skb(skb);
1196 	}
1197 do_error:
1198 	if (copied)
1199 		goto out;
1200 out_err:
1201 	if (csk_conn_inline(csk))
1202 		csk_reset_flag(csk, CSK_TX_MORE_DATA);
1203 	copied = sk_stream_error(sk, flags, err);
1204 	goto done;
1205 }
1206 
chtls_sendpage(struct sock * sk,struct page * page,int offset,size_t size,int flags)1207 int chtls_sendpage(struct sock *sk, struct page *page,
1208 		   int offset, size_t size, int flags)
1209 {
1210 	struct chtls_sock *csk;
1211 	struct chtls_dev *cdev;
1212 	int mss, err, copied;
1213 	struct tcp_sock *tp;
1214 	long timeo;
1215 
1216 	tp = tcp_sk(sk);
1217 	copied = 0;
1218 	csk = rcu_dereference_sk_user_data(sk);
1219 	cdev = csk->cdev;
1220 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
1221 
1222 	err = sk_stream_wait_connect(sk, &timeo);
1223 	if (!sk_in_state(sk, TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
1224 	    err != 0)
1225 		goto out_err;
1226 
1227 	mss = csk->mss;
1228 	csk_set_flag(csk, CSK_TX_MORE_DATA);
1229 
1230 	while (size > 0) {
1231 		struct sk_buff *skb = skb_peek_tail(&csk->txq);
1232 		int copy, i;
1233 
1234 		if (!skb || (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND) ||
1235 		    (copy = mss - skb->len) <= 0) {
1236 new_buf:
1237 			if (!csk_mem_free(cdev, sk))
1238 				goto wait_for_sndbuf;
1239 
1240 			if (is_tls_tx(csk)) {
1241 				skb = get_record_skb(sk,
1242 						     select_size(sk, size,
1243 								 flags,
1244 								 TX_TLSHDR_LEN),
1245 						     true);
1246 			} else {
1247 				skb = get_tx_skb(sk, 0);
1248 			}
1249 			if (!skb)
1250 				goto wait_for_memory;
1251 			copy = mss;
1252 		}
1253 		if (copy > size)
1254 			copy = size;
1255 
1256 		i = skb_shinfo(skb)->nr_frags;
1257 		if (skb_can_coalesce(skb, i, page, offset)) {
1258 			skb_shinfo(skb)->frags[i - 1].size += copy;
1259 		} else if (i < MAX_SKB_FRAGS) {
1260 			get_page(page);
1261 			skb_fill_page_desc(skb, i, page, offset, copy);
1262 		} else {
1263 			tx_skb_finalize(skb);
1264 			push_frames_if_head(sk);
1265 			goto new_buf;
1266 		}
1267 
1268 		skb->len += copy;
1269 		if (skb->len == mss)
1270 			tx_skb_finalize(skb);
1271 		skb->data_len += copy;
1272 		skb->truesize += copy;
1273 		sk->sk_wmem_queued += copy;
1274 		tp->write_seq += copy;
1275 		copied += copy;
1276 		offset += copy;
1277 		size -= copy;
1278 
1279 		if (corked(tp, flags) &&
1280 		    (sk_stream_wspace(sk) < sk_stream_min_wspace(sk)))
1281 			ULP_SKB_CB(skb)->flags |= ULPCB_FLAG_NO_APPEND;
1282 
1283 		if (!size)
1284 			break;
1285 
1286 		if (unlikely(ULP_SKB_CB(skb)->flags & ULPCB_FLAG_NO_APPEND))
1287 			push_frames_if_head(sk);
1288 		continue;
1289 wait_for_sndbuf:
1290 		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
1291 wait_for_memory:
1292 		err = csk_wait_memory(cdev, sk, &timeo);
1293 		if (err)
1294 			goto do_error;
1295 	}
1296 out:
1297 	csk_reset_flag(csk, CSK_TX_MORE_DATA);
1298 	if (copied)
1299 		chtls_tcp_push(sk, flags);
1300 done:
1301 	release_sock(sk);
1302 	return copied;
1303 
1304 do_error:
1305 	if (copied)
1306 		goto out;
1307 
1308 out_err:
1309 	if (csk_conn_inline(csk))
1310 		csk_reset_flag(csk, CSK_TX_MORE_DATA);
1311 	copied = sk_stream_error(sk, flags, err);
1312 	goto done;
1313 }
1314 
chtls_select_window(struct sock * sk)1315 static void chtls_select_window(struct sock *sk)
1316 {
1317 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1318 	struct tcp_sock *tp = tcp_sk(sk);
1319 	unsigned int wnd = tp->rcv_wnd;
1320 
1321 	wnd = max_t(unsigned int, wnd, tcp_full_space(sk));
1322 	wnd = max_t(unsigned int, MIN_RCV_WND, wnd);
1323 
1324 	if (wnd > MAX_RCV_WND)
1325 		wnd = MAX_RCV_WND;
1326 
1327 /*
1328  * Check if we need to grow the receive window in response to an increase in
1329  * the socket's receive buffer size.  Some applications increase the buffer
1330  * size dynamically and rely on the window to grow accordingly.
1331  */
1332 
1333 	if (wnd > tp->rcv_wnd) {
1334 		tp->rcv_wup -= wnd - tp->rcv_wnd;
1335 		tp->rcv_wnd = wnd;
1336 		/* Mark the receive window as updated */
1337 		csk_reset_flag(csk, CSK_UPDATE_RCV_WND);
1338 	}
1339 }
1340 
1341 /*
1342  * Send RX credits through an RX_DATA_ACK CPL message.  We are permitted
1343  * to return without sending the message in case we cannot allocate
1344  * an sk_buff.  Returns the number of credits sent.
1345  */
send_rx_credits(struct chtls_sock * csk,u32 credits)1346 static u32 send_rx_credits(struct chtls_sock *csk, u32 credits)
1347 {
1348 	struct cpl_rx_data_ack *req;
1349 	struct sk_buff *skb;
1350 
1351 	skb = alloc_skb(sizeof(*req), GFP_ATOMIC);
1352 	if (!skb)
1353 		return 0;
1354 	__skb_put(skb, sizeof(*req));
1355 	req = (struct cpl_rx_data_ack *)skb->head;
1356 
1357 	set_wr_txq(skb, CPL_PRIORITY_ACK, csk->port_id);
1358 	INIT_TP_WR(req, csk->tid);
1359 	OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK,
1360 						    csk->tid));
1361 	req->credit_dack = cpu_to_be32(RX_CREDITS_V(credits) |
1362 				       RX_FORCE_ACK_F);
1363 	cxgb4_ofld_send(csk->cdev->ports[csk->port_id], skb);
1364 	return credits;
1365 }
1366 
1367 #define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | \
1368 			     TCPF_FIN_WAIT1 | \
1369 			     TCPF_FIN_WAIT2)
1370 
1371 /*
1372  * Called after some received data has been read.  It returns RX credits
1373  * to the HW for the amount of data processed.
1374  */
chtls_cleanup_rbuf(struct sock * sk,int copied)1375 static void chtls_cleanup_rbuf(struct sock *sk, int copied)
1376 {
1377 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1378 	struct tcp_sock *tp;
1379 	int must_send;
1380 	u32 credits;
1381 	u32 thres;
1382 
1383 	thres = 15 * 1024;
1384 
1385 	if (!sk_in_state(sk, CREDIT_RETURN_STATE))
1386 		return;
1387 
1388 	chtls_select_window(sk);
1389 	tp = tcp_sk(sk);
1390 	credits = tp->copied_seq - tp->rcv_wup;
1391 	if (unlikely(!credits))
1392 		return;
1393 
1394 /*
1395  * For coalescing to work effectively ensure the receive window has
1396  * at least 16KB left.
1397  */
1398 	must_send = credits + 16384 >= tp->rcv_wnd;
1399 
1400 	if (must_send || credits >= thres)
1401 		tp->rcv_wup += send_rx_credits(csk, credits);
1402 }
1403 
chtls_pt_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int nonblock,int flags,int * addr_len)1404 static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1405 			    int nonblock, int flags, int *addr_len)
1406 {
1407 	struct chtls_sock *csk = rcu_dereference_sk_user_data(sk);
1408 	struct net_device *dev = csk->egress_dev;
1409 	struct chtls_hws *hws = &csk->tlshws;
1410 	struct tcp_sock *tp = tcp_sk(sk);
1411 	struct adapter *adap;
1412 	unsigned long avail;
1413 	int buffers_freed;
1414 	int copied = 0;
1415 	int request;
1416 	int target;
1417 	long timeo;
1418 
1419 	adap = netdev2adap(dev);
1420 	buffers_freed = 0;
1421 
1422 	timeo = sock_rcvtimeo(sk, nonblock);
1423 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1424 	request = len;
1425 
1426 	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1427 		chtls_cleanup_rbuf(sk, copied);
1428 
1429 	do {
1430 		struct sk_buff *skb;
1431 		u32 offset = 0;
1432 
1433 		if (unlikely(tp->urg_data &&
1434 			     tp->urg_seq == tp->copied_seq)) {
1435 			if (copied)
1436 				break;
1437 			if (signal_pending(current)) {
1438 				copied = timeo ? sock_intr_errno(timeo) :
1439 					-EAGAIN;
1440 				break;
1441 			}
1442 		}
1443 		skb = skb_peek(&sk->sk_receive_queue);
1444 		if (skb)
1445 			goto found_ok_skb;
1446 		if (csk->wr_credits &&
1447 		    skb_queue_len(&csk->txq) &&
1448 		    chtls_push_frames(csk, csk->wr_credits ==
1449 				      csk->wr_max_credits))
1450 			sk->sk_write_space(sk);
1451 
1452 		if (copied >= target && !sk->sk_backlog.tail)
1453 			break;
1454 
1455 		if (copied) {
1456 			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1457 			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1458 			    signal_pending(current))
1459 				break;
1460 
1461 			if (!timeo)
1462 				break;
1463 		} else {
1464 			if (sock_flag(sk, SOCK_DONE))
1465 				break;
1466 			if (sk->sk_err) {
1467 				copied = sock_error(sk);
1468 				break;
1469 			}
1470 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1471 				break;
1472 			if (sk->sk_state == TCP_CLOSE) {
1473 				copied = -ENOTCONN;
1474 				break;
1475 			}
1476 			if (!timeo) {
1477 				copied = -EAGAIN;
1478 				break;
1479 			}
1480 			if (signal_pending(current)) {
1481 				copied = sock_intr_errno(timeo);
1482 				break;
1483 			}
1484 		}
1485 		if (sk->sk_backlog.tail) {
1486 			release_sock(sk);
1487 			lock_sock(sk);
1488 			chtls_cleanup_rbuf(sk, copied);
1489 			continue;
1490 		}
1491 
1492 		if (copied >= target)
1493 			break;
1494 		chtls_cleanup_rbuf(sk, copied);
1495 		sk_wait_data(sk, &timeo, NULL);
1496 		continue;
1497 found_ok_skb:
1498 		if (!skb->len) {
1499 			skb_dst_set(skb, NULL);
1500 			__skb_unlink(skb, &sk->sk_receive_queue);
1501 			kfree_skb(skb);
1502 
1503 			if (!copied && !timeo) {
1504 				copied = -EAGAIN;
1505 				break;
1506 			}
1507 
1508 			if (copied < target) {
1509 				release_sock(sk);
1510 				lock_sock(sk);
1511 				continue;
1512 			}
1513 			break;
1514 		}
1515 		offset = hws->copied_seq;
1516 		avail = skb->len - offset;
1517 		if (len < avail)
1518 			avail = len;
1519 
1520 		if (unlikely(tp->urg_data)) {
1521 			u32 urg_offset = tp->urg_seq - tp->copied_seq;
1522 
1523 			if (urg_offset < avail) {
1524 				if (urg_offset) {
1525 					avail = urg_offset;
1526 				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
1527 					/* First byte is urgent, skip */
1528 					tp->copied_seq++;
1529 					offset++;
1530 					avail--;
1531 					if (!avail)
1532 						goto skip_copy;
1533 				}
1534 			}
1535 		}
1536 		if (skb_copy_datagram_msg(skb, offset, msg, avail)) {
1537 			if (!copied) {
1538 				copied = -EFAULT;
1539 				break;
1540 			}
1541 		}
1542 
1543 		copied += avail;
1544 		len -= avail;
1545 		hws->copied_seq += avail;
1546 skip_copy:
1547 		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1548 			tp->urg_data = 0;
1549 
1550 		if ((avail + offset) >= skb->len) {
1551 			if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
1552 				tp->copied_seq += skb->len;
1553 				hws->rcvpld = skb->hdr_len;
1554 			} else {
1555 				tp->copied_seq += hws->rcvpld;
1556 			}
1557 			chtls_free_skb(sk, skb);
1558 			buffers_freed++;
1559 			hws->copied_seq = 0;
1560 			if (copied >= target &&
1561 			    !skb_peek(&sk->sk_receive_queue))
1562 				break;
1563 		}
1564 	} while (len > 0);
1565 
1566 	if (buffers_freed)
1567 		chtls_cleanup_rbuf(sk, copied);
1568 	release_sock(sk);
1569 	return copied;
1570 }
1571 
1572 /*
1573  * Peek at data in a socket's receive buffer.
1574  */
peekmsg(struct sock * sk,struct msghdr * msg,size_t len,int nonblock,int flags)1575 static int peekmsg(struct sock *sk, struct msghdr *msg,
1576 		   size_t len, int nonblock, int flags)
1577 {
1578 	struct tcp_sock *tp = tcp_sk(sk);
1579 	u32 peek_seq, offset;
1580 	struct sk_buff *skb;
1581 	int copied = 0;
1582 	size_t avail;          /* amount of available data in current skb */
1583 	long timeo;
1584 
1585 	lock_sock(sk);
1586 	timeo = sock_rcvtimeo(sk, nonblock);
1587 	peek_seq = tp->copied_seq;
1588 
1589 	do {
1590 		if (unlikely(tp->urg_data && tp->urg_seq == peek_seq)) {
1591 			if (copied)
1592 				break;
1593 			if (signal_pending(current)) {
1594 				copied = timeo ? sock_intr_errno(timeo) :
1595 				-EAGAIN;
1596 				break;
1597 			}
1598 		}
1599 
1600 		skb_queue_walk(&sk->sk_receive_queue, skb) {
1601 			offset = peek_seq - ULP_SKB_CB(skb)->seq;
1602 			if (offset < skb->len)
1603 				goto found_ok_skb;
1604 		}
1605 
1606 		/* empty receive queue */
1607 		if (copied)
1608 			break;
1609 		if (sock_flag(sk, SOCK_DONE))
1610 			break;
1611 		if (sk->sk_err) {
1612 			copied = sock_error(sk);
1613 			break;
1614 		}
1615 		if (sk->sk_shutdown & RCV_SHUTDOWN)
1616 			break;
1617 		if (sk->sk_state == TCP_CLOSE) {
1618 			copied = -ENOTCONN;
1619 			break;
1620 		}
1621 		if (!timeo) {
1622 			copied = -EAGAIN;
1623 			break;
1624 		}
1625 		if (signal_pending(current)) {
1626 			copied = sock_intr_errno(timeo);
1627 			break;
1628 		}
1629 
1630 		if (sk->sk_backlog.tail) {
1631 			/* Do not sleep, just process backlog. */
1632 			release_sock(sk);
1633 			lock_sock(sk);
1634 		} else {
1635 			sk_wait_data(sk, &timeo, NULL);
1636 		}
1637 
1638 		if (unlikely(peek_seq != tp->copied_seq)) {
1639 			if (net_ratelimit())
1640 				pr_info("TCP(%s:%d), race in MSG_PEEK.\n",
1641 					current->comm, current->pid);
1642 			peek_seq = tp->copied_seq;
1643 		}
1644 		continue;
1645 
1646 found_ok_skb:
1647 		avail = skb->len - offset;
1648 		if (len < avail)
1649 			avail = len;
1650 		/*
1651 		 * Do we have urgent data here?  We need to skip over the
1652 		 * urgent byte.
1653 		 */
1654 		if (unlikely(tp->urg_data)) {
1655 			u32 urg_offset = tp->urg_seq - peek_seq;
1656 
1657 			if (urg_offset < avail) {
1658 				/*
1659 				 * The amount of data we are preparing to copy
1660 				 * contains urgent data.
1661 				 */
1662 				if (!urg_offset) { /* First byte is urgent */
1663 					if (!sock_flag(sk, SOCK_URGINLINE)) {
1664 						peek_seq++;
1665 						offset++;
1666 						avail--;
1667 					}
1668 					if (!avail)
1669 						continue;
1670 				} else {
1671 					/* stop short of the urgent data */
1672 					avail = urg_offset;
1673 				}
1674 			}
1675 		}
1676 
1677 		/*
1678 		 * If MSG_TRUNC is specified the data is discarded.
1679 		 */
1680 		if (likely(!(flags & MSG_TRUNC)))
1681 			if (skb_copy_datagram_msg(skb, offset, msg, len)) {
1682 				if (!copied) {
1683 					copied = -EFAULT;
1684 					break;
1685 				}
1686 			}
1687 		peek_seq += avail;
1688 		copied += avail;
1689 		len -= avail;
1690 	} while (len > 0);
1691 
1692 	release_sock(sk);
1693 	return copied;
1694 }
1695 
chtls_recvmsg(struct sock * sk,struct msghdr * msg,size_t len,int nonblock,int flags,int * addr_len)1696 int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
1697 		  int nonblock, int flags, int *addr_len)
1698 {
1699 	struct tcp_sock *tp = tcp_sk(sk);
1700 	struct chtls_sock *csk;
1701 	struct chtls_hws *hws;
1702 	unsigned long avail;    /* amount of available data in current skb */
1703 	int buffers_freed;
1704 	int copied = 0;
1705 	int request;
1706 	long timeo;
1707 	int target;             /* Read at least this many bytes */
1708 
1709 	buffers_freed = 0;
1710 
1711 	if (unlikely(flags & MSG_OOB))
1712 		return tcp_prot.recvmsg(sk, msg, len, nonblock, flags,
1713 					addr_len);
1714 
1715 	if (unlikely(flags & MSG_PEEK))
1716 		return peekmsg(sk, msg, len, nonblock, flags);
1717 
1718 	if (sk_can_busy_loop(sk) &&
1719 	    skb_queue_empty(&sk->sk_receive_queue) &&
1720 	    sk->sk_state == TCP_ESTABLISHED)
1721 		sk_busy_loop(sk, nonblock);
1722 
1723 	lock_sock(sk);
1724 	csk = rcu_dereference_sk_user_data(sk);
1725 	hws = &csk->tlshws;
1726 
1727 	if (is_tls_rx(csk))
1728 		return chtls_pt_recvmsg(sk, msg, len, nonblock,
1729 					flags, addr_len);
1730 
1731 	timeo = sock_rcvtimeo(sk, nonblock);
1732 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
1733 	request = len;
1734 
1735 	if (unlikely(csk_flag(sk, CSK_UPDATE_RCV_WND)))
1736 		chtls_cleanup_rbuf(sk, copied);
1737 
1738 	do {
1739 		struct sk_buff *skb;
1740 		u32 offset;
1741 
1742 		if (unlikely(tp->urg_data && tp->urg_seq == tp->copied_seq)) {
1743 			if (copied)
1744 				break;
1745 			if (signal_pending(current)) {
1746 				copied = timeo ? sock_intr_errno(timeo) :
1747 					-EAGAIN;
1748 				break;
1749 			}
1750 		}
1751 
1752 		skb = skb_peek(&sk->sk_receive_queue);
1753 		if (skb)
1754 			goto found_ok_skb;
1755 
1756 		if (csk->wr_credits &&
1757 		    skb_queue_len(&csk->txq) &&
1758 		    chtls_push_frames(csk, csk->wr_credits ==
1759 				      csk->wr_max_credits))
1760 			sk->sk_write_space(sk);
1761 
1762 		if (copied >= target && !sk->sk_backlog.tail)
1763 			break;
1764 
1765 		if (copied) {
1766 			if (sk->sk_err || sk->sk_state == TCP_CLOSE ||
1767 			    (sk->sk_shutdown & RCV_SHUTDOWN) ||
1768 			    signal_pending(current))
1769 				break;
1770 		} else {
1771 			if (sock_flag(sk, SOCK_DONE))
1772 				break;
1773 			if (sk->sk_err) {
1774 				copied = sock_error(sk);
1775 				break;
1776 			}
1777 			if (sk->sk_shutdown & RCV_SHUTDOWN)
1778 				break;
1779 			if (sk->sk_state == TCP_CLOSE) {
1780 				copied = -ENOTCONN;
1781 				break;
1782 			}
1783 			if (!timeo) {
1784 				copied = -EAGAIN;
1785 				break;
1786 			}
1787 			if (signal_pending(current)) {
1788 				copied = sock_intr_errno(timeo);
1789 				break;
1790 			}
1791 		}
1792 
1793 		if (sk->sk_backlog.tail) {
1794 			release_sock(sk);
1795 			lock_sock(sk);
1796 			chtls_cleanup_rbuf(sk, copied);
1797 			continue;
1798 		}
1799 
1800 		if (copied >= target)
1801 			break;
1802 		chtls_cleanup_rbuf(sk, copied);
1803 		sk_wait_data(sk, &timeo, NULL);
1804 		continue;
1805 
1806 found_ok_skb:
1807 		if (!skb->len) {
1808 			chtls_kfree_skb(sk, skb);
1809 			if (!copied && !timeo) {
1810 				copied = -EAGAIN;
1811 				break;
1812 			}
1813 
1814 			if (copied < target)
1815 				continue;
1816 
1817 			break;
1818 		}
1819 
1820 		offset = tp->copied_seq - ULP_SKB_CB(skb)->seq;
1821 		avail = skb->len - offset;
1822 		if (len < avail)
1823 			avail = len;
1824 
1825 		if (unlikely(tp->urg_data)) {
1826 			u32 urg_offset = tp->urg_seq - tp->copied_seq;
1827 
1828 			if (urg_offset < avail) {
1829 				if (urg_offset) {
1830 					avail = urg_offset;
1831 				} else if (!sock_flag(sk, SOCK_URGINLINE)) {
1832 					tp->copied_seq++;
1833 					offset++;
1834 					avail--;
1835 					if (!avail)
1836 						goto skip_copy;
1837 				}
1838 			}
1839 		}
1840 
1841 		if (likely(!(flags & MSG_TRUNC))) {
1842 			if (skb_copy_datagram_msg(skb, offset,
1843 						  msg, avail)) {
1844 				if (!copied) {
1845 					copied = -EFAULT;
1846 					break;
1847 				}
1848 			}
1849 		}
1850 
1851 		tp->copied_seq += avail;
1852 		copied += avail;
1853 		len -= avail;
1854 
1855 skip_copy:
1856 		if (tp->urg_data && after(tp->copied_seq, tp->urg_seq))
1857 			tp->urg_data = 0;
1858 
1859 		if (avail + offset >= skb->len) {
1860 			if (likely(skb))
1861 				chtls_free_skb(sk, skb);
1862 			buffers_freed++;
1863 
1864 			if  (copied >= target &&
1865 			     !skb_peek(&sk->sk_receive_queue))
1866 				break;
1867 		}
1868 	} while (len > 0);
1869 
1870 	if (buffers_freed)
1871 		chtls_cleanup_rbuf(sk, copied);
1872 
1873 	release_sock(sk);
1874 	return copied;
1875 }
1876