1 // SPDX-License-Identifier: (GPL-2.0 OR MIT)
2 /* Google virtual Ethernet (gve) driver
3 *
4 * Copyright (C) 2015-2021 Google, Inc.
5 */
6
7 #include "gve.h"
8 #include "gve_adminq.h"
9 #include "gve_utils.h"
10 #include <linux/ip.h>
11 #include <linux/tcp.h>
12 #include <linux/vmalloc.h>
13 #include <linux/skbuff.h>
14
gve_tx_put_doorbell(struct gve_priv * priv,struct gve_queue_resources * q_resources,u32 val)15 static inline void gve_tx_put_doorbell(struct gve_priv *priv,
16 struct gve_queue_resources *q_resources,
17 u32 val)
18 {
19 iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]);
20 }
21
22 /* gvnic can only transmit from a Registered Segment.
23 * We copy skb payloads into the registered segment before writing Tx
24 * descriptors and ringing the Tx doorbell.
25 *
26 * gve_tx_fifo_* manages the Registered Segment as a FIFO - clients must
27 * free allocations in the order they were allocated.
28 */
29
gve_tx_fifo_init(struct gve_priv * priv,struct gve_tx_fifo * fifo)30 static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo)
31 {
32 fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP,
33 PAGE_KERNEL);
34 if (unlikely(!fifo->base)) {
35 netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n",
36 fifo->qpl->id);
37 return -ENOMEM;
38 }
39
40 fifo->size = fifo->qpl->num_entries * PAGE_SIZE;
41 atomic_set(&fifo->available, fifo->size);
42 fifo->head = 0;
43 return 0;
44 }
45
gve_tx_fifo_release(struct gve_priv * priv,struct gve_tx_fifo * fifo)46 static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo)
47 {
48 WARN(atomic_read(&fifo->available) != fifo->size,
49 "Releasing non-empty fifo");
50
51 vunmap(fifo->base);
52 }
53
gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo * fifo,size_t bytes)54 static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo,
55 size_t bytes)
56 {
57 return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head;
58 }
59
gve_tx_fifo_can_alloc(struct gve_tx_fifo * fifo,size_t bytes)60 static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes)
61 {
62 return (atomic_read(&fifo->available) <= bytes) ? false : true;
63 }
64
65 /* gve_tx_alloc_fifo - Allocate fragment(s) from Tx FIFO
66 * @fifo: FIFO to allocate from
67 * @bytes: Allocation size
68 * @iov: Scatter-gather elements to fill with allocation fragment base/len
69 *
70 * Returns number of valid elements in iov[] or negative on error.
71 *
72 * Allocations from a given FIFO must be externally synchronized but concurrent
73 * allocation and frees are allowed.
74 */
gve_tx_alloc_fifo(struct gve_tx_fifo * fifo,size_t bytes,struct gve_tx_iovec iov[2])75 static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes,
76 struct gve_tx_iovec iov[2])
77 {
78 size_t overflow, padding;
79 u32 aligned_head;
80 int nfrags = 0;
81
82 if (!bytes)
83 return 0;
84
85 /* This check happens before we know how much padding is needed to
86 * align to a cacheline boundary for the payload, but that is fine,
87 * because the FIFO head always start aligned, and the FIFO's boundaries
88 * are aligned, so if there is space for the data, there is space for
89 * the padding to the next alignment.
90 */
91 WARN(!gve_tx_fifo_can_alloc(fifo, bytes),
92 "Reached %s when there's not enough space in the fifo", __func__);
93
94 nfrags++;
95
96 iov[0].iov_offset = fifo->head;
97 iov[0].iov_len = bytes;
98 fifo->head += bytes;
99
100 if (fifo->head > fifo->size) {
101 /* If the allocation did not fit in the tail fragment of the
102 * FIFO, also use the head fragment.
103 */
104 nfrags++;
105 overflow = fifo->head - fifo->size;
106 iov[0].iov_len -= overflow;
107 iov[1].iov_offset = 0; /* Start of fifo*/
108 iov[1].iov_len = overflow;
109
110 fifo->head = overflow;
111 }
112
113 /* Re-align to a cacheline boundary */
114 aligned_head = L1_CACHE_ALIGN(fifo->head);
115 padding = aligned_head - fifo->head;
116 iov[nfrags - 1].iov_padding = padding;
117 atomic_sub(bytes + padding, &fifo->available);
118 fifo->head = aligned_head;
119
120 if (fifo->head == fifo->size)
121 fifo->head = 0;
122
123 return nfrags;
124 }
125
126 /* gve_tx_free_fifo - Return space to Tx FIFO
127 * @fifo: FIFO to return fragments to
128 * @bytes: Bytes to free
129 */
gve_tx_free_fifo(struct gve_tx_fifo * fifo,size_t bytes)130 static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes)
131 {
132 atomic_add(bytes, &fifo->available);
133 }
134
135 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
136 u32 to_do, bool try_to_wake);
137
gve_tx_free_ring(struct gve_priv * priv,int idx)138 static void gve_tx_free_ring(struct gve_priv *priv, int idx)
139 {
140 struct gve_tx_ring *tx = &priv->tx[idx];
141 struct device *hdev = &priv->pdev->dev;
142 size_t bytes;
143 u32 slots;
144
145 gve_tx_remove_from_block(priv, idx);
146 slots = tx->mask + 1;
147 gve_clean_tx_done(priv, tx, tx->req, false);
148 netdev_tx_reset_queue(tx->netdev_txq);
149
150 dma_free_coherent(hdev, sizeof(*tx->q_resources),
151 tx->q_resources, tx->q_resources_bus);
152 tx->q_resources = NULL;
153
154 if (!tx->raw_addressing) {
155 gve_tx_fifo_release(priv, &tx->tx_fifo);
156 gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
157 tx->tx_fifo.qpl = NULL;
158 }
159
160 bytes = sizeof(*tx->desc) * slots;
161 dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
162 tx->desc = NULL;
163
164 vfree(tx->info);
165 tx->info = NULL;
166
167 netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx);
168 }
169
gve_tx_alloc_ring(struct gve_priv * priv,int idx)170 static int gve_tx_alloc_ring(struct gve_priv *priv, int idx)
171 {
172 struct gve_tx_ring *tx = &priv->tx[idx];
173 struct device *hdev = &priv->pdev->dev;
174 u32 slots = priv->tx_desc_cnt;
175 size_t bytes;
176
177 /* Make sure everything is zeroed to start */
178 memset(tx, 0, sizeof(*tx));
179 tx->q_num = idx;
180
181 tx->mask = slots - 1;
182
183 /* alloc metadata */
184 tx->info = vzalloc(sizeof(*tx->info) * slots);
185 if (!tx->info)
186 return -ENOMEM;
187
188 /* alloc tx queue */
189 bytes = sizeof(*tx->desc) * slots;
190 tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL);
191 if (!tx->desc)
192 goto abort_with_info;
193
194 tx->raw_addressing = priv->queue_format == GVE_GQI_RDA_FORMAT;
195 tx->dev = &priv->pdev->dev;
196 if (!tx->raw_addressing) {
197 tx->tx_fifo.qpl = gve_assign_tx_qpl(priv);
198 if (!tx->tx_fifo.qpl)
199 goto abort_with_desc;
200 /* map Tx FIFO */
201 if (gve_tx_fifo_init(priv, &tx->tx_fifo))
202 goto abort_with_qpl;
203 }
204
205 tx->q_resources =
206 dma_alloc_coherent(hdev,
207 sizeof(*tx->q_resources),
208 &tx->q_resources_bus,
209 GFP_KERNEL);
210 if (!tx->q_resources)
211 goto abort_with_fifo;
212
213 netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx,
214 (unsigned long)tx->bus);
215 tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx);
216 gve_tx_add_to_block(priv, idx);
217
218 return 0;
219
220 abort_with_fifo:
221 if (!tx->raw_addressing)
222 gve_tx_fifo_release(priv, &tx->tx_fifo);
223 abort_with_qpl:
224 if (!tx->raw_addressing)
225 gve_unassign_qpl(priv, tx->tx_fifo.qpl->id);
226 abort_with_desc:
227 dma_free_coherent(hdev, bytes, tx->desc, tx->bus);
228 tx->desc = NULL;
229 abort_with_info:
230 vfree(tx->info);
231 tx->info = NULL;
232 return -ENOMEM;
233 }
234
gve_tx_alloc_rings(struct gve_priv * priv)235 int gve_tx_alloc_rings(struct gve_priv *priv)
236 {
237 int err = 0;
238 int i;
239
240 for (i = 0; i < priv->tx_cfg.num_queues; i++) {
241 err = gve_tx_alloc_ring(priv, i);
242 if (err) {
243 netif_err(priv, drv, priv->dev,
244 "Failed to alloc tx ring=%d: err=%d\n",
245 i, err);
246 break;
247 }
248 }
249 /* Unallocate if there was an error */
250 if (err) {
251 int j;
252
253 for (j = 0; j < i; j++)
254 gve_tx_free_ring(priv, j);
255 }
256 return err;
257 }
258
gve_tx_free_rings_gqi(struct gve_priv * priv)259 void gve_tx_free_rings_gqi(struct gve_priv *priv)
260 {
261 int i;
262
263 for (i = 0; i < priv->tx_cfg.num_queues; i++)
264 gve_tx_free_ring(priv, i);
265 }
266
267 /* gve_tx_avail - Calculates the number of slots available in the ring
268 * @tx: tx ring to check
269 *
270 * Returns the number of slots available
271 *
272 * The capacity of the queue is mask + 1. We don't need to reserve an entry.
273 **/
gve_tx_avail(struct gve_tx_ring * tx)274 static inline u32 gve_tx_avail(struct gve_tx_ring *tx)
275 {
276 return tx->mask + 1 - (tx->req - tx->done);
277 }
278
gve_skb_fifo_bytes_required(struct gve_tx_ring * tx,struct sk_buff * skb)279 static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx,
280 struct sk_buff *skb)
281 {
282 int pad_bytes, align_hdr_pad;
283 int bytes;
284 int hlen;
285
286 hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) +
287 tcp_hdrlen(skb) : skb_headlen(skb);
288
289 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo,
290 hlen);
291 /* We need to take into account the header alignment padding. */
292 align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen;
293 bytes = align_hdr_pad + pad_bytes + skb->len;
294
295 return bytes;
296 }
297
298 /* The most descriptors we could need is MAX_SKB_FRAGS + 3 : 1 for each skb frag,
299 * +1 for the skb linear portion, +1 for when tcp hdr needs to be in separate descriptor,
300 * and +1 if the payload wraps to the beginning of the FIFO.
301 */
302 #define MAX_TX_DESC_NEEDED (MAX_SKB_FRAGS + 3)
gve_tx_unmap_buf(struct device * dev,struct gve_tx_buffer_state * info)303 static void gve_tx_unmap_buf(struct device *dev, struct gve_tx_buffer_state *info)
304 {
305 if (info->skb) {
306 dma_unmap_single(dev, dma_unmap_addr(&info->buf, dma),
307 dma_unmap_len(&info->buf, len),
308 DMA_TO_DEVICE);
309 dma_unmap_len_set(&info->buf, len, 0);
310 } else {
311 dma_unmap_page(dev, dma_unmap_addr(&info->buf, dma),
312 dma_unmap_len(&info->buf, len),
313 DMA_TO_DEVICE);
314 dma_unmap_len_set(&info->buf, len, 0);
315 }
316 }
317
318 /* Check if sufficient resources (descriptor ring space, FIFO space) are
319 * available to transmit the given number of bytes.
320 */
gve_can_tx(struct gve_tx_ring * tx,int bytes_required)321 static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required)
322 {
323 bool can_alloc = true;
324
325 if (!tx->raw_addressing)
326 can_alloc = gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required);
327
328 return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && can_alloc);
329 }
330
331 /* Stops the queue if the skb cannot be transmitted. */
gve_maybe_stop_tx(struct gve_tx_ring * tx,struct sk_buff * skb)332 static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb)
333 {
334 int bytes_required = 0;
335
336 if (!tx->raw_addressing)
337 bytes_required = gve_skb_fifo_bytes_required(tx, skb);
338
339 if (likely(gve_can_tx(tx, bytes_required)))
340 return 0;
341
342 /* No space, so stop the queue */
343 tx->stop_queue++;
344 netif_tx_stop_queue(tx->netdev_txq);
345 smp_mb(); /* sync with restarting queue in gve_clean_tx_done() */
346
347 /* Now check for resources again, in case gve_clean_tx_done() freed
348 * resources after we checked and we stopped the queue after
349 * gve_clean_tx_done() checked.
350 *
351 * gve_maybe_stop_tx() gve_clean_tx_done()
352 * nsegs/can_alloc test failed
353 * gve_tx_free_fifo()
354 * if (tx queue stopped)
355 * netif_tx_queue_wake()
356 * netif_tx_stop_queue()
357 * Need to check again for space here!
358 */
359 if (likely(!gve_can_tx(tx, bytes_required)))
360 return -EBUSY;
361
362 netif_tx_start_queue(tx->netdev_txq);
363 tx->wake_queue++;
364 return 0;
365 }
366
gve_tx_fill_pkt_desc(union gve_tx_desc * pkt_desc,struct sk_buff * skb,bool is_gso,int l4_hdr_offset,u32 desc_cnt,u16 hlen,u64 addr)367 static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc,
368 struct sk_buff *skb, bool is_gso,
369 int l4_hdr_offset, u32 desc_cnt,
370 u16 hlen, u64 addr)
371 {
372 /* l4_hdr_offset and csum_offset are in units of 16-bit words */
373 if (is_gso) {
374 pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM;
375 pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
376 pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
377 } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
378 pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM;
379 pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1;
380 pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1;
381 } else {
382 pkt_desc->pkt.type_flags = GVE_TXD_STD;
383 pkt_desc->pkt.l4_csum_offset = 0;
384 pkt_desc->pkt.l4_hdr_offset = 0;
385 }
386 pkt_desc->pkt.desc_cnt = desc_cnt;
387 pkt_desc->pkt.len = cpu_to_be16(skb->len);
388 pkt_desc->pkt.seg_len = cpu_to_be16(hlen);
389 pkt_desc->pkt.seg_addr = cpu_to_be64(addr);
390 }
391
gve_tx_fill_seg_desc(union gve_tx_desc * seg_desc,struct sk_buff * skb,bool is_gso,u16 len,u64 addr)392 static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc,
393 struct sk_buff *skb, bool is_gso,
394 u16 len, u64 addr)
395 {
396 seg_desc->seg.type_flags = GVE_TXD_SEG;
397 if (is_gso) {
398 if (skb_is_gso_v6(skb))
399 seg_desc->seg.type_flags |= GVE_TXSF_IPV6;
400 seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1;
401 seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size);
402 }
403 seg_desc->seg.seg_len = cpu_to_be16(len);
404 seg_desc->seg.seg_addr = cpu_to_be64(addr);
405 }
406
gve_dma_sync_for_device(struct device * dev,dma_addr_t * page_buses,u64 iov_offset,u64 iov_len)407 static void gve_dma_sync_for_device(struct device *dev, dma_addr_t *page_buses,
408 u64 iov_offset, u64 iov_len)
409 {
410 u64 last_page = (iov_offset + iov_len - 1) / PAGE_SIZE;
411 u64 first_page = iov_offset / PAGE_SIZE;
412 u64 page;
413
414 for (page = first_page; page <= last_page; page++)
415 dma_sync_single_for_device(dev, page_buses[page], PAGE_SIZE, DMA_TO_DEVICE);
416 }
417
gve_tx_add_skb_copy(struct gve_priv * priv,struct gve_tx_ring * tx,struct sk_buff * skb)418 static int gve_tx_add_skb_copy(struct gve_priv *priv, struct gve_tx_ring *tx, struct sk_buff *skb)
419 {
420 int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset;
421 union gve_tx_desc *pkt_desc, *seg_desc;
422 struct gve_tx_buffer_state *info;
423 bool is_gso = skb_is_gso(skb);
424 u32 idx = tx->req & tx->mask;
425 int payload_iov = 2;
426 int copy_offset;
427 u32 next_idx;
428 int i;
429
430 info = &tx->info[idx];
431 pkt_desc = &tx->desc[idx];
432
433 l4_hdr_offset = skb_checksum_start_offset(skb);
434 /* If the skb is gso, then we want the tcp header in the first segment
435 * otherwise we want the linear portion of the skb (which will contain
436 * the checksum because skb->csum_start and skb->csum_offset are given
437 * relative to skb->head) in the first segment.
438 */
439 hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) :
440 skb_headlen(skb);
441
442 info->skb = skb;
443 /* We don't want to split the header, so if necessary, pad to the end
444 * of the fifo and then put the header at the beginning of the fifo.
445 */
446 pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen);
447 hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes,
448 &info->iov[0]);
449 WARN(!hdr_nfrags, "hdr_nfrags should never be 0!");
450 payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen,
451 &info->iov[payload_iov]);
452
453 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
454 1 + payload_nfrags, hlen,
455 info->iov[hdr_nfrags - 1].iov_offset);
456
457 skb_copy_bits(skb, 0,
458 tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset,
459 hlen);
460 gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses,
461 info->iov[hdr_nfrags - 1].iov_offset,
462 info->iov[hdr_nfrags - 1].iov_len);
463 copy_offset = hlen;
464
465 for (i = payload_iov; i < payload_nfrags + payload_iov; i++) {
466 next_idx = (tx->req + 1 + i - payload_iov) & tx->mask;
467 seg_desc = &tx->desc[next_idx];
468
469 gve_tx_fill_seg_desc(seg_desc, skb, is_gso,
470 info->iov[i].iov_len,
471 info->iov[i].iov_offset);
472
473 skb_copy_bits(skb, copy_offset,
474 tx->tx_fifo.base + info->iov[i].iov_offset,
475 info->iov[i].iov_len);
476 gve_dma_sync_for_device(&priv->pdev->dev, tx->tx_fifo.qpl->page_buses,
477 info->iov[i].iov_offset,
478 info->iov[i].iov_len);
479 copy_offset += info->iov[i].iov_len;
480 }
481
482 return 1 + payload_nfrags;
483 }
484
gve_tx_add_skb_no_copy(struct gve_priv * priv,struct gve_tx_ring * tx,struct sk_buff * skb)485 static int gve_tx_add_skb_no_copy(struct gve_priv *priv, struct gve_tx_ring *tx,
486 struct sk_buff *skb)
487 {
488 const struct skb_shared_info *shinfo = skb_shinfo(skb);
489 int hlen, payload_nfrags, l4_hdr_offset;
490 union gve_tx_desc *pkt_desc, *seg_desc;
491 struct gve_tx_buffer_state *info;
492 bool is_gso = skb_is_gso(skb);
493 u32 idx = tx->req & tx->mask;
494 struct gve_tx_dma_buf *buf;
495 u64 addr;
496 u32 len;
497 int i;
498
499 info = &tx->info[idx];
500 pkt_desc = &tx->desc[idx];
501
502 l4_hdr_offset = skb_checksum_start_offset(skb);
503 /* If the skb is gso, then we want only up to the tcp header in the first segment
504 * to efficiently replicate on each segment otherwise we want the linear portion
505 * of the skb (which will contain the checksum because skb->csum_start and
506 * skb->csum_offset are given relative to skb->head) in the first segment.
507 */
508 hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : skb_headlen(skb);
509 len = skb_headlen(skb);
510
511 info->skb = skb;
512
513 addr = dma_map_single(tx->dev, skb->data, len, DMA_TO_DEVICE);
514 if (unlikely(dma_mapping_error(tx->dev, addr))) {
515 tx->dma_mapping_error++;
516 goto drop;
517 }
518 buf = &info->buf;
519 dma_unmap_len_set(buf, len, len);
520 dma_unmap_addr_set(buf, dma, addr);
521
522 payload_nfrags = shinfo->nr_frags;
523 if (hlen < len) {
524 /* For gso the rest of the linear portion of the skb needs to
525 * be in its own descriptor.
526 */
527 payload_nfrags++;
528 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
529 1 + payload_nfrags, hlen, addr);
530
531 len -= hlen;
532 addr += hlen;
533 idx = (tx->req + 1) & tx->mask;
534 seg_desc = &tx->desc[idx];
535 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
536 } else {
537 gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset,
538 1 + payload_nfrags, hlen, addr);
539 }
540
541 for (i = 0; i < shinfo->nr_frags; i++) {
542 const skb_frag_t *frag = &shinfo->frags[i];
543
544 idx = (idx + 1) & tx->mask;
545 seg_desc = &tx->desc[idx];
546 len = skb_frag_size(frag);
547 addr = skb_frag_dma_map(tx->dev, frag, 0, len, DMA_TO_DEVICE);
548 if (unlikely(dma_mapping_error(tx->dev, addr))) {
549 tx->dma_mapping_error++;
550 goto unmap_drop;
551 }
552 buf = &tx->info[idx].buf;
553 tx->info[idx].skb = NULL;
554 dma_unmap_len_set(buf, len, len);
555 dma_unmap_addr_set(buf, dma, addr);
556
557 gve_tx_fill_seg_desc(seg_desc, skb, is_gso, len, addr);
558 }
559
560 return 1 + payload_nfrags;
561
562 unmap_drop:
563 i += (payload_nfrags == shinfo->nr_frags ? 1 : 2);
564 while (i--) {
565 idx--;
566 gve_tx_unmap_buf(tx->dev, &tx->info[idx & tx->mask]);
567 }
568 drop:
569 tx->dropped_pkt++;
570 return 0;
571 }
572
gve_tx(struct sk_buff * skb,struct net_device * dev)573 netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev)
574 {
575 struct gve_priv *priv = netdev_priv(dev);
576 struct gve_tx_ring *tx;
577 int nsegs;
578
579 WARN(skb_get_queue_mapping(skb) >= priv->tx_cfg.num_queues,
580 "skb queue index out of range");
581 tx = &priv->tx[skb_get_queue_mapping(skb)];
582 if (unlikely(gve_maybe_stop_tx(tx, skb))) {
583 /* We need to ring the txq doorbell -- we have stopped the Tx
584 * queue for want of resources, but prior calls to gve_tx()
585 * may have added descriptors without ringing the doorbell.
586 */
587
588 gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
589 return NETDEV_TX_BUSY;
590 }
591 if (tx->raw_addressing)
592 nsegs = gve_tx_add_skb_no_copy(priv, tx, skb);
593 else
594 nsegs = gve_tx_add_skb_copy(priv, tx, skb);
595
596 /* If the packet is getting sent, we need to update the skb */
597 if (nsegs) {
598 netdev_tx_sent_queue(tx->netdev_txq, skb->len);
599 skb_tx_timestamp(skb);
600 tx->req += nsegs;
601 } else {
602 dev_kfree_skb_any(skb);
603 }
604
605 if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more())
606 return NETDEV_TX_OK;
607
608 /* Give packets to NIC. Even if this packet failed to send the doorbell
609 * might need to be rung because of xmit_more.
610 */
611 gve_tx_put_doorbell(priv, tx->q_resources, tx->req);
612 return NETDEV_TX_OK;
613 }
614
615 #define GVE_TX_START_THRESH PAGE_SIZE
616
gve_clean_tx_done(struct gve_priv * priv,struct gve_tx_ring * tx,u32 to_do,bool try_to_wake)617 static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx,
618 u32 to_do, bool try_to_wake)
619 {
620 struct gve_tx_buffer_state *info;
621 u64 pkts = 0, bytes = 0;
622 size_t space_freed = 0;
623 struct sk_buff *skb;
624 int i, j;
625 u32 idx;
626
627 for (j = 0; j < to_do; j++) {
628 idx = tx->done & tx->mask;
629 netif_info(priv, tx_done, priv->dev,
630 "[%d] %s: idx=%d (req=%u done=%u)\n",
631 tx->q_num, __func__, idx, tx->req, tx->done);
632 info = &tx->info[idx];
633 skb = info->skb;
634
635 /* Unmap the buffer */
636 if (tx->raw_addressing)
637 gve_tx_unmap_buf(tx->dev, info);
638 tx->done++;
639 /* Mark as free */
640 if (skb) {
641 info->skb = NULL;
642 bytes += skb->len;
643 pkts++;
644 dev_consume_skb_any(skb);
645 if (tx->raw_addressing)
646 continue;
647 /* FIFO free */
648 for (i = 0; i < ARRAY_SIZE(info->iov); i++) {
649 space_freed += info->iov[i].iov_len + info->iov[i].iov_padding;
650 info->iov[i].iov_len = 0;
651 info->iov[i].iov_padding = 0;
652 }
653 }
654 }
655
656 if (!tx->raw_addressing)
657 gve_tx_free_fifo(&tx->tx_fifo, space_freed);
658 u64_stats_update_begin(&tx->statss);
659 tx->bytes_done += bytes;
660 tx->pkt_done += pkts;
661 u64_stats_update_end(&tx->statss);
662 netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes);
663
664 /* start the queue if we've stopped it */
665 #ifndef CONFIG_BQL
666 /* Make sure that the doorbells are synced */
667 smp_mb();
668 #endif
669 if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) &&
670 likely(gve_can_tx(tx, GVE_TX_START_THRESH))) {
671 tx->wake_queue++;
672 netif_tx_wake_queue(tx->netdev_txq);
673 }
674
675 return pkts;
676 }
677
gve_tx_load_event_counter(struct gve_priv * priv,struct gve_tx_ring * tx)678 __be32 gve_tx_load_event_counter(struct gve_priv *priv,
679 struct gve_tx_ring *tx)
680 {
681 u32 counter_index = be32_to_cpu((tx->q_resources->counter_index));
682
683 return READ_ONCE(priv->counter_array[counter_index]);
684 }
685
gve_tx_poll(struct gve_notify_block * block,int budget)686 bool gve_tx_poll(struct gve_notify_block *block, int budget)
687 {
688 struct gve_priv *priv = block->priv;
689 struct gve_tx_ring *tx = block->tx;
690 bool repoll = false;
691 u32 nic_done;
692 u32 to_do;
693
694 /* If budget is 0, do all the work */
695 if (budget == 0)
696 budget = INT_MAX;
697
698 /* Find out how much work there is to be done */
699 tx->last_nic_done = gve_tx_load_event_counter(priv, tx);
700 nic_done = be32_to_cpu(tx->last_nic_done);
701 if (budget > 0) {
702 /* Do as much work as we have that the budget will
703 * allow
704 */
705 to_do = min_t(u32, (nic_done - tx->done), budget);
706 gve_clean_tx_done(priv, tx, to_do, true);
707 }
708 /* If we still have work we want to repoll */
709 repoll |= (nic_done != tx->done);
710 return repoll;
711 }
712