1 /*
2 * Copyright(c) 2017 - 2018 Intel Corporation.
3 *
4 * This file is provided under a dual BSD/GPLv2 license. When using or
5 * redistributing this file, you may do so under either license.
6 *
7 * GPL LICENSE SUMMARY
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
17 *
18 * BSD LICENSE
19 *
20 * Redistribution and use in source and binary forms, with or without
21 * modification, are permitted provided that the following conditions
22 * are met:
23 *
24 * - Redistributions of source code must retain the above copyright
25 * notice, this list of conditions and the following disclaimer.
26 * - Redistributions in binary form must reproduce the above copyright
27 * notice, this list of conditions and the following disclaimer in
28 * the documentation and/or other materials provided with the
29 * distribution.
30 * - Neither the name of Intel Corporation nor the names of its
31 * contributors may be used to endorse or promote products derived
32 * from this software without specific prior written permission.
33 *
34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45 *
46 */
47
48 /*
49 * This file contains HFI1 support for VNIC functionality
50 */
51
52 #include <linux/io.h>
53 #include <linux/if_vlan.h>
54
55 #include "vnic.h"
56
57 #define HFI_TX_TIMEOUT_MS 1000
58
59 #define HFI1_VNIC_RCV_Q_SIZE 1024
60
61 #define HFI1_VNIC_UP 0
62
63 static DEFINE_SPINLOCK(vport_cntr_lock);
64
setup_vnic_ctxt(struct hfi1_devdata * dd,struct hfi1_ctxtdata * uctxt)65 static int setup_vnic_ctxt(struct hfi1_devdata *dd, struct hfi1_ctxtdata *uctxt)
66 {
67 unsigned int rcvctrl_ops = 0;
68 int ret;
69
70 uctxt->do_interrupt = &handle_receive_interrupt;
71
72 /* Now allocate the RcvHdr queue and eager buffers. */
73 ret = hfi1_create_rcvhdrq(dd, uctxt);
74 if (ret)
75 goto done;
76
77 ret = hfi1_setup_eagerbufs(uctxt);
78 if (ret)
79 goto done;
80
81 if (uctxt->rcvhdrtail_kvaddr)
82 clear_rcvhdrtail(uctxt);
83
84 rcvctrl_ops = HFI1_RCVCTRL_CTXT_ENB;
85 rcvctrl_ops |= HFI1_RCVCTRL_INTRAVAIL_ENB;
86
87 if (!HFI1_CAP_KGET_MASK(uctxt->flags, MULTI_PKT_EGR))
88 rcvctrl_ops |= HFI1_RCVCTRL_ONE_PKT_EGR_ENB;
89 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_EGR_FULL))
90 rcvctrl_ops |= HFI1_RCVCTRL_NO_EGR_DROP_ENB;
91 if (HFI1_CAP_KGET_MASK(uctxt->flags, NODROP_RHQ_FULL))
92 rcvctrl_ops |= HFI1_RCVCTRL_NO_RHQ_DROP_ENB;
93 if (HFI1_CAP_KGET_MASK(uctxt->flags, DMA_RTAIL))
94 rcvctrl_ops |= HFI1_RCVCTRL_TAILUPD_ENB;
95
96 hfi1_rcvctrl(uctxt->dd, rcvctrl_ops, uctxt);
97 done:
98 return ret;
99 }
100
allocate_vnic_ctxt(struct hfi1_devdata * dd,struct hfi1_ctxtdata ** vnic_ctxt)101 static int allocate_vnic_ctxt(struct hfi1_devdata *dd,
102 struct hfi1_ctxtdata **vnic_ctxt)
103 {
104 struct hfi1_ctxtdata *uctxt;
105 int ret;
106
107 if (dd->flags & HFI1_FROZEN)
108 return -EIO;
109
110 ret = hfi1_create_ctxtdata(dd->pport, dd->node, &uctxt);
111 if (ret < 0) {
112 dd_dev_err(dd, "Unable to create ctxtdata, failing open\n");
113 return -ENOMEM;
114 }
115
116 uctxt->flags = HFI1_CAP_KGET(MULTI_PKT_EGR) |
117 HFI1_CAP_KGET(NODROP_RHQ_FULL) |
118 HFI1_CAP_KGET(NODROP_EGR_FULL) |
119 HFI1_CAP_KGET(DMA_RTAIL);
120 uctxt->seq_cnt = 1;
121 uctxt->is_vnic = true;
122
123 hfi1_set_vnic_msix_info(uctxt);
124
125 hfi1_stats.sps_ctxts++;
126 dd_dev_dbg(dd, "created vnic context %d\n", uctxt->ctxt);
127 *vnic_ctxt = uctxt;
128
129 return 0;
130 }
131
deallocate_vnic_ctxt(struct hfi1_devdata * dd,struct hfi1_ctxtdata * uctxt)132 static void deallocate_vnic_ctxt(struct hfi1_devdata *dd,
133 struct hfi1_ctxtdata *uctxt)
134 {
135 dd_dev_dbg(dd, "closing vnic context %d\n", uctxt->ctxt);
136 flush_wc();
137
138 hfi1_reset_vnic_msix_info(uctxt);
139
140 /*
141 * Disable receive context and interrupt available, reset all
142 * RcvCtxtCtrl bits to default values.
143 */
144 hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_DIS |
145 HFI1_RCVCTRL_TIDFLOW_DIS |
146 HFI1_RCVCTRL_INTRAVAIL_DIS |
147 HFI1_RCVCTRL_ONE_PKT_EGR_DIS |
148 HFI1_RCVCTRL_NO_RHQ_DROP_DIS |
149 HFI1_RCVCTRL_NO_EGR_DROP_DIS, uctxt);
150
151 uctxt->event_flags = 0;
152
153 hfi1_clear_tids(uctxt);
154 hfi1_clear_ctxt_pkey(dd, uctxt);
155
156 hfi1_stats.sps_ctxts--;
157
158 hfi1_free_ctxt(uctxt);
159 }
160
hfi1_vnic_setup(struct hfi1_devdata * dd)161 void hfi1_vnic_setup(struct hfi1_devdata *dd)
162 {
163 idr_init(&dd->vnic.vesw_idr);
164 }
165
hfi1_vnic_cleanup(struct hfi1_devdata * dd)166 void hfi1_vnic_cleanup(struct hfi1_devdata *dd)
167 {
168 idr_destroy(&dd->vnic.vesw_idr);
169 }
170
171 #define SUM_GRP_COUNTERS(stats, qstats, x_grp) do { \
172 u64 *src64, *dst64; \
173 for (src64 = &qstats->x_grp.unicast, \
174 dst64 = &stats->x_grp.unicast; \
175 dst64 <= &stats->x_grp.s_1519_max;) { \
176 *dst64++ += *src64++; \
177 } \
178 } while (0)
179
180 /* hfi1_vnic_update_stats - update statistics */
hfi1_vnic_update_stats(struct hfi1_vnic_vport_info * vinfo,struct opa_vnic_stats * stats)181 static void hfi1_vnic_update_stats(struct hfi1_vnic_vport_info *vinfo,
182 struct opa_vnic_stats *stats)
183 {
184 struct net_device *netdev = vinfo->netdev;
185 u8 i;
186
187 /* add tx counters on different queues */
188 for (i = 0; i < vinfo->num_tx_q; i++) {
189 struct opa_vnic_stats *qstats = &vinfo->stats[i];
190 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
191
192 stats->netstats.tx_fifo_errors += qnstats->tx_fifo_errors;
193 stats->netstats.tx_carrier_errors += qnstats->tx_carrier_errors;
194 stats->tx_drop_state += qstats->tx_drop_state;
195 stats->tx_dlid_zero += qstats->tx_dlid_zero;
196
197 SUM_GRP_COUNTERS(stats, qstats, tx_grp);
198 stats->netstats.tx_packets += qnstats->tx_packets;
199 stats->netstats.tx_bytes += qnstats->tx_bytes;
200 }
201
202 /* add rx counters on different queues */
203 for (i = 0; i < vinfo->num_rx_q; i++) {
204 struct opa_vnic_stats *qstats = &vinfo->stats[i];
205 struct rtnl_link_stats64 *qnstats = &vinfo->stats[i].netstats;
206
207 stats->netstats.rx_fifo_errors += qnstats->rx_fifo_errors;
208 stats->netstats.rx_nohandler += qnstats->rx_nohandler;
209 stats->rx_drop_state += qstats->rx_drop_state;
210 stats->rx_oversize += qstats->rx_oversize;
211 stats->rx_runt += qstats->rx_runt;
212
213 SUM_GRP_COUNTERS(stats, qstats, rx_grp);
214 stats->netstats.rx_packets += qnstats->rx_packets;
215 stats->netstats.rx_bytes += qnstats->rx_bytes;
216 }
217
218 stats->netstats.tx_errors = stats->netstats.tx_fifo_errors +
219 stats->netstats.tx_carrier_errors +
220 stats->tx_drop_state + stats->tx_dlid_zero;
221 stats->netstats.tx_dropped = stats->netstats.tx_errors;
222
223 stats->netstats.rx_errors = stats->netstats.rx_fifo_errors +
224 stats->netstats.rx_nohandler +
225 stats->rx_drop_state + stats->rx_oversize +
226 stats->rx_runt;
227 stats->netstats.rx_dropped = stats->netstats.rx_errors;
228
229 netdev->stats.tx_packets = stats->netstats.tx_packets;
230 netdev->stats.tx_bytes = stats->netstats.tx_bytes;
231 netdev->stats.tx_fifo_errors = stats->netstats.tx_fifo_errors;
232 netdev->stats.tx_carrier_errors = stats->netstats.tx_carrier_errors;
233 netdev->stats.tx_errors = stats->netstats.tx_errors;
234 netdev->stats.tx_dropped = stats->netstats.tx_dropped;
235
236 netdev->stats.rx_packets = stats->netstats.rx_packets;
237 netdev->stats.rx_bytes = stats->netstats.rx_bytes;
238 netdev->stats.rx_fifo_errors = stats->netstats.rx_fifo_errors;
239 netdev->stats.multicast = stats->rx_grp.mcastbcast;
240 netdev->stats.rx_length_errors = stats->rx_oversize + stats->rx_runt;
241 netdev->stats.rx_errors = stats->netstats.rx_errors;
242 netdev->stats.rx_dropped = stats->netstats.rx_dropped;
243 }
244
245 /* update_len_counters - update pkt's len histogram counters */
update_len_counters(struct opa_vnic_grp_stats * grp,int len)246 static inline void update_len_counters(struct opa_vnic_grp_stats *grp,
247 int len)
248 {
249 /* account for 4 byte FCS */
250 if (len >= 1515)
251 grp->s_1519_max++;
252 else if (len >= 1020)
253 grp->s_1024_1518++;
254 else if (len >= 508)
255 grp->s_512_1023++;
256 else if (len >= 252)
257 grp->s_256_511++;
258 else if (len >= 124)
259 grp->s_128_255++;
260 else if (len >= 61)
261 grp->s_65_127++;
262 else
263 grp->s_64++;
264 }
265
266 /* hfi1_vnic_update_tx_counters - update transmit counters */
hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info * vinfo,u8 q_idx,struct sk_buff * skb,int err)267 static void hfi1_vnic_update_tx_counters(struct hfi1_vnic_vport_info *vinfo,
268 u8 q_idx, struct sk_buff *skb, int err)
269 {
270 struct ethhdr *mac_hdr = (struct ethhdr *)skb_mac_header(skb);
271 struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
272 struct opa_vnic_grp_stats *tx_grp = &stats->tx_grp;
273 u16 vlan_tci;
274
275 stats->netstats.tx_packets++;
276 stats->netstats.tx_bytes += skb->len + ETH_FCS_LEN;
277
278 update_len_counters(tx_grp, skb->len);
279
280 /* rest of the counts are for good packets only */
281 if (unlikely(err))
282 return;
283
284 if (is_multicast_ether_addr(mac_hdr->h_dest))
285 tx_grp->mcastbcast++;
286 else
287 tx_grp->unicast++;
288
289 if (!__vlan_get_tag(skb, &vlan_tci))
290 tx_grp->vlan++;
291 else
292 tx_grp->untagged++;
293 }
294
295 /* hfi1_vnic_update_rx_counters - update receive counters */
hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info * vinfo,u8 q_idx,struct sk_buff * skb,int err)296 static void hfi1_vnic_update_rx_counters(struct hfi1_vnic_vport_info *vinfo,
297 u8 q_idx, struct sk_buff *skb, int err)
298 {
299 struct ethhdr *mac_hdr = (struct ethhdr *)skb->data;
300 struct opa_vnic_stats *stats = &vinfo->stats[q_idx];
301 struct opa_vnic_grp_stats *rx_grp = &stats->rx_grp;
302 u16 vlan_tci;
303
304 stats->netstats.rx_packets++;
305 stats->netstats.rx_bytes += skb->len + ETH_FCS_LEN;
306
307 update_len_counters(rx_grp, skb->len);
308
309 /* rest of the counts are for good packets only */
310 if (unlikely(err))
311 return;
312
313 if (is_multicast_ether_addr(mac_hdr->h_dest))
314 rx_grp->mcastbcast++;
315 else
316 rx_grp->unicast++;
317
318 if (!__vlan_get_tag(skb, &vlan_tci))
319 rx_grp->vlan++;
320 else
321 rx_grp->untagged++;
322 }
323
324 /* This function is overloaded for opa_vnic specific implementation */
hfi1_vnic_get_stats64(struct net_device * netdev,struct rtnl_link_stats64 * stats)325 static void hfi1_vnic_get_stats64(struct net_device *netdev,
326 struct rtnl_link_stats64 *stats)
327 {
328 struct opa_vnic_stats *vstats = (struct opa_vnic_stats *)stats;
329 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
330
331 hfi1_vnic_update_stats(vinfo, vstats);
332 }
333
create_bypass_pbc(u32 vl,u32 dw_len)334 static u64 create_bypass_pbc(u32 vl, u32 dw_len)
335 {
336 u64 pbc;
337
338 pbc = ((u64)PBC_IHCRC_NONE << PBC_INSERT_HCRC_SHIFT)
339 | PBC_INSERT_BYPASS_ICRC | PBC_CREDIT_RETURN
340 | PBC_PACKET_BYPASS
341 | ((vl & PBC_VL_MASK) << PBC_VL_SHIFT)
342 | (dw_len & PBC_LENGTH_DWS_MASK) << PBC_LENGTH_DWS_SHIFT;
343
344 return pbc;
345 }
346
347 /* hfi1_vnic_maybe_stop_tx - stop tx queue if required */
hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info * vinfo,u8 q_idx)348 static void hfi1_vnic_maybe_stop_tx(struct hfi1_vnic_vport_info *vinfo,
349 u8 q_idx)
350 {
351 netif_stop_subqueue(vinfo->netdev, q_idx);
352 if (!hfi1_vnic_sdma_write_avail(vinfo, q_idx))
353 return;
354
355 netif_start_subqueue(vinfo->netdev, q_idx);
356 }
357
hfi1_netdev_start_xmit(struct sk_buff * skb,struct net_device * netdev)358 static netdev_tx_t hfi1_netdev_start_xmit(struct sk_buff *skb,
359 struct net_device *netdev)
360 {
361 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
362 u8 pad_len, q_idx = skb->queue_mapping;
363 struct hfi1_devdata *dd = vinfo->dd;
364 struct opa_vnic_skb_mdata *mdata;
365 u32 pkt_len, total_len;
366 int err = -EINVAL;
367 u64 pbc;
368
369 v_dbg("xmit: queue %d skb len %d\n", q_idx, skb->len);
370 if (unlikely(!netif_oper_up(netdev))) {
371 vinfo->stats[q_idx].tx_drop_state++;
372 goto tx_finish;
373 }
374
375 /* take out meta data */
376 mdata = (struct opa_vnic_skb_mdata *)skb->data;
377 skb_pull(skb, sizeof(*mdata));
378 if (unlikely(mdata->flags & OPA_VNIC_SKB_MDATA_ENCAP_ERR)) {
379 vinfo->stats[q_idx].tx_dlid_zero++;
380 goto tx_finish;
381 }
382
383 /* add tail padding (for 8 bytes size alignment) and icrc */
384 pad_len = -(skb->len + OPA_VNIC_ICRC_TAIL_LEN) & 0x7;
385 pad_len += OPA_VNIC_ICRC_TAIL_LEN;
386
387 /*
388 * pkt_len is how much data we have to write, includes header and data.
389 * total_len is length of the packet in Dwords plus the PBC should not
390 * include the CRC.
391 */
392 pkt_len = (skb->len + pad_len) >> 2;
393 total_len = pkt_len + 2; /* PBC + packet */
394
395 pbc = create_bypass_pbc(mdata->vl, total_len);
396
397 skb_get(skb);
398 v_dbg("pbc 0x%016llX len %d pad_len %d\n", pbc, skb->len, pad_len);
399 err = dd->process_vnic_dma_send(dd, q_idx, vinfo, skb, pbc, pad_len);
400 if (unlikely(err)) {
401 if (err == -ENOMEM)
402 vinfo->stats[q_idx].netstats.tx_fifo_errors++;
403 else if (err != -EBUSY)
404 vinfo->stats[q_idx].netstats.tx_carrier_errors++;
405 }
406 /* remove the header before updating tx counters */
407 skb_pull(skb, OPA_VNIC_HDR_LEN);
408
409 if (unlikely(err == -EBUSY)) {
410 hfi1_vnic_maybe_stop_tx(vinfo, q_idx);
411 dev_kfree_skb_any(skb);
412 return NETDEV_TX_BUSY;
413 }
414
415 tx_finish:
416 /* update tx counters */
417 hfi1_vnic_update_tx_counters(vinfo, q_idx, skb, err);
418 dev_kfree_skb_any(skb);
419 return NETDEV_TX_OK;
420 }
421
hfi1_vnic_select_queue(struct net_device * netdev,struct sk_buff * skb,struct net_device * sb_dev,select_queue_fallback_t fallback)422 static u16 hfi1_vnic_select_queue(struct net_device *netdev,
423 struct sk_buff *skb,
424 struct net_device *sb_dev,
425 select_queue_fallback_t fallback)
426 {
427 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
428 struct opa_vnic_skb_mdata *mdata;
429 struct sdma_engine *sde;
430
431 mdata = (struct opa_vnic_skb_mdata *)skb->data;
432 sde = sdma_select_engine_vl(vinfo->dd, mdata->entropy, mdata->vl);
433 return sde->this_idx;
434 }
435
436 /* hfi1_vnic_decap_skb - strip OPA header from the skb (ethernet) packet */
hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue * rxq,struct sk_buff * skb)437 static inline int hfi1_vnic_decap_skb(struct hfi1_vnic_rx_queue *rxq,
438 struct sk_buff *skb)
439 {
440 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
441 int max_len = vinfo->netdev->mtu + VLAN_ETH_HLEN;
442 int rc = -EFAULT;
443
444 skb_pull(skb, OPA_VNIC_HDR_LEN);
445
446 /* Validate Packet length */
447 if (unlikely(skb->len > max_len))
448 vinfo->stats[rxq->idx].rx_oversize++;
449 else if (unlikely(skb->len < ETH_ZLEN))
450 vinfo->stats[rxq->idx].rx_runt++;
451 else
452 rc = 0;
453 return rc;
454 }
455
hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue * rxq)456 static inline struct sk_buff *hfi1_vnic_get_skb(struct hfi1_vnic_rx_queue *rxq)
457 {
458 unsigned char *pad_info;
459 struct sk_buff *skb;
460
461 skb = skb_dequeue(&rxq->skbq);
462 if (unlikely(!skb))
463 return NULL;
464
465 /* remove tail padding and icrc */
466 pad_info = skb->data + skb->len - 1;
467 skb_trim(skb, (skb->len - OPA_VNIC_ICRC_TAIL_LEN -
468 ((*pad_info) & 0x7)));
469
470 return skb;
471 }
472
473 /* hfi1_vnic_handle_rx - handle skb receive */
hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue * rxq,int * work_done,int work_to_do)474 static void hfi1_vnic_handle_rx(struct hfi1_vnic_rx_queue *rxq,
475 int *work_done, int work_to_do)
476 {
477 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
478 struct sk_buff *skb;
479 int rc;
480
481 while (1) {
482 if (*work_done >= work_to_do)
483 break;
484
485 skb = hfi1_vnic_get_skb(rxq);
486 if (unlikely(!skb))
487 break;
488
489 rc = hfi1_vnic_decap_skb(rxq, skb);
490 /* update rx counters */
491 hfi1_vnic_update_rx_counters(vinfo, rxq->idx, skb, rc);
492 if (unlikely(rc)) {
493 dev_kfree_skb_any(skb);
494 continue;
495 }
496
497 skb_checksum_none_assert(skb);
498 skb->protocol = eth_type_trans(skb, rxq->netdev);
499
500 napi_gro_receive(&rxq->napi, skb);
501 (*work_done)++;
502 }
503 }
504
505 /* hfi1_vnic_napi - napi receive polling callback function */
hfi1_vnic_napi(struct napi_struct * napi,int budget)506 static int hfi1_vnic_napi(struct napi_struct *napi, int budget)
507 {
508 struct hfi1_vnic_rx_queue *rxq = container_of(napi,
509 struct hfi1_vnic_rx_queue, napi);
510 struct hfi1_vnic_vport_info *vinfo = rxq->vinfo;
511 int work_done = 0;
512
513 v_dbg("napi %d budget %d\n", rxq->idx, budget);
514 hfi1_vnic_handle_rx(rxq, &work_done, budget);
515
516 v_dbg("napi %d work_done %d\n", rxq->idx, work_done);
517 if (work_done < budget)
518 napi_complete(napi);
519
520 return work_done;
521 }
522
hfi1_vnic_bypass_rcv(struct hfi1_packet * packet)523 void hfi1_vnic_bypass_rcv(struct hfi1_packet *packet)
524 {
525 struct hfi1_devdata *dd = packet->rcd->dd;
526 struct hfi1_vnic_vport_info *vinfo = NULL;
527 struct hfi1_vnic_rx_queue *rxq;
528 struct sk_buff *skb;
529 int l4_type, vesw_id = -1;
530 u8 q_idx;
531
532 l4_type = hfi1_16B_get_l4(packet->ebuf);
533 if (likely(l4_type == OPA_16B_L4_ETHR)) {
534 vesw_id = HFI1_VNIC_GET_VESWID(packet->ebuf);
535 vinfo = idr_find(&dd->vnic.vesw_idr, vesw_id);
536
537 /*
538 * In case of invalid vesw id, count the error on
539 * the first available vport.
540 */
541 if (unlikely(!vinfo)) {
542 struct hfi1_vnic_vport_info *vinfo_tmp;
543 int id_tmp = 0;
544
545 vinfo_tmp = idr_get_next(&dd->vnic.vesw_idr, &id_tmp);
546 if (vinfo_tmp) {
547 spin_lock(&vport_cntr_lock);
548 vinfo_tmp->stats[0].netstats.rx_nohandler++;
549 spin_unlock(&vport_cntr_lock);
550 }
551 }
552 }
553
554 if (unlikely(!vinfo)) {
555 dd_dev_warn(dd, "vnic rcv err: l4 %d vesw id %d ctx %d\n",
556 l4_type, vesw_id, packet->rcd->ctxt);
557 return;
558 }
559
560 q_idx = packet->rcd->vnic_q_idx;
561 rxq = &vinfo->rxq[q_idx];
562 if (unlikely(!netif_oper_up(vinfo->netdev))) {
563 vinfo->stats[q_idx].rx_drop_state++;
564 skb_queue_purge(&rxq->skbq);
565 return;
566 }
567
568 if (unlikely(skb_queue_len(&rxq->skbq) > HFI1_VNIC_RCV_Q_SIZE)) {
569 vinfo->stats[q_idx].netstats.rx_fifo_errors++;
570 return;
571 }
572
573 skb = netdev_alloc_skb(vinfo->netdev, packet->tlen);
574 if (unlikely(!skb)) {
575 vinfo->stats[q_idx].netstats.rx_fifo_errors++;
576 return;
577 }
578
579 memcpy(skb->data, packet->ebuf, packet->tlen);
580 skb_put(skb, packet->tlen);
581 skb_queue_tail(&rxq->skbq, skb);
582
583 if (napi_schedule_prep(&rxq->napi)) {
584 v_dbg("napi %d scheduling\n", q_idx);
585 __napi_schedule(&rxq->napi);
586 }
587 }
588
hfi1_vnic_up(struct hfi1_vnic_vport_info * vinfo)589 static int hfi1_vnic_up(struct hfi1_vnic_vport_info *vinfo)
590 {
591 struct hfi1_devdata *dd = vinfo->dd;
592 struct net_device *netdev = vinfo->netdev;
593 int i, rc;
594
595 /* ensure virtual eth switch id is valid */
596 if (!vinfo->vesw_id)
597 return -EINVAL;
598
599 rc = idr_alloc(&dd->vnic.vesw_idr, vinfo, vinfo->vesw_id,
600 vinfo->vesw_id + 1, GFP_NOWAIT);
601 if (rc < 0)
602 return rc;
603
604 for (i = 0; i < vinfo->num_rx_q; i++) {
605 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
606
607 skb_queue_head_init(&rxq->skbq);
608 napi_enable(&rxq->napi);
609 }
610
611 netif_carrier_on(netdev);
612 netif_tx_start_all_queues(netdev);
613 set_bit(HFI1_VNIC_UP, &vinfo->flags);
614
615 return 0;
616 }
617
hfi1_vnic_down(struct hfi1_vnic_vport_info * vinfo)618 static void hfi1_vnic_down(struct hfi1_vnic_vport_info *vinfo)
619 {
620 struct hfi1_devdata *dd = vinfo->dd;
621 u8 i;
622
623 clear_bit(HFI1_VNIC_UP, &vinfo->flags);
624 netif_carrier_off(vinfo->netdev);
625 netif_tx_disable(vinfo->netdev);
626 idr_remove(&dd->vnic.vesw_idr, vinfo->vesw_id);
627
628 /* ensure irqs see the change */
629 hfi1_vnic_synchronize_irq(dd);
630
631 /* remove unread skbs */
632 for (i = 0; i < vinfo->num_rx_q; i++) {
633 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
634
635 napi_disable(&rxq->napi);
636 skb_queue_purge(&rxq->skbq);
637 }
638 }
639
hfi1_netdev_open(struct net_device * netdev)640 static int hfi1_netdev_open(struct net_device *netdev)
641 {
642 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
643 int rc;
644
645 mutex_lock(&vinfo->lock);
646 rc = hfi1_vnic_up(vinfo);
647 mutex_unlock(&vinfo->lock);
648 return rc;
649 }
650
hfi1_netdev_close(struct net_device * netdev)651 static int hfi1_netdev_close(struct net_device *netdev)
652 {
653 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
654
655 mutex_lock(&vinfo->lock);
656 if (test_bit(HFI1_VNIC_UP, &vinfo->flags))
657 hfi1_vnic_down(vinfo);
658 mutex_unlock(&vinfo->lock);
659 return 0;
660 }
661
hfi1_vnic_allot_ctxt(struct hfi1_devdata * dd,struct hfi1_ctxtdata ** vnic_ctxt)662 static int hfi1_vnic_allot_ctxt(struct hfi1_devdata *dd,
663 struct hfi1_ctxtdata **vnic_ctxt)
664 {
665 int rc;
666
667 rc = allocate_vnic_ctxt(dd, vnic_ctxt);
668 if (rc) {
669 dd_dev_err(dd, "vnic ctxt alloc failed %d\n", rc);
670 return rc;
671 }
672
673 rc = setup_vnic_ctxt(dd, *vnic_ctxt);
674 if (rc) {
675 dd_dev_err(dd, "vnic ctxt setup failed %d\n", rc);
676 deallocate_vnic_ctxt(dd, *vnic_ctxt);
677 *vnic_ctxt = NULL;
678 }
679
680 return rc;
681 }
682
hfi1_vnic_init(struct hfi1_vnic_vport_info * vinfo)683 static int hfi1_vnic_init(struct hfi1_vnic_vport_info *vinfo)
684 {
685 struct hfi1_devdata *dd = vinfo->dd;
686 int i, rc = 0;
687
688 mutex_lock(&hfi1_mutex);
689 if (!dd->vnic.num_vports) {
690 rc = hfi1_vnic_txreq_init(dd);
691 if (rc)
692 goto txreq_fail;
693
694 dd->vnic.msix_idx = dd->first_dyn_msix_idx;
695 }
696
697 for (i = dd->vnic.num_ctxt; i < vinfo->num_rx_q; i++) {
698 rc = hfi1_vnic_allot_ctxt(dd, &dd->vnic.ctxt[i]);
699 if (rc)
700 break;
701 hfi1_rcd_get(dd->vnic.ctxt[i]);
702 dd->vnic.ctxt[i]->vnic_q_idx = i;
703 }
704
705 if (i < vinfo->num_rx_q) {
706 /*
707 * If required amount of contexts is not
708 * allocated successfully then remaining contexts
709 * are released.
710 */
711 while (i-- > dd->vnic.num_ctxt) {
712 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
713 hfi1_rcd_put(dd->vnic.ctxt[i]);
714 dd->vnic.ctxt[i] = NULL;
715 }
716 goto alloc_fail;
717 }
718
719 if (dd->vnic.num_ctxt != i) {
720 dd->vnic.num_ctxt = i;
721 hfi1_init_vnic_rsm(dd);
722 }
723
724 dd->vnic.num_vports++;
725 hfi1_vnic_sdma_init(vinfo);
726 alloc_fail:
727 if (!dd->vnic.num_vports)
728 hfi1_vnic_txreq_deinit(dd);
729 txreq_fail:
730 mutex_unlock(&hfi1_mutex);
731 return rc;
732 }
733
hfi1_vnic_deinit(struct hfi1_vnic_vport_info * vinfo)734 static void hfi1_vnic_deinit(struct hfi1_vnic_vport_info *vinfo)
735 {
736 struct hfi1_devdata *dd = vinfo->dd;
737 int i;
738
739 mutex_lock(&hfi1_mutex);
740 if (--dd->vnic.num_vports == 0) {
741 for (i = 0; i < dd->vnic.num_ctxt; i++) {
742 deallocate_vnic_ctxt(dd, dd->vnic.ctxt[i]);
743 hfi1_rcd_put(dd->vnic.ctxt[i]);
744 dd->vnic.ctxt[i] = NULL;
745 }
746 hfi1_deinit_vnic_rsm(dd);
747 dd->vnic.num_ctxt = 0;
748 hfi1_vnic_txreq_deinit(dd);
749 }
750 mutex_unlock(&hfi1_mutex);
751 }
752
hfi1_vnic_set_vesw_id(struct net_device * netdev,int id)753 static void hfi1_vnic_set_vesw_id(struct net_device *netdev, int id)
754 {
755 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
756 bool reopen = false;
757
758 /*
759 * If vesw_id is being changed, and if the vnic port is up,
760 * reset the vnic port to ensure new vesw_id gets picked up
761 */
762 if (id != vinfo->vesw_id) {
763 mutex_lock(&vinfo->lock);
764 if (test_bit(HFI1_VNIC_UP, &vinfo->flags)) {
765 hfi1_vnic_down(vinfo);
766 reopen = true;
767 }
768
769 vinfo->vesw_id = id;
770 if (reopen)
771 hfi1_vnic_up(vinfo);
772
773 mutex_unlock(&vinfo->lock);
774 }
775 }
776
777 /* netdev ops */
778 static const struct net_device_ops hfi1_netdev_ops = {
779 .ndo_open = hfi1_netdev_open,
780 .ndo_stop = hfi1_netdev_close,
781 .ndo_start_xmit = hfi1_netdev_start_xmit,
782 .ndo_select_queue = hfi1_vnic_select_queue,
783 .ndo_get_stats64 = hfi1_vnic_get_stats64,
784 };
785
hfi1_vnic_free_rn(struct net_device * netdev)786 static void hfi1_vnic_free_rn(struct net_device *netdev)
787 {
788 struct hfi1_vnic_vport_info *vinfo = opa_vnic_dev_priv(netdev);
789
790 hfi1_vnic_deinit(vinfo);
791 mutex_destroy(&vinfo->lock);
792 free_netdev(netdev);
793 }
794
hfi1_vnic_alloc_rn(struct ib_device * device,u8 port_num,enum rdma_netdev_t type,const char * name,unsigned char name_assign_type,void (* setup)(struct net_device *))795 struct net_device *hfi1_vnic_alloc_rn(struct ib_device *device,
796 u8 port_num,
797 enum rdma_netdev_t type,
798 const char *name,
799 unsigned char name_assign_type,
800 void (*setup)(struct net_device *))
801 {
802 struct hfi1_devdata *dd = dd_from_ibdev(device);
803 struct hfi1_vnic_vport_info *vinfo;
804 struct net_device *netdev;
805 struct rdma_netdev *rn;
806 int i, size, rc;
807
808 if (!dd->num_vnic_contexts)
809 return ERR_PTR(-ENOMEM);
810
811 if (!port_num || (port_num > dd->num_pports))
812 return ERR_PTR(-EINVAL);
813
814 if (type != RDMA_NETDEV_OPA_VNIC)
815 return ERR_PTR(-EOPNOTSUPP);
816
817 size = sizeof(struct opa_vnic_rdma_netdev) + sizeof(*vinfo);
818 netdev = alloc_netdev_mqs(size, name, name_assign_type, setup,
819 chip_sdma_engines(dd), dd->num_vnic_contexts);
820 if (!netdev)
821 return ERR_PTR(-ENOMEM);
822
823 rn = netdev_priv(netdev);
824 vinfo = opa_vnic_dev_priv(netdev);
825 vinfo->dd = dd;
826 vinfo->num_tx_q = chip_sdma_engines(dd);
827 vinfo->num_rx_q = dd->num_vnic_contexts;
828 vinfo->netdev = netdev;
829 rn->free_rdma_netdev = hfi1_vnic_free_rn;
830 rn->set_id = hfi1_vnic_set_vesw_id;
831
832 netdev->features = NETIF_F_HIGHDMA | NETIF_F_SG;
833 netdev->hw_features = netdev->features;
834 netdev->vlan_features = netdev->features;
835 netdev->watchdog_timeo = msecs_to_jiffies(HFI_TX_TIMEOUT_MS);
836 netdev->netdev_ops = &hfi1_netdev_ops;
837 mutex_init(&vinfo->lock);
838
839 for (i = 0; i < vinfo->num_rx_q; i++) {
840 struct hfi1_vnic_rx_queue *rxq = &vinfo->rxq[i];
841
842 rxq->idx = i;
843 rxq->vinfo = vinfo;
844 rxq->netdev = netdev;
845 netif_napi_add(netdev, &rxq->napi, hfi1_vnic_napi, 64);
846 }
847
848 rc = hfi1_vnic_init(vinfo);
849 if (rc)
850 goto init_fail;
851
852 return netdev;
853 init_fail:
854 mutex_destroy(&vinfo->lock);
855 free_netdev(netdev);
856 return ERR_PTR(rc);
857 }
858