1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c) 2012 - 2015 UNISYS CORPORATION
3  * All rights reserved.
4  */
5 
6 /* This driver lives in a spar partition, and registers to ethernet io
7  * channels from the visorbus driver. It creates netdev devices and
8  * forwards transmit to the IO channel and accepts rcvs from the IO
9  * Partition via the IO channel.
10  */
11 
12 #include <linux/debugfs.h>
13 #include <linux/etherdevice.h>
14 #include <linux/module.h>
15 #include <linux/netdevice.h>
16 #include <linux/kthread.h>
17 #include <linux/skbuff.h>
18 #include <linux/rtnetlink.h>
19 #include <linux/visorbus.h>
20 
21 #include "iochannel.h"
22 
23 #define VISORNIC_INFINITE_RSP_WAIT 0
24 
25 /* MAX_BUF = 64 lines x 32 MAXVNIC x 80 characters
26  *         = 163840 bytes
27  */
28 #define MAX_BUF 163840
29 #define NAPI_WEIGHT 64
30 
31 /* GUIDS for director channel type supported by this driver.  */
32 /* {8cd5994d-c58e-11da-95a9-00e08161165f} */
33 #define VISOR_VNIC_CHANNEL_GUID \
34 	GUID_INIT(0x8cd5994d, 0xc58e, 0x11da, \
35 		0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
36 #define VISOR_VNIC_CHANNEL_GUID_STR \
37 	"8cd5994d-c58e-11da-95a9-00e08161165f"
38 
39 static struct visor_channeltype_descriptor visornic_channel_types[] = {
40 	/* Note that the only channel type we expect to be reported by the
41 	 * bus driver is the VISOR_VNIC channel.
42 	 */
43 	{ VISOR_VNIC_CHANNEL_GUID, "ultravnic", sizeof(struct channel_header),
44 	  VISOR_VNIC_CHANNEL_VERSIONID },
45 	{}
46 };
47 MODULE_DEVICE_TABLE(visorbus, visornic_channel_types);
48 /* FIXME XXX: This next line of code must be fixed and removed before
49  * acceptance into the 'normal' part of the kernel.  It is only here as a place
50  * holder to get module autoloading functionality working for visorbus.  Code
51  * must be added to scripts/mode/file2alias.c, etc., to get this working
52  * properly.
53  */
54 MODULE_ALIAS("visorbus:" VISOR_VNIC_CHANNEL_GUID_STR);
55 
56 struct chanstat {
57 	unsigned long got_rcv;
58 	unsigned long got_enbdisack;
59 	unsigned long got_xmit_done;
60 	unsigned long xmit_fail;
61 	unsigned long sent_enbdis;
62 	unsigned long sent_promisc;
63 	unsigned long sent_post;
64 	unsigned long sent_post_failed;
65 	unsigned long sent_xmit;
66 	unsigned long reject_count;
67 	unsigned long extra_rcvbufs_sent;
68 };
69 
70 /* struct visornic_devdata
71  * @enabled:                        0 disabled 1 enabled to receive.
72  * @enab_dis_acked:                 NET_RCV_ENABLE/DISABLE acked by IOPART.
73  * @struct *dev:
74  * @struct *netdev:
75  * @struct net_stats:
76  * @interrupt_rcvd:
77  * @rsp_queue:
78  * @struct **rcvbuf:
79  * @incarnation_id:                 incarnation_id lets IOPART know about
80  *                                  re-birth.
81  * @old_flags:                      flags as they were prior to
82  *                                  set_multicast_list.
83  * @usage:                          count of users.
84  * @num_rcv_bufs:                   number of rcv buffers the vnic will post.
85  * @num_rcv_bufs_could_not_alloc:
86  * @num_rcvbuf_in_iovm:
87  * @alloc_failed_in_if_needed_cnt:
88  * @alloc_failed_in_repost_rtn_cnt:
89  * @max_outstanding_net_xmits:      absolute max number of outstanding xmits
90  *                                  - should never hit this.
91  * @upper_threshold_net_xmits:      high water mark for calling
92  *                                  netif_stop_queue().
93  * @lower_threshold_net_xmits:      high water mark for calling
94  *                                  netif_wake_queue().
95  * @struct xmitbufhead:             xmitbufhead - head of the xmit buffer list
96  *                                  sent to the IOPART end.
97  * @server_down_complete_func:
98  * @struct timeout_reset:
99  * @struct *cmdrsp_rcv:             cmdrsp_rcv is used for posting/unposting rcv
100  *                                  buffers.
101  * @struct *xmit_cmdrsp:            xmit_cmdrsp - issues NET_XMIT - only one
102  *                                  active xmit at a time.
103  * @server_down:                    IOPART is down.
104  * @server_change_state:            Processing SERVER_CHANGESTATE msg.
105  * @going_away:                     device is being torn down.
106  * @struct *eth_debugfs_dir:
107  * @interrupts_rcvd:
108  * @interrupts_notme:
109  * @interrupts_disabled:
110  * @busy_cnt:
111  * @priv_lock:                      spinlock to access devdata structures.
112  * @flow_control_upper_hits:
113  * @flow_control_lower_hits:
114  * @n_rcv0:                         # rcvs of 0 buffers.
115  * @n_rcv1:                         # rcvs of 1 buffers.
116  * @n_rcv2:                         # rcvs of 2 buffers.
117  * @n_rcvx:                         # rcvs of >2 buffers.
118  * @found_repost_rcvbuf_cnt:        # repost_rcvbuf_cnt.
119  * @repost_found_skb_cnt:           # of found the skb.
120  * @n_repost_deficit:               # of lost rcv buffers.
121  * @bad_rcv_buf:                    # of unknown rcv skb not freed.
122  * @n_rcv_packets_not_accepted:     # bogs rcv packets.
123  * @queuefullmsg_logged:
124  * @struct chstat:
125  * @struct irq_poll_timer:
126  * @struct napi:
127  * @struct cmdrsp:
128  */
129 struct visornic_devdata {
130 	unsigned short enabled;
131 	unsigned short enab_dis_acked;
132 
133 	struct visor_device *dev;
134 	struct net_device *netdev;
135 	struct net_device_stats net_stats;
136 	atomic_t interrupt_rcvd;
137 	wait_queue_head_t rsp_queue;
138 	struct sk_buff **rcvbuf;
139 	u64 incarnation_id;
140 	unsigned short old_flags;
141 	atomic_t usage;
142 
143 	int num_rcv_bufs;
144 	int num_rcv_bufs_could_not_alloc;
145 	atomic_t num_rcvbuf_in_iovm;
146 	unsigned long alloc_failed_in_if_needed_cnt;
147 	unsigned long alloc_failed_in_repost_rtn_cnt;
148 
149 	unsigned long max_outstanding_net_xmits;
150 	unsigned long upper_threshold_net_xmits;
151 	unsigned long lower_threshold_net_xmits;
152 	struct sk_buff_head xmitbufhead;
153 
154 	visorbus_state_complete_func server_down_complete_func;
155 	struct work_struct timeout_reset;
156 	struct uiscmdrsp *cmdrsp_rcv;
157 	struct uiscmdrsp *xmit_cmdrsp;
158 	bool server_down;
159 	bool server_change_state;
160 	bool going_away;
161 	struct dentry *eth_debugfs_dir;
162 	u64 interrupts_rcvd;
163 	u64 interrupts_notme;
164 	u64 interrupts_disabled;
165 	u64 busy_cnt;
166 	/* spinlock to access devdata structures. */
167 	spinlock_t priv_lock;
168 
169 	/* flow control counter */
170 	u64 flow_control_upper_hits;
171 	u64 flow_control_lower_hits;
172 
173 	/* debug counters */
174 	unsigned long n_rcv0;
175 	unsigned long n_rcv1;
176 	unsigned long n_rcv2;
177 	unsigned long n_rcvx;
178 	unsigned long found_repost_rcvbuf_cnt;
179 	unsigned long repost_found_skb_cnt;
180 	unsigned long n_repost_deficit;
181 	unsigned long bad_rcv_buf;
182 	unsigned long n_rcv_packets_not_accepted;
183 
184 	int queuefullmsg_logged;
185 	struct chanstat chstat;
186 	struct timer_list irq_poll_timer;
187 	struct napi_struct napi;
188 	struct uiscmdrsp cmdrsp[SIZEOF_CMDRSP];
189 };
190 
191 /* Returns next non-zero index on success or 0 on failure (i.e. out of room). */
add_physinfo_entries(u64 inp_pfn,u16 inp_off,u16 inp_len,u16 index,u16 max_pi_arr_entries,struct phys_info pi_arr[])192 static u16 add_physinfo_entries(u64 inp_pfn, u16 inp_off, u16 inp_len,
193 				u16 index, u16 max_pi_arr_entries,
194 				struct phys_info pi_arr[])
195 {
196 	u16 i, len, firstlen;
197 
198 	firstlen = PI_PAGE_SIZE - inp_off;
199 	if (inp_len <= firstlen) {
200 		/* The input entry spans only one page - add as is. */
201 		if (index >= max_pi_arr_entries)
202 			return 0;
203 		pi_arr[index].pi_pfn = inp_pfn;
204 		pi_arr[index].pi_off = (u16)inp_off;
205 		pi_arr[index].pi_len = (u16)inp_len;
206 		return index + 1;
207 	}
208 
209 	/* This entry spans multiple pages. */
210 	for (len = inp_len, i = 0; len;
211 		len -= pi_arr[index + i].pi_len, i++) {
212 		if (index + i >= max_pi_arr_entries)
213 			return 0;
214 		pi_arr[index + i].pi_pfn = inp_pfn + i;
215 		if (i == 0) {
216 			pi_arr[index].pi_off = inp_off;
217 			pi_arr[index].pi_len = firstlen;
218 		} else {
219 			pi_arr[index + i].pi_off = 0;
220 			pi_arr[index + i].pi_len = min_t(u16, len,
221 							 PI_PAGE_SIZE);
222 		}
223 	}
224 	return index + i;
225 }
226 
227 /* visor_copy_fragsinfo_from_skb - copy fragment list in the SKB to a phys_info
228  *				   array that the IOPART understands
229  * @skb:	  Skbuff that we are pulling the frags from.
230  * @firstfraglen: Length of first fragment in skb.
231  * @frags_max:	  Max len of frags array.
232  * @frags:	  Frags array filled in on output.
233  *
234  * Return: Positive integer indicating number of entries filled in frags on
235  *         success, negative integer on error.
236  */
visor_copy_fragsinfo_from_skb(struct sk_buff * skb,unsigned int firstfraglen,unsigned int frags_max,struct phys_info frags[])237 static int visor_copy_fragsinfo_from_skb(struct sk_buff *skb,
238 					 unsigned int firstfraglen,
239 					 unsigned int frags_max,
240 					 struct phys_info frags[])
241 {
242 	unsigned int count = 0, frag, size, offset = 0, numfrags;
243 	unsigned int total_count;
244 
245 	numfrags = skb_shinfo(skb)->nr_frags;
246 
247 	/* Compute the number of fragments this skb has, and if its more than
248 	 * frag array can hold, linearize the skb
249 	 */
250 	total_count = numfrags + (firstfraglen / PI_PAGE_SIZE);
251 	if (firstfraglen % PI_PAGE_SIZE)
252 		total_count++;
253 
254 	if (total_count > frags_max) {
255 		if (skb_linearize(skb))
256 			return -EINVAL;
257 		numfrags = skb_shinfo(skb)->nr_frags;
258 		firstfraglen = 0;
259 	}
260 
261 	while (firstfraglen) {
262 		if (count == frags_max)
263 			return -EINVAL;
264 
265 		frags[count].pi_pfn =
266 			page_to_pfn(virt_to_page(skb->data + offset));
267 		frags[count].pi_off =
268 			(unsigned long)(skb->data + offset) & PI_PAGE_MASK;
269 		size = min_t(unsigned int, firstfraglen,
270 			     PI_PAGE_SIZE - frags[count].pi_off);
271 
272 		/* can take smallest of firstfraglen (what's left) OR
273 		 * bytes left in the page
274 		 */
275 		frags[count].pi_len = size;
276 		firstfraglen -= size;
277 		offset += size;
278 		count++;
279 	}
280 	if (numfrags) {
281 		if ((count + numfrags) > frags_max)
282 			return -EINVAL;
283 
284 		for (frag = 0; frag < numfrags; frag++) {
285 			count = add_physinfo_entries(page_to_pfn(
286 				  skb_frag_page(&skb_shinfo(skb)->frags[frag])),
287 				  skb_frag_off(&skb_shinfo(skb)->frags[frag]),
288 				  skb_frag_size(&skb_shinfo(skb)->frags[frag]),
289 				  count, frags_max, frags);
290 			/* add_physinfo_entries only returns
291 			 * zero if the frags array is out of room
292 			 * That should never happen because we
293 			 * fail above, if count+numfrags > frags_max.
294 			 */
295 			if (!count)
296 				return -EINVAL;
297 		}
298 	}
299 	if (skb_shinfo(skb)->frag_list) {
300 		struct sk_buff *skbinlist;
301 		int c;
302 
303 		for (skbinlist = skb_shinfo(skb)->frag_list; skbinlist;
304 		     skbinlist = skbinlist->next) {
305 			c = visor_copy_fragsinfo_from_skb(skbinlist,
306 							  skbinlist->len -
307 							  skbinlist->data_len,
308 							  frags_max - count,
309 							  &frags[count]);
310 			if (c < 0)
311 				return c;
312 			count += c;
313 		}
314 	}
315 	return count;
316 }
317 
enable_ints_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)318 static ssize_t enable_ints_write(struct file *file,
319 				 const char __user *buffer,
320 				 size_t count, loff_t *ppos)
321 {
322 	/* Don't want to break ABI here by having a debugfs
323 	 * file that no longer exists or is writable, so
324 	 * lets just make this a vestigual function
325 	 */
326 	return count;
327 }
328 
329 static const struct file_operations debugfs_enable_ints_fops = {
330 	.write = enable_ints_write,
331 };
332 
333 /* visornic_serverdown_complete - pause device following IOPART going down
334  * @devdata: Device managed by IOPART.
335  *
336  * The IO partition has gone down, and we need to do some cleanup for when it
337  * comes back. Treat the IO partition as the link being down.
338  */
visornic_serverdown_complete(struct visornic_devdata * devdata)339 static void visornic_serverdown_complete(struct visornic_devdata *devdata)
340 {
341 	struct net_device *netdev = devdata->netdev;
342 
343 	/* Stop polling for interrupts */
344 	del_timer_sync(&devdata->irq_poll_timer);
345 
346 	rtnl_lock();
347 	dev_close(netdev);
348 	rtnl_unlock();
349 
350 	atomic_set(&devdata->num_rcvbuf_in_iovm, 0);
351 	devdata->chstat.sent_xmit = 0;
352 	devdata->chstat.got_xmit_done = 0;
353 
354 	if (devdata->server_down_complete_func)
355 		(*devdata->server_down_complete_func)(devdata->dev, 0);
356 
357 	devdata->server_down = true;
358 	devdata->server_change_state = false;
359 	devdata->server_down_complete_func = NULL;
360 }
361 
362 /* visornic_serverdown - Command has notified us that IOPART is down
363  * @devdata:	   Device managed by IOPART.
364  * @complete_func: Function to call when finished.
365  *
366  * Schedule the work needed to handle the server down request. Make sure we
367  * haven't already handled the server change state event.
368  *
369  * Return: 0 if we scheduled the work, negative integer on error.
370  */
visornic_serverdown(struct visornic_devdata * devdata,visorbus_state_complete_func complete_func)371 static int visornic_serverdown(struct visornic_devdata *devdata,
372 			       visorbus_state_complete_func complete_func)
373 {
374 	unsigned long flags;
375 	int err;
376 
377 	spin_lock_irqsave(&devdata->priv_lock, flags);
378 	if (devdata->server_change_state) {
379 		dev_dbg(&devdata->dev->device, "%s changing state\n",
380 			__func__);
381 		err = -EINVAL;
382 		goto err_unlock;
383 	}
384 	if (devdata->server_down) {
385 		dev_dbg(&devdata->dev->device, "%s already down\n",
386 			__func__);
387 		err = -EINVAL;
388 		goto err_unlock;
389 	}
390 	if (devdata->going_away) {
391 		dev_dbg(&devdata->dev->device,
392 			"%s aborting because device removal pending\n",
393 			__func__);
394 		err = -ENODEV;
395 		goto err_unlock;
396 	}
397 	devdata->server_change_state = true;
398 	devdata->server_down_complete_func = complete_func;
399 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
400 
401 	visornic_serverdown_complete(devdata);
402 	return 0;
403 
404 err_unlock:
405 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
406 	return err;
407 }
408 
409 /* alloc_rcv_buf - alloc rcv buffer to be given to the IO Partition
410  * @netdev: Network adapter the rcv bufs are attached too.
411  *
412  * Create an sk_buff (rcv_buf) that will be passed to the IO Partition
413  * so that it can write rcv data into our memory space.
414  *
415  * Return: Pointer to sk_buff.
416  */
alloc_rcv_buf(struct net_device * netdev)417 static struct sk_buff *alloc_rcv_buf(struct net_device *netdev)
418 {
419 	struct sk_buff *skb;
420 
421 	/* NOTE: the first fragment in each rcv buffer is pointed to by
422 	 * rcvskb->data. For now all rcv buffers will be RCVPOST_BUF_SIZE
423 	 * in length, so the first frag is large enough to hold 1514.
424 	 */
425 	skb = alloc_skb(RCVPOST_BUF_SIZE, GFP_ATOMIC);
426 	if (!skb)
427 		return NULL;
428 	skb->dev = netdev;
429 	/* current value of mtu doesn't come into play here; large
430 	 * packets will just end up using multiple rcv buffers all of
431 	 * same size.
432 	 */
433 	skb->len = RCVPOST_BUF_SIZE;
434 	/* alloc_skb already zeroes it out for clarification. */
435 	skb->data_len = 0;
436 	return skb;
437 }
438 
439 /* post_skb - post a skb to the IO Partition
440  * @cmdrsp:  Cmdrsp packet to be send to the IO Partition.
441  * @devdata: visornic_devdata to post the skb to.
442  * @skb:     Skb to give to the IO partition.
443  *
444  * Return: 0 on success, negative integer on error.
445  */
post_skb(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata,struct sk_buff * skb)446 static int post_skb(struct uiscmdrsp *cmdrsp, struct visornic_devdata *devdata,
447 		    struct sk_buff *skb)
448 {
449 	int err;
450 
451 	cmdrsp->net.buf = skb;
452 	cmdrsp->net.rcvpost.frag.pi_pfn = page_to_pfn(virt_to_page(skb->data));
453 	cmdrsp->net.rcvpost.frag.pi_off =
454 		(unsigned long)skb->data & PI_PAGE_MASK;
455 	cmdrsp->net.rcvpost.frag.pi_len = skb->len;
456 	cmdrsp->net.rcvpost.unique_num = devdata->incarnation_id;
457 
458 	if ((cmdrsp->net.rcvpost.frag.pi_off + skb->len) > PI_PAGE_SIZE)
459 		return -EINVAL;
460 
461 	cmdrsp->net.type = NET_RCV_POST;
462 	cmdrsp->cmdtype = CMD_NET_TYPE;
463 	err = visorchannel_signalinsert(devdata->dev->visorchannel,
464 					IOCHAN_TO_IOPART,
465 					cmdrsp);
466 	if (err) {
467 		devdata->chstat.sent_post_failed++;
468 		return err;
469 	}
470 
471 	atomic_inc(&devdata->num_rcvbuf_in_iovm);
472 	devdata->chstat.sent_post++;
473 	return 0;
474 }
475 
476 /* send_enbdis - Send NET_RCV_ENBDIS to IO Partition
477  * @netdev:  Netdevice we are enabling/disabling, used as context return value.
478  * @state:   Enable = 1/disable = 0.
479  * @devdata: Visornic device we are enabling/disabling.
480  *
481  * Send the enable/disable message to the IO Partition.
482  *
483  * Return: 0 on success, negative integer on error.
484  */
send_enbdis(struct net_device * netdev,int state,struct visornic_devdata * devdata)485 static int send_enbdis(struct net_device *netdev, int state,
486 		       struct visornic_devdata *devdata)
487 {
488 	int err;
489 
490 	devdata->cmdrsp_rcv->net.enbdis.enable = state;
491 	devdata->cmdrsp_rcv->net.enbdis.context = netdev;
492 	devdata->cmdrsp_rcv->net.type = NET_RCV_ENBDIS;
493 	devdata->cmdrsp_rcv->cmdtype = CMD_NET_TYPE;
494 	err = visorchannel_signalinsert(devdata->dev->visorchannel,
495 					IOCHAN_TO_IOPART,
496 					devdata->cmdrsp_rcv);
497 	if (err)
498 		return err;
499 	devdata->chstat.sent_enbdis++;
500 	return 0;
501 }
502 
503 /* visornic_disable_with_timeout - disable network adapter
504  * @netdev:  netdevice to disable.
505  * @timeout: Timeout to wait for disable.
506  *
507  * Disable the network adapter and inform the IO Partition that we are disabled.
508  * Reclaim memory from rcv bufs.
509  *
510  * Return: 0 on success, negative integer on failure of IO Partition responding.
511  */
visornic_disable_with_timeout(struct net_device * netdev,const int timeout)512 static int visornic_disable_with_timeout(struct net_device *netdev,
513 					 const int timeout)
514 {
515 	struct visornic_devdata *devdata = netdev_priv(netdev);
516 	int i;
517 	unsigned long flags;
518 	int wait = 0;
519 	int err;
520 
521 	/* send a msg telling the other end we are stopping incoming pkts */
522 	spin_lock_irqsave(&devdata->priv_lock, flags);
523 	devdata->enabled = 0;
524 	/* must wait for ack */
525 	devdata->enab_dis_acked = 0;
526 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
527 
528 	/* send disable and wait for ack -- don't hold lock when sending
529 	 * disable because if the queue is full, insert might sleep.
530 	 * If an error occurs, don't wait for the timeout.
531 	 */
532 	err = send_enbdis(netdev, 0, devdata);
533 	if (err)
534 		return err;
535 
536 	/* wait for ack to arrive before we try to free rcv buffers
537 	 * NOTE: the other end automatically unposts the rcv buffers when
538 	 * when it gets a disable.
539 	 */
540 	spin_lock_irqsave(&devdata->priv_lock, flags);
541 	while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
542 	       (wait < timeout)) {
543 		if (devdata->enab_dis_acked)
544 			break;
545 		if (devdata->server_down || devdata->server_change_state) {
546 			dev_dbg(&netdev->dev, "%s server went away\n",
547 				__func__);
548 			break;
549 		}
550 		set_current_state(TASK_INTERRUPTIBLE);
551 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
552 		wait += schedule_timeout(msecs_to_jiffies(10));
553 		spin_lock_irqsave(&devdata->priv_lock, flags);
554 	}
555 
556 	/* Wait for usage to go to 1 (no other users) before freeing
557 	 * rcv buffers
558 	 */
559 	if (atomic_read(&devdata->usage) > 1) {
560 		while (1) {
561 			set_current_state(TASK_INTERRUPTIBLE);
562 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
563 			schedule_timeout(msecs_to_jiffies(10));
564 			spin_lock_irqsave(&devdata->priv_lock, flags);
565 			if (atomic_read(&devdata->usage))
566 				break;
567 		}
568 	}
569 	/* we've set enabled to 0, so we can give up the lock. */
570 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
571 
572 	/* stop the transmit queue so nothing more can be transmitted */
573 	netif_stop_queue(netdev);
574 
575 	napi_disable(&devdata->napi);
576 
577 	skb_queue_purge(&devdata->xmitbufhead);
578 
579 	/* Free rcv buffers - other end has automatically unposed them on
580 	 * disable
581 	 */
582 	for (i = 0; i < devdata->num_rcv_bufs; i++) {
583 		if (devdata->rcvbuf[i]) {
584 			kfree_skb(devdata->rcvbuf[i]);
585 			devdata->rcvbuf[i] = NULL;
586 		}
587 	}
588 
589 	return 0;
590 }
591 
592 /* init_rcv_bufs - initialize receive buffs and send them to the IO Partition
593  * @netdev:  struct netdevice.
594  * @devdata: visornic_devdata.
595  *
596  * Allocate rcv buffers and post them to the IO Partition.
597  *
598  * Return: 0 on success, negative integer on failure.
599  */
init_rcv_bufs(struct net_device * netdev,struct visornic_devdata * devdata)600 static int init_rcv_bufs(struct net_device *netdev,
601 			 struct visornic_devdata *devdata)
602 {
603 	int i, j, count, err;
604 
605 	/* allocate fixed number of receive buffers to post to uisnic
606 	 * post receive buffers after we've allocated a required amount
607 	 */
608 	for (i = 0; i < devdata->num_rcv_bufs; i++) {
609 		devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
610 		/* if we failed to allocate one let us stop */
611 		if (!devdata->rcvbuf[i])
612 			break;
613 	}
614 	/* couldn't even allocate one -- bail out */
615 	if (i == 0)
616 		return -ENOMEM;
617 	count = i;
618 
619 	/* Ensure we can alloc 2/3rd of the requested number of buffers.
620 	 * 2/3 is an arbitrary choice; used also in ndis init.c
621 	 */
622 	if (count < ((2 * devdata->num_rcv_bufs) / 3)) {
623 		/* free receive buffers we did alloc and then bail out */
624 		for (i = 0; i < count; i++) {
625 			kfree_skb(devdata->rcvbuf[i]);
626 			devdata->rcvbuf[i] = NULL;
627 		}
628 		return -ENOMEM;
629 	}
630 
631 	/* post receive buffers to receive incoming input - without holding
632 	 * lock - we've not enabled nor started the queue so there shouldn't
633 	 * be any rcv or xmit activity
634 	 */
635 	for (i = 0; i < count; i++) {
636 		err = post_skb(devdata->cmdrsp_rcv, devdata,
637 			       devdata->rcvbuf[i]);
638 		if (!err)
639 			continue;
640 
641 		/* Error handling -
642 		 * If we posted at least one skb, we should return success,
643 		 * but need to free the resources that we have not successfully
644 		 * posted.
645 		 */
646 		for (j = i; j < count; j++) {
647 			kfree_skb(devdata->rcvbuf[j]);
648 			devdata->rcvbuf[j] = NULL;
649 		}
650 		if (i == 0)
651 			return err;
652 		break;
653 	}
654 
655 	return 0;
656 }
657 
658 /* visornic_enable_with_timeout	- send enable to IO Partition
659  * @netdev:  struct net_device.
660  * @timeout: Time to wait for the ACK from the enable.
661  *
662  * Sends enable to IOVM and inits, and posts receive buffers to IOVM. Timeout is
663  * defined in msecs (timeout of 0 specifies infinite wait).
664  *
665  * Return: 0 on success, negative integer on failure.
666  */
visornic_enable_with_timeout(struct net_device * netdev,const int timeout)667 static int visornic_enable_with_timeout(struct net_device *netdev,
668 					const int timeout)
669 {
670 	int err = 0;
671 	struct visornic_devdata *devdata = netdev_priv(netdev);
672 	unsigned long flags;
673 	int wait = 0;
674 
675 	napi_enable(&devdata->napi);
676 
677 	/* NOTE: the other end automatically unposts the rcv buffers when it
678 	 * gets a disable.
679 	 */
680 	err = init_rcv_bufs(netdev, devdata);
681 	if (err < 0) {
682 		dev_err(&netdev->dev,
683 			"%s failed to init rcv bufs\n", __func__);
684 		return err;
685 	}
686 
687 	spin_lock_irqsave(&devdata->priv_lock, flags);
688 	devdata->enabled = 1;
689 	devdata->enab_dis_acked = 0;
690 
691 	/* now we're ready, let's send an ENB to uisnic but until we get
692 	 * an ACK back from uisnic, we'll drop the packets
693 	 */
694 	devdata->n_rcv_packets_not_accepted = 0;
695 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
696 
697 	/* send enable and wait for ack -- don't hold lock when sending enable
698 	 * because if the queue is full, insert might sleep. If an error
699 	 * occurs error out.
700 	 */
701 	err = send_enbdis(netdev, 1, devdata);
702 	if (err)
703 		return err;
704 
705 	spin_lock_irqsave(&devdata->priv_lock, flags);
706 	while ((timeout == VISORNIC_INFINITE_RSP_WAIT) ||
707 	       (wait < timeout)) {
708 		if (devdata->enab_dis_acked)
709 			break;
710 		if (devdata->server_down || devdata->server_change_state) {
711 			dev_dbg(&netdev->dev, "%s server went away\n",
712 				__func__);
713 			break;
714 		}
715 		set_current_state(TASK_INTERRUPTIBLE);
716 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
717 		wait += schedule_timeout(msecs_to_jiffies(10));
718 		spin_lock_irqsave(&devdata->priv_lock, flags);
719 	}
720 
721 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
722 
723 	if (!devdata->enab_dis_acked) {
724 		dev_err(&netdev->dev, "%s missing ACK\n", __func__);
725 		return -EIO;
726 	}
727 
728 	netif_start_queue(netdev);
729 	return 0;
730 }
731 
732 /* visornic_timeout_reset - handle xmit timeout resets
733  * @work: Work item that scheduled the work.
734  *
735  * Transmit timeouts are typically handled by resetting the device for our
736  * virtual NIC; we will send a disable and enable to the IOVM. If it doesn't
737  * respond, we will trigger a serverdown.
738  */
visornic_timeout_reset(struct work_struct * work)739 static void visornic_timeout_reset(struct work_struct *work)
740 {
741 	struct visornic_devdata *devdata;
742 	struct net_device *netdev;
743 	int response = 0;
744 
745 	devdata = container_of(work, struct visornic_devdata, timeout_reset);
746 	netdev = devdata->netdev;
747 
748 	rtnl_lock();
749 	if (!netif_running(netdev)) {
750 		rtnl_unlock();
751 		return;
752 	}
753 
754 	response = visornic_disable_with_timeout(netdev,
755 						 VISORNIC_INFINITE_RSP_WAIT);
756 	if (response)
757 		goto call_serverdown;
758 
759 	response = visornic_enable_with_timeout(netdev,
760 						VISORNIC_INFINITE_RSP_WAIT);
761 	if (response)
762 		goto call_serverdown;
763 
764 	rtnl_unlock();
765 
766 	return;
767 
768 call_serverdown:
769 	visornic_serverdown(devdata, NULL);
770 	rtnl_unlock();
771 }
772 
773 /* visornic_open - enable the visornic device and mark the queue started
774  * @netdev: netdevice to start.
775  *
776  * Enable the device and start the transmit queue.
777  *
778  * Return: 0 on success.
779  */
visornic_open(struct net_device * netdev)780 static int visornic_open(struct net_device *netdev)
781 {
782 	visornic_enable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
783 	return 0;
784 }
785 
786 /* visornic_close - disables the visornic device and stops the queues
787  * @netdev: netdevice to stop.
788  *
789  * Disable the device and stop the transmit queue.
790  *
791  * Return 0 on success.
792  */
visornic_close(struct net_device * netdev)793 static int visornic_close(struct net_device *netdev)
794 {
795 	visornic_disable_with_timeout(netdev, VISORNIC_INFINITE_RSP_WAIT);
796 	return 0;
797 }
798 
799 /* devdata_xmits_outstanding - compute outstanding xmits
800  * @devdata: visornic_devdata for device
801  *
802  * Return: Long integer representing the number of outstanding xmits.
803  */
devdata_xmits_outstanding(struct visornic_devdata * devdata)804 static unsigned long devdata_xmits_outstanding(struct visornic_devdata *devdata)
805 {
806 	if (devdata->chstat.sent_xmit >= devdata->chstat.got_xmit_done)
807 		return devdata->chstat.sent_xmit -
808 			devdata->chstat.got_xmit_done;
809 	return (ULONG_MAX - devdata->chstat.got_xmit_done
810 		+ devdata->chstat.sent_xmit + 1);
811 }
812 
813 /* vnic_hit_high_watermark
814  * @devdata:	    Indicates visornic device we are checking.
815  * @high_watermark: Max num of unacked xmits we will tolerate before we will
816  *		    start throttling.
817  *
818  * Return: True iff the number of unacked xmits sent to the IO Partition is >=
819  *	   high_watermark. False otherwise.
820  */
vnic_hit_high_watermark(struct visornic_devdata * devdata,ulong high_watermark)821 static bool vnic_hit_high_watermark(struct visornic_devdata *devdata,
822 				    ulong high_watermark)
823 {
824 	return (devdata_xmits_outstanding(devdata) >= high_watermark);
825 }
826 
827 /* vnic_hit_low_watermark
828  * @devdata:	   Indicates visornic device we are checking.
829  * @low_watermark: We will wait until the num of unacked xmits drops to this
830  *		   value or lower before we start transmitting again.
831  *
832  * Return: True iff the number of unacked xmits sent to the IO Partition is <=
833  *	   low_watermark.
834  */
vnic_hit_low_watermark(struct visornic_devdata * devdata,ulong low_watermark)835 static bool vnic_hit_low_watermark(struct visornic_devdata *devdata,
836 				   ulong low_watermark)
837 {
838 	return (devdata_xmits_outstanding(devdata) <= low_watermark);
839 }
840 
841 /* visornic_xmit - send a packet to the IO Partition
842  * @skb:    Packet to be sent.
843  * @netdev: Net device the packet is being sent from.
844  *
845  * Convert the skb to a cmdrsp so the IO Partition can understand it, and send
846  * the XMIT command to the IO Partition for processing. This function is
847  * protected from concurrent calls by a spinlock xmit_lock in the net_device
848  * struct. As soon as the function returns, it can be called again.
849  *
850  * Return: NETDEV_TX_OK.
851  */
visornic_xmit(struct sk_buff * skb,struct net_device * netdev)852 static netdev_tx_t visornic_xmit(struct sk_buff *skb, struct net_device *netdev)
853 {
854 	struct visornic_devdata *devdata;
855 	int len, firstfraglen, padlen;
856 	struct uiscmdrsp *cmdrsp = NULL;
857 	unsigned long flags;
858 	int err;
859 
860 	devdata = netdev_priv(netdev);
861 	spin_lock_irqsave(&devdata->priv_lock, flags);
862 
863 	if (netif_queue_stopped(netdev) || devdata->server_down ||
864 	    devdata->server_change_state) {
865 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
866 		devdata->busy_cnt++;
867 		dev_dbg(&netdev->dev,
868 			"%s busy - queue stopped\n", __func__);
869 		kfree_skb(skb);
870 		return NETDEV_TX_OK;
871 	}
872 
873 	/* sk_buff struct is used to host network data throughout all the
874 	 * linux network subsystems
875 	 */
876 	len = skb->len;
877 
878 	/* skb->len is the FULL length of data (including fragmentary portion)
879 	 * skb->data_len is the length of the fragment portion in frags
880 	 * skb->len - skb->data_len is size of the 1st fragment in skb->data
881 	 * calculate the length of the first fragment that skb->data is
882 	 * pointing to
883 	 */
884 	firstfraglen = skb->len - skb->data_len;
885 	if (firstfraglen < ETH_HLEN) {
886 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
887 		devdata->busy_cnt++;
888 		dev_err(&netdev->dev,
889 			"%s busy - first frag too small (%d)\n",
890 			__func__, firstfraglen);
891 		kfree_skb(skb);
892 		return NETDEV_TX_OK;
893 	}
894 
895 	if (len < ETH_MIN_PACKET_SIZE &&
896 	    ((skb_end_pointer(skb) - skb->data) >= ETH_MIN_PACKET_SIZE)) {
897 		/* pad the packet out to minimum size */
898 		padlen = ETH_MIN_PACKET_SIZE - len;
899 		skb_put_zero(skb, padlen);
900 		len += padlen;
901 		firstfraglen += padlen;
902 	}
903 
904 	cmdrsp = devdata->xmit_cmdrsp;
905 	/* clear cmdrsp */
906 	memset(cmdrsp, 0, SIZEOF_CMDRSP);
907 	cmdrsp->net.type = NET_XMIT;
908 	cmdrsp->cmdtype = CMD_NET_TYPE;
909 
910 	/* save the pointer to skb -- we'll need it for completion */
911 	cmdrsp->net.buf = skb;
912 
913 	if (vnic_hit_high_watermark(devdata,
914 				    devdata->max_outstanding_net_xmits)) {
915 		/* extra NET_XMITs queued over to IOVM - need to wait */
916 		devdata->chstat.reject_count++;
917 		if (!devdata->queuefullmsg_logged &&
918 		    ((devdata->chstat.reject_count & 0x3ff) == 1))
919 			devdata->queuefullmsg_logged = 1;
920 		netif_stop_queue(netdev);
921 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
922 		devdata->busy_cnt++;
923 		dev_dbg(&netdev->dev,
924 			"%s busy - waiting for iovm to catch up\n",
925 			__func__);
926 		kfree_skb(skb);
927 		return NETDEV_TX_OK;
928 	}
929 	if (devdata->queuefullmsg_logged)
930 		devdata->queuefullmsg_logged = 0;
931 
932 	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
933 		cmdrsp->net.xmt.lincsum.valid = 1;
934 		cmdrsp->net.xmt.lincsum.protocol = skb->protocol;
935 		if (skb_transport_header(skb) > skb->data) {
936 			cmdrsp->net.xmt.lincsum.hrawoff =
937 				skb_transport_header(skb) - skb->data;
938 			cmdrsp->net.xmt.lincsum.hrawoff = 1;
939 		}
940 		if (skb_network_header(skb) > skb->data) {
941 			cmdrsp->net.xmt.lincsum.nhrawoff =
942 				skb_network_header(skb) - skb->data;
943 			cmdrsp->net.xmt.lincsum.nhrawoffv = 1;
944 		}
945 		cmdrsp->net.xmt.lincsum.csum = skb->csum;
946 	} else {
947 		cmdrsp->net.xmt.lincsum.valid = 0;
948 	}
949 
950 	/* save off the length of the entire data packet */
951 	cmdrsp->net.xmt.len = len;
952 
953 	/* copy ethernet header from first frag into ocmdrsp
954 	 * - everything else will be pass in frags & DMA'ed
955 	 */
956 	memcpy(cmdrsp->net.xmt.ethhdr, skb->data, ETH_HLEN);
957 
958 	/* copy frags info - from skb->data we need to only provide access
959 	 * beyond eth header
960 	 */
961 	cmdrsp->net.xmt.num_frags =
962 		visor_copy_fragsinfo_from_skb(skb, firstfraglen,
963 					      MAX_PHYS_INFO,
964 					      cmdrsp->net.xmt.frags);
965 	if (cmdrsp->net.xmt.num_frags < 0) {
966 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
967 		devdata->busy_cnt++;
968 		dev_err(&netdev->dev,
969 			"%s busy - copy frags failed\n", __func__);
970 		kfree_skb(skb);
971 		return NETDEV_TX_OK;
972 	}
973 
974 	err = visorchannel_signalinsert(devdata->dev->visorchannel,
975 					IOCHAN_TO_IOPART, cmdrsp);
976 	if (err) {
977 		netif_stop_queue(netdev);
978 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
979 		devdata->busy_cnt++;
980 		dev_dbg(&netdev->dev,
981 			"%s busy - signalinsert failed\n", __func__);
982 		kfree_skb(skb);
983 		return NETDEV_TX_OK;
984 	}
985 
986 	/* Track the skbs that have been sent to the IOVM for XMIT */
987 	skb_queue_head(&devdata->xmitbufhead, skb);
988 
989 	/* update xmt stats */
990 	devdata->net_stats.tx_packets++;
991 	devdata->net_stats.tx_bytes += skb->len;
992 	devdata->chstat.sent_xmit++;
993 
994 	/* check if we have hit the high watermark for netif_stop_queue() */
995 	if (vnic_hit_high_watermark(devdata,
996 				    devdata->upper_threshold_net_xmits)) {
997 		/* extra NET_XMITs queued over to IOVM - need to wait */
998 		/* stop queue - call netif_wake_queue() after lower threshold */
999 		netif_stop_queue(netdev);
1000 		dev_dbg(&netdev->dev,
1001 			"%s busy - invoking iovm flow control\n",
1002 			__func__);
1003 		devdata->flow_control_upper_hits++;
1004 	}
1005 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1006 
1007 	/* skb will be freed when we get back NET_XMIT_DONE */
1008 	return NETDEV_TX_OK;
1009 }
1010 
1011 /* visornic_get_stats - returns net_stats of the visornic device
1012  * @netdev: netdevice.
1013  *
1014  * Return: Pointer to the net_device_stats struct for the device.
1015  */
visornic_get_stats(struct net_device * netdev)1016 static struct net_device_stats *visornic_get_stats(struct net_device *netdev)
1017 {
1018 	struct visornic_devdata *devdata = netdev_priv(netdev);
1019 
1020 	return &devdata->net_stats;
1021 }
1022 
1023 /* visornic_change_mtu - changes mtu of device
1024  * @netdev: netdevice.
1025  * @new_mtu: Value of new mtu.
1026  *
1027  * The device's MTU cannot be changed by system; it must be changed via a
1028  * CONTROLVM message. All vnics and pnics in a switch have to have the same MTU
1029  * for everything to work. Currently not supported.
1030  *
1031  * Return: -EINVAL.
1032  */
visornic_change_mtu(struct net_device * netdev,int new_mtu)1033 static int visornic_change_mtu(struct net_device *netdev, int new_mtu)
1034 {
1035 	return -EINVAL;
1036 }
1037 
1038 /* visornic_set_multi - set visornic device flags
1039  * @netdev: netdevice.
1040  *
1041  * The only flag we currently support is IFF_PROMISC.
1042  */
visornic_set_multi(struct net_device * netdev)1043 static void visornic_set_multi(struct net_device *netdev)
1044 {
1045 	struct uiscmdrsp *cmdrsp;
1046 	struct visornic_devdata *devdata = netdev_priv(netdev);
1047 	int err = 0;
1048 
1049 	if (devdata->old_flags == netdev->flags)
1050 		return;
1051 
1052 	if ((netdev->flags & IFF_PROMISC) ==
1053 	    (devdata->old_flags & IFF_PROMISC))
1054 		goto out_save_flags;
1055 
1056 	cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_ATOMIC);
1057 	if (!cmdrsp)
1058 		return;
1059 	cmdrsp->cmdtype = CMD_NET_TYPE;
1060 	cmdrsp->net.type = NET_RCV_PROMISC;
1061 	cmdrsp->net.enbdis.context = netdev;
1062 	cmdrsp->net.enbdis.enable =
1063 		netdev->flags & IFF_PROMISC;
1064 	err = visorchannel_signalinsert(devdata->dev->visorchannel,
1065 					IOCHAN_TO_IOPART,
1066 					cmdrsp);
1067 	kfree(cmdrsp);
1068 	if (err)
1069 		return;
1070 
1071 out_save_flags:
1072 	devdata->old_flags = netdev->flags;
1073 }
1074 
1075 /* visornic_xmit_timeout - request to timeout the xmit
1076  * @netdev: netdevice.
1077  *
1078  * Queue the work and return. Make sure we have not already been informed that
1079  * the IO Partition is gone; if so, we will have already timed-out the xmits.
1080  */
visornic_xmit_timeout(struct net_device * netdev,unsigned int txqueue)1081 static void visornic_xmit_timeout(struct net_device *netdev, unsigned int txqueue)
1082 {
1083 	struct visornic_devdata *devdata = netdev_priv(netdev);
1084 	unsigned long flags;
1085 
1086 	spin_lock_irqsave(&devdata->priv_lock, flags);
1087 	if (devdata->going_away) {
1088 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1089 		dev_dbg(&devdata->dev->device,
1090 			"%s aborting because device removal pending\n",
1091 			__func__);
1092 		return;
1093 	}
1094 
1095 	/* Ensure that a ServerDown message hasn't been received */
1096 	if (!devdata->enabled ||
1097 	    (devdata->server_down && !devdata->server_change_state)) {
1098 		dev_dbg(&netdev->dev, "%s no processing\n",
1099 			__func__);
1100 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1101 		return;
1102 	}
1103 	schedule_work(&devdata->timeout_reset);
1104 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1105 }
1106 
1107 /* repost_return - repost rcv bufs that have come back
1108  * @cmdrsp: IO channel command struct to post.
1109  * @devdata: Visornic devdata for the device.
1110  * @skb: Socket buffer.
1111  * @netdev: netdevice.
1112  *
1113  * Repost rcv buffers that have been returned to us when we are finished
1114  * with them.
1115  *
1116  * Return: 0 for success, negative integer on error.
1117  */
repost_return(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata,struct sk_buff * skb,struct net_device * netdev)1118 static int repost_return(struct uiscmdrsp *cmdrsp,
1119 			 struct visornic_devdata *devdata,
1120 			 struct sk_buff *skb, struct net_device *netdev)
1121 {
1122 	struct net_pkt_rcv copy;
1123 	int i = 0, cc, numreposted;
1124 	int found_skb = 0;
1125 	int status = 0;
1126 
1127 	copy = cmdrsp->net.rcv;
1128 	switch (copy.numrcvbufs) {
1129 	case 0:
1130 		devdata->n_rcv0++;
1131 		break;
1132 	case 1:
1133 		devdata->n_rcv1++;
1134 		break;
1135 	case 2:
1136 		devdata->n_rcv2++;
1137 		break;
1138 	default:
1139 		devdata->n_rcvx++;
1140 		break;
1141 	}
1142 	for (cc = 0, numreposted = 0; cc < copy.numrcvbufs; cc++) {
1143 		for (i = 0; i < devdata->num_rcv_bufs; i++) {
1144 			if (devdata->rcvbuf[i] != copy.rcvbuf[cc])
1145 				continue;
1146 
1147 			if ((skb) && devdata->rcvbuf[i] == skb) {
1148 				devdata->found_repost_rcvbuf_cnt++;
1149 				found_skb = 1;
1150 				devdata->repost_found_skb_cnt++;
1151 			}
1152 			devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1153 			if (!devdata->rcvbuf[i]) {
1154 				devdata->num_rcv_bufs_could_not_alloc++;
1155 				devdata->alloc_failed_in_repost_rtn_cnt++;
1156 				status = -ENOMEM;
1157 				break;
1158 			}
1159 			status = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1160 			if (status) {
1161 				kfree_skb(devdata->rcvbuf[i]);
1162 				devdata->rcvbuf[i] = NULL;
1163 				break;
1164 			}
1165 			numreposted++;
1166 			break;
1167 		}
1168 	}
1169 	if (numreposted != copy.numrcvbufs) {
1170 		devdata->n_repost_deficit++;
1171 		status = -EINVAL;
1172 	}
1173 	if (skb) {
1174 		if (found_skb) {
1175 			kfree_skb(skb);
1176 		} else {
1177 			status = -EINVAL;
1178 			devdata->bad_rcv_buf++;
1179 		}
1180 	}
1181 	return status;
1182 }
1183 
1184 /* visornic_rx - handle receive packets coming back from IO Partition
1185  * @cmdrsp: Receive packet returned from IO Partition.
1186  *
1187  * Got a receive packet back from the IO Partition; handle it and send it up
1188  * the stack.
1189 
1190  * Return: 1 iff an skb was received, otherwise 0.
1191  */
visornic_rx(struct uiscmdrsp * cmdrsp)1192 static int visornic_rx(struct uiscmdrsp *cmdrsp)
1193 {
1194 	struct visornic_devdata *devdata;
1195 	struct sk_buff *skb, *prev, *curr;
1196 	struct net_device *netdev;
1197 	int cc, currsize, off;
1198 	struct ethhdr *eth;
1199 	unsigned long flags;
1200 
1201 	/* post new rcv buf to the other end using the cmdrsp we have at hand
1202 	 * post it without holding lock - but we'll use the signal lock to
1203 	 * synchronize the queue insert the cmdrsp that contains the net.rcv
1204 	 * is the one we are using to repost, so copy the info we need from it.
1205 	 */
1206 	skb = cmdrsp->net.buf;
1207 	netdev = skb->dev;
1208 
1209 	devdata = netdev_priv(netdev);
1210 
1211 	spin_lock_irqsave(&devdata->priv_lock, flags);
1212 	atomic_dec(&devdata->num_rcvbuf_in_iovm);
1213 
1214 	/* set length to how much was ACTUALLY received -
1215 	 * NOTE: rcv_done_len includes actual length of data rcvd
1216 	 * including ethhdr
1217 	 */
1218 	skb->len = cmdrsp->net.rcv.rcv_done_len;
1219 
1220 	/* update rcv stats - call it with priv_lock held */
1221 	devdata->net_stats.rx_packets++;
1222 	devdata->net_stats.rx_bytes += skb->len;
1223 
1224 	/* test enabled while holding lock */
1225 	if (!(devdata->enabled && devdata->enab_dis_acked)) {
1226 		/* don't process it unless we're in enable mode and until
1227 		 * we've gotten an ACK saying the other end got our RCV enable
1228 		 */
1229 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
1230 		repost_return(cmdrsp, devdata, skb, netdev);
1231 		return 0;
1232 	}
1233 
1234 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1235 
1236 	/* when skb was allocated, skb->dev, skb->data, skb->len and
1237 	 * skb->data_len were setup. AND, data has already put into the
1238 	 * skb (both first frag and in frags pages)
1239 	 * NOTE: firstfragslen is the amount of data in skb->data and that
1240 	 * which is not in nr_frags or frag_list. This is now simply
1241 	 * RCVPOST_BUF_SIZE. bump tail to show how much data is in
1242 	 * firstfrag & set data_len to show rest see if we have to chain
1243 	 * frag_list.
1244 	 */
1245 	/* do PRECAUTIONARY check */
1246 	if (skb->len > RCVPOST_BUF_SIZE) {
1247 		if (cmdrsp->net.rcv.numrcvbufs < 2) {
1248 			if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1249 				dev_err(&devdata->netdev->dev,
1250 					"repost_return failed");
1251 			return 0;
1252 		}
1253 		/* length rcvd is greater than firstfrag in this skb rcv buf  */
1254 		/* amount in skb->data */
1255 		skb->tail += RCVPOST_BUF_SIZE;
1256 		/* amount that will be in frag_list */
1257 		skb->data_len = skb->len - RCVPOST_BUF_SIZE;
1258 	} else {
1259 		/* data fits in this skb - no chaining - do
1260 		 * PRECAUTIONARY check
1261 		 */
1262 		/* should be 1 */
1263 		if (cmdrsp->net.rcv.numrcvbufs != 1) {
1264 			if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1265 				dev_err(&devdata->netdev->dev,
1266 					"repost_return failed");
1267 			return 0;
1268 		}
1269 		skb->tail += skb->len;
1270 		/* nothing rcvd in frag_list */
1271 		skb->data_len = 0;
1272 	}
1273 	off = skb_tail_pointer(skb) - skb->data;
1274 
1275 	/* amount we bumped tail by in the head skb
1276 	 * it is used to calculate the size of each chained skb below
1277 	 * it is also used to index into bufline to continue the copy
1278 	 * (for chansocktwopc)
1279 	 * if necessary chain the rcv skbs together.
1280 	 * NOTE: index 0 has the same as cmdrsp->net.rcv.skb; we need to
1281 	 * chain the rest to that one.
1282 	 * - do PRECAUTIONARY check
1283 	 */
1284 	if (cmdrsp->net.rcv.rcvbuf[0] != skb) {
1285 		if (repost_return(cmdrsp, devdata, skb, netdev) < 0)
1286 			dev_err(&devdata->netdev->dev, "repost_return failed");
1287 		return 0;
1288 	}
1289 
1290 	if (cmdrsp->net.rcv.numrcvbufs > 1) {
1291 		/* chain the various rcv buffers into the skb's frag_list. */
1292 		/* Note: off was initialized above  */
1293 		for (cc = 1, prev = NULL;
1294 		     cc < cmdrsp->net.rcv.numrcvbufs; cc++) {
1295 			curr = (struct sk_buff *)cmdrsp->net.rcv.rcvbuf[cc];
1296 			curr->next = NULL;
1297 			/* start of list- set head */
1298 			if (!prev)
1299 				skb_shinfo(skb)->frag_list = curr;
1300 			else
1301 				prev->next = curr;
1302 			prev = curr;
1303 
1304 			/* should we set skb->len and skb->data_len for each
1305 			 * buffer being chained??? can't hurt!
1306 			 */
1307 			currsize = min(skb->len - off,
1308 				       (unsigned int)RCVPOST_BUF_SIZE);
1309 			curr->len = currsize;
1310 			curr->tail += currsize;
1311 			curr->data_len = 0;
1312 			off += currsize;
1313 		}
1314 		/* assert skb->len == off */
1315 		if (skb->len != off) {
1316 			netdev_err(devdata->netdev,
1317 				   "something wrong; skb->len:%d != off:%d\n",
1318 				   skb->len, off);
1319 		}
1320 	}
1321 
1322 	/* set up packet's protocol type using ethernet header - this
1323 	 * sets up skb->pkt_type & it also PULLS out the eth header
1324 	 */
1325 	skb->protocol = eth_type_trans(skb, netdev);
1326 	eth = eth_hdr(skb);
1327 	skb->csum = 0;
1328 	skb->ip_summed = CHECKSUM_NONE;
1329 
1330 	do {
1331 		/* accept all packets */
1332 		if (netdev->flags & IFF_PROMISC)
1333 			break;
1334 		if (skb->pkt_type == PACKET_BROADCAST) {
1335 			/* accept all broadcast packets */
1336 			if (netdev->flags & IFF_BROADCAST)
1337 				break;
1338 		} else if (skb->pkt_type == PACKET_MULTICAST) {
1339 			if ((netdev->flags & IFF_MULTICAST) &&
1340 			    (netdev_mc_count(netdev))) {
1341 				struct netdev_hw_addr *ha;
1342 				int found_mc = 0;
1343 
1344 				/* only accept multicast packets that we can
1345 				 * find in our multicast address list
1346 				 */
1347 				netdev_for_each_mc_addr(ha, netdev) {
1348 					if (ether_addr_equal(eth->h_dest,
1349 							     ha->addr)) {
1350 						found_mc = 1;
1351 						break;
1352 					}
1353 				}
1354 				/* accept pkt, dest matches a multicast addr */
1355 				if (found_mc)
1356 					break;
1357 			}
1358 		/* accept packet, h_dest must match vnic  mac address */
1359 		} else if (skb->pkt_type == PACKET_HOST) {
1360 			break;
1361 		} else if (skb->pkt_type == PACKET_OTHERHOST) {
1362 			/* something is not right */
1363 			dev_err(&devdata->netdev->dev,
1364 				"**** FAILED to deliver rcv packet to OS; name:%s Dest:%pM VNIC:%pM\n",
1365 				netdev->name, eth->h_dest, netdev->dev_addr);
1366 		}
1367 		/* drop packet - don't forward it up to OS */
1368 		devdata->n_rcv_packets_not_accepted++;
1369 		repost_return(cmdrsp, devdata, skb, netdev);
1370 		return 0;
1371 	} while (0);
1372 
1373 	netif_receive_skb(skb);
1374 	/* netif_rx returns various values, but "in practice most drivers
1375 	 * ignore the return value
1376 	 */
1377 
1378 	skb = NULL;
1379 	/* whether the packet got dropped or handled, the skb is freed by
1380 	 * kernel code, so we shouldn't free it. but we should repost a
1381 	 * new rcv buffer.
1382 	 */
1383 	repost_return(cmdrsp, devdata, skb, netdev);
1384 	return 1;
1385 }
1386 
1387 /* devdata_initialize - initialize devdata structure
1388  * @devdata: visornic_devdata structure to initialize.
1389  * @dev:     visorbus_device it belongs to.
1390  *
1391  * Setup initial values for the visornic, based on channel and default values.
1392  *
1393  * Return: A pointer to the devdata structure.
1394  */
devdata_initialize(struct visornic_devdata * devdata,struct visor_device * dev)1395 static struct visornic_devdata *devdata_initialize(
1396 					struct visornic_devdata *devdata,
1397 					struct visor_device *dev)
1398 {
1399 	devdata->dev = dev;
1400 	devdata->incarnation_id = get_jiffies_64();
1401 	return devdata;
1402 }
1403 
1404 /* devdata_release - free up references in devdata
1405  * @devdata: Struct to clean up.
1406  */
devdata_release(struct visornic_devdata * devdata)1407 static void devdata_release(struct visornic_devdata *devdata)
1408 {
1409 	kfree(devdata->rcvbuf);
1410 	kfree(devdata->cmdrsp_rcv);
1411 	kfree(devdata->xmit_cmdrsp);
1412 }
1413 
1414 static const struct net_device_ops visornic_dev_ops = {
1415 	.ndo_open = visornic_open,
1416 	.ndo_stop = visornic_close,
1417 	.ndo_start_xmit = visornic_xmit,
1418 	.ndo_get_stats = visornic_get_stats,
1419 	.ndo_change_mtu = visornic_change_mtu,
1420 	.ndo_tx_timeout = visornic_xmit_timeout,
1421 	.ndo_set_rx_mode = visornic_set_multi,
1422 };
1423 
1424 /* DebugFS code */
info_debugfs_read(struct file * file,char __user * buf,size_t len,loff_t * offset)1425 static ssize_t info_debugfs_read(struct file *file, char __user *buf,
1426 				 size_t len, loff_t *offset)
1427 {
1428 	ssize_t bytes_read = 0;
1429 	int str_pos = 0;
1430 	struct visornic_devdata *devdata;
1431 	struct net_device *dev;
1432 	char *vbuf;
1433 
1434 	if (len > MAX_BUF)
1435 		len = MAX_BUF;
1436 	vbuf = kzalloc(len, GFP_KERNEL);
1437 	if (!vbuf)
1438 		return -ENOMEM;
1439 
1440 	/* for each vnic channel dump out channel specific data */
1441 	rcu_read_lock();
1442 	for_each_netdev_rcu(current->nsproxy->net_ns, dev) {
1443 		/* Only consider netdevs that are visornic, and are open */
1444 		if (dev->netdev_ops != &visornic_dev_ops ||
1445 		    (!netif_queue_stopped(dev)))
1446 			continue;
1447 
1448 		devdata = netdev_priv(dev);
1449 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1450 				     "netdev = %s (0x%p), MAC Addr %pM\n",
1451 				     dev->name,
1452 				     dev,
1453 				     dev->dev_addr);
1454 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1455 				     "VisorNic Dev Info = 0x%p\n", devdata);
1456 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1457 				     " num_rcv_bufs = %d\n",
1458 				     devdata->num_rcv_bufs);
1459 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1460 				     " max_outstanding_next_xmits = %lu\n",
1461 				    devdata->max_outstanding_net_xmits);
1462 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1463 				     " upper_threshold_net_xmits = %lu\n",
1464 				     devdata->upper_threshold_net_xmits);
1465 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1466 				     " lower_threshold_net_xmits = %lu\n",
1467 				     devdata->lower_threshold_net_xmits);
1468 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1469 				     " queuefullmsg_logged = %d\n",
1470 				     devdata->queuefullmsg_logged);
1471 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1472 				     " chstat.got_rcv = %lu\n",
1473 				     devdata->chstat.got_rcv);
1474 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1475 				     " chstat.got_enbdisack = %lu\n",
1476 				     devdata->chstat.got_enbdisack);
1477 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1478 				     " chstat.got_xmit_done = %lu\n",
1479 				     devdata->chstat.got_xmit_done);
1480 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1481 				     " chstat.xmit_fail = %lu\n",
1482 				     devdata->chstat.xmit_fail);
1483 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1484 				     " chstat.sent_enbdis = %lu\n",
1485 				     devdata->chstat.sent_enbdis);
1486 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1487 				     " chstat.sent_promisc = %lu\n",
1488 				     devdata->chstat.sent_promisc);
1489 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1490 				     " chstat.sent_post = %lu\n",
1491 				     devdata->chstat.sent_post);
1492 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1493 				     " chstat.sent_post_failed = %lu\n",
1494 				     devdata->chstat.sent_post_failed);
1495 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1496 				     " chstat.sent_xmit = %lu\n",
1497 				     devdata->chstat.sent_xmit);
1498 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1499 				     " chstat.reject_count = %lu\n",
1500 				     devdata->chstat.reject_count);
1501 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1502 				     " chstat.extra_rcvbufs_sent = %lu\n",
1503 				     devdata->chstat.extra_rcvbufs_sent);
1504 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1505 				     " n_rcv0 = %lu\n", devdata->n_rcv0);
1506 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1507 				     " n_rcv1 = %lu\n", devdata->n_rcv1);
1508 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1509 				     " n_rcv2 = %lu\n", devdata->n_rcv2);
1510 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1511 				     " n_rcvx = %lu\n", devdata->n_rcvx);
1512 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1513 				     " num_rcvbuf_in_iovm = %d\n",
1514 				     atomic_read(&devdata->num_rcvbuf_in_iovm));
1515 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1516 				     " alloc_failed_in_if_needed_cnt = %lu\n",
1517 				     devdata->alloc_failed_in_if_needed_cnt);
1518 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1519 				     " alloc_failed_in_repost_rtn_cnt = %lu\n",
1520 				     devdata->alloc_failed_in_repost_rtn_cnt);
1521 		/* str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1522 		 *		     " inner_loop_limit_reached_cnt = %lu\n",
1523 		 *		     devdata->inner_loop_limit_reached_cnt);
1524 		 */
1525 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1526 				     " found_repost_rcvbuf_cnt = %lu\n",
1527 				     devdata->found_repost_rcvbuf_cnt);
1528 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1529 				     " repost_found_skb_cnt = %lu\n",
1530 				     devdata->repost_found_skb_cnt);
1531 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1532 				     " n_repost_deficit = %lu\n",
1533 				     devdata->n_repost_deficit);
1534 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1535 				     " bad_rcv_buf = %lu\n",
1536 				     devdata->bad_rcv_buf);
1537 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1538 				     " n_rcv_packets_not_accepted = %lu\n",
1539 				     devdata->n_rcv_packets_not_accepted);
1540 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1541 				     " interrupts_rcvd = %llu\n",
1542 				     devdata->interrupts_rcvd);
1543 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1544 				     " interrupts_notme = %llu\n",
1545 				     devdata->interrupts_notme);
1546 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1547 				     " interrupts_disabled = %llu\n",
1548 				     devdata->interrupts_disabled);
1549 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1550 				     " busy_cnt = %llu\n",
1551 				     devdata->busy_cnt);
1552 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1553 				     " flow_control_upper_hits = %llu\n",
1554 				     devdata->flow_control_upper_hits);
1555 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1556 				     " flow_control_lower_hits = %llu\n",
1557 				     devdata->flow_control_lower_hits);
1558 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1559 				     " netif_queue = %s\n",
1560 				     netif_queue_stopped(devdata->netdev) ?
1561 				     "stopped" : "running");
1562 		str_pos += scnprintf(vbuf + str_pos, len - str_pos,
1563 				     " xmits_outstanding = %lu\n",
1564 				     devdata_xmits_outstanding(devdata));
1565 	}
1566 	rcu_read_unlock();
1567 	bytes_read = simple_read_from_buffer(buf, len, offset, vbuf, str_pos);
1568 	kfree(vbuf);
1569 	return bytes_read;
1570 }
1571 
1572 static struct dentry *visornic_debugfs_dir;
1573 static const struct file_operations debugfs_info_fops = {
1574 	.read = info_debugfs_read,
1575 };
1576 
1577 /* send_rcv_posts_if_needed - send receive buffers to the IO Partition.
1578  * @devdata: Visornic device.
1579  */
send_rcv_posts_if_needed(struct visornic_devdata * devdata)1580 static void send_rcv_posts_if_needed(struct visornic_devdata *devdata)
1581 {
1582 	int i;
1583 	struct net_device *netdev;
1584 	struct uiscmdrsp *cmdrsp = devdata->cmdrsp_rcv;
1585 	int cur_num_rcv_bufs_to_alloc, rcv_bufs_allocated;
1586 	int err;
1587 
1588 	/* don't do this until vnic is marked ready */
1589 	if (!(devdata->enabled && devdata->enab_dis_acked))
1590 		return;
1591 
1592 	netdev = devdata->netdev;
1593 	rcv_bufs_allocated = 0;
1594 	/* this code is trying to prevent getting stuck here forever,
1595 	 * but still retry it if you cant allocate them all this time.
1596 	 */
1597 	cur_num_rcv_bufs_to_alloc = devdata->num_rcv_bufs_could_not_alloc;
1598 	while (cur_num_rcv_bufs_to_alloc > 0) {
1599 		cur_num_rcv_bufs_to_alloc--;
1600 		for (i = 0; i < devdata->num_rcv_bufs; i++) {
1601 			if (devdata->rcvbuf[i])
1602 				continue;
1603 			devdata->rcvbuf[i] = alloc_rcv_buf(netdev);
1604 			if (!devdata->rcvbuf[i]) {
1605 				devdata->alloc_failed_in_if_needed_cnt++;
1606 				break;
1607 			}
1608 			rcv_bufs_allocated++;
1609 			err = post_skb(cmdrsp, devdata, devdata->rcvbuf[i]);
1610 			if (err) {
1611 				kfree_skb(devdata->rcvbuf[i]);
1612 				devdata->rcvbuf[i] = NULL;
1613 				break;
1614 			}
1615 			devdata->chstat.extra_rcvbufs_sent++;
1616 		}
1617 	}
1618 	devdata->num_rcv_bufs_could_not_alloc -= rcv_bufs_allocated;
1619 }
1620 
1621 /* drain_resp_queue - drains and ignores all messages from the resp queue
1622  * @cmdrsp:  IO channel command response message.
1623  * @devdata: Visornic device to drain.
1624  */
drain_resp_queue(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata)1625 static void drain_resp_queue(struct uiscmdrsp *cmdrsp,
1626 			     struct visornic_devdata *devdata)
1627 {
1628 	while (!visorchannel_signalremove(devdata->dev->visorchannel,
1629 					  IOCHAN_FROM_IOPART,
1630 					  cmdrsp))
1631 		;
1632 }
1633 
1634 /* service_resp_queue - drain the response queue
1635  * @cmdrsp:  IO channel command response message.
1636  * @devdata: Visornic device to drain.
1637  * @rx_work_done:
1638  * @budget:
1639  *
1640  * Drain the response queue of any responses from the IO Partition. Process the
1641  * responses as we get them.
1642  */
service_resp_queue(struct uiscmdrsp * cmdrsp,struct visornic_devdata * devdata,int * rx_work_done,int budget)1643 static void service_resp_queue(struct uiscmdrsp *cmdrsp,
1644 			       struct visornic_devdata *devdata,
1645 			       int *rx_work_done, int budget)
1646 {
1647 	unsigned long flags;
1648 	struct net_device *netdev;
1649 
1650 	while (*rx_work_done < budget) {
1651 		/* TODO: CLIENT ACQUIRE -- Don't really need this at the
1652 		 * moment
1653 		 */
1654 		/* queue empty */
1655 		if (visorchannel_signalremove(devdata->dev->visorchannel,
1656 					      IOCHAN_FROM_IOPART,
1657 					      cmdrsp))
1658 			break;
1659 
1660 		switch (cmdrsp->net.type) {
1661 		case NET_RCV:
1662 			devdata->chstat.got_rcv++;
1663 			/* process incoming packet */
1664 			*rx_work_done += visornic_rx(cmdrsp);
1665 			break;
1666 		case NET_XMIT_DONE:
1667 			spin_lock_irqsave(&devdata->priv_lock, flags);
1668 			devdata->chstat.got_xmit_done++;
1669 			if (cmdrsp->net.xmtdone.xmt_done_result)
1670 				devdata->chstat.xmit_fail++;
1671 			/* only call queue wake if we stopped it */
1672 			netdev = ((struct sk_buff *)cmdrsp->net.buf)->dev;
1673 			/* ASSERT netdev == vnicinfo->netdev; */
1674 			if (netdev == devdata->netdev &&
1675 			    netif_queue_stopped(netdev)) {
1676 				/* check if we have crossed the lower watermark
1677 				 * for netif_wake_queue()
1678 				 */
1679 				if (vnic_hit_low_watermark
1680 				    (devdata,
1681 				     devdata->lower_threshold_net_xmits)) {
1682 					/* enough NET_XMITs completed
1683 					 * so can restart netif queue
1684 					 */
1685 					netif_wake_queue(netdev);
1686 					devdata->flow_control_lower_hits++;
1687 				}
1688 			}
1689 			skb_unlink(cmdrsp->net.buf, &devdata->xmitbufhead);
1690 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
1691 			kfree_skb(cmdrsp->net.buf);
1692 			break;
1693 		case NET_RCV_ENBDIS_ACK:
1694 			devdata->chstat.got_enbdisack++;
1695 			netdev = (struct net_device *)
1696 			cmdrsp->net.enbdis.context;
1697 			spin_lock_irqsave(&devdata->priv_lock, flags);
1698 			devdata->enab_dis_acked = 1;
1699 			spin_unlock_irqrestore(&devdata->priv_lock, flags);
1700 
1701 			if (devdata->server_down &&
1702 			    devdata->server_change_state) {
1703 				/* Inform Linux that the link is up */
1704 				devdata->server_down = false;
1705 				devdata->server_change_state = false;
1706 				netif_wake_queue(netdev);
1707 				netif_carrier_on(netdev);
1708 			}
1709 			break;
1710 		case NET_CONNECT_STATUS:
1711 			netdev = devdata->netdev;
1712 			if (cmdrsp->net.enbdis.enable == 1) {
1713 				spin_lock_irqsave(&devdata->priv_lock, flags);
1714 				devdata->enabled = cmdrsp->net.enbdis.enable;
1715 				spin_unlock_irqrestore(&devdata->priv_lock,
1716 						       flags);
1717 				netif_wake_queue(netdev);
1718 				netif_carrier_on(netdev);
1719 			} else {
1720 				netif_stop_queue(netdev);
1721 				netif_carrier_off(netdev);
1722 				spin_lock_irqsave(&devdata->priv_lock, flags);
1723 				devdata->enabled = cmdrsp->net.enbdis.enable;
1724 				spin_unlock_irqrestore(&devdata->priv_lock,
1725 						       flags);
1726 			}
1727 			break;
1728 		default:
1729 			break;
1730 		}
1731 		/* cmdrsp is now available for reuse  */
1732 	}
1733 }
1734 
visornic_poll(struct napi_struct * napi,int budget)1735 static int visornic_poll(struct napi_struct *napi, int budget)
1736 {
1737 	struct visornic_devdata *devdata = container_of(napi,
1738 							struct visornic_devdata,
1739 							napi);
1740 	int rx_count = 0;
1741 
1742 	send_rcv_posts_if_needed(devdata);
1743 	service_resp_queue(devdata->cmdrsp, devdata, &rx_count, budget);
1744 
1745 	/* If there aren't any more packets to receive stop the poll */
1746 	if (rx_count < budget)
1747 		napi_complete_done(napi, rx_count);
1748 
1749 	return rx_count;
1750 }
1751 
1752 /* poll_for_irq	- checks the status of the response queue
1753  * @t: pointer to the 'struct timer_list' from which we can retrieve the
1754  *     the visornic devdata struct.
1755  *
1756  * Main function of the vnic_incoming thread. Periodically check the response
1757  * queue and drain it if needed.
1758  */
poll_for_irq(struct timer_list * t)1759 static void poll_for_irq(struct timer_list *t)
1760 {
1761 	struct visornic_devdata *devdata = from_timer(devdata, t,
1762 						      irq_poll_timer);
1763 
1764 	if (!visorchannel_signalempty(
1765 				   devdata->dev->visorchannel,
1766 				   IOCHAN_FROM_IOPART))
1767 		napi_schedule(&devdata->napi);
1768 
1769 	atomic_set(&devdata->interrupt_rcvd, 0);
1770 
1771 	mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1772 }
1773 
1774 /* visornic_probe - probe function for visornic devices
1775  * @dev: The visor device discovered.
1776  *
1777  * Called when visorbus discovers a visornic device on its bus. It creates a new
1778  * visornic ethernet adapter.
1779  *
1780  * Return: 0 on success, or negative integer on error.
1781  */
visornic_probe(struct visor_device * dev)1782 static int visornic_probe(struct visor_device *dev)
1783 {
1784 	struct visornic_devdata *devdata = NULL;
1785 	struct net_device *netdev = NULL;
1786 	int err;
1787 	int channel_offset = 0;
1788 	u64 features;
1789 
1790 	netdev = alloc_etherdev(sizeof(struct visornic_devdata));
1791 	if (!netdev) {
1792 		dev_err(&dev->device,
1793 			"%s alloc_etherdev failed\n", __func__);
1794 		return -ENOMEM;
1795 	}
1796 
1797 	netdev->netdev_ops = &visornic_dev_ops;
1798 	netdev->watchdog_timeo = 5 * HZ;
1799 	SET_NETDEV_DEV(netdev, &dev->device);
1800 
1801 	/* Get MAC address from channel and read it into the device. */
1802 	netdev->addr_len = ETH_ALEN;
1803 	channel_offset = offsetof(struct visor_io_channel, vnic.macaddr);
1804 	err = visorbus_read_channel(dev, channel_offset, netdev->dev_addr,
1805 				    ETH_ALEN);
1806 	if (err < 0) {
1807 		dev_err(&dev->device,
1808 			"%s failed to get mac addr from chan (%d)\n",
1809 			__func__, err);
1810 		goto cleanup_netdev;
1811 	}
1812 
1813 	devdata = devdata_initialize(netdev_priv(netdev), dev);
1814 	if (!devdata) {
1815 		dev_err(&dev->device,
1816 			"%s devdata_initialize failed\n", __func__);
1817 		err = -ENOMEM;
1818 		goto cleanup_netdev;
1819 	}
1820 	/* don't trust messages laying around in the channel */
1821 	drain_resp_queue(devdata->cmdrsp, devdata);
1822 
1823 	devdata->netdev = netdev;
1824 	dev_set_drvdata(&dev->device, devdata);
1825 	init_waitqueue_head(&devdata->rsp_queue);
1826 	spin_lock_init(&devdata->priv_lock);
1827 	/* not yet */
1828 	devdata->enabled = 0;
1829 	atomic_set(&devdata->usage, 1);
1830 
1831 	/* Setup rcv bufs */
1832 	channel_offset = offsetof(struct visor_io_channel, vnic.num_rcv_bufs);
1833 	err = visorbus_read_channel(dev, channel_offset,
1834 				    &devdata->num_rcv_bufs, 4);
1835 	if (err) {
1836 		dev_err(&dev->device,
1837 			"%s failed to get #rcv bufs from chan (%d)\n",
1838 			__func__, err);
1839 		goto cleanup_netdev;
1840 	}
1841 
1842 	devdata->rcvbuf = kcalloc(devdata->num_rcv_bufs,
1843 				  sizeof(struct sk_buff *), GFP_KERNEL);
1844 	if (!devdata->rcvbuf) {
1845 		err = -ENOMEM;
1846 		goto cleanup_netdev;
1847 	}
1848 
1849 	/* set the net_xmit outstanding threshold
1850 	 * always leave two slots open but you should have 3 at a minimum
1851 	 * note that max_outstanding_net_xmits must be > 0
1852 	 */
1853 	devdata->max_outstanding_net_xmits =
1854 		max_t(unsigned long, 3, ((devdata->num_rcv_bufs / 3) - 2));
1855 	devdata->upper_threshold_net_xmits =
1856 		max_t(unsigned long,
1857 		      2, (devdata->max_outstanding_net_xmits - 1));
1858 	devdata->lower_threshold_net_xmits =
1859 		max_t(unsigned long,
1860 		      1, (devdata->max_outstanding_net_xmits / 2));
1861 
1862 	skb_queue_head_init(&devdata->xmitbufhead);
1863 
1864 	/* create a cmdrsp we can use to post and unpost rcv buffers */
1865 	devdata->cmdrsp_rcv = kmalloc(SIZEOF_CMDRSP, GFP_KERNEL);
1866 	if (!devdata->cmdrsp_rcv) {
1867 		err = -ENOMEM;
1868 		goto cleanup_rcvbuf;
1869 	}
1870 	devdata->xmit_cmdrsp = kmalloc(SIZEOF_CMDRSP, GFP_KERNEL);
1871 	if (!devdata->xmit_cmdrsp) {
1872 		err = -ENOMEM;
1873 		goto cleanup_cmdrsp_rcv;
1874 	}
1875 	INIT_WORK(&devdata->timeout_reset, visornic_timeout_reset);
1876 	devdata->server_down = false;
1877 	devdata->server_change_state = false;
1878 
1879 	/*set the default mtu */
1880 	channel_offset = offsetof(struct visor_io_channel, vnic.mtu);
1881 	err = visorbus_read_channel(dev, channel_offset, &netdev->mtu, 4);
1882 	if (err) {
1883 		dev_err(&dev->device,
1884 			"%s failed to get mtu from chan (%d)\n",
1885 			__func__, err);
1886 		goto cleanup_xmit_cmdrsp;
1887 	}
1888 
1889 	/* TODO: Setup Interrupt information */
1890 	/* Let's start our threads to get responses */
1891 	netif_napi_add(netdev, &devdata->napi, visornic_poll, NAPI_WEIGHT);
1892 
1893 	timer_setup(&devdata->irq_poll_timer, poll_for_irq, 0);
1894 	/* Note: This time has to start running before the while
1895 	 * loop below because the napi routine is responsible for
1896 	 * setting enab_dis_acked
1897 	 */
1898 	mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
1899 
1900 	channel_offset = offsetof(struct visor_io_channel,
1901 				  channel_header.features);
1902 	err = visorbus_read_channel(dev, channel_offset, &features, 8);
1903 	if (err) {
1904 		dev_err(&dev->device,
1905 			"%s failed to get features from chan (%d)\n",
1906 			__func__, err);
1907 		goto cleanup_napi_add;
1908 	}
1909 
1910 	features |= VISOR_CHANNEL_IS_POLLING;
1911 	features |= VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING;
1912 	err = visorbus_write_channel(dev, channel_offset, &features, 8);
1913 	if (err) {
1914 		dev_err(&dev->device,
1915 			"%s failed to set features in chan (%d)\n",
1916 			__func__, err);
1917 		goto cleanup_napi_add;
1918 	}
1919 
1920 	/* Note: Interrupts have to be enable before the while
1921 	 * loop below because the napi routine is responsible for
1922 	 * setting enab_dis_acked
1923 	 */
1924 	visorbus_enable_channel_interrupts(dev);
1925 
1926 	err = register_netdev(netdev);
1927 	if (err) {
1928 		dev_err(&dev->device,
1929 			"%s register_netdev failed (%d)\n", __func__, err);
1930 		goto cleanup_napi_add;
1931 	}
1932 
1933 	/* create debug/sysfs directories */
1934 	devdata->eth_debugfs_dir = debugfs_create_dir(netdev->name,
1935 						      visornic_debugfs_dir);
1936 	if (!devdata->eth_debugfs_dir) {
1937 		dev_err(&dev->device,
1938 			"%s debugfs_create_dir %s failed\n",
1939 			__func__, netdev->name);
1940 		err = -ENOMEM;
1941 		goto cleanup_register_netdev;
1942 	}
1943 
1944 	dev_info(&dev->device, "%s success netdev=%s\n",
1945 		 __func__, netdev->name);
1946 	return 0;
1947 
1948 cleanup_register_netdev:
1949 	unregister_netdev(netdev);
1950 
1951 cleanup_napi_add:
1952 	del_timer_sync(&devdata->irq_poll_timer);
1953 	netif_napi_del(&devdata->napi);
1954 
1955 cleanup_xmit_cmdrsp:
1956 	kfree(devdata->xmit_cmdrsp);
1957 
1958 cleanup_cmdrsp_rcv:
1959 	kfree(devdata->cmdrsp_rcv);
1960 
1961 cleanup_rcvbuf:
1962 	kfree(devdata->rcvbuf);
1963 
1964 cleanup_netdev:
1965 	free_netdev(netdev);
1966 	return err;
1967 }
1968 
1969 /* host_side_disappeared - IO Partition is gone
1970  * @devdata: Device object.
1971  *
1972  * IO partition servicing this device is gone; do cleanup.
1973  */
host_side_disappeared(struct visornic_devdata * devdata)1974 static void host_side_disappeared(struct visornic_devdata *devdata)
1975 {
1976 	unsigned long flags;
1977 
1978 	spin_lock_irqsave(&devdata->priv_lock, flags);
1979 	/* indicate device destroyed */
1980 	devdata->dev = NULL;
1981 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
1982 }
1983 
1984 /* visornic_remove - called when visornic dev goes away
1985  * @dev: Visornic device that is being removed.
1986  *
1987  * Called when DEVICE_DESTROY gets called to remove device.
1988  */
visornic_remove(struct visor_device * dev)1989 static void visornic_remove(struct visor_device *dev)
1990 {
1991 	struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
1992 	struct net_device *netdev;
1993 	unsigned long flags;
1994 
1995 	if (!devdata) {
1996 		dev_err(&dev->device, "%s no devdata\n", __func__);
1997 		return;
1998 	}
1999 	spin_lock_irqsave(&devdata->priv_lock, flags);
2000 	if (devdata->going_away) {
2001 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
2002 		dev_err(&dev->device, "%s already being removed\n", __func__);
2003 		return;
2004 	}
2005 	devdata->going_away = true;
2006 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
2007 	netdev = devdata->netdev;
2008 	if (!netdev) {
2009 		dev_err(&dev->device, "%s not net device\n", __func__);
2010 		return;
2011 	}
2012 
2013 	/* going_away prevents new items being added to the workqueues */
2014 	cancel_work_sync(&devdata->timeout_reset);
2015 
2016 	debugfs_remove_recursive(devdata->eth_debugfs_dir);
2017 	/* this will call visornic_close() */
2018 	unregister_netdev(netdev);
2019 
2020 	del_timer_sync(&devdata->irq_poll_timer);
2021 	netif_napi_del(&devdata->napi);
2022 
2023 	dev_set_drvdata(&dev->device, NULL);
2024 	host_side_disappeared(devdata);
2025 	devdata_release(devdata);
2026 	free_netdev(netdev);
2027 }
2028 
2029 /* visornic_pause - called when IO Part disappears
2030  * @dev:	   Visornic device that is being serviced.
2031  * @complete_func: Call when finished.
2032  *
2033  * Called when the IO Partition has gone down. Need to free up resources and
2034  * wait for IO partition to come back. Mark link as down and don't attempt any
2035  * DMA. When we have freed memory, call the complete_func so that Command knows
2036  * we are done. If we don't call complete_func, the IO Partition will never
2037  * come back.
2038  *
2039  * Return: 0 on success.
2040  */
visornic_pause(struct visor_device * dev,visorbus_state_complete_func complete_func)2041 static int visornic_pause(struct visor_device *dev,
2042 			  visorbus_state_complete_func complete_func)
2043 {
2044 	struct visornic_devdata *devdata = dev_get_drvdata(&dev->device);
2045 
2046 	visornic_serverdown(devdata, complete_func);
2047 	return 0;
2048 }
2049 
2050 /* visornic_resume - called when IO Partition has recovered
2051  * @dev:	   Visornic device that is being serviced.
2052  * @compelte_func: Call when finished.
2053  *
2054  * Called when the IO partition has recovered. Re-establish connection to the IO
2055  * Partition and set the link up. Okay to do DMA again.
2056  *
2057  * Returns 0 for success, negative integer on error.
2058  */
visornic_resume(struct visor_device * dev,visorbus_state_complete_func complete_func)2059 static int visornic_resume(struct visor_device *dev,
2060 			   visorbus_state_complete_func complete_func)
2061 {
2062 	struct visornic_devdata *devdata;
2063 	struct net_device *netdev;
2064 	unsigned long flags;
2065 
2066 	devdata = dev_get_drvdata(&dev->device);
2067 	if (!devdata) {
2068 		dev_err(&dev->device, "%s no devdata\n", __func__);
2069 		return -EINVAL;
2070 	}
2071 
2072 	netdev = devdata->netdev;
2073 
2074 	spin_lock_irqsave(&devdata->priv_lock, flags);
2075 	if (devdata->server_change_state) {
2076 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
2077 		dev_err(&dev->device, "%s server already changing state\n",
2078 			__func__);
2079 		return -EINVAL;
2080 	}
2081 	if (!devdata->server_down) {
2082 		spin_unlock_irqrestore(&devdata->priv_lock, flags);
2083 		dev_err(&dev->device, "%s server not down\n", __func__);
2084 		complete_func(dev, 0);
2085 		return 0;
2086 	}
2087 	devdata->server_change_state = true;
2088 	spin_unlock_irqrestore(&devdata->priv_lock, flags);
2089 
2090 	/* Must transition channel to ATTACHED state BEFORE
2091 	 * we can start using the device again.
2092 	 * TODO: State transitions
2093 	 */
2094 	mod_timer(&devdata->irq_poll_timer, msecs_to_jiffies(2));
2095 
2096 	rtnl_lock();
2097 	dev_open(netdev, NULL);
2098 	rtnl_unlock();
2099 
2100 	complete_func(dev, 0);
2101 	return 0;
2102 }
2103 
2104 /* This is used to tell the visorbus driver which types of visor devices
2105  * we support, and what functions to call when a visor device that we support
2106  * is attached or removed.
2107  */
2108 static struct visor_driver visornic_driver = {
2109 	.name = "visornic",
2110 	.owner = THIS_MODULE,
2111 	.channel_types = visornic_channel_types,
2112 	.probe = visornic_probe,
2113 	.remove = visornic_remove,
2114 	.pause = visornic_pause,
2115 	.resume = visornic_resume,
2116 	.channel_interrupt = NULL,
2117 };
2118 
2119 /* visornic_init - init function
2120  *
2121  * Init function for the visornic driver. Do initial driver setup and wait
2122  * for devices.
2123  *
2124  * Return: 0 on success, negative integer on error.
2125  */
visornic_init(void)2126 static int visornic_init(void)
2127 {
2128 	int err;
2129 
2130 	visornic_debugfs_dir = debugfs_create_dir("visornic", NULL);
2131 
2132 	debugfs_create_file("info", 0400, visornic_debugfs_dir, NULL,
2133 			    &debugfs_info_fops);
2134 	debugfs_create_file("enable_ints", 0200, visornic_debugfs_dir, NULL,
2135 			    &debugfs_enable_ints_fops);
2136 
2137 	err = visorbus_register_visor_driver(&visornic_driver);
2138 	if (err)
2139 		debugfs_remove_recursive(visornic_debugfs_dir);
2140 
2141 	return err;
2142 }
2143 
2144 /* visornic_cleanup - driver exit routine
2145  *
2146  * Unregister driver from the bus and free up memory.
2147  */
visornic_cleanup(void)2148 static void visornic_cleanup(void)
2149 {
2150 	visorbus_unregister_visor_driver(&visornic_driver);
2151 	debugfs_remove_recursive(visornic_debugfs_dir);
2152 }
2153 
2154 module_init(visornic_init);
2155 module_exit(visornic_cleanup);
2156 
2157 MODULE_AUTHOR("Unisys");
2158 MODULE_LICENSE("GPL");
2159 MODULE_DESCRIPTION("s-Par NIC driver for virtual network devices");
2160