1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /* net/sched/sch_teql.c	"True" (or "trivial") link equalizer.
3   *
4   * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
5   */
6  
7  #include <linux/module.h>
8  #include <linux/types.h>
9  #include <linux/kernel.h>
10  #include <linux/slab.h>
11  #include <linux/string.h>
12  #include <linux/errno.h>
13  #include <linux/if_arp.h>
14  #include <linux/netdevice.h>
15  #include <linux/init.h>
16  #include <linux/skbuff.h>
17  #include <linux/moduleparam.h>
18  #include <net/dst.h>
19  #include <net/neighbour.h>
20  #include <net/pkt_sched.h>
21  
22  /*
23     How to setup it.
24     ----------------
25  
26     After loading this module you will find a new device teqlN
27     and new qdisc with the same name. To join a slave to the equalizer
28     you should just set this qdisc on a device f.e.
29  
30     # tc qdisc add dev eth0 root teql0
31     # tc qdisc add dev eth1 root teql0
32  
33     That's all. Full PnP 8)
34  
35     Applicability.
36     --------------
37  
38     1. Slave devices MUST be active devices, i.e., they must raise the tbusy
39        signal and generate EOI events. If you want to equalize virtual devices
40        like tunnels, use a normal eql device.
41     2. This device puts no limitations on physical slave characteristics
42        f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
43        Certainly, large difference in link speeds will make the resulting
44        eqalized link unusable, because of huge packet reordering.
45        I estimate an upper useful difference as ~10 times.
46     3. If the slave requires address resolution, only protocols using
47        neighbour cache (IPv4/IPv6) will work over the equalized link.
48        Other protocols are still allowed to use the slave device directly,
49        which will not break load balancing, though native slave
50        traffic will have the highest priority.  */
51  
52  struct teql_master {
53  	struct Qdisc_ops qops;
54  	struct net_device *dev;
55  	struct Qdisc *slaves;
56  	struct list_head master_list;
57  	unsigned long	tx_bytes;
58  	unsigned long	tx_packets;
59  	unsigned long	tx_errors;
60  	unsigned long	tx_dropped;
61  };
62  
63  struct teql_sched_data {
64  	struct Qdisc *next;
65  	struct teql_master *m;
66  	struct sk_buff_head q;
67  };
68  
69  #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
70  
71  #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
72  
73  /* "teql*" qdisc routines */
74  
75  static int
teql_enqueue(struct sk_buff * skb,struct Qdisc * sch,struct sk_buff ** to_free)76  teql_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free)
77  {
78  	struct net_device *dev = qdisc_dev(sch);
79  	struct teql_sched_data *q = qdisc_priv(sch);
80  
81  	if (q->q.qlen < dev->tx_queue_len) {
82  		__skb_queue_tail(&q->q, skb);
83  		return NET_XMIT_SUCCESS;
84  	}
85  
86  	return qdisc_drop(skb, sch, to_free);
87  }
88  
89  static struct sk_buff *
teql_dequeue(struct Qdisc * sch)90  teql_dequeue(struct Qdisc *sch)
91  {
92  	struct teql_sched_data *dat = qdisc_priv(sch);
93  	struct netdev_queue *dat_queue;
94  	struct sk_buff *skb;
95  	struct Qdisc *q;
96  
97  	skb = __skb_dequeue(&dat->q);
98  	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
99  	q = rcu_dereference_bh(dat_queue->qdisc);
100  
101  	if (skb == NULL) {
102  		struct net_device *m = qdisc_dev(q);
103  		if (m) {
104  			dat->m->slaves = sch;
105  			netif_wake_queue(m);
106  		}
107  	} else {
108  		qdisc_bstats_update(sch, skb);
109  	}
110  	sch->q.qlen = dat->q.qlen + q->q.qlen;
111  	return skb;
112  }
113  
114  static struct sk_buff *
teql_peek(struct Qdisc * sch)115  teql_peek(struct Qdisc *sch)
116  {
117  	/* teql is meant to be used as root qdisc */
118  	return NULL;
119  }
120  
121  static void
teql_reset(struct Qdisc * sch)122  teql_reset(struct Qdisc *sch)
123  {
124  	struct teql_sched_data *dat = qdisc_priv(sch);
125  
126  	skb_queue_purge(&dat->q);
127  	sch->q.qlen = 0;
128  }
129  
130  static void
teql_destroy(struct Qdisc * sch)131  teql_destroy(struct Qdisc *sch)
132  {
133  	struct Qdisc *q, *prev;
134  	struct teql_sched_data *dat = qdisc_priv(sch);
135  	struct teql_master *master = dat->m;
136  
137  	if (!master)
138  		return;
139  
140  	prev = master->slaves;
141  	if (prev) {
142  		do {
143  			q = NEXT_SLAVE(prev);
144  			if (q == sch) {
145  				NEXT_SLAVE(prev) = NEXT_SLAVE(q);
146  				if (q == master->slaves) {
147  					master->slaves = NEXT_SLAVE(q);
148  					if (q == master->slaves) {
149  						struct netdev_queue *txq;
150  						spinlock_t *root_lock;
151  
152  						txq = netdev_get_tx_queue(master->dev, 0);
153  						master->slaves = NULL;
154  
155  						root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
156  						spin_lock_bh(root_lock);
157  						qdisc_reset(rtnl_dereference(txq->qdisc));
158  						spin_unlock_bh(root_lock);
159  					}
160  				}
161  				skb_queue_purge(&dat->q);
162  				break;
163  			}
164  
165  		} while ((prev = q) != master->slaves);
166  	}
167  }
168  
teql_qdisc_init(struct Qdisc * sch,struct nlattr * opt,struct netlink_ext_ack * extack)169  static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt,
170  			   struct netlink_ext_ack *extack)
171  {
172  	struct net_device *dev = qdisc_dev(sch);
173  	struct teql_master *m = (struct teql_master *)sch->ops;
174  	struct teql_sched_data *q = qdisc_priv(sch);
175  
176  	if (dev->hard_header_len > m->dev->hard_header_len)
177  		return -EINVAL;
178  
179  	if (m->dev == dev)
180  		return -ELOOP;
181  
182  	q->m = m;
183  
184  	skb_queue_head_init(&q->q);
185  
186  	if (m->slaves) {
187  		if (m->dev->flags & IFF_UP) {
188  			if ((m->dev->flags & IFF_POINTOPOINT &&
189  			     !(dev->flags & IFF_POINTOPOINT)) ||
190  			    (m->dev->flags & IFF_BROADCAST &&
191  			     !(dev->flags & IFF_BROADCAST)) ||
192  			    (m->dev->flags & IFF_MULTICAST &&
193  			     !(dev->flags & IFF_MULTICAST)) ||
194  			    dev->mtu < m->dev->mtu)
195  				return -EINVAL;
196  		} else {
197  			if (!(dev->flags&IFF_POINTOPOINT))
198  				m->dev->flags &= ~IFF_POINTOPOINT;
199  			if (!(dev->flags&IFF_BROADCAST))
200  				m->dev->flags &= ~IFF_BROADCAST;
201  			if (!(dev->flags&IFF_MULTICAST))
202  				m->dev->flags &= ~IFF_MULTICAST;
203  			if (dev->mtu < m->dev->mtu)
204  				m->dev->mtu = dev->mtu;
205  		}
206  		q->next = NEXT_SLAVE(m->slaves);
207  		NEXT_SLAVE(m->slaves) = sch;
208  	} else {
209  		q->next = sch;
210  		m->slaves = sch;
211  		m->dev->mtu = dev->mtu;
212  		m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
213  	}
214  	return 0;
215  }
216  
217  
218  static int
__teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq,struct dst_entry * dst)219  __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
220  	       struct net_device *dev, struct netdev_queue *txq,
221  	       struct dst_entry *dst)
222  {
223  	struct neighbour *n;
224  	int err = 0;
225  
226  	n = dst_neigh_lookup_skb(dst, skb);
227  	if (!n)
228  		return -ENOENT;
229  
230  	if (dst->dev != dev) {
231  		struct neighbour *mn;
232  
233  		mn = __neigh_lookup_errno(n->tbl, n->primary_key, dev);
234  		neigh_release(n);
235  		if (IS_ERR(mn))
236  			return PTR_ERR(mn);
237  		n = mn;
238  	}
239  
240  	if (neigh_event_send(n, skb_res) == 0) {
241  		int err;
242  		char haddr[MAX_ADDR_LEN];
243  
244  		neigh_ha_snapshot(haddr, n, dev);
245  		err = dev_hard_header(skb, dev, ntohs(skb_protocol(skb, false)),
246  				      haddr, NULL, skb->len);
247  
248  		if (err < 0)
249  			err = -EINVAL;
250  	} else {
251  		err = (skb_res == NULL) ? -EAGAIN : 1;
252  	}
253  	neigh_release(n);
254  	return err;
255  }
256  
teql_resolve(struct sk_buff * skb,struct sk_buff * skb_res,struct net_device * dev,struct netdev_queue * txq)257  static inline int teql_resolve(struct sk_buff *skb,
258  			       struct sk_buff *skb_res,
259  			       struct net_device *dev,
260  			       struct netdev_queue *txq)
261  {
262  	struct dst_entry *dst = skb_dst(skb);
263  	int res;
264  
265  	if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
266  		return -ENODEV;
267  
268  	if (!dev->header_ops || !dst)
269  		return 0;
270  
271  	rcu_read_lock();
272  	res = __teql_resolve(skb, skb_res, dev, txq, dst);
273  	rcu_read_unlock();
274  
275  	return res;
276  }
277  
teql_master_xmit(struct sk_buff * skb,struct net_device * dev)278  static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
279  {
280  	struct teql_master *master = netdev_priv(dev);
281  	struct Qdisc *start, *q;
282  	int busy;
283  	int nores;
284  	int subq = skb_get_queue_mapping(skb);
285  	struct sk_buff *skb_res = NULL;
286  
287  	start = master->slaves;
288  
289  restart:
290  	nores = 0;
291  	busy = 0;
292  
293  	q = start;
294  	if (!q)
295  		goto drop;
296  
297  	do {
298  		struct net_device *slave = qdisc_dev(q);
299  		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
300  
301  		if (slave_txq->qdisc_sleeping != q)
302  			continue;
303  		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
304  		    !netif_running(slave)) {
305  			busy = 1;
306  			continue;
307  		}
308  
309  		switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
310  		case 0:
311  			if (__netif_tx_trylock(slave_txq)) {
312  				unsigned int length = qdisc_pkt_len(skb);
313  
314  				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
315  				    netdev_start_xmit(skb, slave, slave_txq, false) ==
316  				    NETDEV_TX_OK) {
317  					__netif_tx_unlock(slave_txq);
318  					master->slaves = NEXT_SLAVE(q);
319  					netif_wake_queue(dev);
320  					master->tx_packets++;
321  					master->tx_bytes += length;
322  					return NETDEV_TX_OK;
323  				}
324  				__netif_tx_unlock(slave_txq);
325  			}
326  			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
327  				busy = 1;
328  			break;
329  		case 1:
330  			master->slaves = NEXT_SLAVE(q);
331  			return NETDEV_TX_OK;
332  		default:
333  			nores = 1;
334  			break;
335  		}
336  		__skb_pull(skb, skb_network_offset(skb));
337  	} while ((q = NEXT_SLAVE(q)) != start);
338  
339  	if (nores && skb_res == NULL) {
340  		skb_res = skb;
341  		goto restart;
342  	}
343  
344  	if (busy) {
345  		netif_stop_queue(dev);
346  		return NETDEV_TX_BUSY;
347  	}
348  	master->tx_errors++;
349  
350  drop:
351  	master->tx_dropped++;
352  	dev_kfree_skb(skb);
353  	return NETDEV_TX_OK;
354  }
355  
teql_master_open(struct net_device * dev)356  static int teql_master_open(struct net_device *dev)
357  {
358  	struct Qdisc *q;
359  	struct teql_master *m = netdev_priv(dev);
360  	int mtu = 0xFFFE;
361  	unsigned int flags = IFF_NOARP | IFF_MULTICAST;
362  
363  	if (m->slaves == NULL)
364  		return -EUNATCH;
365  
366  	flags = FMASK;
367  
368  	q = m->slaves;
369  	do {
370  		struct net_device *slave = qdisc_dev(q);
371  
372  		if (slave == NULL)
373  			return -EUNATCH;
374  
375  		if (slave->mtu < mtu)
376  			mtu = slave->mtu;
377  		if (slave->hard_header_len > LL_MAX_HEADER)
378  			return -EINVAL;
379  
380  		/* If all the slaves are BROADCAST, master is BROADCAST
381  		   If all the slaves are PtP, master is PtP
382  		   Otherwise, master is NBMA.
383  		 */
384  		if (!(slave->flags&IFF_POINTOPOINT))
385  			flags &= ~IFF_POINTOPOINT;
386  		if (!(slave->flags&IFF_BROADCAST))
387  			flags &= ~IFF_BROADCAST;
388  		if (!(slave->flags&IFF_MULTICAST))
389  			flags &= ~IFF_MULTICAST;
390  	} while ((q = NEXT_SLAVE(q)) != m->slaves);
391  
392  	m->dev->mtu = mtu;
393  	m->dev->flags = (m->dev->flags&~FMASK) | flags;
394  	netif_start_queue(m->dev);
395  	return 0;
396  }
397  
teql_master_close(struct net_device * dev)398  static int teql_master_close(struct net_device *dev)
399  {
400  	netif_stop_queue(dev);
401  	return 0;
402  }
403  
teql_master_stats64(struct net_device * dev,struct rtnl_link_stats64 * stats)404  static void teql_master_stats64(struct net_device *dev,
405  				struct rtnl_link_stats64 *stats)
406  {
407  	struct teql_master *m = netdev_priv(dev);
408  
409  	stats->tx_packets	= m->tx_packets;
410  	stats->tx_bytes		= m->tx_bytes;
411  	stats->tx_errors	= m->tx_errors;
412  	stats->tx_dropped	= m->tx_dropped;
413  }
414  
teql_master_mtu(struct net_device * dev,int new_mtu)415  static int teql_master_mtu(struct net_device *dev, int new_mtu)
416  {
417  	struct teql_master *m = netdev_priv(dev);
418  	struct Qdisc *q;
419  
420  	q = m->slaves;
421  	if (q) {
422  		do {
423  			if (new_mtu > qdisc_dev(q)->mtu)
424  				return -EINVAL;
425  		} while ((q = NEXT_SLAVE(q)) != m->slaves);
426  	}
427  
428  	dev->mtu = new_mtu;
429  	return 0;
430  }
431  
432  static const struct net_device_ops teql_netdev_ops = {
433  	.ndo_open	= teql_master_open,
434  	.ndo_stop	= teql_master_close,
435  	.ndo_start_xmit	= teql_master_xmit,
436  	.ndo_get_stats64 = teql_master_stats64,
437  	.ndo_change_mtu	= teql_master_mtu,
438  };
439  
teql_master_setup(struct net_device * dev)440  static __init void teql_master_setup(struct net_device *dev)
441  {
442  	struct teql_master *master = netdev_priv(dev);
443  	struct Qdisc_ops *ops = &master->qops;
444  
445  	master->dev	= dev;
446  	ops->priv_size  = sizeof(struct teql_sched_data);
447  
448  	ops->enqueue	=	teql_enqueue;
449  	ops->dequeue	=	teql_dequeue;
450  	ops->peek	=	teql_peek;
451  	ops->init	=	teql_qdisc_init;
452  	ops->reset	=	teql_reset;
453  	ops->destroy	=	teql_destroy;
454  	ops->owner	=	THIS_MODULE;
455  
456  	dev->netdev_ops =       &teql_netdev_ops;
457  	dev->type		= ARPHRD_VOID;
458  	dev->mtu		= 1500;
459  	dev->min_mtu		= 68;
460  	dev->max_mtu		= 65535;
461  	dev->tx_queue_len	= 100;
462  	dev->flags		= IFF_NOARP;
463  	dev->hard_header_len	= LL_MAX_HEADER;
464  	netif_keep_dst(dev);
465  }
466  
467  static LIST_HEAD(master_dev_list);
468  static int max_equalizers = 1;
469  module_param(max_equalizers, int, 0);
470  MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
471  
teql_init(void)472  static int __init teql_init(void)
473  {
474  	int i;
475  	int err = -ENODEV;
476  
477  	for (i = 0; i < max_equalizers; i++) {
478  		struct net_device *dev;
479  		struct teql_master *master;
480  
481  		dev = alloc_netdev(sizeof(struct teql_master), "teql%d",
482  				   NET_NAME_UNKNOWN, teql_master_setup);
483  		if (!dev) {
484  			err = -ENOMEM;
485  			break;
486  		}
487  
488  		if ((err = register_netdev(dev))) {
489  			free_netdev(dev);
490  			break;
491  		}
492  
493  		master = netdev_priv(dev);
494  
495  		strlcpy(master->qops.id, dev->name, IFNAMSIZ);
496  		err = register_qdisc(&master->qops);
497  
498  		if (err) {
499  			unregister_netdev(dev);
500  			free_netdev(dev);
501  			break;
502  		}
503  
504  		list_add_tail(&master->master_list, &master_dev_list);
505  	}
506  	return i ? 0 : err;
507  }
508  
teql_exit(void)509  static void __exit teql_exit(void)
510  {
511  	struct teql_master *master, *nxt;
512  
513  	list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
514  
515  		list_del(&master->master_list);
516  
517  		unregister_qdisc(&master->qops);
518  		unregister_netdev(master->dev);
519  		free_netdev(master->dev);
520  	}
521  }
522  
523  module_init(teql_init);
524  module_exit(teql_exit);
525  
526  MODULE_LICENSE("GPL");
527