1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 // Copyright (c) 2018 Mellanox Technologies
3 
4 #include <linux/mlx5/driver.h>
5 
6 #include "mlx5_core.h"
7 #include "lib/eq.h"
8 #include "lib/mlx5.h"
9 
10 struct mlx5_event_nb {
11 	struct mlx5_nb  nb;
12 	void           *ctx;
13 };
14 
15 /* General events handlers for the low level mlx5_core driver
16  *
17  * Other Major feature specific events such as
18  * clock/eswitch/fpga/FW trace and many others, are handled elsewhere, with
19  * separate notifiers callbacks, specifically by those mlx5 components.
20  */
21 static int any_notifier(struct notifier_block *, unsigned long, void *);
22 static int temp_warn(struct notifier_block *, unsigned long, void *);
23 static int port_module(struct notifier_block *, unsigned long, void *);
24 static int pcie_core(struct notifier_block *, unsigned long, void *);
25 
26 /* handler which forwards the event to events->nh, driver notifiers */
27 static int forward_event(struct notifier_block *, unsigned long, void *);
28 
29 static struct mlx5_nb events_nbs_ref[] = {
30 	/* Events to be proccessed by mlx5_core */
31 	{.nb.notifier_call = any_notifier,  .event_type = MLX5_EVENT_TYPE_NOTIFY_ANY },
32 	{.nb.notifier_call = temp_warn,     .event_type = MLX5_EVENT_TYPE_TEMP_WARN_EVENT },
33 	{.nb.notifier_call = port_module,   .event_type = MLX5_EVENT_TYPE_PORT_MODULE_EVENT },
34 	{.nb.notifier_call = pcie_core,     .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
35 
36 	/* Events to be forwarded (as is) to mlx5 core interfaces (mlx5e/mlx5_ib) */
37 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PORT_CHANGE },
38 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_GENERAL_EVENT },
39 	/* QP/WQ resource events to forward */
40 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_DCT_DRAINED },
41 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG },
42 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_COMM_EST },
43 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SQ_DRAINED },
44 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_LAST_WQE },
45 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_CATAS_ERROR },
46 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_PATH_MIG_FAILED },
47 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR },
48 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_WQ_ACCESS_ERROR },
49 	/* SRQ events */
50 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_CATAS_ERROR },
51 	{.nb.notifier_call = forward_event,   .event_type = MLX5_EVENT_TYPE_SRQ_RQ_LIMIT },
52 };
53 
54 struct mlx5_events {
55 	struct mlx5_core_dev *dev;
56 	struct workqueue_struct *wq;
57 	struct mlx5_event_nb  notifiers[ARRAY_SIZE(events_nbs_ref)];
58 	/* driver notifier chain */
59 	struct atomic_notifier_head nh;
60 	/* port module events stats */
61 	struct mlx5_pme_stats pme_stats;
62 	/*pcie_core*/
63 	struct work_struct pcie_core_work;
64 };
65 
eqe_type_str(u8 type)66 static const char *eqe_type_str(u8 type)
67 {
68 	switch (type) {
69 	case MLX5_EVENT_TYPE_COMP:
70 		return "MLX5_EVENT_TYPE_COMP";
71 	case MLX5_EVENT_TYPE_PATH_MIG:
72 		return "MLX5_EVENT_TYPE_PATH_MIG";
73 	case MLX5_EVENT_TYPE_COMM_EST:
74 		return "MLX5_EVENT_TYPE_COMM_EST";
75 	case MLX5_EVENT_TYPE_SQ_DRAINED:
76 		return "MLX5_EVENT_TYPE_SQ_DRAINED";
77 	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
78 		return "MLX5_EVENT_TYPE_SRQ_LAST_WQE";
79 	case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
80 		return "MLX5_EVENT_TYPE_SRQ_RQ_LIMIT";
81 	case MLX5_EVENT_TYPE_CQ_ERROR:
82 		return "MLX5_EVENT_TYPE_CQ_ERROR";
83 	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
84 		return "MLX5_EVENT_TYPE_WQ_CATAS_ERROR";
85 	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
86 		return "MLX5_EVENT_TYPE_PATH_MIG_FAILED";
87 	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
88 		return "MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR";
89 	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
90 		return "MLX5_EVENT_TYPE_WQ_ACCESS_ERROR";
91 	case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
92 		return "MLX5_EVENT_TYPE_SRQ_CATAS_ERROR";
93 	case MLX5_EVENT_TYPE_INTERNAL_ERROR:
94 		return "MLX5_EVENT_TYPE_INTERNAL_ERROR";
95 	case MLX5_EVENT_TYPE_PORT_CHANGE:
96 		return "MLX5_EVENT_TYPE_PORT_CHANGE";
97 	case MLX5_EVENT_TYPE_GPIO_EVENT:
98 		return "MLX5_EVENT_TYPE_GPIO_EVENT";
99 	case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
100 		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
101 	case MLX5_EVENT_TYPE_TEMP_WARN_EVENT:
102 		return "MLX5_EVENT_TYPE_TEMP_WARN_EVENT";
103 	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
104 		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
105 	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
106 		return "MLX5_EVENT_TYPE_DB_BF_CONGESTION";
107 	case MLX5_EVENT_TYPE_STALL_EVENT:
108 		return "MLX5_EVENT_TYPE_STALL_EVENT";
109 	case MLX5_EVENT_TYPE_CMD:
110 		return "MLX5_EVENT_TYPE_CMD";
111 	case MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED:
112 		return "MLX5_EVENT_TYPE_ESW_FUNCTIONS_CHANGED";
113 	case MLX5_EVENT_TYPE_PAGE_REQUEST:
114 		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
115 	case MLX5_EVENT_TYPE_PAGE_FAULT:
116 		return "MLX5_EVENT_TYPE_PAGE_FAULT";
117 	case MLX5_EVENT_TYPE_PPS_EVENT:
118 		return "MLX5_EVENT_TYPE_PPS_EVENT";
119 	case MLX5_EVENT_TYPE_NIC_VPORT_CHANGE:
120 		return "MLX5_EVENT_TYPE_NIC_VPORT_CHANGE";
121 	case MLX5_EVENT_TYPE_FPGA_ERROR:
122 		return "MLX5_EVENT_TYPE_FPGA_ERROR";
123 	case MLX5_EVENT_TYPE_FPGA_QP_ERROR:
124 		return "MLX5_EVENT_TYPE_FPGA_QP_ERROR";
125 	case MLX5_EVENT_TYPE_GENERAL_EVENT:
126 		return "MLX5_EVENT_TYPE_GENERAL_EVENT";
127 	case MLX5_EVENT_TYPE_MONITOR_COUNTER:
128 		return "MLX5_EVENT_TYPE_MONITOR_COUNTER";
129 	case MLX5_EVENT_TYPE_DEVICE_TRACER:
130 		return "MLX5_EVENT_TYPE_DEVICE_TRACER";
131 	default:
132 		return "Unrecognized event";
133 	}
134 }
135 
136 /* handles all FW events, type == eqe->type */
any_notifier(struct notifier_block * nb,unsigned long type,void * data)137 static int any_notifier(struct notifier_block *nb,
138 			unsigned long type, void *data)
139 {
140 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
141 	struct mlx5_events   *events   = event_nb->ctx;
142 	struct mlx5_eqe      *eqe      = data;
143 
144 	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d)\n",
145 		      eqe_type_str(eqe->type), eqe->sub_type);
146 	return NOTIFY_OK;
147 }
148 
149 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
temp_warn(struct notifier_block * nb,unsigned long type,void * data)150 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
151 {
152 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
153 	struct mlx5_events   *events   = event_nb->ctx;
154 	struct mlx5_eqe      *eqe      = data;
155 	u64 value_lsb;
156 	u64 value_msb;
157 
158 	value_lsb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_lsb);
159 	value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
160 
161 	mlx5_core_warn(events->dev,
162 		       "High temperature on sensors with bit set %llx %llx",
163 		       value_msb, value_lsb);
164 
165 	return NOTIFY_OK;
166 }
167 
168 /* MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
mlx5_pme_status_to_string(enum port_module_event_status_type status)169 static const char *mlx5_pme_status_to_string(enum port_module_event_status_type status)
170 {
171 	switch (status) {
172 	case MLX5_MODULE_STATUS_PLUGGED:
173 		return "Cable plugged";
174 	case MLX5_MODULE_STATUS_UNPLUGGED:
175 		return "Cable unplugged";
176 	case MLX5_MODULE_STATUS_ERROR:
177 		return "Cable error";
178 	case MLX5_MODULE_STATUS_DISABLED:
179 		return "Cable disabled";
180 	default:
181 		return "Unknown status";
182 	}
183 }
184 
mlx5_pme_error_to_string(enum port_module_event_error_type error)185 static const char *mlx5_pme_error_to_string(enum port_module_event_error_type error)
186 {
187 	switch (error) {
188 	case MLX5_MODULE_EVENT_ERROR_POWER_BUDGET_EXCEEDED:
189 		return "Power budget exceeded";
190 	case MLX5_MODULE_EVENT_ERROR_LONG_RANGE_FOR_NON_MLNX:
191 		return "Long Range for non MLNX cable";
192 	case MLX5_MODULE_EVENT_ERROR_BUS_STUCK:
193 		return "Bus stuck (I2C or data shorted)";
194 	case MLX5_MODULE_EVENT_ERROR_NO_EEPROM_RETRY_TIMEOUT:
195 		return "No EEPROM/retry timeout";
196 	case MLX5_MODULE_EVENT_ERROR_ENFORCE_PART_NUMBER_LIST:
197 		return "Enforce part number list";
198 	case MLX5_MODULE_EVENT_ERROR_UNKNOWN_IDENTIFIER:
199 		return "Unknown identifier";
200 	case MLX5_MODULE_EVENT_ERROR_HIGH_TEMPERATURE:
201 		return "High Temperature";
202 	case MLX5_MODULE_EVENT_ERROR_BAD_CABLE:
203 		return "Bad or shorted cable/module";
204 	case MLX5_MODULE_EVENT_ERROR_PCIE_POWER_SLOT_EXCEEDED:
205 		return "One or more network ports have been powered down due to insufficient/unadvertised power on the PCIe slot";
206 	default:
207 		return "Unknown error";
208 	}
209 }
210 
211 /* type == MLX5_EVENT_TYPE_PORT_MODULE_EVENT */
port_module(struct notifier_block * nb,unsigned long type,void * data)212 static int port_module(struct notifier_block *nb, unsigned long type, void *data)
213 {
214 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
215 	struct mlx5_events   *events   = event_nb->ctx;
216 	struct mlx5_eqe      *eqe      = data;
217 
218 	enum port_module_event_status_type module_status;
219 	enum port_module_event_error_type error_type;
220 	struct mlx5_eqe_port_module *module_event_eqe;
221 	const char *status_str;
222 	u8 module_num;
223 
224 	module_event_eqe = &eqe->data.port_module;
225 	module_status = module_event_eqe->module_status &
226 			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
227 	error_type = module_event_eqe->error_type &
228 		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
229 
230 	if (module_status < MLX5_MODULE_STATUS_NUM)
231 		events->pme_stats.status_counters[module_status]++;
232 
233 	if (module_status == MLX5_MODULE_STATUS_ERROR)
234 		if (error_type < MLX5_MODULE_EVENT_ERROR_NUM)
235 			events->pme_stats.error_counters[error_type]++;
236 
237 	if (!printk_ratelimit())
238 		return NOTIFY_OK;
239 
240 	module_num = module_event_eqe->module;
241 	status_str = mlx5_pme_status_to_string(module_status);
242 	if (module_status == MLX5_MODULE_STATUS_ERROR) {
243 		const char *error_str = mlx5_pme_error_to_string(error_type);
244 
245 		mlx5_core_err(events->dev,
246 			      "Port module event[error]: module %u, %s, %s\n",
247 			      module_num, status_str, error_str);
248 	} else {
249 		mlx5_core_info(events->dev,
250 			       "Port module event: module %u, %s\n",
251 			       module_num, status_str);
252 	}
253 
254 	return NOTIFY_OK;
255 }
256 
257 enum {
258 	MLX5_PCI_POWER_COULD_NOT_BE_READ = 0x0,
259 	MLX5_PCI_POWER_SUFFICIENT_REPORTED = 0x1,
260 	MLX5_PCI_POWER_INSUFFICIENT_REPORTED = 0x2,
261 };
262 
mlx5_pcie_event(struct work_struct * work)263 static void mlx5_pcie_event(struct work_struct *work)
264 {
265 	u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {0};
266 	u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {0};
267 	struct mlx5_events *events;
268 	struct mlx5_core_dev *dev;
269 	u8 power_status;
270 	u16 pci_power;
271 
272 	events = container_of(work, struct mlx5_events, pcie_core_work);
273 	dev  = events->dev;
274 
275 	if (!MLX5_CAP_MCAM_FEATURE(dev, pci_status_and_power))
276 		return;
277 
278 	mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out),
279 			     MLX5_REG_MPEIN, 0, 0);
280 	power_status = MLX5_GET(mpein_reg, out, pwr_status);
281 	pci_power = MLX5_GET(mpein_reg, out, pci_power);
282 
283 	switch (power_status) {
284 	case MLX5_PCI_POWER_COULD_NOT_BE_READ:
285 		mlx5_core_info_rl(dev,
286 				  "PCIe slot power capability was not advertised.\n");
287 		break;
288 	case MLX5_PCI_POWER_INSUFFICIENT_REPORTED:
289 		mlx5_core_warn_rl(dev,
290 				  "Detected insufficient power on the PCIe slot (%uW).\n",
291 				  pci_power);
292 		break;
293 	case MLX5_PCI_POWER_SUFFICIENT_REPORTED:
294 		mlx5_core_info_rl(dev,
295 				  "PCIe slot advertised sufficient power (%uW).\n",
296 				  pci_power);
297 		break;
298 	}
299 }
300 
pcie_core(struct notifier_block * nb,unsigned long type,void * data)301 static int pcie_core(struct notifier_block *nb, unsigned long type, void *data)
302 {
303 	struct mlx5_event_nb    *event_nb = mlx5_nb_cof(nb,
304 							struct mlx5_event_nb,
305 							nb);
306 	struct mlx5_events      *events   = event_nb->ctx;
307 	struct mlx5_eqe         *eqe      = data;
308 
309 	switch (eqe->sub_type) {
310 	case MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT:
311 			queue_work(events->wq, &events->pcie_core_work);
312 		break;
313 	default:
314 		return NOTIFY_DONE;
315 	}
316 
317 	return NOTIFY_OK;
318 }
319 
mlx5_get_pme_stats(struct mlx5_core_dev * dev,struct mlx5_pme_stats * stats)320 void mlx5_get_pme_stats(struct mlx5_core_dev *dev, struct mlx5_pme_stats *stats)
321 {
322 	*stats = dev->priv.events->pme_stats;
323 }
324 
325 /* forward event as is to registered interfaces (mlx5e/mlx5_ib) */
forward_event(struct notifier_block * nb,unsigned long event,void * data)326 static int forward_event(struct notifier_block *nb, unsigned long event, void *data)
327 {
328 	struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
329 	struct mlx5_events   *events   = event_nb->ctx;
330 	struct mlx5_eqe      *eqe      = data;
331 
332 	mlx5_core_dbg(events->dev, "Async eqe type %s, subtype (%d) forward to interfaces\n",
333 		      eqe_type_str(eqe->type), eqe->sub_type);
334 	atomic_notifier_call_chain(&events->nh, event, data);
335 	return NOTIFY_OK;
336 }
337 
mlx5_events_init(struct mlx5_core_dev * dev)338 int mlx5_events_init(struct mlx5_core_dev *dev)
339 {
340 	struct mlx5_events *events = kzalloc(sizeof(*events), GFP_KERNEL);
341 
342 	if (!events)
343 		return -ENOMEM;
344 
345 	ATOMIC_INIT_NOTIFIER_HEAD(&events->nh);
346 	events->dev = dev;
347 	dev->priv.events = events;
348 	events->wq = create_singlethread_workqueue("mlx5_events");
349 	if (!events->wq)
350 		return -ENOMEM;
351 	INIT_WORK(&events->pcie_core_work, mlx5_pcie_event);
352 
353 	return 0;
354 }
355 
mlx5_events_cleanup(struct mlx5_core_dev * dev)356 void mlx5_events_cleanup(struct mlx5_core_dev *dev)
357 {
358 	destroy_workqueue(dev->priv.events->wq);
359 	kvfree(dev->priv.events);
360 }
361 
mlx5_events_start(struct mlx5_core_dev * dev)362 void mlx5_events_start(struct mlx5_core_dev *dev)
363 {
364 	struct mlx5_events *events = dev->priv.events;
365 	int i;
366 
367 	for (i = 0; i < ARRAY_SIZE(events_nbs_ref); i++) {
368 		events->notifiers[i].nb  = events_nbs_ref[i];
369 		events->notifiers[i].ctx = events;
370 		mlx5_eq_notifier_register(dev, &events->notifiers[i].nb);
371 	}
372 }
373 
mlx5_events_stop(struct mlx5_core_dev * dev)374 void mlx5_events_stop(struct mlx5_core_dev *dev)
375 {
376 	struct mlx5_events *events = dev->priv.events;
377 	int i;
378 
379 	for (i = ARRAY_SIZE(events_nbs_ref) - 1; i >= 0 ; i--)
380 		mlx5_eq_notifier_unregister(dev, &events->notifiers[i].nb);
381 	flush_workqueue(events->wq);
382 }
383 
mlx5_notifier_register(struct mlx5_core_dev * dev,struct notifier_block * nb)384 int mlx5_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb)
385 {
386 	struct mlx5_events *events = dev->priv.events;
387 
388 	return atomic_notifier_chain_register(&events->nh, nb);
389 }
390 EXPORT_SYMBOL(mlx5_notifier_register);
391 
mlx5_notifier_unregister(struct mlx5_core_dev * dev,struct notifier_block * nb)392 int mlx5_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb)
393 {
394 	struct mlx5_events *events = dev->priv.events;
395 
396 	return atomic_notifier_chain_unregister(&events->nh, nb);
397 }
398 EXPORT_SYMBOL(mlx5_notifier_unregister);
399 
mlx5_notifier_call_chain(struct mlx5_events * events,unsigned int event,void * data)400 int mlx5_notifier_call_chain(struct mlx5_events *events, unsigned int event, void *data)
401 {
402 	return atomic_notifier_call_chain(&events->nh, event, data);
403 }
404