1 // SPDX-License-Identifier: GPL-2.0
2 // Copyright (c) 2019 Mellanox Technologies.
3 
4 #include "health.h"
5 #include "lib/eq.h"
6 
mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg * fmsg,char * name)7 int mlx5e_reporter_named_obj_nest_start(struct devlink_fmsg *fmsg, char *name)
8 {
9 	int err;
10 
11 	err = devlink_fmsg_pair_nest_start(fmsg, name);
12 	if (err)
13 		return err;
14 
15 	err = devlink_fmsg_obj_nest_start(fmsg);
16 	if (err)
17 		return err;
18 
19 	return 0;
20 }
21 
mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg * fmsg)22 int mlx5e_reporter_named_obj_nest_end(struct devlink_fmsg *fmsg)
23 {
24 	int err;
25 
26 	err = devlink_fmsg_obj_nest_end(fmsg);
27 	if (err)
28 		return err;
29 
30 	err = devlink_fmsg_pair_nest_end(fmsg);
31 	if (err)
32 		return err;
33 
34 	return 0;
35 }
36 
mlx5e_reporter_cq_diagnose(struct mlx5e_cq * cq,struct devlink_fmsg * fmsg)37 int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
38 {
39 	struct mlx5e_priv *priv = cq->channel->priv;
40 	u32 out[MLX5_ST_SZ_DW(query_cq_out)] = {};
41 	u8 hw_status;
42 	void *cqc;
43 	int err;
44 
45 	err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out));
46 	if (err)
47 		return err;
48 
49 	cqc = MLX5_ADDR_OF(query_cq_out, out, cq_context);
50 	hw_status = MLX5_GET(cqc, cqc, status);
51 
52 	err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
53 	if (err)
54 		return err;
55 
56 	err = devlink_fmsg_u32_pair_put(fmsg, "cqn", cq->mcq.cqn);
57 	if (err)
58 		return err;
59 
60 	err = devlink_fmsg_u8_pair_put(fmsg, "HW status", hw_status);
61 	if (err)
62 		return err;
63 
64 	err = mlx5e_reporter_named_obj_nest_end(fmsg);
65 	if (err)
66 		return err;
67 
68 	return 0;
69 }
70 
mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq * cq,struct devlink_fmsg * fmsg)71 int mlx5e_reporter_cq_common_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
72 {
73 	u8 cq_log_stride;
74 	u32 cq_sz;
75 	int err;
76 
77 	cq_sz = mlx5_cqwq_get_size(&cq->wq);
78 	cq_log_stride = mlx5_cqwq_get_log_stride_size(&cq->wq);
79 
80 	err = mlx5e_reporter_named_obj_nest_start(fmsg, "CQ");
81 	if (err)
82 		return err;
83 
84 	err = devlink_fmsg_u64_pair_put(fmsg, "stride size", BIT(cq_log_stride));
85 	if (err)
86 		return err;
87 
88 	err = devlink_fmsg_u32_pair_put(fmsg, "size", cq_sz);
89 	if (err)
90 		return err;
91 
92 	err = mlx5e_reporter_named_obj_nest_end(fmsg);
93 	if (err)
94 		return err;
95 
96 	return 0;
97 }
98 
mlx5e_health_create_reporters(struct mlx5e_priv * priv)99 int mlx5e_health_create_reporters(struct mlx5e_priv *priv)
100 {
101 	int err;
102 
103 	err = mlx5e_reporter_tx_create(priv);
104 	if (err)
105 		return err;
106 
107 	err = mlx5e_reporter_rx_create(priv);
108 	if (err)
109 		return err;
110 
111 	return 0;
112 }
113 
mlx5e_health_destroy_reporters(struct mlx5e_priv * priv)114 void mlx5e_health_destroy_reporters(struct mlx5e_priv *priv)
115 {
116 	mlx5e_reporter_rx_destroy(priv);
117 	mlx5e_reporter_tx_destroy(priv);
118 }
119 
mlx5e_health_channels_update(struct mlx5e_priv * priv)120 void mlx5e_health_channels_update(struct mlx5e_priv *priv)
121 {
122 	if (priv->tx_reporter)
123 		devlink_health_reporter_state_update(priv->tx_reporter,
124 						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
125 	if (priv->rx_reporter)
126 		devlink_health_reporter_state_update(priv->rx_reporter,
127 						     DEVLINK_HEALTH_REPORTER_STATE_HEALTHY);
128 }
129 
mlx5e_health_sq_to_ready(struct mlx5e_channel * channel,u32 sqn)130 int mlx5e_health_sq_to_ready(struct mlx5e_channel *channel, u32 sqn)
131 {
132 	struct mlx5_core_dev *mdev = channel->mdev;
133 	struct net_device *dev = channel->netdev;
134 	struct mlx5e_modify_sq_param msp = {};
135 	int err;
136 
137 	msp.curr_state = MLX5_SQC_STATE_ERR;
138 	msp.next_state = MLX5_SQC_STATE_RST;
139 
140 	err = mlx5e_modify_sq(mdev, sqn, &msp);
141 	if (err) {
142 		netdev_err(dev, "Failed to move sq 0x%x to reset\n", sqn);
143 		return err;
144 	}
145 
146 	memset(&msp, 0, sizeof(msp));
147 	msp.curr_state = MLX5_SQC_STATE_RST;
148 	msp.next_state = MLX5_SQC_STATE_RDY;
149 
150 	err = mlx5e_modify_sq(mdev, sqn, &msp);
151 	if (err) {
152 		netdev_err(dev, "Failed to move sq 0x%x to ready\n", sqn);
153 		return err;
154 	}
155 
156 	return 0;
157 }
158 
mlx5e_health_recover_channels(struct mlx5e_priv * priv)159 int mlx5e_health_recover_channels(struct mlx5e_priv *priv)
160 {
161 	int err = 0;
162 
163 	rtnl_lock();
164 	mutex_lock(&priv->state_lock);
165 
166 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
167 		goto out;
168 
169 	err = mlx5e_safe_reopen_channels(priv);
170 
171 out:
172 	mutex_unlock(&priv->state_lock);
173 	rtnl_unlock();
174 
175 	return err;
176 }
177 
mlx5e_health_channel_eq_recover(struct mlx5_eq_comp * eq,struct mlx5e_channel * channel)178 int mlx5e_health_channel_eq_recover(struct mlx5_eq_comp *eq, struct mlx5e_channel *channel)
179 {
180 	u32 eqe_count;
181 
182 	netdev_err(channel->netdev, "EQ 0x%x: Cons = 0x%x, irqn = 0x%x\n",
183 		   eq->core.eqn, eq->core.cons_index, eq->core.irqn);
184 
185 	eqe_count = mlx5_eq_poll_irq_disabled(eq);
186 	if (!eqe_count)
187 		return -EIO;
188 
189 	netdev_err(channel->netdev, "Recovered %d eqes on EQ 0x%x\n",
190 		   eqe_count, eq->core.eqn);
191 
192 	channel->stats->eq_rearm++;
193 	return 0;
194 }
195 
mlx5e_health_report(struct mlx5e_priv * priv,struct devlink_health_reporter * reporter,char * err_str,struct mlx5e_err_ctx * err_ctx)196 int mlx5e_health_report(struct mlx5e_priv *priv,
197 			struct devlink_health_reporter *reporter, char *err_str,
198 			struct mlx5e_err_ctx *err_ctx)
199 {
200 	if (!reporter) {
201 		netdev_err(priv->netdev, err_str);
202 		return err_ctx->recover(&err_ctx->ctx);
203 	}
204 	return devlink_health_report(reporter, err_str, err_ctx);
205 }
206