1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* AFS fileserver probing
3  *
4  * Copyright (C) 2018, 2020 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/sched.h>
9 #include <linux/slab.h>
10 #include "afs_fs.h"
11 #include "internal.h"
12 #include "protocol_yfs.h"
13 
14 static unsigned int afs_fs_probe_fast_poll_interval = 30 * HZ;
15 static unsigned int afs_fs_probe_slow_poll_interval = 5 * 60 * HZ;
16 
17 /*
18  * Start the probe polling timer.  We have to supply it with an inc on the
19  * outstanding server count.
20  */
afs_schedule_fs_probe(struct afs_net * net,struct afs_server * server,bool fast)21 static void afs_schedule_fs_probe(struct afs_net *net,
22 				  struct afs_server *server, bool fast)
23 {
24 	unsigned long atj;
25 
26 	if (!net->live)
27 		return;
28 
29 	atj = server->probed_at;
30 	atj += fast ? afs_fs_probe_fast_poll_interval : afs_fs_probe_slow_poll_interval;
31 
32 	afs_inc_servers_outstanding(net);
33 	if (timer_reduce(&net->fs_probe_timer, atj))
34 		afs_dec_servers_outstanding(net);
35 }
36 
37 /*
38  * Handle the completion of a set of probes.
39  */
afs_finished_fs_probe(struct afs_net * net,struct afs_server * server)40 static void afs_finished_fs_probe(struct afs_net *net, struct afs_server *server)
41 {
42 	bool responded = server->probe.responded;
43 
44 	write_seqlock(&net->fs_lock);
45 	if (responded) {
46 		list_add_tail(&server->probe_link, &net->fs_probe_slow);
47 	} else {
48 		server->rtt = UINT_MAX;
49 		clear_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
50 		list_add_tail(&server->probe_link, &net->fs_probe_fast);
51 	}
52 	write_sequnlock(&net->fs_lock);
53 
54 	afs_schedule_fs_probe(net, server, !responded);
55 }
56 
57 /*
58  * Handle the completion of a probe.
59  */
afs_done_one_fs_probe(struct afs_net * net,struct afs_server * server)60 static void afs_done_one_fs_probe(struct afs_net *net, struct afs_server *server)
61 {
62 	_enter("");
63 
64 	if (atomic_dec_and_test(&server->probe_outstanding))
65 		afs_finished_fs_probe(net, server);
66 
67 	wake_up_all(&server->probe_wq);
68 }
69 
70 /*
71  * Handle inability to send a probe due to ENOMEM when trying to allocate a
72  * call struct.
73  */
afs_fs_probe_not_done(struct afs_net * net,struct afs_server * server,struct afs_addr_cursor * ac)74 static void afs_fs_probe_not_done(struct afs_net *net,
75 				  struct afs_server *server,
76 				  struct afs_addr_cursor *ac)
77 {
78 	struct afs_addr_list *alist = ac->alist;
79 	unsigned int index = ac->index;
80 
81 	_enter("");
82 
83 	trace_afs_io_error(0, -ENOMEM, afs_io_error_fs_probe_fail);
84 	spin_lock(&server->probe_lock);
85 
86 	server->probe.local_failure = true;
87 	if (server->probe.error == 0)
88 		server->probe.error = -ENOMEM;
89 
90 	set_bit(index, &alist->failed);
91 
92 	spin_unlock(&server->probe_lock);
93 	return afs_done_one_fs_probe(net, server);
94 }
95 
96 /*
97  * Process the result of probing a fileserver.  This is called after successful
98  * or failed delivery of an FS.GetCapabilities operation.
99  */
afs_fileserver_probe_result(struct afs_call * call)100 void afs_fileserver_probe_result(struct afs_call *call)
101 {
102 	struct afs_addr_list *alist = call->alist;
103 	struct afs_server *server = call->server;
104 	unsigned int index = call->addr_ix;
105 	unsigned int rtt_us = 0;
106 	int ret = call->error;
107 
108 	_enter("%pU,%u", &server->uuid, index);
109 
110 	spin_lock(&server->probe_lock);
111 
112 	switch (ret) {
113 	case 0:
114 		server->probe.error = 0;
115 		goto responded;
116 	case -ECONNABORTED:
117 		if (!server->probe.responded) {
118 			server->probe.abort_code = call->abort_code;
119 			server->probe.error = ret;
120 		}
121 		goto responded;
122 	case -ENOMEM:
123 	case -ENONET:
124 		clear_bit(index, &alist->responded);
125 		server->probe.local_failure = true;
126 		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
127 		goto out;
128 	case -ECONNRESET: /* Responded, but call expired. */
129 	case -ERFKILL:
130 	case -EADDRNOTAVAIL:
131 	case -ENETUNREACH:
132 	case -EHOSTUNREACH:
133 	case -EHOSTDOWN:
134 	case -ECONNREFUSED:
135 	case -ETIMEDOUT:
136 	case -ETIME:
137 	default:
138 		clear_bit(index, &alist->responded);
139 		set_bit(index, &alist->failed);
140 		if (!server->probe.responded &&
141 		    (server->probe.error == 0 ||
142 		     server->probe.error == -ETIMEDOUT ||
143 		     server->probe.error == -ETIME))
144 			server->probe.error = ret;
145 		trace_afs_io_error(call->debug_id, ret, afs_io_error_fs_probe_fail);
146 		goto out;
147 	}
148 
149 responded:
150 	clear_bit(index, &alist->failed);
151 
152 	if (call->service_id == YFS_FS_SERVICE) {
153 		server->probe.is_yfs = true;
154 		set_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
155 		alist->addrs[index].srx_service = call->service_id;
156 	} else {
157 		server->probe.not_yfs = true;
158 		if (!server->probe.is_yfs) {
159 			clear_bit(AFS_SERVER_FL_IS_YFS, &server->flags);
160 			alist->addrs[index].srx_service = call->service_id;
161 		}
162 	}
163 
164 	if (rxrpc_kernel_get_srtt(call->net->socket, call->rxcall, &rtt_us) &&
165 	    rtt_us < server->probe.rtt) {
166 		server->probe.rtt = rtt_us;
167 		server->rtt = rtt_us;
168 		alist->preferred = index;
169 	}
170 
171 	smp_wmb(); /* Set rtt before responded. */
172 	server->probe.responded = true;
173 	set_bit(index, &alist->responded);
174 	set_bit(AFS_SERVER_FL_RESPONDING, &server->flags);
175 out:
176 	spin_unlock(&server->probe_lock);
177 
178 	_debug("probe %pU [%u] %pISpc rtt=%u ret=%d",
179 	       &server->uuid, index, &alist->addrs[index].transport,
180 	       rtt_us, ret);
181 
182 	return afs_done_one_fs_probe(call->net, server);
183 }
184 
185 /*
186  * Probe one or all of a fileserver's addresses to find out the best route and
187  * to query its capabilities.
188  */
afs_fs_probe_fileserver(struct afs_net * net,struct afs_server * server,struct key * key,bool all)189 void afs_fs_probe_fileserver(struct afs_net *net, struct afs_server *server,
190 			     struct key *key, bool all)
191 {
192 	struct afs_addr_cursor ac = {
193 		.index = 0,
194 	};
195 
196 	_enter("%pU", &server->uuid);
197 
198 	read_lock(&server->fs_lock);
199 	ac.alist = rcu_dereference_protected(server->addresses,
200 					     lockdep_is_held(&server->fs_lock));
201 	afs_get_addrlist(ac.alist);
202 	read_unlock(&server->fs_lock);
203 
204 	server->probed_at = jiffies;
205 	atomic_set(&server->probe_outstanding, all ? ac.alist->nr_addrs : 1);
206 	memset(&server->probe, 0, sizeof(server->probe));
207 	server->probe.rtt = UINT_MAX;
208 
209 	ac.index = ac.alist->preferred;
210 	if (ac.index < 0 || ac.index >= ac.alist->nr_addrs)
211 		all = true;
212 
213 	if (all) {
214 		for (ac.index = 0; ac.index < ac.alist->nr_addrs; ac.index++)
215 			if (!afs_fs_get_capabilities(net, server, &ac, key))
216 				afs_fs_probe_not_done(net, server, &ac);
217 	} else {
218 		if (!afs_fs_get_capabilities(net, server, &ac, key))
219 			afs_fs_probe_not_done(net, server, &ac);
220 	}
221 
222 	afs_put_addrlist(ac.alist);
223 }
224 
225 /*
226  * Wait for the first as-yet untried fileserver to respond.
227  */
afs_wait_for_fs_probes(struct afs_server_list * slist,unsigned long untried)228 int afs_wait_for_fs_probes(struct afs_server_list *slist, unsigned long untried)
229 {
230 	struct wait_queue_entry *waits;
231 	struct afs_server *server;
232 	unsigned int rtt = UINT_MAX, rtt_s;
233 	bool have_responders = false;
234 	int pref = -1, i;
235 
236 	_enter("%u,%lx", slist->nr_servers, untried);
237 
238 	/* Only wait for servers that have a probe outstanding. */
239 	for (i = 0; i < slist->nr_servers; i++) {
240 		if (test_bit(i, &untried)) {
241 			server = slist->servers[i].server;
242 			if (!atomic_read(&server->probe_outstanding))
243 				__clear_bit(i, &untried);
244 			if (server->probe.responded)
245 				have_responders = true;
246 		}
247 	}
248 	if (have_responders || !untried)
249 		return 0;
250 
251 	waits = kmalloc(array_size(slist->nr_servers, sizeof(*waits)), GFP_KERNEL);
252 	if (!waits)
253 		return -ENOMEM;
254 
255 	for (i = 0; i < slist->nr_servers; i++) {
256 		if (test_bit(i, &untried)) {
257 			server = slist->servers[i].server;
258 			init_waitqueue_entry(&waits[i], current);
259 			add_wait_queue(&server->probe_wq, &waits[i]);
260 		}
261 	}
262 
263 	for (;;) {
264 		bool still_probing = false;
265 
266 		set_current_state(TASK_INTERRUPTIBLE);
267 		for (i = 0; i < slist->nr_servers; i++) {
268 			if (test_bit(i, &untried)) {
269 				server = slist->servers[i].server;
270 				if (server->probe.responded)
271 					goto stop;
272 				if (atomic_read(&server->probe_outstanding))
273 					still_probing = true;
274 			}
275 		}
276 
277 		if (!still_probing || signal_pending(current))
278 			goto stop;
279 		schedule();
280 	}
281 
282 stop:
283 	set_current_state(TASK_RUNNING);
284 
285 	for (i = 0; i < slist->nr_servers; i++) {
286 		if (test_bit(i, &untried)) {
287 			server = slist->servers[i].server;
288 			rtt_s = READ_ONCE(server->rtt);
289 			if (test_bit(AFS_SERVER_FL_RESPONDING, &server->flags) &&
290 			    rtt_s < rtt) {
291 				pref = i;
292 				rtt = rtt_s;
293 			}
294 
295 			remove_wait_queue(&server->probe_wq, &waits[i]);
296 		}
297 	}
298 
299 	kfree(waits);
300 
301 	if (pref == -1 && signal_pending(current))
302 		return -ERESTARTSYS;
303 
304 	if (pref >= 0)
305 		slist->preferred = pref;
306 	return 0;
307 }
308 
309 /*
310  * Probe timer.  We have an increment on fs_outstanding that we need to pass
311  * along to the work item.
312  */
afs_fs_probe_timer(struct timer_list * timer)313 void afs_fs_probe_timer(struct timer_list *timer)
314 {
315 	struct afs_net *net = container_of(timer, struct afs_net, fs_probe_timer);
316 
317 	if (!net->live || !queue_work(afs_wq, &net->fs_prober))
318 		afs_dec_servers_outstanding(net);
319 }
320 
321 /*
322  * Dispatch a probe to a server.
323  */
afs_dispatch_fs_probe(struct afs_net * net,struct afs_server * server,bool all)324 static void afs_dispatch_fs_probe(struct afs_net *net, struct afs_server *server, bool all)
325 	__releases(&net->fs_lock)
326 {
327 	struct key *key = NULL;
328 
329 	/* We remove it from the queues here - it will be added back to
330 	 * one of the queues on the completion of the probe.
331 	 */
332 	list_del_init(&server->probe_link);
333 
334 	afs_get_server(server, afs_server_trace_get_probe);
335 	write_sequnlock(&net->fs_lock);
336 
337 	afs_fs_probe_fileserver(net, server, key, all);
338 	afs_put_server(net, server, afs_server_trace_put_probe);
339 }
340 
341 /*
342  * Probe a server immediately without waiting for its due time to come
343  * round.  This is used when all of the addresses have been tried.
344  */
afs_probe_fileserver(struct afs_net * net,struct afs_server * server)345 void afs_probe_fileserver(struct afs_net *net, struct afs_server *server)
346 {
347 	write_seqlock(&net->fs_lock);
348 	if (!list_empty(&server->probe_link))
349 		return afs_dispatch_fs_probe(net, server, true);
350 	write_sequnlock(&net->fs_lock);
351 }
352 
353 /*
354  * Probe dispatcher to regularly dispatch probes to keep NAT alive.
355  */
afs_fs_probe_dispatcher(struct work_struct * work)356 void afs_fs_probe_dispatcher(struct work_struct *work)
357 {
358 	struct afs_net *net = container_of(work, struct afs_net, fs_prober);
359 	struct afs_server *fast, *slow, *server;
360 	unsigned long nowj, timer_at, poll_at;
361 	bool first_pass = true, set_timer = false;
362 
363 	if (!net->live)
364 		return;
365 
366 	_enter("");
367 
368 	if (list_empty(&net->fs_probe_fast) && list_empty(&net->fs_probe_slow)) {
369 		_leave(" [none]");
370 		return;
371 	}
372 
373 again:
374 	write_seqlock(&net->fs_lock);
375 
376 	fast = slow = server = NULL;
377 	nowj = jiffies;
378 	timer_at = nowj + MAX_JIFFY_OFFSET;
379 
380 	if (!list_empty(&net->fs_probe_fast)) {
381 		fast = list_first_entry(&net->fs_probe_fast, struct afs_server, probe_link);
382 		poll_at = fast->probed_at + afs_fs_probe_fast_poll_interval;
383 		if (time_before(nowj, poll_at)) {
384 			timer_at = poll_at;
385 			set_timer = true;
386 			fast = NULL;
387 		}
388 	}
389 
390 	if (!list_empty(&net->fs_probe_slow)) {
391 		slow = list_first_entry(&net->fs_probe_slow, struct afs_server, probe_link);
392 		poll_at = slow->probed_at + afs_fs_probe_slow_poll_interval;
393 		if (time_before(nowj, poll_at)) {
394 			if (time_before(poll_at, timer_at))
395 			    timer_at = poll_at;
396 			set_timer = true;
397 			slow = NULL;
398 		}
399 	}
400 
401 	server = fast ?: slow;
402 	if (server)
403 		_debug("probe %pU", &server->uuid);
404 
405 	if (server && (first_pass || !need_resched())) {
406 		afs_dispatch_fs_probe(net, server, server == fast);
407 		first_pass = false;
408 		goto again;
409 	}
410 
411 	write_sequnlock(&net->fs_lock);
412 
413 	if (server) {
414 		if (!queue_work(afs_wq, &net->fs_prober))
415 			afs_dec_servers_outstanding(net);
416 		_leave(" [requeue]");
417 	} else if (set_timer) {
418 		if (timer_reduce(&net->fs_probe_timer, timer_at))
419 			afs_dec_servers_outstanding(net);
420 		_leave(" [timer]");
421 	} else {
422 		afs_dec_servers_outstanding(net);
423 		_leave(" [quiesce]");
424 	}
425 }
426 
427 /*
428  * Wait for a probe on a particular fileserver to complete for 2s.
429  */
afs_wait_for_one_fs_probe(struct afs_server * server,bool is_intr)430 int afs_wait_for_one_fs_probe(struct afs_server *server, bool is_intr)
431 {
432 	struct wait_queue_entry wait;
433 	unsigned long timo = 2 * HZ;
434 
435 	if (atomic_read(&server->probe_outstanding) == 0)
436 		goto dont_wait;
437 
438 	init_wait_entry(&wait, 0);
439 	for (;;) {
440 		prepare_to_wait_event(&server->probe_wq, &wait,
441 				      is_intr ? TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE);
442 		if (timo == 0 ||
443 		    server->probe.responded ||
444 		    atomic_read(&server->probe_outstanding) == 0 ||
445 		    (is_intr && signal_pending(current)))
446 			break;
447 		timo = schedule_timeout(timo);
448 	}
449 
450 	finish_wait(&server->probe_wq, &wait);
451 
452 dont_wait:
453 	if (server->probe.responded)
454 		return 0;
455 	if (is_intr && signal_pending(current))
456 		return -ERESTARTSYS;
457 	if (timo == 0)
458 		return -ETIME;
459 	return -EDESTADDRREQ;
460 }
461 
462 /*
463  * Clean up the probing when the namespace is killed off.
464  */
afs_fs_probe_cleanup(struct afs_net * net)465 void afs_fs_probe_cleanup(struct afs_net *net)
466 {
467 	if (del_timer_sync(&net->fs_probe_timer))
468 		afs_dec_servers_outstanding(net);
469 }
470