1 /*
2  * User interface for Resource Alloction in Resource Director Technology(RDT)
3  *
4  * Copyright (C) 2016 Intel Corporation
5  *
6  * Author: Fenghua Yu <fenghua.yu@intel.com>
7  *
8  * This program is free software; you can redistribute it and/or modify it
9  * under the terms and conditions of the GNU General Public License,
10  * version 2, as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope it will be useful, but WITHOUT
13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15  * more details.
16  *
17  * More information about RDT be found in the Intel (R) x86 Architecture
18  * Software Developer Manual.
19  */
20 
21 #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
22 
23 #include <linux/cacheinfo.h>
24 #include <linux/cpu.h>
25 #include <linux/debugfs.h>
26 #include <linux/fs.h>
27 #include <linux/sysfs.h>
28 #include <linux/kernfs.h>
29 #include <linux/seq_buf.h>
30 #include <linux/seq_file.h>
31 #include <linux/sched/signal.h>
32 #include <linux/sched/task.h>
33 #include <linux/slab.h>
34 #include <linux/task_work.h>
35 
36 #include <uapi/linux/magic.h>
37 
38 #include <asm/intel_rdt_sched.h>
39 #include "intel_rdt.h"
40 
41 DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
42 DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
43 DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
44 static struct kernfs_root *rdt_root;
45 struct rdtgroup rdtgroup_default;
46 LIST_HEAD(rdt_all_groups);
47 
48 /* Kernel fs node for "info" directory under root */
49 static struct kernfs_node *kn_info;
50 
51 /* Kernel fs node for "mon_groups" directory under root */
52 static struct kernfs_node *kn_mongrp;
53 
54 /* Kernel fs node for "mon_data" directory under root */
55 static struct kernfs_node *kn_mondata;
56 
57 static struct seq_buf last_cmd_status;
58 static char last_cmd_status_buf[512];
59 
60 struct dentry *debugfs_resctrl;
61 
rdt_last_cmd_clear(void)62 void rdt_last_cmd_clear(void)
63 {
64 	lockdep_assert_held(&rdtgroup_mutex);
65 	seq_buf_clear(&last_cmd_status);
66 }
67 
rdt_last_cmd_puts(const char * s)68 void rdt_last_cmd_puts(const char *s)
69 {
70 	lockdep_assert_held(&rdtgroup_mutex);
71 	seq_buf_puts(&last_cmd_status, s);
72 }
73 
rdt_last_cmd_printf(const char * fmt,...)74 void rdt_last_cmd_printf(const char *fmt, ...)
75 {
76 	va_list ap;
77 
78 	va_start(ap, fmt);
79 	lockdep_assert_held(&rdtgroup_mutex);
80 	seq_buf_vprintf(&last_cmd_status, fmt, ap);
81 	va_end(ap);
82 }
83 
84 /*
85  * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
86  * we can keep a bitmap of free CLOSIDs in a single integer.
87  *
88  * Using a global CLOSID across all resources has some advantages and
89  * some drawbacks:
90  * + We can simply set "current->closid" to assign a task to a resource
91  *   group.
92  * + Context switch code can avoid extra memory references deciding which
93  *   CLOSID to load into the PQR_ASSOC MSR
94  * - We give up some options in configuring resource groups across multi-socket
95  *   systems.
96  * - Our choices on how to configure each resource become progressively more
97  *   limited as the number of resources grows.
98  */
99 static int closid_free_map;
100 static int closid_free_map_len;
101 
closids_supported(void)102 int closids_supported(void)
103 {
104 	return closid_free_map_len;
105 }
106 
closid_init(void)107 static void closid_init(void)
108 {
109 	struct rdt_resource *r;
110 	int rdt_min_closid = 32;
111 
112 	/* Compute rdt_min_closid across all resources */
113 	for_each_alloc_enabled_rdt_resource(r)
114 		rdt_min_closid = min(rdt_min_closid, r->num_closid);
115 
116 	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
117 
118 	/* CLOSID 0 is always reserved for the default group */
119 	closid_free_map &= ~1;
120 	closid_free_map_len = rdt_min_closid;
121 }
122 
closid_alloc(void)123 static int closid_alloc(void)
124 {
125 	u32 closid = ffs(closid_free_map);
126 
127 	if (closid == 0)
128 		return -ENOSPC;
129 	closid--;
130 	closid_free_map &= ~(1 << closid);
131 
132 	return closid;
133 }
134 
closid_free(int closid)135 void closid_free(int closid)
136 {
137 	closid_free_map |= 1 << closid;
138 }
139 
140 /**
141  * closid_allocated - test if provided closid is in use
142  * @closid: closid to be tested
143  *
144  * Return: true if @closid is currently associated with a resource group,
145  * false if @closid is free
146  */
closid_allocated(unsigned int closid)147 static bool closid_allocated(unsigned int closid)
148 {
149 	return (closid_free_map & (1 << closid)) == 0;
150 }
151 
152 /**
153  * rdtgroup_mode_by_closid - Return mode of resource group with closid
154  * @closid: closid if the resource group
155  *
156  * Each resource group is associated with a @closid. Here the mode
157  * of a resource group can be queried by searching for it using its closid.
158  *
159  * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
160  */
rdtgroup_mode_by_closid(int closid)161 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
162 {
163 	struct rdtgroup *rdtgrp;
164 
165 	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
166 		if (rdtgrp->closid == closid)
167 			return rdtgrp->mode;
168 	}
169 
170 	return RDT_NUM_MODES;
171 }
172 
173 static const char * const rdt_mode_str[] = {
174 	[RDT_MODE_SHAREABLE]		= "shareable",
175 	[RDT_MODE_EXCLUSIVE]		= "exclusive",
176 	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
177 	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
178 };
179 
180 /**
181  * rdtgroup_mode_str - Return the string representation of mode
182  * @mode: the resource group mode as &enum rdtgroup_mode
183  *
184  * Return: string representation of valid mode, "unknown" otherwise
185  */
rdtgroup_mode_str(enum rdtgrp_mode mode)186 static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
187 {
188 	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
189 		return "unknown";
190 
191 	return rdt_mode_str[mode];
192 }
193 
194 /* set uid and gid of rdtgroup dirs and files to that of the creator */
rdtgroup_kn_set_ugid(struct kernfs_node * kn)195 static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
196 {
197 	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
198 				.ia_uid = current_fsuid(),
199 				.ia_gid = current_fsgid(), };
200 
201 	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
202 	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
203 		return 0;
204 
205 	return kernfs_setattr(kn, &iattr);
206 }
207 
rdtgroup_add_file(struct kernfs_node * parent_kn,struct rftype * rft)208 static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
209 {
210 	struct kernfs_node *kn;
211 	int ret;
212 
213 	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
214 				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
215 				  0, rft->kf_ops, rft, NULL, NULL);
216 	if (IS_ERR(kn))
217 		return PTR_ERR(kn);
218 
219 	ret = rdtgroup_kn_set_ugid(kn);
220 	if (ret) {
221 		kernfs_remove(kn);
222 		return ret;
223 	}
224 
225 	return 0;
226 }
227 
rdtgroup_seqfile_show(struct seq_file * m,void * arg)228 static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
229 {
230 	struct kernfs_open_file *of = m->private;
231 	struct rftype *rft = of->kn->priv;
232 
233 	if (rft->seq_show)
234 		return rft->seq_show(of, m, arg);
235 	return 0;
236 }
237 
rdtgroup_file_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)238 static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
239 				   size_t nbytes, loff_t off)
240 {
241 	struct rftype *rft = of->kn->priv;
242 
243 	if (rft->write)
244 		return rft->write(of, buf, nbytes, off);
245 
246 	return -EINVAL;
247 }
248 
249 static struct kernfs_ops rdtgroup_kf_single_ops = {
250 	.atomic_write_len	= PAGE_SIZE,
251 	.write			= rdtgroup_file_write,
252 	.seq_show		= rdtgroup_seqfile_show,
253 };
254 
255 static struct kernfs_ops kf_mondata_ops = {
256 	.atomic_write_len	= PAGE_SIZE,
257 	.seq_show		= rdtgroup_mondata_show,
258 };
259 
is_cpu_list(struct kernfs_open_file * of)260 static bool is_cpu_list(struct kernfs_open_file *of)
261 {
262 	struct rftype *rft = of->kn->priv;
263 
264 	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
265 }
266 
rdtgroup_cpus_show(struct kernfs_open_file * of,struct seq_file * s,void * v)267 static int rdtgroup_cpus_show(struct kernfs_open_file *of,
268 			      struct seq_file *s, void *v)
269 {
270 	struct rdtgroup *rdtgrp;
271 	int ret = 0;
272 
273 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
274 
275 	if (rdtgrp) {
276 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
277 			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
278 				   cpumask_pr_args(&rdtgrp->plr->d->cpu_mask));
279 		else
280 			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
281 				   cpumask_pr_args(&rdtgrp->cpu_mask));
282 	} else {
283 		ret = -ENOENT;
284 	}
285 	rdtgroup_kn_unlock(of->kn);
286 
287 	return ret;
288 }
289 
290 /*
291  * This is safe against intel_rdt_sched_in() called from __switch_to()
292  * because __switch_to() is executed with interrupts disabled. A local call
293  * from update_closid_rmid() is proteced against __switch_to() because
294  * preemption is disabled.
295  */
update_cpu_closid_rmid(void * info)296 static void update_cpu_closid_rmid(void *info)
297 {
298 	struct rdtgroup *r = info;
299 
300 	if (r) {
301 		this_cpu_write(pqr_state.default_closid, r->closid);
302 		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
303 	}
304 
305 	/*
306 	 * We cannot unconditionally write the MSR because the current
307 	 * executing task might have its own closid selected. Just reuse
308 	 * the context switch code.
309 	 */
310 	intel_rdt_sched_in();
311 }
312 
313 /*
314  * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
315  *
316  * Per task closids/rmids must have been set up before calling this function.
317  */
318 static void
update_closid_rmid(const struct cpumask * cpu_mask,struct rdtgroup * r)319 update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
320 {
321 	int cpu = get_cpu();
322 
323 	if (cpumask_test_cpu(cpu, cpu_mask))
324 		update_cpu_closid_rmid(r);
325 	smp_call_function_many(cpu_mask, update_cpu_closid_rmid, r, 1);
326 	put_cpu();
327 }
328 
cpus_mon_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask)329 static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
330 			  cpumask_var_t tmpmask)
331 {
332 	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
333 	struct list_head *head;
334 
335 	/* Check whether cpus belong to parent ctrl group */
336 	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
337 	if (cpumask_weight(tmpmask)) {
338 		rdt_last_cmd_puts("can only add CPUs to mongroup that belong to parent\n");
339 		return -EINVAL;
340 	}
341 
342 	/* Check whether cpus are dropped from this group */
343 	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
344 	if (cpumask_weight(tmpmask)) {
345 		/* Give any dropped cpus to parent rdtgroup */
346 		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
347 		update_closid_rmid(tmpmask, prgrp);
348 	}
349 
350 	/*
351 	 * If we added cpus, remove them from previous group that owned them
352 	 * and update per-cpu rmid
353 	 */
354 	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
355 	if (cpumask_weight(tmpmask)) {
356 		head = &prgrp->mon.crdtgrp_list;
357 		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
358 			if (crgrp == rdtgrp)
359 				continue;
360 			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
361 				       tmpmask);
362 		}
363 		update_closid_rmid(tmpmask, rdtgrp);
364 	}
365 
366 	/* Done pushing/pulling - update this group with new mask */
367 	cpumask_copy(&rdtgrp->cpu_mask, newmask);
368 
369 	return 0;
370 }
371 
cpumask_rdtgrp_clear(struct rdtgroup * r,struct cpumask * m)372 static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
373 {
374 	struct rdtgroup *crgrp;
375 
376 	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
377 	/* update the child mon group masks as well*/
378 	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
379 		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
380 }
381 
cpus_ctrl_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask,cpumask_var_t tmpmask1)382 static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
383 			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
384 {
385 	struct rdtgroup *r, *crgrp;
386 	struct list_head *head;
387 
388 	/* Check whether cpus are dropped from this group */
389 	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
390 	if (cpumask_weight(tmpmask)) {
391 		/* Can't drop from default group */
392 		if (rdtgrp == &rdtgroup_default) {
393 			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
394 			return -EINVAL;
395 		}
396 
397 		/* Give any dropped cpus to rdtgroup_default */
398 		cpumask_or(&rdtgroup_default.cpu_mask,
399 			   &rdtgroup_default.cpu_mask, tmpmask);
400 		update_closid_rmid(tmpmask, &rdtgroup_default);
401 	}
402 
403 	/*
404 	 * If we added cpus, remove them from previous group and
405 	 * the prev group's child groups that owned them
406 	 * and update per-cpu closid/rmid.
407 	 */
408 	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
409 	if (cpumask_weight(tmpmask)) {
410 		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
411 			if (r == rdtgrp)
412 				continue;
413 			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
414 			if (cpumask_weight(tmpmask1))
415 				cpumask_rdtgrp_clear(r, tmpmask1);
416 		}
417 		update_closid_rmid(tmpmask, rdtgrp);
418 	}
419 
420 	/* Done pushing/pulling - update this group with new mask */
421 	cpumask_copy(&rdtgrp->cpu_mask, newmask);
422 
423 	/*
424 	 * Clear child mon group masks since there is a new parent mask
425 	 * now and update the rmid for the cpus the child lost.
426 	 */
427 	head = &rdtgrp->mon.crdtgrp_list;
428 	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
429 		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
430 		update_closid_rmid(tmpmask, rdtgrp);
431 		cpumask_clear(&crgrp->cpu_mask);
432 	}
433 
434 	return 0;
435 }
436 
rdtgroup_cpus_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)437 static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
438 				   char *buf, size_t nbytes, loff_t off)
439 {
440 	cpumask_var_t tmpmask, newmask, tmpmask1;
441 	struct rdtgroup *rdtgrp;
442 	int ret;
443 
444 	if (!buf)
445 		return -EINVAL;
446 
447 	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
448 		return -ENOMEM;
449 	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
450 		free_cpumask_var(tmpmask);
451 		return -ENOMEM;
452 	}
453 	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
454 		free_cpumask_var(tmpmask);
455 		free_cpumask_var(newmask);
456 		return -ENOMEM;
457 	}
458 
459 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
460 	rdt_last_cmd_clear();
461 	if (!rdtgrp) {
462 		ret = -ENOENT;
463 		rdt_last_cmd_puts("directory was removed\n");
464 		goto unlock;
465 	}
466 
467 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
468 	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
469 		ret = -EINVAL;
470 		rdt_last_cmd_puts("pseudo-locking in progress\n");
471 		goto unlock;
472 	}
473 
474 	if (is_cpu_list(of))
475 		ret = cpulist_parse(buf, newmask);
476 	else
477 		ret = cpumask_parse(buf, newmask);
478 
479 	if (ret) {
480 		rdt_last_cmd_puts("bad cpu list/mask\n");
481 		goto unlock;
482 	}
483 
484 	/* check that user didn't specify any offline cpus */
485 	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
486 	if (cpumask_weight(tmpmask)) {
487 		ret = -EINVAL;
488 		rdt_last_cmd_puts("can only assign online cpus\n");
489 		goto unlock;
490 	}
491 
492 	if (rdtgrp->type == RDTCTRL_GROUP)
493 		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
494 	else if (rdtgrp->type == RDTMON_GROUP)
495 		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
496 	else
497 		ret = -EINVAL;
498 
499 unlock:
500 	rdtgroup_kn_unlock(of->kn);
501 	free_cpumask_var(tmpmask);
502 	free_cpumask_var(newmask);
503 	free_cpumask_var(tmpmask1);
504 
505 	return ret ?: nbytes;
506 }
507 
508 struct task_move_callback {
509 	struct callback_head	work;
510 	struct rdtgroup		*rdtgrp;
511 };
512 
move_myself(struct callback_head * head)513 static void move_myself(struct callback_head *head)
514 {
515 	struct task_move_callback *callback;
516 	struct rdtgroup *rdtgrp;
517 
518 	callback = container_of(head, struct task_move_callback, work);
519 	rdtgrp = callback->rdtgrp;
520 
521 	/*
522 	 * If resource group was deleted before this task work callback
523 	 * was invoked, then assign the task to root group and free the
524 	 * resource group.
525 	 */
526 	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
527 	    (rdtgrp->flags & RDT_DELETED)) {
528 		current->closid = 0;
529 		current->rmid = 0;
530 		kfree(rdtgrp);
531 	}
532 
533 	preempt_disable();
534 	/* update PQR_ASSOC MSR to make resource group go into effect */
535 	intel_rdt_sched_in();
536 	preempt_enable();
537 
538 	kfree(callback);
539 }
540 
__rdtgroup_move_task(struct task_struct * tsk,struct rdtgroup * rdtgrp)541 static int __rdtgroup_move_task(struct task_struct *tsk,
542 				struct rdtgroup *rdtgrp)
543 {
544 	struct task_move_callback *callback;
545 	int ret;
546 
547 	callback = kzalloc(sizeof(*callback), GFP_KERNEL);
548 	if (!callback)
549 		return -ENOMEM;
550 	callback->work.func = move_myself;
551 	callback->rdtgrp = rdtgrp;
552 
553 	/*
554 	 * Take a refcount, so rdtgrp cannot be freed before the
555 	 * callback has been invoked.
556 	 */
557 	atomic_inc(&rdtgrp->waitcount);
558 	ret = task_work_add(tsk, &callback->work, true);
559 	if (ret) {
560 		/*
561 		 * Task is exiting. Drop the refcount and free the callback.
562 		 * No need to check the refcount as the group cannot be
563 		 * deleted before the write function unlocks rdtgroup_mutex.
564 		 */
565 		atomic_dec(&rdtgrp->waitcount);
566 		kfree(callback);
567 		rdt_last_cmd_puts("task exited\n");
568 	} else {
569 		/*
570 		 * For ctrl_mon groups move both closid and rmid.
571 		 * For monitor groups, can move the tasks only from
572 		 * their parent CTRL group.
573 		 */
574 		if (rdtgrp->type == RDTCTRL_GROUP) {
575 			tsk->closid = rdtgrp->closid;
576 			tsk->rmid = rdtgrp->mon.rmid;
577 		} else if (rdtgrp->type == RDTMON_GROUP) {
578 			if (rdtgrp->mon.parent->closid == tsk->closid) {
579 				tsk->rmid = rdtgrp->mon.rmid;
580 			} else {
581 				rdt_last_cmd_puts("Can't move task to different control group\n");
582 				ret = -EINVAL;
583 			}
584 		}
585 	}
586 	return ret;
587 }
588 
589 /**
590  * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
591  * @r: Resource group
592  *
593  * Return: 1 if tasks have been assigned to @r, 0 otherwise
594  */
rdtgroup_tasks_assigned(struct rdtgroup * r)595 int rdtgroup_tasks_assigned(struct rdtgroup *r)
596 {
597 	struct task_struct *p, *t;
598 	int ret = 0;
599 
600 	lockdep_assert_held(&rdtgroup_mutex);
601 
602 	rcu_read_lock();
603 	for_each_process_thread(p, t) {
604 		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
605 		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) {
606 			ret = 1;
607 			break;
608 		}
609 	}
610 	rcu_read_unlock();
611 
612 	return ret;
613 }
614 
rdtgroup_task_write_permission(struct task_struct * task,struct kernfs_open_file * of)615 static int rdtgroup_task_write_permission(struct task_struct *task,
616 					  struct kernfs_open_file *of)
617 {
618 	const struct cred *tcred = get_task_cred(task);
619 	const struct cred *cred = current_cred();
620 	int ret = 0;
621 
622 	/*
623 	 * Even if we're attaching all tasks in the thread group, we only
624 	 * need to check permissions on one of them.
625 	 */
626 	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
627 	    !uid_eq(cred->euid, tcred->uid) &&
628 	    !uid_eq(cred->euid, tcred->suid)) {
629 		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
630 		ret = -EPERM;
631 	}
632 
633 	put_cred(tcred);
634 	return ret;
635 }
636 
rdtgroup_move_task(pid_t pid,struct rdtgroup * rdtgrp,struct kernfs_open_file * of)637 static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
638 			      struct kernfs_open_file *of)
639 {
640 	struct task_struct *tsk;
641 	int ret;
642 
643 	rcu_read_lock();
644 	if (pid) {
645 		tsk = find_task_by_vpid(pid);
646 		if (!tsk) {
647 			rcu_read_unlock();
648 			rdt_last_cmd_printf("No task %d\n", pid);
649 			return -ESRCH;
650 		}
651 	} else {
652 		tsk = current;
653 	}
654 
655 	get_task_struct(tsk);
656 	rcu_read_unlock();
657 
658 	ret = rdtgroup_task_write_permission(tsk, of);
659 	if (!ret)
660 		ret = __rdtgroup_move_task(tsk, rdtgrp);
661 
662 	put_task_struct(tsk);
663 	return ret;
664 }
665 
rdtgroup_tasks_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)666 static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
667 				    char *buf, size_t nbytes, loff_t off)
668 {
669 	struct rdtgroup *rdtgrp;
670 	int ret = 0;
671 	pid_t pid;
672 
673 	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
674 		return -EINVAL;
675 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
676 	if (!rdtgrp) {
677 		rdtgroup_kn_unlock(of->kn);
678 		return -ENOENT;
679 	}
680 	rdt_last_cmd_clear();
681 
682 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
683 	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
684 		ret = -EINVAL;
685 		rdt_last_cmd_puts("pseudo-locking in progress\n");
686 		goto unlock;
687 	}
688 
689 	ret = rdtgroup_move_task(pid, rdtgrp, of);
690 
691 unlock:
692 	rdtgroup_kn_unlock(of->kn);
693 
694 	return ret ?: nbytes;
695 }
696 
show_rdt_tasks(struct rdtgroup * r,struct seq_file * s)697 static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
698 {
699 	struct task_struct *p, *t;
700 
701 	rcu_read_lock();
702 	for_each_process_thread(p, t) {
703 		if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
704 		    (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid))
705 			seq_printf(s, "%d\n", t->pid);
706 	}
707 	rcu_read_unlock();
708 }
709 
rdtgroup_tasks_show(struct kernfs_open_file * of,struct seq_file * s,void * v)710 static int rdtgroup_tasks_show(struct kernfs_open_file *of,
711 			       struct seq_file *s, void *v)
712 {
713 	struct rdtgroup *rdtgrp;
714 	int ret = 0;
715 
716 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
717 	if (rdtgrp)
718 		show_rdt_tasks(rdtgrp, s);
719 	else
720 		ret = -ENOENT;
721 	rdtgroup_kn_unlock(of->kn);
722 
723 	return ret;
724 }
725 
rdt_last_cmd_status_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)726 static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
727 				    struct seq_file *seq, void *v)
728 {
729 	int len;
730 
731 	mutex_lock(&rdtgroup_mutex);
732 	len = seq_buf_used(&last_cmd_status);
733 	if (len)
734 		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
735 	else
736 		seq_puts(seq, "ok\n");
737 	mutex_unlock(&rdtgroup_mutex);
738 	return 0;
739 }
740 
rdt_num_closids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)741 static int rdt_num_closids_show(struct kernfs_open_file *of,
742 				struct seq_file *seq, void *v)
743 {
744 	struct rdt_resource *r = of->kn->parent->priv;
745 
746 	seq_printf(seq, "%d\n", r->num_closid);
747 	return 0;
748 }
749 
rdt_default_ctrl_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)750 static int rdt_default_ctrl_show(struct kernfs_open_file *of,
751 			     struct seq_file *seq, void *v)
752 {
753 	struct rdt_resource *r = of->kn->parent->priv;
754 
755 	seq_printf(seq, "%x\n", r->default_ctrl);
756 	return 0;
757 }
758 
rdt_min_cbm_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)759 static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
760 			     struct seq_file *seq, void *v)
761 {
762 	struct rdt_resource *r = of->kn->parent->priv;
763 
764 	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
765 	return 0;
766 }
767 
rdt_shareable_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)768 static int rdt_shareable_bits_show(struct kernfs_open_file *of,
769 				   struct seq_file *seq, void *v)
770 {
771 	struct rdt_resource *r = of->kn->parent->priv;
772 
773 	seq_printf(seq, "%x\n", r->cache.shareable_bits);
774 	return 0;
775 }
776 
777 /**
778  * rdt_bit_usage_show - Display current usage of resources
779  *
780  * A domain is a shared resource that can now be allocated differently. Here
781  * we display the current regions of the domain as an annotated bitmask.
782  * For each domain of this resource its allocation bitmask
783  * is annotated as below to indicate the current usage of the corresponding bit:
784  *   0 - currently unused
785  *   X - currently available for sharing and used by software and hardware
786  *   H - currently used by hardware only but available for software use
787  *   S - currently used and shareable by software only
788  *   E - currently used exclusively by one resource group
789  *   P - currently pseudo-locked by one resource group
790  */
rdt_bit_usage_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)791 static int rdt_bit_usage_show(struct kernfs_open_file *of,
792 			      struct seq_file *seq, void *v)
793 {
794 	struct rdt_resource *r = of->kn->parent->priv;
795 	u32 sw_shareable = 0, hw_shareable = 0;
796 	u32 exclusive = 0, pseudo_locked = 0;
797 	struct rdt_domain *dom;
798 	int i, hwb, swb, excl, psl;
799 	enum rdtgrp_mode mode;
800 	bool sep = false;
801 	u32 *ctrl;
802 
803 	mutex_lock(&rdtgroup_mutex);
804 	hw_shareable = r->cache.shareable_bits;
805 	list_for_each_entry(dom, &r->domains, list) {
806 		if (sep)
807 			seq_putc(seq, ';');
808 		ctrl = dom->ctrl_val;
809 		sw_shareable = 0;
810 		exclusive = 0;
811 		seq_printf(seq, "%d=", dom->id);
812 		for (i = 0; i < closids_supported(); i++, ctrl++) {
813 			if (!closid_allocated(i))
814 				continue;
815 			mode = rdtgroup_mode_by_closid(i);
816 			switch (mode) {
817 			case RDT_MODE_SHAREABLE:
818 				sw_shareable |= *ctrl;
819 				break;
820 			case RDT_MODE_EXCLUSIVE:
821 				exclusive |= *ctrl;
822 				break;
823 			case RDT_MODE_PSEUDO_LOCKSETUP:
824 			/*
825 			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
826 			 * here but not included since the CBM
827 			 * associated with this CLOSID in this mode
828 			 * is not initialized and no task or cpu can be
829 			 * assigned this CLOSID.
830 			 */
831 				break;
832 			case RDT_MODE_PSEUDO_LOCKED:
833 			case RDT_NUM_MODES:
834 				WARN(1,
835 				     "invalid mode for closid %d\n", i);
836 				break;
837 			}
838 		}
839 		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
840 			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
841 			hwb = test_bit(i, (unsigned long *)&hw_shareable);
842 			swb = test_bit(i, (unsigned long *)&sw_shareable);
843 			excl = test_bit(i, (unsigned long *)&exclusive);
844 			psl = test_bit(i, (unsigned long *)&pseudo_locked);
845 			if (hwb && swb)
846 				seq_putc(seq, 'X');
847 			else if (hwb && !swb)
848 				seq_putc(seq, 'H');
849 			else if (!hwb && swb)
850 				seq_putc(seq, 'S');
851 			else if (excl)
852 				seq_putc(seq, 'E');
853 			else if (psl)
854 				seq_putc(seq, 'P');
855 			else /* Unused bits remain */
856 				seq_putc(seq, '0');
857 		}
858 		sep = true;
859 	}
860 	seq_putc(seq, '\n');
861 	mutex_unlock(&rdtgroup_mutex);
862 	return 0;
863 }
864 
rdt_min_bw_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)865 static int rdt_min_bw_show(struct kernfs_open_file *of,
866 			     struct seq_file *seq, void *v)
867 {
868 	struct rdt_resource *r = of->kn->parent->priv;
869 
870 	seq_printf(seq, "%u\n", r->membw.min_bw);
871 	return 0;
872 }
873 
rdt_num_rmids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)874 static int rdt_num_rmids_show(struct kernfs_open_file *of,
875 			      struct seq_file *seq, void *v)
876 {
877 	struct rdt_resource *r = of->kn->parent->priv;
878 
879 	seq_printf(seq, "%d\n", r->num_rmid);
880 
881 	return 0;
882 }
883 
rdt_mon_features_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)884 static int rdt_mon_features_show(struct kernfs_open_file *of,
885 				 struct seq_file *seq, void *v)
886 {
887 	struct rdt_resource *r = of->kn->parent->priv;
888 	struct mon_evt *mevt;
889 
890 	list_for_each_entry(mevt, &r->evt_list, list)
891 		seq_printf(seq, "%s\n", mevt->name);
892 
893 	return 0;
894 }
895 
rdt_bw_gran_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)896 static int rdt_bw_gran_show(struct kernfs_open_file *of,
897 			     struct seq_file *seq, void *v)
898 {
899 	struct rdt_resource *r = of->kn->parent->priv;
900 
901 	seq_printf(seq, "%u\n", r->membw.bw_gran);
902 	return 0;
903 }
904 
rdt_delay_linear_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)905 static int rdt_delay_linear_show(struct kernfs_open_file *of,
906 			     struct seq_file *seq, void *v)
907 {
908 	struct rdt_resource *r = of->kn->parent->priv;
909 
910 	seq_printf(seq, "%u\n", r->membw.delay_linear);
911 	return 0;
912 }
913 
max_threshold_occ_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)914 static int max_threshold_occ_show(struct kernfs_open_file *of,
915 				  struct seq_file *seq, void *v)
916 {
917 	struct rdt_resource *r = of->kn->parent->priv;
918 
919 	seq_printf(seq, "%u\n", intel_cqm_threshold * r->mon_scale);
920 
921 	return 0;
922 }
923 
max_threshold_occ_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)924 static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
925 				       char *buf, size_t nbytes, loff_t off)
926 {
927 	struct rdt_resource *r = of->kn->parent->priv;
928 	unsigned int bytes;
929 	int ret;
930 
931 	ret = kstrtouint(buf, 0, &bytes);
932 	if (ret)
933 		return ret;
934 
935 	if (bytes > (boot_cpu_data.x86_cache_size * 1024))
936 		return -EINVAL;
937 
938 	intel_cqm_threshold = bytes / r->mon_scale;
939 
940 	return nbytes;
941 }
942 
943 /*
944  * rdtgroup_mode_show - Display mode of this resource group
945  */
rdtgroup_mode_show(struct kernfs_open_file * of,struct seq_file * s,void * v)946 static int rdtgroup_mode_show(struct kernfs_open_file *of,
947 			      struct seq_file *s, void *v)
948 {
949 	struct rdtgroup *rdtgrp;
950 
951 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
952 	if (!rdtgrp) {
953 		rdtgroup_kn_unlock(of->kn);
954 		return -ENOENT;
955 	}
956 
957 	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
958 
959 	rdtgroup_kn_unlock(of->kn);
960 	return 0;
961 }
962 
963 /**
964  * rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
965  * @r: Resource to which domain instance @d belongs.
966  * @d: The domain instance for which @closid is being tested.
967  * @cbm: Capacity bitmask being tested.
968  * @closid: Intended closid for @cbm.
969  * @exclusive: Only check if overlaps with exclusive resource groups
970  *
971  * Checks if provided @cbm intended to be used for @closid on domain
972  * @d overlaps with any other closids or other hardware usage associated
973  * with this domain. If @exclusive is true then only overlaps with
974  * resource groups in exclusive mode will be considered. If @exclusive
975  * is false then overlaps with any resource group or hardware entities
976  * will be considered.
977  *
978  * @cbm is unsigned long, even if only 32 bits are used, to make the
979  * bitmap functions work correctly.
980  *
981  * Return: false if CBM does not overlap, true if it does.
982  */
rdtgroup_cbm_overlaps(struct rdt_resource * r,struct rdt_domain * d,unsigned long cbm,int closid,bool exclusive)983 bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
984 			   unsigned long cbm, int closid, bool exclusive)
985 {
986 	enum rdtgrp_mode mode;
987 	unsigned long ctrl_b;
988 	u32 *ctrl;
989 	int i;
990 
991 	/* Check for any overlap with regions used by hardware directly */
992 	if (!exclusive) {
993 		ctrl_b = r->cache.shareable_bits;
994 		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
995 			return true;
996 	}
997 
998 	/* Check for overlap with other resource groups */
999 	ctrl = d->ctrl_val;
1000 	for (i = 0; i < closids_supported(); i++, ctrl++) {
1001 		ctrl_b = *ctrl;
1002 		mode = rdtgroup_mode_by_closid(i);
1003 		if (closid_allocated(i) && i != closid &&
1004 		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1005 			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1006 				if (exclusive) {
1007 					if (mode == RDT_MODE_EXCLUSIVE)
1008 						return true;
1009 					continue;
1010 				}
1011 				return true;
1012 			}
1013 		}
1014 	}
1015 
1016 	return false;
1017 }
1018 
1019 /**
1020  * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1021  *
1022  * An exclusive resource group implies that there should be no sharing of
1023  * its allocated resources. At the time this group is considered to be
1024  * exclusive this test can determine if its current schemata supports this
1025  * setting by testing for overlap with all other resource groups.
1026  *
1027  * Return: true if resource group can be exclusive, false if there is overlap
1028  * with allocations of other resource groups and thus this resource group
1029  * cannot be exclusive.
1030  */
rdtgroup_mode_test_exclusive(struct rdtgroup * rdtgrp)1031 static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1032 {
1033 	int closid = rdtgrp->closid;
1034 	struct rdt_resource *r;
1035 	bool has_cache = false;
1036 	struct rdt_domain *d;
1037 
1038 	for_each_alloc_enabled_rdt_resource(r) {
1039 		if (r->rid == RDT_RESOURCE_MBA)
1040 			continue;
1041 		has_cache = true;
1042 		list_for_each_entry(d, &r->domains, list) {
1043 			if (rdtgroup_cbm_overlaps(r, d, d->ctrl_val[closid],
1044 						  rdtgrp->closid, false)) {
1045 				rdt_last_cmd_puts("schemata overlaps\n");
1046 				return false;
1047 			}
1048 		}
1049 	}
1050 
1051 	if (!has_cache) {
1052 		rdt_last_cmd_puts("cannot be exclusive without CAT/CDP\n");
1053 		return false;
1054 	}
1055 
1056 	return true;
1057 }
1058 
1059 /**
1060  * rdtgroup_mode_write - Modify the resource group's mode
1061  *
1062  */
rdtgroup_mode_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1063 static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1064 				   char *buf, size_t nbytes, loff_t off)
1065 {
1066 	struct rdtgroup *rdtgrp;
1067 	enum rdtgrp_mode mode;
1068 	int ret = 0;
1069 
1070 	/* Valid input requires a trailing newline */
1071 	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1072 		return -EINVAL;
1073 	buf[nbytes - 1] = '\0';
1074 
1075 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1076 	if (!rdtgrp) {
1077 		rdtgroup_kn_unlock(of->kn);
1078 		return -ENOENT;
1079 	}
1080 
1081 	rdt_last_cmd_clear();
1082 
1083 	mode = rdtgrp->mode;
1084 
1085 	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1086 	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1087 	    (!strcmp(buf, "pseudo-locksetup") &&
1088 	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1089 	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1090 		goto out;
1091 
1092 	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1093 		rdt_last_cmd_printf("cannot change pseudo-locked group\n");
1094 		ret = -EINVAL;
1095 		goto out;
1096 	}
1097 
1098 	if (!strcmp(buf, "shareable")) {
1099 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1100 			ret = rdtgroup_locksetup_exit(rdtgrp);
1101 			if (ret)
1102 				goto out;
1103 		}
1104 		rdtgrp->mode = RDT_MODE_SHAREABLE;
1105 	} else if (!strcmp(buf, "exclusive")) {
1106 		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1107 			ret = -EINVAL;
1108 			goto out;
1109 		}
1110 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1111 			ret = rdtgroup_locksetup_exit(rdtgrp);
1112 			if (ret)
1113 				goto out;
1114 		}
1115 		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1116 	} else if (!strcmp(buf, "pseudo-locksetup")) {
1117 		ret = rdtgroup_locksetup_enter(rdtgrp);
1118 		if (ret)
1119 			goto out;
1120 		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1121 	} else {
1122 		rdt_last_cmd_printf("unknown/unsupported mode\n");
1123 		ret = -EINVAL;
1124 	}
1125 
1126 out:
1127 	rdtgroup_kn_unlock(of->kn);
1128 	return ret ?: nbytes;
1129 }
1130 
1131 /**
1132  * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1133  * @r: RDT resource to which @d belongs.
1134  * @d: RDT domain instance.
1135  * @cbm: bitmask for which the size should be computed.
1136  *
1137  * The bitmask provided associated with the RDT domain instance @d will be
1138  * translated into how many bytes it represents. The size in bytes is
1139  * computed by first dividing the total cache size by the CBM length to
1140  * determine how many bytes each bit in the bitmask represents. The result
1141  * is multiplied with the number of bits set in the bitmask.
1142  *
1143  * @cbm is unsigned long, even if only 32 bits are used to make the
1144  * bitmap functions work correctly.
1145  */
rdtgroup_cbm_to_size(struct rdt_resource * r,struct rdt_domain * d,unsigned long cbm)1146 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1147 				  struct rdt_domain *d, unsigned long cbm)
1148 {
1149 	struct cpu_cacheinfo *ci;
1150 	unsigned int size = 0;
1151 	int num_b, i;
1152 
1153 	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1154 	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
1155 	for (i = 0; i < ci->num_leaves; i++) {
1156 		if (ci->info_list[i].level == r->cache_level) {
1157 			size = ci->info_list[i].size / r->cache.cbm_len * num_b;
1158 			break;
1159 		}
1160 	}
1161 
1162 	return size;
1163 }
1164 
1165 /**
1166  * rdtgroup_size_show - Display size in bytes of allocated regions
1167  *
1168  * The "size" file mirrors the layout of the "schemata" file, printing the
1169  * size in bytes of each region instead of the capacity bitmask.
1170  *
1171  */
rdtgroup_size_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1172 static int rdtgroup_size_show(struct kernfs_open_file *of,
1173 			      struct seq_file *s, void *v)
1174 {
1175 	struct rdtgroup *rdtgrp;
1176 	struct rdt_resource *r;
1177 	struct rdt_domain *d;
1178 	unsigned int size;
1179 	bool sep;
1180 	u32 ctrl;
1181 
1182 	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1183 	if (!rdtgrp) {
1184 		rdtgroup_kn_unlock(of->kn);
1185 		return -ENOENT;
1186 	}
1187 
1188 	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1189 		seq_printf(s, "%*s:", max_name_width, rdtgrp->plr->r->name);
1190 		size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
1191 					    rdtgrp->plr->d,
1192 					    rdtgrp->plr->cbm);
1193 		seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
1194 		goto out;
1195 	}
1196 
1197 	for_each_alloc_enabled_rdt_resource(r) {
1198 		sep = false;
1199 		seq_printf(s, "%*s:", max_name_width, r->name);
1200 		list_for_each_entry(d, &r->domains, list) {
1201 			if (sep)
1202 				seq_putc(s, ';');
1203 			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1204 				size = 0;
1205 			} else {
1206 				ctrl = (!is_mba_sc(r) ?
1207 						d->ctrl_val[rdtgrp->closid] :
1208 						d->mbps_val[rdtgrp->closid]);
1209 				if (r->rid == RDT_RESOURCE_MBA)
1210 					size = ctrl;
1211 				else
1212 					size = rdtgroup_cbm_to_size(r, d, ctrl);
1213 			}
1214 			seq_printf(s, "%d=%u", d->id, size);
1215 			sep = true;
1216 		}
1217 		seq_putc(s, '\n');
1218 	}
1219 
1220 out:
1221 	rdtgroup_kn_unlock(of->kn);
1222 
1223 	return 0;
1224 }
1225 
1226 /* rdtgroup information files for one cache resource. */
1227 static struct rftype res_common_files[] = {
1228 	{
1229 		.name		= "last_cmd_status",
1230 		.mode		= 0444,
1231 		.kf_ops		= &rdtgroup_kf_single_ops,
1232 		.seq_show	= rdt_last_cmd_status_show,
1233 		.fflags		= RF_TOP_INFO,
1234 	},
1235 	{
1236 		.name		= "num_closids",
1237 		.mode		= 0444,
1238 		.kf_ops		= &rdtgroup_kf_single_ops,
1239 		.seq_show	= rdt_num_closids_show,
1240 		.fflags		= RF_CTRL_INFO,
1241 	},
1242 	{
1243 		.name		= "mon_features",
1244 		.mode		= 0444,
1245 		.kf_ops		= &rdtgroup_kf_single_ops,
1246 		.seq_show	= rdt_mon_features_show,
1247 		.fflags		= RF_MON_INFO,
1248 	},
1249 	{
1250 		.name		= "num_rmids",
1251 		.mode		= 0444,
1252 		.kf_ops		= &rdtgroup_kf_single_ops,
1253 		.seq_show	= rdt_num_rmids_show,
1254 		.fflags		= RF_MON_INFO,
1255 	},
1256 	{
1257 		.name		= "cbm_mask",
1258 		.mode		= 0444,
1259 		.kf_ops		= &rdtgroup_kf_single_ops,
1260 		.seq_show	= rdt_default_ctrl_show,
1261 		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1262 	},
1263 	{
1264 		.name		= "min_cbm_bits",
1265 		.mode		= 0444,
1266 		.kf_ops		= &rdtgroup_kf_single_ops,
1267 		.seq_show	= rdt_min_cbm_bits_show,
1268 		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1269 	},
1270 	{
1271 		.name		= "shareable_bits",
1272 		.mode		= 0444,
1273 		.kf_ops		= &rdtgroup_kf_single_ops,
1274 		.seq_show	= rdt_shareable_bits_show,
1275 		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1276 	},
1277 	{
1278 		.name		= "bit_usage",
1279 		.mode		= 0444,
1280 		.kf_ops		= &rdtgroup_kf_single_ops,
1281 		.seq_show	= rdt_bit_usage_show,
1282 		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1283 	},
1284 	{
1285 		.name		= "min_bandwidth",
1286 		.mode		= 0444,
1287 		.kf_ops		= &rdtgroup_kf_single_ops,
1288 		.seq_show	= rdt_min_bw_show,
1289 		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1290 	},
1291 	{
1292 		.name		= "bandwidth_gran",
1293 		.mode		= 0444,
1294 		.kf_ops		= &rdtgroup_kf_single_ops,
1295 		.seq_show	= rdt_bw_gran_show,
1296 		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1297 	},
1298 	{
1299 		.name		= "delay_linear",
1300 		.mode		= 0444,
1301 		.kf_ops		= &rdtgroup_kf_single_ops,
1302 		.seq_show	= rdt_delay_linear_show,
1303 		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1304 	},
1305 	{
1306 		.name		= "max_threshold_occupancy",
1307 		.mode		= 0644,
1308 		.kf_ops		= &rdtgroup_kf_single_ops,
1309 		.write		= max_threshold_occ_write,
1310 		.seq_show	= max_threshold_occ_show,
1311 		.fflags		= RF_MON_INFO | RFTYPE_RES_CACHE,
1312 	},
1313 	{
1314 		.name		= "cpus",
1315 		.mode		= 0644,
1316 		.kf_ops		= &rdtgroup_kf_single_ops,
1317 		.write		= rdtgroup_cpus_write,
1318 		.seq_show	= rdtgroup_cpus_show,
1319 		.fflags		= RFTYPE_BASE,
1320 	},
1321 	{
1322 		.name		= "cpus_list",
1323 		.mode		= 0644,
1324 		.kf_ops		= &rdtgroup_kf_single_ops,
1325 		.write		= rdtgroup_cpus_write,
1326 		.seq_show	= rdtgroup_cpus_show,
1327 		.flags		= RFTYPE_FLAGS_CPUS_LIST,
1328 		.fflags		= RFTYPE_BASE,
1329 	},
1330 	{
1331 		.name		= "tasks",
1332 		.mode		= 0644,
1333 		.kf_ops		= &rdtgroup_kf_single_ops,
1334 		.write		= rdtgroup_tasks_write,
1335 		.seq_show	= rdtgroup_tasks_show,
1336 		.fflags		= RFTYPE_BASE,
1337 	},
1338 	{
1339 		.name		= "schemata",
1340 		.mode		= 0644,
1341 		.kf_ops		= &rdtgroup_kf_single_ops,
1342 		.write		= rdtgroup_schemata_write,
1343 		.seq_show	= rdtgroup_schemata_show,
1344 		.fflags		= RF_CTRL_BASE,
1345 	},
1346 	{
1347 		.name		= "mode",
1348 		.mode		= 0644,
1349 		.kf_ops		= &rdtgroup_kf_single_ops,
1350 		.write		= rdtgroup_mode_write,
1351 		.seq_show	= rdtgroup_mode_show,
1352 		.fflags		= RF_CTRL_BASE,
1353 	},
1354 	{
1355 		.name		= "size",
1356 		.mode		= 0444,
1357 		.kf_ops		= &rdtgroup_kf_single_ops,
1358 		.seq_show	= rdtgroup_size_show,
1359 		.fflags		= RF_CTRL_BASE,
1360 	},
1361 
1362 };
1363 
rdtgroup_add_files(struct kernfs_node * kn,unsigned long fflags)1364 static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
1365 {
1366 	struct rftype *rfts, *rft;
1367 	int ret, len;
1368 
1369 	rfts = res_common_files;
1370 	len = ARRAY_SIZE(res_common_files);
1371 
1372 	lockdep_assert_held(&rdtgroup_mutex);
1373 
1374 	for (rft = rfts; rft < rfts + len; rft++) {
1375 		if ((fflags & rft->fflags) == rft->fflags) {
1376 			ret = rdtgroup_add_file(kn, rft);
1377 			if (ret)
1378 				goto error;
1379 		}
1380 	}
1381 
1382 	return 0;
1383 error:
1384 	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
1385 	while (--rft >= rfts) {
1386 		if ((fflags & rft->fflags) == rft->fflags)
1387 			kernfs_remove_by_name(kn, rft->name);
1388 	}
1389 	return ret;
1390 }
1391 
1392 /**
1393  * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
1394  * @r: The resource group with which the file is associated.
1395  * @name: Name of the file
1396  *
1397  * The permissions of named resctrl file, directory, or link are modified
1398  * to not allow read, write, or execute by any user.
1399  *
1400  * WARNING: This function is intended to communicate to the user that the
1401  * resctrl file has been locked down - that it is not relevant to the
1402  * particular state the system finds itself in. It should not be relied
1403  * on to protect from user access because after the file's permissions
1404  * are restricted the user can still change the permissions using chmod
1405  * from the command line.
1406  *
1407  * Return: 0 on success, <0 on failure.
1408  */
rdtgroup_kn_mode_restrict(struct rdtgroup * r,const char * name)1409 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
1410 {
1411 	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1412 	struct kernfs_node *kn;
1413 	int ret = 0;
1414 
1415 	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1416 	if (!kn)
1417 		return -ENOENT;
1418 
1419 	switch (kernfs_type(kn)) {
1420 	case KERNFS_DIR:
1421 		iattr.ia_mode = S_IFDIR;
1422 		break;
1423 	case KERNFS_FILE:
1424 		iattr.ia_mode = S_IFREG;
1425 		break;
1426 	case KERNFS_LINK:
1427 		iattr.ia_mode = S_IFLNK;
1428 		break;
1429 	}
1430 
1431 	ret = kernfs_setattr(kn, &iattr);
1432 	kernfs_put(kn);
1433 	return ret;
1434 }
1435 
1436 /**
1437  * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
1438  * @r: The resource group with which the file is associated.
1439  * @name: Name of the file
1440  * @mask: Mask of permissions that should be restored
1441  *
1442  * Restore the permissions of the named file. If @name is a directory the
1443  * permissions of its parent will be used.
1444  *
1445  * Return: 0 on success, <0 on failure.
1446  */
rdtgroup_kn_mode_restore(struct rdtgroup * r,const char * name,umode_t mask)1447 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
1448 			     umode_t mask)
1449 {
1450 	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1451 	struct kernfs_node *kn, *parent;
1452 	struct rftype *rfts, *rft;
1453 	int ret, len;
1454 
1455 	rfts = res_common_files;
1456 	len = ARRAY_SIZE(res_common_files);
1457 
1458 	for (rft = rfts; rft < rfts + len; rft++) {
1459 		if (!strcmp(rft->name, name))
1460 			iattr.ia_mode = rft->mode & mask;
1461 	}
1462 
1463 	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1464 	if (!kn)
1465 		return -ENOENT;
1466 
1467 	switch (kernfs_type(kn)) {
1468 	case KERNFS_DIR:
1469 		parent = kernfs_get_parent(kn);
1470 		if (parent) {
1471 			iattr.ia_mode |= parent->mode;
1472 			kernfs_put(parent);
1473 		}
1474 		iattr.ia_mode |= S_IFDIR;
1475 		break;
1476 	case KERNFS_FILE:
1477 		iattr.ia_mode |= S_IFREG;
1478 		break;
1479 	case KERNFS_LINK:
1480 		iattr.ia_mode |= S_IFLNK;
1481 		break;
1482 	}
1483 
1484 	ret = kernfs_setattr(kn, &iattr);
1485 	kernfs_put(kn);
1486 	return ret;
1487 }
1488 
rdtgroup_mkdir_info_resdir(struct rdt_resource * r,char * name,unsigned long fflags)1489 static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
1490 				      unsigned long fflags)
1491 {
1492 	struct kernfs_node *kn_subdir;
1493 	int ret;
1494 
1495 	kn_subdir = kernfs_create_dir(kn_info, name,
1496 				      kn_info->mode, r);
1497 	if (IS_ERR(kn_subdir))
1498 		return PTR_ERR(kn_subdir);
1499 
1500 	kernfs_get(kn_subdir);
1501 	ret = rdtgroup_kn_set_ugid(kn_subdir);
1502 	if (ret)
1503 		return ret;
1504 
1505 	ret = rdtgroup_add_files(kn_subdir, fflags);
1506 	if (!ret)
1507 		kernfs_activate(kn_subdir);
1508 
1509 	return ret;
1510 }
1511 
rdtgroup_create_info_dir(struct kernfs_node * parent_kn)1512 static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
1513 {
1514 	struct rdt_resource *r;
1515 	unsigned long fflags;
1516 	char name[32];
1517 	int ret;
1518 
1519 	/* create the directory */
1520 	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
1521 	if (IS_ERR(kn_info))
1522 		return PTR_ERR(kn_info);
1523 	kernfs_get(kn_info);
1524 
1525 	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
1526 	if (ret)
1527 		goto out_destroy;
1528 
1529 	for_each_alloc_enabled_rdt_resource(r) {
1530 		fflags =  r->fflags | RF_CTRL_INFO;
1531 		ret = rdtgroup_mkdir_info_resdir(r, r->name, fflags);
1532 		if (ret)
1533 			goto out_destroy;
1534 	}
1535 
1536 	for_each_mon_enabled_rdt_resource(r) {
1537 		fflags =  r->fflags | RF_MON_INFO;
1538 		sprintf(name, "%s_MON", r->name);
1539 		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
1540 		if (ret)
1541 			goto out_destroy;
1542 	}
1543 
1544 	/*
1545 	 * This extra ref will be put in kernfs_remove() and guarantees
1546 	 * that @rdtgrp->kn is always accessible.
1547 	 */
1548 	kernfs_get(kn_info);
1549 
1550 	ret = rdtgroup_kn_set_ugid(kn_info);
1551 	if (ret)
1552 		goto out_destroy;
1553 
1554 	kernfs_activate(kn_info);
1555 
1556 	return 0;
1557 
1558 out_destroy:
1559 	kernfs_remove(kn_info);
1560 	return ret;
1561 }
1562 
1563 static int
mongroup_create_dir(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,char * name,struct kernfs_node ** dest_kn)1564 mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
1565 		    char *name, struct kernfs_node **dest_kn)
1566 {
1567 	struct kernfs_node *kn;
1568 	int ret;
1569 
1570 	/* create the directory */
1571 	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
1572 	if (IS_ERR(kn))
1573 		return PTR_ERR(kn);
1574 
1575 	if (dest_kn)
1576 		*dest_kn = kn;
1577 
1578 	/*
1579 	 * This extra ref will be put in kernfs_remove() and guarantees
1580 	 * that @rdtgrp->kn is always accessible.
1581 	 */
1582 	kernfs_get(kn);
1583 
1584 	ret = rdtgroup_kn_set_ugid(kn);
1585 	if (ret)
1586 		goto out_destroy;
1587 
1588 	kernfs_activate(kn);
1589 
1590 	return 0;
1591 
1592 out_destroy:
1593 	kernfs_remove(kn);
1594 	return ret;
1595 }
1596 
l3_qos_cfg_update(void * arg)1597 static void l3_qos_cfg_update(void *arg)
1598 {
1599 	bool *enable = arg;
1600 
1601 	wrmsrl(IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
1602 }
1603 
l2_qos_cfg_update(void * arg)1604 static void l2_qos_cfg_update(void *arg)
1605 {
1606 	bool *enable = arg;
1607 
1608 	wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
1609 }
1610 
is_mba_linear(void)1611 static inline bool is_mba_linear(void)
1612 {
1613 	return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
1614 }
1615 
set_cache_qos_cfg(int level,bool enable)1616 static int set_cache_qos_cfg(int level, bool enable)
1617 {
1618 	void (*update)(void *arg);
1619 	struct rdt_resource *r_l;
1620 	cpumask_var_t cpu_mask;
1621 	struct rdt_domain *d;
1622 	int cpu;
1623 
1624 	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
1625 		return -ENOMEM;
1626 
1627 	if (level == RDT_RESOURCE_L3)
1628 		update = l3_qos_cfg_update;
1629 	else if (level == RDT_RESOURCE_L2)
1630 		update = l2_qos_cfg_update;
1631 	else
1632 		return -EINVAL;
1633 
1634 	r_l = &rdt_resources_all[level];
1635 	list_for_each_entry(d, &r_l->domains, list) {
1636 		/* Pick one CPU from each domain instance to update MSR */
1637 		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
1638 	}
1639 	cpu = get_cpu();
1640 	/* Update QOS_CFG MSR on this cpu if it's in cpu_mask. */
1641 	if (cpumask_test_cpu(cpu, cpu_mask))
1642 		update(&enable);
1643 	/* Update QOS_CFG MSR on all other cpus in cpu_mask. */
1644 	smp_call_function_many(cpu_mask, update, &enable, 1);
1645 	put_cpu();
1646 
1647 	free_cpumask_var(cpu_mask);
1648 
1649 	return 0;
1650 }
1651 
1652 /*
1653  * Enable or disable the MBA software controller
1654  * which helps user specify bandwidth in MBps.
1655  * MBA software controller is supported only if
1656  * MBM is supported and MBA is in linear scale.
1657  */
set_mba_sc(bool mba_sc)1658 static int set_mba_sc(bool mba_sc)
1659 {
1660 	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
1661 	struct rdt_domain *d;
1662 
1663 	if (!is_mbm_enabled() || !is_mba_linear() ||
1664 	    mba_sc == is_mba_sc(r))
1665 		return -EINVAL;
1666 
1667 	r->membw.mba_sc = mba_sc;
1668 	list_for_each_entry(d, &r->domains, list)
1669 		setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
1670 
1671 	return 0;
1672 }
1673 
cdp_enable(int level,int data_type,int code_type)1674 static int cdp_enable(int level, int data_type, int code_type)
1675 {
1676 	struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
1677 	struct rdt_resource *r_lcode = &rdt_resources_all[code_type];
1678 	struct rdt_resource *r_l = &rdt_resources_all[level];
1679 	int ret;
1680 
1681 	if (!r_l->alloc_capable || !r_ldata->alloc_capable ||
1682 	    !r_lcode->alloc_capable)
1683 		return -EINVAL;
1684 
1685 	ret = set_cache_qos_cfg(level, true);
1686 	if (!ret) {
1687 		r_l->alloc_enabled = false;
1688 		r_ldata->alloc_enabled = true;
1689 		r_lcode->alloc_enabled = true;
1690 	}
1691 	return ret;
1692 }
1693 
cdpl3_enable(void)1694 static int cdpl3_enable(void)
1695 {
1696 	return cdp_enable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA,
1697 			  RDT_RESOURCE_L3CODE);
1698 }
1699 
cdpl2_enable(void)1700 static int cdpl2_enable(void)
1701 {
1702 	return cdp_enable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA,
1703 			  RDT_RESOURCE_L2CODE);
1704 }
1705 
cdp_disable(int level,int data_type,int code_type)1706 static void cdp_disable(int level, int data_type, int code_type)
1707 {
1708 	struct rdt_resource *r = &rdt_resources_all[level];
1709 
1710 	r->alloc_enabled = r->alloc_capable;
1711 
1712 	if (rdt_resources_all[data_type].alloc_enabled) {
1713 		rdt_resources_all[data_type].alloc_enabled = false;
1714 		rdt_resources_all[code_type].alloc_enabled = false;
1715 		set_cache_qos_cfg(level, false);
1716 	}
1717 }
1718 
cdpl3_disable(void)1719 static void cdpl3_disable(void)
1720 {
1721 	cdp_disable(RDT_RESOURCE_L3, RDT_RESOURCE_L3DATA, RDT_RESOURCE_L3CODE);
1722 }
1723 
cdpl2_disable(void)1724 static void cdpl2_disable(void)
1725 {
1726 	cdp_disable(RDT_RESOURCE_L2, RDT_RESOURCE_L2DATA, RDT_RESOURCE_L2CODE);
1727 }
1728 
cdp_disable_all(void)1729 static void cdp_disable_all(void)
1730 {
1731 	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
1732 		cdpl3_disable();
1733 	if (rdt_resources_all[RDT_RESOURCE_L2DATA].alloc_enabled)
1734 		cdpl2_disable();
1735 }
1736 
parse_rdtgroupfs_options(char * data)1737 static int parse_rdtgroupfs_options(char *data)
1738 {
1739 	char *token, *o = data;
1740 	int ret = 0;
1741 
1742 	while ((token = strsep(&o, ",")) != NULL) {
1743 		if (!*token) {
1744 			ret = -EINVAL;
1745 			goto out;
1746 		}
1747 
1748 		if (!strcmp(token, "cdp")) {
1749 			ret = cdpl3_enable();
1750 			if (ret)
1751 				goto out;
1752 		} else if (!strcmp(token, "cdpl2")) {
1753 			ret = cdpl2_enable();
1754 			if (ret)
1755 				goto out;
1756 		} else if (!strcmp(token, "mba_MBps")) {
1757 			ret = set_mba_sc(true);
1758 			if (ret)
1759 				goto out;
1760 		} else {
1761 			ret = -EINVAL;
1762 			goto out;
1763 		}
1764 	}
1765 
1766 	return 0;
1767 
1768 out:
1769 	pr_err("Invalid mount option \"%s\"\n", token);
1770 
1771 	return ret;
1772 }
1773 
1774 /*
1775  * We don't allow rdtgroup directories to be created anywhere
1776  * except the root directory. Thus when looking for the rdtgroup
1777  * structure for a kernfs node we are either looking at a directory,
1778  * in which case the rdtgroup structure is pointed at by the "priv"
1779  * field, otherwise we have a file, and need only look to the parent
1780  * to find the rdtgroup.
1781  */
kernfs_to_rdtgroup(struct kernfs_node * kn)1782 static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
1783 {
1784 	if (kernfs_type(kn) == KERNFS_DIR) {
1785 		/*
1786 		 * All the resource directories use "kn->priv"
1787 		 * to point to the "struct rdtgroup" for the
1788 		 * resource. "info" and its subdirectories don't
1789 		 * have rdtgroup structures, so return NULL here.
1790 		 */
1791 		if (kn == kn_info || kn->parent == kn_info)
1792 			return NULL;
1793 		else
1794 			return kn->priv;
1795 	} else {
1796 		return kn->parent->priv;
1797 	}
1798 }
1799 
rdtgroup_kn_lock_live(struct kernfs_node * kn)1800 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
1801 {
1802 	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
1803 
1804 	if (!rdtgrp)
1805 		return NULL;
1806 
1807 	atomic_inc(&rdtgrp->waitcount);
1808 	kernfs_break_active_protection(kn);
1809 
1810 	mutex_lock(&rdtgroup_mutex);
1811 
1812 	/* Was this group deleted while we waited? */
1813 	if (rdtgrp->flags & RDT_DELETED)
1814 		return NULL;
1815 
1816 	return rdtgrp;
1817 }
1818 
rdtgroup_kn_unlock(struct kernfs_node * kn)1819 void rdtgroup_kn_unlock(struct kernfs_node *kn)
1820 {
1821 	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
1822 
1823 	if (!rdtgrp)
1824 		return;
1825 
1826 	mutex_unlock(&rdtgroup_mutex);
1827 
1828 	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
1829 	    (rdtgrp->flags & RDT_DELETED)) {
1830 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
1831 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
1832 			rdtgroup_pseudo_lock_remove(rdtgrp);
1833 		kernfs_unbreak_active_protection(kn);
1834 		kernfs_put(rdtgrp->kn);
1835 		kfree(rdtgrp);
1836 	} else {
1837 		kernfs_unbreak_active_protection(kn);
1838 	}
1839 }
1840 
1841 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
1842 			     struct rdtgroup *prgrp,
1843 			     struct kernfs_node **mon_data_kn);
1844 
rdt_mount(struct file_system_type * fs_type,int flags,const char * unused_dev_name,void * data)1845 static struct dentry *rdt_mount(struct file_system_type *fs_type,
1846 				int flags, const char *unused_dev_name,
1847 				void *data)
1848 {
1849 	struct rdt_domain *dom;
1850 	struct rdt_resource *r;
1851 	struct dentry *dentry;
1852 	int ret;
1853 
1854 	cpus_read_lock();
1855 	mutex_lock(&rdtgroup_mutex);
1856 	/*
1857 	 * resctrl file system can only be mounted once.
1858 	 */
1859 	if (static_branch_unlikely(&rdt_enable_key)) {
1860 		dentry = ERR_PTR(-EBUSY);
1861 		goto out;
1862 	}
1863 
1864 	ret = parse_rdtgroupfs_options(data);
1865 	if (ret) {
1866 		dentry = ERR_PTR(ret);
1867 		goto out_cdp;
1868 	}
1869 
1870 	closid_init();
1871 
1872 	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
1873 	if (ret) {
1874 		dentry = ERR_PTR(ret);
1875 		goto out_cdp;
1876 	}
1877 
1878 	if (rdt_mon_capable) {
1879 		ret = mongroup_create_dir(rdtgroup_default.kn,
1880 					  NULL, "mon_groups",
1881 					  &kn_mongrp);
1882 		if (ret) {
1883 			dentry = ERR_PTR(ret);
1884 			goto out_info;
1885 		}
1886 		kernfs_get(kn_mongrp);
1887 
1888 		ret = mkdir_mondata_all(rdtgroup_default.kn,
1889 					&rdtgroup_default, &kn_mondata);
1890 		if (ret) {
1891 			dentry = ERR_PTR(ret);
1892 			goto out_mongrp;
1893 		}
1894 		kernfs_get(kn_mondata);
1895 		rdtgroup_default.mon.mon_data_kn = kn_mondata;
1896 	}
1897 
1898 	ret = rdt_pseudo_lock_init();
1899 	if (ret) {
1900 		dentry = ERR_PTR(ret);
1901 		goto out_mondata;
1902 	}
1903 
1904 	dentry = kernfs_mount(fs_type, flags, rdt_root,
1905 			      RDTGROUP_SUPER_MAGIC, NULL);
1906 	if (IS_ERR(dentry))
1907 		goto out_psl;
1908 
1909 	if (rdt_alloc_capable)
1910 		static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
1911 	if (rdt_mon_capable)
1912 		static_branch_enable_cpuslocked(&rdt_mon_enable_key);
1913 
1914 	if (rdt_alloc_capable || rdt_mon_capable)
1915 		static_branch_enable_cpuslocked(&rdt_enable_key);
1916 
1917 	if (is_mbm_enabled()) {
1918 		r = &rdt_resources_all[RDT_RESOURCE_L3];
1919 		list_for_each_entry(dom, &r->domains, list)
1920 			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
1921 	}
1922 
1923 	goto out;
1924 
1925 out_psl:
1926 	rdt_pseudo_lock_release();
1927 out_mondata:
1928 	if (rdt_mon_capable)
1929 		kernfs_remove(kn_mondata);
1930 out_mongrp:
1931 	if (rdt_mon_capable)
1932 		kernfs_remove(kn_mongrp);
1933 out_info:
1934 	kernfs_remove(kn_info);
1935 out_cdp:
1936 	cdp_disable_all();
1937 out:
1938 	rdt_last_cmd_clear();
1939 	mutex_unlock(&rdtgroup_mutex);
1940 	cpus_read_unlock();
1941 
1942 	return dentry;
1943 }
1944 
reset_all_ctrls(struct rdt_resource * r)1945 static int reset_all_ctrls(struct rdt_resource *r)
1946 {
1947 	struct msr_param msr_param;
1948 	cpumask_var_t cpu_mask;
1949 	struct rdt_domain *d;
1950 	int i, cpu;
1951 
1952 	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
1953 		return -ENOMEM;
1954 
1955 	msr_param.res = r;
1956 	msr_param.low = 0;
1957 	msr_param.high = r->num_closid;
1958 
1959 	/*
1960 	 * Disable resource control for this resource by setting all
1961 	 * CBMs in all domains to the maximum mask value. Pick one CPU
1962 	 * from each domain to update the MSRs below.
1963 	 */
1964 	list_for_each_entry(d, &r->domains, list) {
1965 		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
1966 
1967 		for (i = 0; i < r->num_closid; i++)
1968 			d->ctrl_val[i] = r->default_ctrl;
1969 	}
1970 	cpu = get_cpu();
1971 	/* Update CBM on this cpu if it's in cpu_mask. */
1972 	if (cpumask_test_cpu(cpu, cpu_mask))
1973 		rdt_ctrl_update(&msr_param);
1974 	/* Update CBM on all other cpus in cpu_mask. */
1975 	smp_call_function_many(cpu_mask, rdt_ctrl_update, &msr_param, 1);
1976 	put_cpu();
1977 
1978 	free_cpumask_var(cpu_mask);
1979 
1980 	return 0;
1981 }
1982 
is_closid_match(struct task_struct * t,struct rdtgroup * r)1983 static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
1984 {
1985 	return (rdt_alloc_capable &&
1986 		(r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
1987 }
1988 
is_rmid_match(struct task_struct * t,struct rdtgroup * r)1989 static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
1990 {
1991 	return (rdt_mon_capable &&
1992 		(r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
1993 }
1994 
1995 /*
1996  * Move tasks from one to the other group. If @from is NULL, then all tasks
1997  * in the systems are moved unconditionally (used for teardown).
1998  *
1999  * If @mask is not NULL the cpus on which moved tasks are running are set
2000  * in that mask so the update smp function call is restricted to affected
2001  * cpus.
2002  */
rdt_move_group_tasks(struct rdtgroup * from,struct rdtgroup * to,struct cpumask * mask)2003 static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2004 				 struct cpumask *mask)
2005 {
2006 	struct task_struct *p, *t;
2007 
2008 	read_lock(&tasklist_lock);
2009 	for_each_process_thread(p, t) {
2010 		if (!from || is_closid_match(t, from) ||
2011 		    is_rmid_match(t, from)) {
2012 			t->closid = to->closid;
2013 			t->rmid = to->mon.rmid;
2014 
2015 #ifdef CONFIG_SMP
2016 			/*
2017 			 * This is safe on x86 w/o barriers as the ordering
2018 			 * of writing to task_cpu() and t->on_cpu is
2019 			 * reverse to the reading here. The detection is
2020 			 * inaccurate as tasks might move or schedule
2021 			 * before the smp function call takes place. In
2022 			 * such a case the function call is pointless, but
2023 			 * there is no other side effect.
2024 			 */
2025 			if (mask && t->on_cpu)
2026 				cpumask_set_cpu(task_cpu(t), mask);
2027 #endif
2028 		}
2029 	}
2030 	read_unlock(&tasklist_lock);
2031 }
2032 
free_all_child_rdtgrp(struct rdtgroup * rdtgrp)2033 static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2034 {
2035 	struct rdtgroup *sentry, *stmp;
2036 	struct list_head *head;
2037 
2038 	head = &rdtgrp->mon.crdtgrp_list;
2039 	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
2040 		free_rmid(sentry->mon.rmid);
2041 		list_del(&sentry->mon.crdtgrp_list);
2042 		kfree(sentry);
2043 	}
2044 }
2045 
2046 /*
2047  * Forcibly remove all of subdirectories under root.
2048  */
rmdir_all_sub(void)2049 static void rmdir_all_sub(void)
2050 {
2051 	struct rdtgroup *rdtgrp, *tmp;
2052 
2053 	/* Move all tasks to the default resource group */
2054 	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
2055 
2056 	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
2057 		/* Free any child rmids */
2058 		free_all_child_rdtgrp(rdtgrp);
2059 
2060 		/* Remove each rdtgroup other than root */
2061 		if (rdtgrp == &rdtgroup_default)
2062 			continue;
2063 
2064 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2065 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2066 			rdtgroup_pseudo_lock_remove(rdtgrp);
2067 
2068 		/*
2069 		 * Give any CPUs back to the default group. We cannot copy
2070 		 * cpu_online_mask because a CPU might have executed the
2071 		 * offline callback already, but is still marked online.
2072 		 */
2073 		cpumask_or(&rdtgroup_default.cpu_mask,
2074 			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2075 
2076 		free_rmid(rdtgrp->mon.rmid);
2077 
2078 		kernfs_remove(rdtgrp->kn);
2079 		list_del(&rdtgrp->rdtgroup_list);
2080 		kfree(rdtgrp);
2081 	}
2082 	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
2083 	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
2084 
2085 	kernfs_remove(kn_info);
2086 	kernfs_remove(kn_mongrp);
2087 	kernfs_remove(kn_mondata);
2088 }
2089 
rdt_kill_sb(struct super_block * sb)2090 static void rdt_kill_sb(struct super_block *sb)
2091 {
2092 	struct rdt_resource *r;
2093 
2094 	cpus_read_lock();
2095 	mutex_lock(&rdtgroup_mutex);
2096 
2097 	set_mba_sc(false);
2098 
2099 	/*Put everything back to default values. */
2100 	for_each_alloc_enabled_rdt_resource(r)
2101 		reset_all_ctrls(r);
2102 	cdp_disable_all();
2103 	rmdir_all_sub();
2104 	rdt_pseudo_lock_release();
2105 	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
2106 	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
2107 	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
2108 	static_branch_disable_cpuslocked(&rdt_enable_key);
2109 	kernfs_kill_sb(sb);
2110 	mutex_unlock(&rdtgroup_mutex);
2111 	cpus_read_unlock();
2112 }
2113 
2114 static struct file_system_type rdt_fs_type = {
2115 	.name    = "resctrl",
2116 	.mount   = rdt_mount,
2117 	.kill_sb = rdt_kill_sb,
2118 };
2119 
mon_addfile(struct kernfs_node * parent_kn,const char * name,void * priv)2120 static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
2121 		       void *priv)
2122 {
2123 	struct kernfs_node *kn;
2124 	int ret = 0;
2125 
2126 	kn = __kernfs_create_file(parent_kn, name, 0444,
2127 				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
2128 				  &kf_mondata_ops, priv, NULL, NULL);
2129 	if (IS_ERR(kn))
2130 		return PTR_ERR(kn);
2131 
2132 	ret = rdtgroup_kn_set_ugid(kn);
2133 	if (ret) {
2134 		kernfs_remove(kn);
2135 		return ret;
2136 	}
2137 
2138 	return ret;
2139 }
2140 
2141 /*
2142  * Remove all subdirectories of mon_data of ctrl_mon groups
2143  * and monitor groups with given domain id.
2144  */
rmdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,unsigned int dom_id)2145 void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id)
2146 {
2147 	struct rdtgroup *prgrp, *crgrp;
2148 	char name[32];
2149 
2150 	if (!r->mon_enabled)
2151 		return;
2152 
2153 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2154 		sprintf(name, "mon_%s_%02d", r->name, dom_id);
2155 		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
2156 
2157 		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
2158 			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
2159 	}
2160 }
2161 
mkdir_mondata_subdir(struct kernfs_node * parent_kn,struct rdt_domain * d,struct rdt_resource * r,struct rdtgroup * prgrp)2162 static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
2163 				struct rdt_domain *d,
2164 				struct rdt_resource *r, struct rdtgroup *prgrp)
2165 {
2166 	union mon_data_bits priv;
2167 	struct kernfs_node *kn;
2168 	struct mon_evt *mevt;
2169 	struct rmid_read rr;
2170 	char name[32];
2171 	int ret;
2172 
2173 	sprintf(name, "mon_%s_%02d", r->name, d->id);
2174 	/* create the directory */
2175 	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2176 	if (IS_ERR(kn))
2177 		return PTR_ERR(kn);
2178 
2179 	/*
2180 	 * This extra ref will be put in kernfs_remove() and guarantees
2181 	 * that kn is always accessible.
2182 	 */
2183 	kernfs_get(kn);
2184 	ret = rdtgroup_kn_set_ugid(kn);
2185 	if (ret)
2186 		goto out_destroy;
2187 
2188 	if (WARN_ON(list_empty(&r->evt_list))) {
2189 		ret = -EPERM;
2190 		goto out_destroy;
2191 	}
2192 
2193 	priv.u.rid = r->rid;
2194 	priv.u.domid = d->id;
2195 	list_for_each_entry(mevt, &r->evt_list, list) {
2196 		priv.u.evtid = mevt->evtid;
2197 		ret = mon_addfile(kn, mevt->name, priv.priv);
2198 		if (ret)
2199 			goto out_destroy;
2200 
2201 		if (is_mbm_event(mevt->evtid))
2202 			mon_event_read(&rr, d, prgrp, mevt->evtid, true);
2203 	}
2204 	kernfs_activate(kn);
2205 	return 0;
2206 
2207 out_destroy:
2208 	kernfs_remove(kn);
2209 	return ret;
2210 }
2211 
2212 /*
2213  * Add all subdirectories of mon_data for "ctrl_mon" groups
2214  * and "monitor" groups with given domain id.
2215  */
mkdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,struct rdt_domain * d)2216 void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
2217 				    struct rdt_domain *d)
2218 {
2219 	struct kernfs_node *parent_kn;
2220 	struct rdtgroup *prgrp, *crgrp;
2221 	struct list_head *head;
2222 
2223 	if (!r->mon_enabled)
2224 		return;
2225 
2226 	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2227 		parent_kn = prgrp->mon.mon_data_kn;
2228 		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
2229 
2230 		head = &prgrp->mon.crdtgrp_list;
2231 		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
2232 			parent_kn = crgrp->mon.mon_data_kn;
2233 			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
2234 		}
2235 	}
2236 }
2237 
mkdir_mondata_subdir_alldom(struct kernfs_node * parent_kn,struct rdt_resource * r,struct rdtgroup * prgrp)2238 static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
2239 				       struct rdt_resource *r,
2240 				       struct rdtgroup *prgrp)
2241 {
2242 	struct rdt_domain *dom;
2243 	int ret;
2244 
2245 	list_for_each_entry(dom, &r->domains, list) {
2246 		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
2247 		if (ret)
2248 			return ret;
2249 	}
2250 
2251 	return 0;
2252 }
2253 
2254 /*
2255  * This creates a directory mon_data which contains the monitored data.
2256  *
2257  * mon_data has one directory for each domain whic are named
2258  * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
2259  * with L3 domain looks as below:
2260  * ./mon_data:
2261  * mon_L3_00
2262  * mon_L3_01
2263  * mon_L3_02
2264  * ...
2265  *
2266  * Each domain directory has one file per event:
2267  * ./mon_L3_00/:
2268  * llc_occupancy
2269  *
2270  */
mkdir_mondata_all(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,struct kernfs_node ** dest_kn)2271 static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2272 			     struct rdtgroup *prgrp,
2273 			     struct kernfs_node **dest_kn)
2274 {
2275 	struct rdt_resource *r;
2276 	struct kernfs_node *kn;
2277 	int ret;
2278 
2279 	/*
2280 	 * Create the mon_data directory first.
2281 	 */
2282 	ret = mongroup_create_dir(parent_kn, NULL, "mon_data", &kn);
2283 	if (ret)
2284 		return ret;
2285 
2286 	if (dest_kn)
2287 		*dest_kn = kn;
2288 
2289 	/*
2290 	 * Create the subdirectories for each domain. Note that all events
2291 	 * in a domain like L3 are grouped into a resource whose domain is L3
2292 	 */
2293 	for_each_mon_enabled_rdt_resource(r) {
2294 		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
2295 		if (ret)
2296 			goto out_destroy;
2297 	}
2298 
2299 	return 0;
2300 
2301 out_destroy:
2302 	kernfs_remove(kn);
2303 	return ret;
2304 }
2305 
2306 /**
2307  * cbm_ensure_valid - Enforce validity on provided CBM
2308  * @_val:	Candidate CBM
2309  * @r:		RDT resource to which the CBM belongs
2310  *
2311  * The provided CBM represents all cache portions available for use. This
2312  * may be represented by a bitmap that does not consist of contiguous ones
2313  * and thus be an invalid CBM.
2314  * Here the provided CBM is forced to be a valid CBM by only considering
2315  * the first set of contiguous bits as valid and clearing all bits.
2316  * The intention here is to provide a valid default CBM with which a new
2317  * resource group is initialized. The user can follow this with a
2318  * modification to the CBM if the default does not satisfy the
2319  * requirements.
2320  */
cbm_ensure_valid(u32 * _val,struct rdt_resource * r)2321 static void cbm_ensure_valid(u32 *_val, struct rdt_resource *r)
2322 {
2323 	/*
2324 	 * Convert the u32 _val to an unsigned long required by all the bit
2325 	 * operations within this function. No more than 32 bits of this
2326 	 * converted value can be accessed because all bit operations are
2327 	 * additionally provided with cbm_len that is initialized during
2328 	 * hardware enumeration using five bits from the EAX register and
2329 	 * thus never can exceed 32 bits.
2330 	 */
2331 	unsigned long *val = (unsigned long *)_val;
2332 	unsigned int cbm_len = r->cache.cbm_len;
2333 	unsigned long first_bit, zero_bit;
2334 
2335 	if (*val == 0)
2336 		return;
2337 
2338 	first_bit = find_first_bit(val, cbm_len);
2339 	zero_bit = find_next_zero_bit(val, cbm_len, first_bit);
2340 
2341 	/* Clear any remaining bits to ensure contiguous region */
2342 	bitmap_clear(val, zero_bit, cbm_len - zero_bit);
2343 }
2344 
2345 /**
2346  * rdtgroup_init_alloc - Initialize the new RDT group's allocations
2347  *
2348  * A new RDT group is being created on an allocation capable (CAT)
2349  * supporting system. Set this group up to start off with all usable
2350  * allocations. That is, all shareable and unused bits.
2351  *
2352  * All-zero CBM is invalid. If there are no more shareable bits available
2353  * on any domain then the entire allocation will fail.
2354  */
rdtgroup_init_alloc(struct rdtgroup * rdtgrp)2355 static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
2356 {
2357 	u32 used_b = 0, unused_b = 0;
2358 	u32 closid = rdtgrp->closid;
2359 	struct rdt_resource *r;
2360 	unsigned long tmp_cbm;
2361 	enum rdtgrp_mode mode;
2362 	struct rdt_domain *d;
2363 	int i, ret;
2364 	u32 *ctrl;
2365 
2366 	for_each_alloc_enabled_rdt_resource(r) {
2367 		/*
2368 		 * Only initialize default allocations for CBM cache
2369 		 * resources
2370 		 */
2371 		if (r->rid == RDT_RESOURCE_MBA)
2372 			continue;
2373 		list_for_each_entry(d, &r->domains, list) {
2374 			d->have_new_ctrl = false;
2375 			d->new_ctrl = r->cache.shareable_bits;
2376 			used_b = r->cache.shareable_bits;
2377 			ctrl = d->ctrl_val;
2378 			for (i = 0; i < closids_supported(); i++, ctrl++) {
2379 				if (closid_allocated(i) && i != closid) {
2380 					mode = rdtgroup_mode_by_closid(i);
2381 					if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
2382 						break;
2383 					used_b |= *ctrl;
2384 					if (mode == RDT_MODE_SHAREABLE)
2385 						d->new_ctrl |= *ctrl;
2386 				}
2387 			}
2388 			if (d->plr && d->plr->cbm > 0)
2389 				used_b |= d->plr->cbm;
2390 			unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
2391 			unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
2392 			d->new_ctrl |= unused_b;
2393 			/*
2394 			 * Force the initial CBM to be valid, user can
2395 			 * modify the CBM based on system availability.
2396 			 */
2397 			cbm_ensure_valid(&d->new_ctrl, r);
2398 			/*
2399 			 * Assign the u32 CBM to an unsigned long to ensure
2400 			 * that bitmap_weight() does not access out-of-bound
2401 			 * memory.
2402 			 */
2403 			tmp_cbm = d->new_ctrl;
2404 			if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) <
2405 			    r->cache.min_cbm_bits) {
2406 				rdt_last_cmd_printf("no space on %s:%d\n",
2407 						    r->name, d->id);
2408 				return -ENOSPC;
2409 			}
2410 			d->have_new_ctrl = true;
2411 		}
2412 	}
2413 
2414 	for_each_alloc_enabled_rdt_resource(r) {
2415 		/*
2416 		 * Only initialize default allocations for CBM cache
2417 		 * resources
2418 		 */
2419 		if (r->rid == RDT_RESOURCE_MBA)
2420 			continue;
2421 		ret = update_domains(r, rdtgrp->closid);
2422 		if (ret < 0) {
2423 			rdt_last_cmd_puts("failed to initialize allocations\n");
2424 			return ret;
2425 		}
2426 		rdtgrp->mode = RDT_MODE_SHAREABLE;
2427 	}
2428 
2429 	return 0;
2430 }
2431 
mkdir_rdt_prepare(struct kernfs_node * parent_kn,struct kernfs_node * prgrp_kn,const char * name,umode_t mode,enum rdt_group_type rtype,struct rdtgroup ** r)2432 static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
2433 			     struct kernfs_node *prgrp_kn,
2434 			     const char *name, umode_t mode,
2435 			     enum rdt_group_type rtype, struct rdtgroup **r)
2436 {
2437 	struct rdtgroup *prdtgrp, *rdtgrp;
2438 	struct kernfs_node *kn;
2439 	uint files = 0;
2440 	int ret;
2441 
2442 	prdtgrp = rdtgroup_kn_lock_live(prgrp_kn);
2443 	rdt_last_cmd_clear();
2444 	if (!prdtgrp) {
2445 		ret = -ENODEV;
2446 		rdt_last_cmd_puts("directory was removed\n");
2447 		goto out_unlock;
2448 	}
2449 
2450 	if (rtype == RDTMON_GROUP &&
2451 	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2452 	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
2453 		ret = -EINVAL;
2454 		rdt_last_cmd_puts("pseudo-locking in progress\n");
2455 		goto out_unlock;
2456 	}
2457 
2458 	/* allocate the rdtgroup. */
2459 	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
2460 	if (!rdtgrp) {
2461 		ret = -ENOSPC;
2462 		rdt_last_cmd_puts("kernel out of memory\n");
2463 		goto out_unlock;
2464 	}
2465 	*r = rdtgrp;
2466 	rdtgrp->mon.parent = prdtgrp;
2467 	rdtgrp->type = rtype;
2468 	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
2469 
2470 	/* kernfs creates the directory for rdtgrp */
2471 	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
2472 	if (IS_ERR(kn)) {
2473 		ret = PTR_ERR(kn);
2474 		rdt_last_cmd_puts("kernfs create error\n");
2475 		goto out_free_rgrp;
2476 	}
2477 	rdtgrp->kn = kn;
2478 
2479 	/*
2480 	 * kernfs_remove() will drop the reference count on "kn" which
2481 	 * will free it. But we still need it to stick around for the
2482 	 * rdtgroup_kn_unlock(kn} call below. Take one extra reference
2483 	 * here, which will be dropped inside rdtgroup_kn_unlock().
2484 	 */
2485 	kernfs_get(kn);
2486 
2487 	ret = rdtgroup_kn_set_ugid(kn);
2488 	if (ret) {
2489 		rdt_last_cmd_puts("kernfs perm error\n");
2490 		goto out_destroy;
2491 	}
2492 
2493 	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
2494 	ret = rdtgroup_add_files(kn, files);
2495 	if (ret) {
2496 		rdt_last_cmd_puts("kernfs fill error\n");
2497 		goto out_destroy;
2498 	}
2499 
2500 	if (rdt_mon_capable) {
2501 		ret = alloc_rmid();
2502 		if (ret < 0) {
2503 			rdt_last_cmd_puts("out of RMIDs\n");
2504 			goto out_destroy;
2505 		}
2506 		rdtgrp->mon.rmid = ret;
2507 
2508 		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
2509 		if (ret) {
2510 			rdt_last_cmd_puts("kernfs subdir error\n");
2511 			goto out_idfree;
2512 		}
2513 	}
2514 	kernfs_activate(kn);
2515 
2516 	/*
2517 	 * The caller unlocks the prgrp_kn upon success.
2518 	 */
2519 	return 0;
2520 
2521 out_idfree:
2522 	free_rmid(rdtgrp->mon.rmid);
2523 out_destroy:
2524 	kernfs_remove(rdtgrp->kn);
2525 out_free_rgrp:
2526 	kfree(rdtgrp);
2527 out_unlock:
2528 	rdtgroup_kn_unlock(prgrp_kn);
2529 	return ret;
2530 }
2531 
mkdir_rdt_prepare_clean(struct rdtgroup * rgrp)2532 static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
2533 {
2534 	kernfs_remove(rgrp->kn);
2535 	free_rmid(rgrp->mon.rmid);
2536 	kfree(rgrp);
2537 }
2538 
2539 /*
2540  * Create a monitor group under "mon_groups" directory of a control
2541  * and monitor group(ctrl_mon). This is a resource group
2542  * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
2543  */
rdtgroup_mkdir_mon(struct kernfs_node * parent_kn,struct kernfs_node * prgrp_kn,const char * name,umode_t mode)2544 static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
2545 			      struct kernfs_node *prgrp_kn,
2546 			      const char *name,
2547 			      umode_t mode)
2548 {
2549 	struct rdtgroup *rdtgrp, *prgrp;
2550 	int ret;
2551 
2552 	ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTMON_GROUP,
2553 				&rdtgrp);
2554 	if (ret)
2555 		return ret;
2556 
2557 	prgrp = rdtgrp->mon.parent;
2558 	rdtgrp->closid = prgrp->closid;
2559 
2560 	/*
2561 	 * Add the rdtgrp to the list of rdtgrps the parent
2562 	 * ctrl_mon group has to track.
2563 	 */
2564 	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
2565 
2566 	rdtgroup_kn_unlock(prgrp_kn);
2567 	return ret;
2568 }
2569 
2570 /*
2571  * These are rdtgroups created under the root directory. Can be used
2572  * to allocate and monitor resources.
2573  */
rdtgroup_mkdir_ctrl_mon(struct kernfs_node * parent_kn,struct kernfs_node * prgrp_kn,const char * name,umode_t mode)2574 static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
2575 				   struct kernfs_node *prgrp_kn,
2576 				   const char *name, umode_t mode)
2577 {
2578 	struct rdtgroup *rdtgrp;
2579 	struct kernfs_node *kn;
2580 	u32 closid;
2581 	int ret;
2582 
2583 	ret = mkdir_rdt_prepare(parent_kn, prgrp_kn, name, mode, RDTCTRL_GROUP,
2584 				&rdtgrp);
2585 	if (ret)
2586 		return ret;
2587 
2588 	kn = rdtgrp->kn;
2589 	ret = closid_alloc();
2590 	if (ret < 0) {
2591 		rdt_last_cmd_puts("out of CLOSIDs\n");
2592 		goto out_common_fail;
2593 	}
2594 	closid = ret;
2595 	ret = 0;
2596 
2597 	rdtgrp->closid = closid;
2598 	ret = rdtgroup_init_alloc(rdtgrp);
2599 	if (ret < 0)
2600 		goto out_id_free;
2601 
2602 	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
2603 
2604 	if (rdt_mon_capable) {
2605 		/*
2606 		 * Create an empty mon_groups directory to hold the subset
2607 		 * of tasks and cpus to monitor.
2608 		 */
2609 		ret = mongroup_create_dir(kn, NULL, "mon_groups", NULL);
2610 		if (ret) {
2611 			rdt_last_cmd_puts("kernfs subdir error\n");
2612 			goto out_del_list;
2613 		}
2614 	}
2615 
2616 	goto out_unlock;
2617 
2618 out_del_list:
2619 	list_del(&rdtgrp->rdtgroup_list);
2620 out_id_free:
2621 	closid_free(closid);
2622 out_common_fail:
2623 	mkdir_rdt_prepare_clean(rdtgrp);
2624 out_unlock:
2625 	rdtgroup_kn_unlock(prgrp_kn);
2626 	return ret;
2627 }
2628 
2629 /*
2630  * We allow creating mon groups only with in a directory called "mon_groups"
2631  * which is present in every ctrl_mon group. Check if this is a valid
2632  * "mon_groups" directory.
2633  *
2634  * 1. The directory should be named "mon_groups".
2635  * 2. The mon group itself should "not" be named "mon_groups".
2636  *   This makes sure "mon_groups" directory always has a ctrl_mon group
2637  *   as parent.
2638  */
is_mon_groups(struct kernfs_node * kn,const char * name)2639 static bool is_mon_groups(struct kernfs_node *kn, const char *name)
2640 {
2641 	return (!strcmp(kn->name, "mon_groups") &&
2642 		strcmp(name, "mon_groups"));
2643 }
2644 
rdtgroup_mkdir(struct kernfs_node * parent_kn,const char * name,umode_t mode)2645 static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
2646 			  umode_t mode)
2647 {
2648 	/* Do not accept '\n' to avoid unparsable situation. */
2649 	if (strchr(name, '\n'))
2650 		return -EINVAL;
2651 
2652 	/*
2653 	 * If the parent directory is the root directory and RDT
2654 	 * allocation is supported, add a control and monitoring
2655 	 * subdirectory
2656 	 */
2657 	if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
2658 		return rdtgroup_mkdir_ctrl_mon(parent_kn, parent_kn, name, mode);
2659 
2660 	/*
2661 	 * If RDT monitoring is supported and the parent directory is a valid
2662 	 * "mon_groups" directory, add a monitoring subdirectory.
2663 	 */
2664 	if (rdt_mon_capable && is_mon_groups(parent_kn, name))
2665 		return rdtgroup_mkdir_mon(parent_kn, parent_kn->parent, name, mode);
2666 
2667 	return -EPERM;
2668 }
2669 
rdtgroup_rmdir_mon(struct kernfs_node * kn,struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)2670 static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
2671 			      cpumask_var_t tmpmask)
2672 {
2673 	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
2674 	int cpu;
2675 
2676 	/* Give any tasks back to the parent group */
2677 	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
2678 
2679 	/* Update per cpu rmid of the moved CPUs first */
2680 	for_each_cpu(cpu, &rdtgrp->cpu_mask)
2681 		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
2682 	/*
2683 	 * Update the MSR on moved CPUs and CPUs which have moved
2684 	 * task running on them.
2685 	 */
2686 	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
2687 	update_closid_rmid(tmpmask, NULL);
2688 
2689 	rdtgrp->flags = RDT_DELETED;
2690 	free_rmid(rdtgrp->mon.rmid);
2691 
2692 	/*
2693 	 * Remove the rdtgrp from the parent ctrl_mon group's list
2694 	 */
2695 	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
2696 	list_del(&rdtgrp->mon.crdtgrp_list);
2697 
2698 	/*
2699 	 * one extra hold on this, will drop when we kfree(rdtgrp)
2700 	 * in rdtgroup_kn_unlock()
2701 	 */
2702 	kernfs_get(kn);
2703 	kernfs_remove(rdtgrp->kn);
2704 
2705 	return 0;
2706 }
2707 
rdtgroup_ctrl_remove(struct kernfs_node * kn,struct rdtgroup * rdtgrp)2708 static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
2709 				struct rdtgroup *rdtgrp)
2710 {
2711 	rdtgrp->flags = RDT_DELETED;
2712 	list_del(&rdtgrp->rdtgroup_list);
2713 
2714 	/*
2715 	 * one extra hold on this, will drop when we kfree(rdtgrp)
2716 	 * in rdtgroup_kn_unlock()
2717 	 */
2718 	kernfs_get(kn);
2719 	kernfs_remove(rdtgrp->kn);
2720 	return 0;
2721 }
2722 
rdtgroup_rmdir_ctrl(struct kernfs_node * kn,struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)2723 static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
2724 			       cpumask_var_t tmpmask)
2725 {
2726 	int cpu;
2727 
2728 	/* Give any tasks back to the default group */
2729 	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
2730 
2731 	/* Give any CPUs back to the default group */
2732 	cpumask_or(&rdtgroup_default.cpu_mask,
2733 		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2734 
2735 	/* Update per cpu closid and rmid of the moved CPUs first */
2736 	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
2737 		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
2738 		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
2739 	}
2740 
2741 	/*
2742 	 * Update the MSR on moved CPUs and CPUs which have moved
2743 	 * task running on them.
2744 	 */
2745 	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
2746 	update_closid_rmid(tmpmask, NULL);
2747 
2748 	closid_free(rdtgrp->closid);
2749 	free_rmid(rdtgrp->mon.rmid);
2750 
2751 	/*
2752 	 * Free all the child monitor group rmids.
2753 	 */
2754 	free_all_child_rdtgrp(rdtgrp);
2755 
2756 	rdtgroup_ctrl_remove(kn, rdtgrp);
2757 
2758 	return 0;
2759 }
2760 
rdtgroup_rmdir(struct kernfs_node * kn)2761 static int rdtgroup_rmdir(struct kernfs_node *kn)
2762 {
2763 	struct kernfs_node *parent_kn = kn->parent;
2764 	struct rdtgroup *rdtgrp;
2765 	cpumask_var_t tmpmask;
2766 	int ret = 0;
2767 
2768 	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
2769 		return -ENOMEM;
2770 
2771 	rdtgrp = rdtgroup_kn_lock_live(kn);
2772 	if (!rdtgrp) {
2773 		ret = -EPERM;
2774 		goto out;
2775 	}
2776 
2777 	/*
2778 	 * If the rdtgroup is a ctrl_mon group and parent directory
2779 	 * is the root directory, remove the ctrl_mon group.
2780 	 *
2781 	 * If the rdtgroup is a mon group and parent directory
2782 	 * is a valid "mon_groups" directory, remove the mon group.
2783 	 */
2784 	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) {
2785 		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2786 		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
2787 			ret = rdtgroup_ctrl_remove(kn, rdtgrp);
2788 		} else {
2789 			ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
2790 		}
2791 	} else if (rdtgrp->type == RDTMON_GROUP &&
2792 		 is_mon_groups(parent_kn, kn->name)) {
2793 		ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
2794 	} else {
2795 		ret = -EPERM;
2796 	}
2797 
2798 out:
2799 	rdtgroup_kn_unlock(kn);
2800 	free_cpumask_var(tmpmask);
2801 	return ret;
2802 }
2803 
rdtgroup_show_options(struct seq_file * seq,struct kernfs_root * kf)2804 static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
2805 {
2806 	if (rdt_resources_all[RDT_RESOURCE_L3DATA].alloc_enabled)
2807 		seq_puts(seq, ",cdp");
2808 	return 0;
2809 }
2810 
2811 static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
2812 	.mkdir		= rdtgroup_mkdir,
2813 	.rmdir		= rdtgroup_rmdir,
2814 	.show_options	= rdtgroup_show_options,
2815 };
2816 
rdtgroup_setup_root(void)2817 static int __init rdtgroup_setup_root(void)
2818 {
2819 	int ret;
2820 
2821 	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
2822 				      KERNFS_ROOT_CREATE_DEACTIVATED |
2823 				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
2824 				      &rdtgroup_default);
2825 	if (IS_ERR(rdt_root))
2826 		return PTR_ERR(rdt_root);
2827 
2828 	mutex_lock(&rdtgroup_mutex);
2829 
2830 	rdtgroup_default.closid = 0;
2831 	rdtgroup_default.mon.rmid = 0;
2832 	rdtgroup_default.type = RDTCTRL_GROUP;
2833 	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
2834 
2835 	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
2836 
2837 	ret = rdtgroup_add_files(rdt_root->kn, RF_CTRL_BASE);
2838 	if (ret) {
2839 		kernfs_destroy_root(rdt_root);
2840 		goto out;
2841 	}
2842 
2843 	rdtgroup_default.kn = rdt_root->kn;
2844 	kernfs_activate(rdtgroup_default.kn);
2845 
2846 out:
2847 	mutex_unlock(&rdtgroup_mutex);
2848 
2849 	return ret;
2850 }
2851 
2852 /*
2853  * rdtgroup_init - rdtgroup initialization
2854  *
2855  * Setup resctrl file system including set up root, create mount point,
2856  * register rdtgroup filesystem, and initialize files under root directory.
2857  *
2858  * Return: 0 on success or -errno
2859  */
rdtgroup_init(void)2860 int __init rdtgroup_init(void)
2861 {
2862 	int ret = 0;
2863 
2864 	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
2865 		     sizeof(last_cmd_status_buf));
2866 
2867 	ret = rdtgroup_setup_root();
2868 	if (ret)
2869 		return ret;
2870 
2871 	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
2872 	if (ret)
2873 		goto cleanup_root;
2874 
2875 	ret = register_filesystem(&rdt_fs_type);
2876 	if (ret)
2877 		goto cleanup_mountpoint;
2878 
2879 	/*
2880 	 * Adding the resctrl debugfs directory here may not be ideal since
2881 	 * it would let the resctrl debugfs directory appear on the debugfs
2882 	 * filesystem before the resctrl filesystem is mounted.
2883 	 * It may also be ok since that would enable debugging of RDT before
2884 	 * resctrl is mounted.
2885 	 * The reason why the debugfs directory is created here and not in
2886 	 * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and
2887 	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
2888 	 * (the lockdep class of inode->i_rwsem). Other filesystem
2889 	 * interactions (eg. SyS_getdents) have the lock ordering:
2890 	 * &sb->s_type->i_mutex_key --> &mm->mmap_sem
2891 	 * During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex
2892 	 * is taken, thus creating dependency:
2893 	 * &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause
2894 	 * issues considering the other two lock dependencies.
2895 	 * By creating the debugfs directory here we avoid a dependency
2896 	 * that may cause deadlock (even though file operations cannot
2897 	 * occur until the filesystem is mounted, but I do not know how to
2898 	 * tell lockdep that).
2899 	 */
2900 	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
2901 
2902 	return 0;
2903 
2904 cleanup_mountpoint:
2905 	sysfs_remove_mount_point(fs_kobj, "resctrl");
2906 cleanup_root:
2907 	kernfs_destroy_root(rdt_root);
2908 
2909 	return ret;
2910 }
2911 
rdtgroup_exit(void)2912 void __exit rdtgroup_exit(void)
2913 {
2914 	debugfs_remove_recursive(debugfs_resctrl);
2915 	unregister_filesystem(&rdt_fs_type);
2916 	sysfs_remove_mount_point(fs_kobj, "resctrl");
2917 	kernfs_destroy_root(rdt_root);
2918 }
2919