1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *	Routines to identify caches on Intel CPU.
4  *
5  *	Changes:
6  *	Venkatesh Pallipadi	: Adding cache identification through cpuid(4)
7  *	Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
8  *	Andi Kleen / Andreas Herrmann	: CPUID4 emulation on AMD.
9  */
10 
11 #include <linux/slab.h>
12 #include <linux/cacheinfo.h>
13 #include <linux/cpu.h>
14 #include <linux/sched.h>
15 #include <linux/capability.h>
16 #include <linux/sysfs.h>
17 #include <linux/pci.h>
18 
19 #include <asm/cpufeature.h>
20 #include <asm/cacheinfo.h>
21 #include <asm/amd_nb.h>
22 #include <asm/smp.h>
23 
24 #include "cpu.h"
25 
26 #define LVL_1_INST	1
27 #define LVL_1_DATA	2
28 #define LVL_2		3
29 #define LVL_3		4
30 #define LVL_TRACE	5
31 
32 struct _cache_table {
33 	unsigned char descriptor;
34 	char cache_type;
35 	short size;
36 };
37 
38 #define MB(x)	((x) * 1024)
39 
40 /* All the cache descriptor types we care about (no TLB or
41    trace cache entries) */
42 
43 static const struct _cache_table cache_table[] =
44 {
45 	{ 0x06, LVL_1_INST, 8 },	/* 4-way set assoc, 32 byte line size */
46 	{ 0x08, LVL_1_INST, 16 },	/* 4-way set assoc, 32 byte line size */
47 	{ 0x09, LVL_1_INST, 32 },	/* 4-way set assoc, 64 byte line size */
48 	{ 0x0a, LVL_1_DATA, 8 },	/* 2 way set assoc, 32 byte line size */
49 	{ 0x0c, LVL_1_DATA, 16 },	/* 4-way set assoc, 32 byte line size */
50 	{ 0x0d, LVL_1_DATA, 16 },	/* 4-way set assoc, 64 byte line size */
51 	{ 0x0e, LVL_1_DATA, 24 },	/* 6-way set assoc, 64 byte line size */
52 	{ 0x21, LVL_2,      256 },	/* 8-way set assoc, 64 byte line size */
53 	{ 0x22, LVL_3,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
54 	{ 0x23, LVL_3,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
55 	{ 0x25, LVL_3,      MB(2) },	/* 8-way set assoc, sectored cache, 64 byte line size */
56 	{ 0x29, LVL_3,      MB(4) },	/* 8-way set assoc, sectored cache, 64 byte line size */
57 	{ 0x2c, LVL_1_DATA, 32 },	/* 8-way set assoc, 64 byte line size */
58 	{ 0x30, LVL_1_INST, 32 },	/* 8-way set assoc, 64 byte line size */
59 	{ 0x39, LVL_2,      128 },	/* 4-way set assoc, sectored cache, 64 byte line size */
60 	{ 0x3a, LVL_2,      192 },	/* 6-way set assoc, sectored cache, 64 byte line size */
61 	{ 0x3b, LVL_2,      128 },	/* 2-way set assoc, sectored cache, 64 byte line size */
62 	{ 0x3c, LVL_2,      256 },	/* 4-way set assoc, sectored cache, 64 byte line size */
63 	{ 0x3d, LVL_2,      384 },	/* 6-way set assoc, sectored cache, 64 byte line size */
64 	{ 0x3e, LVL_2,      512 },	/* 4-way set assoc, sectored cache, 64 byte line size */
65 	{ 0x3f, LVL_2,      256 },	/* 2-way set assoc, 64 byte line size */
66 	{ 0x41, LVL_2,      128 },	/* 4-way set assoc, 32 byte line size */
67 	{ 0x42, LVL_2,      256 },	/* 4-way set assoc, 32 byte line size */
68 	{ 0x43, LVL_2,      512 },	/* 4-way set assoc, 32 byte line size */
69 	{ 0x44, LVL_2,      MB(1) },	/* 4-way set assoc, 32 byte line size */
70 	{ 0x45, LVL_2,      MB(2) },	/* 4-way set assoc, 32 byte line size */
71 	{ 0x46, LVL_3,      MB(4) },	/* 4-way set assoc, 64 byte line size */
72 	{ 0x47, LVL_3,      MB(8) },	/* 8-way set assoc, 64 byte line size */
73 	{ 0x48, LVL_2,      MB(3) },	/* 12-way set assoc, 64 byte line size */
74 	{ 0x49, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
75 	{ 0x4a, LVL_3,      MB(6) },	/* 12-way set assoc, 64 byte line size */
76 	{ 0x4b, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
77 	{ 0x4c, LVL_3,      MB(12) },	/* 12-way set assoc, 64 byte line size */
78 	{ 0x4d, LVL_3,      MB(16) },	/* 16-way set assoc, 64 byte line size */
79 	{ 0x4e, LVL_2,      MB(6) },	/* 24-way set assoc, 64 byte line size */
80 	{ 0x60, LVL_1_DATA, 16 },	/* 8-way set assoc, sectored cache, 64 byte line size */
81 	{ 0x66, LVL_1_DATA, 8 },	/* 4-way set assoc, sectored cache, 64 byte line size */
82 	{ 0x67, LVL_1_DATA, 16 },	/* 4-way set assoc, sectored cache, 64 byte line size */
83 	{ 0x68, LVL_1_DATA, 32 },	/* 4-way set assoc, sectored cache, 64 byte line size */
84 	{ 0x70, LVL_TRACE,  12 },	/* 8-way set assoc */
85 	{ 0x71, LVL_TRACE,  16 },	/* 8-way set assoc */
86 	{ 0x72, LVL_TRACE,  32 },	/* 8-way set assoc */
87 	{ 0x73, LVL_TRACE,  64 },	/* 8-way set assoc */
88 	{ 0x78, LVL_2,      MB(1) },	/* 4-way set assoc, 64 byte line size */
89 	{ 0x79, LVL_2,      128 },	/* 8-way set assoc, sectored cache, 64 byte line size */
90 	{ 0x7a, LVL_2,      256 },	/* 8-way set assoc, sectored cache, 64 byte line size */
91 	{ 0x7b, LVL_2,      512 },	/* 8-way set assoc, sectored cache, 64 byte line size */
92 	{ 0x7c, LVL_2,      MB(1) },	/* 8-way set assoc, sectored cache, 64 byte line size */
93 	{ 0x7d, LVL_2,      MB(2) },	/* 8-way set assoc, 64 byte line size */
94 	{ 0x7f, LVL_2,      512 },	/* 2-way set assoc, 64 byte line size */
95 	{ 0x80, LVL_2,      512 },	/* 8-way set assoc, 64 byte line size */
96 	{ 0x82, LVL_2,      256 },	/* 8-way set assoc, 32 byte line size */
97 	{ 0x83, LVL_2,      512 },	/* 8-way set assoc, 32 byte line size */
98 	{ 0x84, LVL_2,      MB(1) },	/* 8-way set assoc, 32 byte line size */
99 	{ 0x85, LVL_2,      MB(2) },	/* 8-way set assoc, 32 byte line size */
100 	{ 0x86, LVL_2,      512 },	/* 4-way set assoc, 64 byte line size */
101 	{ 0x87, LVL_2,      MB(1) },	/* 8-way set assoc, 64 byte line size */
102 	{ 0xd0, LVL_3,      512 },	/* 4-way set assoc, 64 byte line size */
103 	{ 0xd1, LVL_3,      MB(1) },	/* 4-way set assoc, 64 byte line size */
104 	{ 0xd2, LVL_3,      MB(2) },	/* 4-way set assoc, 64 byte line size */
105 	{ 0xd6, LVL_3,      MB(1) },	/* 8-way set assoc, 64 byte line size */
106 	{ 0xd7, LVL_3,      MB(2) },	/* 8-way set assoc, 64 byte line size */
107 	{ 0xd8, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
108 	{ 0xdc, LVL_3,      MB(2) },	/* 12-way set assoc, 64 byte line size */
109 	{ 0xdd, LVL_3,      MB(4) },	/* 12-way set assoc, 64 byte line size */
110 	{ 0xde, LVL_3,      MB(8) },	/* 12-way set assoc, 64 byte line size */
111 	{ 0xe2, LVL_3,      MB(2) },	/* 16-way set assoc, 64 byte line size */
112 	{ 0xe3, LVL_3,      MB(4) },	/* 16-way set assoc, 64 byte line size */
113 	{ 0xe4, LVL_3,      MB(8) },	/* 16-way set assoc, 64 byte line size */
114 	{ 0xea, LVL_3,      MB(12) },	/* 24-way set assoc, 64 byte line size */
115 	{ 0xeb, LVL_3,      MB(18) },	/* 24-way set assoc, 64 byte line size */
116 	{ 0xec, LVL_3,      MB(24) },	/* 24-way set assoc, 64 byte line size */
117 	{ 0x00, 0, 0}
118 };
119 
120 
121 enum _cache_type {
122 	CTYPE_NULL = 0,
123 	CTYPE_DATA = 1,
124 	CTYPE_INST = 2,
125 	CTYPE_UNIFIED = 3
126 };
127 
128 union _cpuid4_leaf_eax {
129 	struct {
130 		enum _cache_type	type:5;
131 		unsigned int		level:3;
132 		unsigned int		is_self_initializing:1;
133 		unsigned int		is_fully_associative:1;
134 		unsigned int		reserved:4;
135 		unsigned int		num_threads_sharing:12;
136 		unsigned int		num_cores_on_die:6;
137 	} split;
138 	u32 full;
139 };
140 
141 union _cpuid4_leaf_ebx {
142 	struct {
143 		unsigned int		coherency_line_size:12;
144 		unsigned int		physical_line_partition:10;
145 		unsigned int		ways_of_associativity:10;
146 	} split;
147 	u32 full;
148 };
149 
150 union _cpuid4_leaf_ecx {
151 	struct {
152 		unsigned int		number_of_sets:32;
153 	} split;
154 	u32 full;
155 };
156 
157 struct _cpuid4_info_regs {
158 	union _cpuid4_leaf_eax eax;
159 	union _cpuid4_leaf_ebx ebx;
160 	union _cpuid4_leaf_ecx ecx;
161 	unsigned int id;
162 	unsigned long size;
163 	struct amd_northbridge *nb;
164 };
165 
166 static unsigned short num_cache_leaves;
167 
168 /* AMD doesn't have CPUID4. Emulate it here to report the same
169    information to the user.  This makes some assumptions about the machine:
170    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
171 
172    In theory the TLBs could be reported as fake type (they are in "dummy").
173    Maybe later */
174 union l1_cache {
175 	struct {
176 		unsigned line_size:8;
177 		unsigned lines_per_tag:8;
178 		unsigned assoc:8;
179 		unsigned size_in_kb:8;
180 	};
181 	unsigned val;
182 };
183 
184 union l2_cache {
185 	struct {
186 		unsigned line_size:8;
187 		unsigned lines_per_tag:4;
188 		unsigned assoc:4;
189 		unsigned size_in_kb:16;
190 	};
191 	unsigned val;
192 };
193 
194 union l3_cache {
195 	struct {
196 		unsigned line_size:8;
197 		unsigned lines_per_tag:4;
198 		unsigned assoc:4;
199 		unsigned res:2;
200 		unsigned size_encoded:14;
201 	};
202 	unsigned val;
203 };
204 
205 static const unsigned short assocs[] = {
206 	[1] = 1,
207 	[2] = 2,
208 	[4] = 4,
209 	[6] = 8,
210 	[8] = 16,
211 	[0xa] = 32,
212 	[0xb] = 48,
213 	[0xc] = 64,
214 	[0xd] = 96,
215 	[0xe] = 128,
216 	[0xf] = 0xffff /* fully associative - no way to show this currently */
217 };
218 
219 static const unsigned char levels[] = { 1, 1, 2, 3 };
220 static const unsigned char types[] = { 1, 2, 3, 3 };
221 
222 static const enum cache_type cache_type_map[] = {
223 	[CTYPE_NULL] = CACHE_TYPE_NOCACHE,
224 	[CTYPE_DATA] = CACHE_TYPE_DATA,
225 	[CTYPE_INST] = CACHE_TYPE_INST,
226 	[CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED,
227 };
228 
229 static void
amd_cpuid4(int leaf,union _cpuid4_leaf_eax * eax,union _cpuid4_leaf_ebx * ebx,union _cpuid4_leaf_ecx * ecx)230 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
231 		     union _cpuid4_leaf_ebx *ebx,
232 		     union _cpuid4_leaf_ecx *ecx)
233 {
234 	unsigned dummy;
235 	unsigned line_size, lines_per_tag, assoc, size_in_kb;
236 	union l1_cache l1i, l1d;
237 	union l2_cache l2;
238 	union l3_cache l3;
239 	union l1_cache *l1 = &l1d;
240 
241 	eax->full = 0;
242 	ebx->full = 0;
243 	ecx->full = 0;
244 
245 	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
246 	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
247 
248 	switch (leaf) {
249 	case 1:
250 		l1 = &l1i;
251 		fallthrough;
252 	case 0:
253 		if (!l1->val)
254 			return;
255 		assoc = assocs[l1->assoc];
256 		line_size = l1->line_size;
257 		lines_per_tag = l1->lines_per_tag;
258 		size_in_kb = l1->size_in_kb;
259 		break;
260 	case 2:
261 		if (!l2.val)
262 			return;
263 		assoc = assocs[l2.assoc];
264 		line_size = l2.line_size;
265 		lines_per_tag = l2.lines_per_tag;
266 		/* cpu_data has errata corrections for K7 applied */
267 		size_in_kb = __this_cpu_read(cpu_info.x86_cache_size);
268 		break;
269 	case 3:
270 		if (!l3.val)
271 			return;
272 		assoc = assocs[l3.assoc];
273 		line_size = l3.line_size;
274 		lines_per_tag = l3.lines_per_tag;
275 		size_in_kb = l3.size_encoded * 512;
276 		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
277 			size_in_kb = size_in_kb >> 1;
278 			assoc = assoc >> 1;
279 		}
280 		break;
281 	default:
282 		return;
283 	}
284 
285 	eax->split.is_self_initializing = 1;
286 	eax->split.type = types[leaf];
287 	eax->split.level = levels[leaf];
288 	eax->split.num_threads_sharing = 0;
289 	eax->split.num_cores_on_die = __this_cpu_read(cpu_info.x86_max_cores) - 1;
290 
291 
292 	if (assoc == 0xffff)
293 		eax->split.is_fully_associative = 1;
294 	ebx->split.coherency_line_size = line_size - 1;
295 	ebx->split.ways_of_associativity = assoc - 1;
296 	ebx->split.physical_line_partition = lines_per_tag - 1;
297 	ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
298 		(ebx->split.ways_of_associativity + 1) - 1;
299 }
300 
301 #if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS)
302 
303 /*
304  * L3 cache descriptors
305  */
amd_calc_l3_indices(struct amd_northbridge * nb)306 static void amd_calc_l3_indices(struct amd_northbridge *nb)
307 {
308 	struct amd_l3_cache *l3 = &nb->l3_cache;
309 	unsigned int sc0, sc1, sc2, sc3;
310 	u32 val = 0;
311 
312 	pci_read_config_dword(nb->misc, 0x1C4, &val);
313 
314 	/* calculate subcache sizes */
315 	l3->subcaches[0] = sc0 = !(val & BIT(0));
316 	l3->subcaches[1] = sc1 = !(val & BIT(4));
317 
318 	if (boot_cpu_data.x86 == 0x15) {
319 		l3->subcaches[0] = sc0 += !(val & BIT(1));
320 		l3->subcaches[1] = sc1 += !(val & BIT(5));
321 	}
322 
323 	l3->subcaches[2] = sc2 = !(val & BIT(8))  + !(val & BIT(9));
324 	l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
325 
326 	l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1;
327 }
328 
329 /*
330  * check whether a slot used for disabling an L3 index is occupied.
331  * @l3: L3 cache descriptor
332  * @slot: slot number (0..1)
333  *
334  * @returns: the disabled index if used or negative value if slot free.
335  */
amd_get_l3_disable_slot(struct amd_northbridge * nb,unsigned slot)336 static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot)
337 {
338 	unsigned int reg = 0;
339 
340 	pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®);
341 
342 	/* check whether this slot is activated already */
343 	if (reg & (3UL << 30))
344 		return reg & 0xfff;
345 
346 	return -1;
347 }
348 
show_cache_disable(struct cacheinfo * this_leaf,char * buf,unsigned int slot)349 static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf,
350 				  unsigned int slot)
351 {
352 	int index;
353 	struct amd_northbridge *nb = this_leaf->priv;
354 
355 	index = amd_get_l3_disable_slot(nb, slot);
356 	if (index >= 0)
357 		return sprintf(buf, "%d\n", index);
358 
359 	return sprintf(buf, "FREE\n");
360 }
361 
362 #define SHOW_CACHE_DISABLE(slot)					\
363 static ssize_t								\
364 cache_disable_##slot##_show(struct device *dev,				\
365 			    struct device_attribute *attr, char *buf)	\
366 {									\
367 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
368 	return show_cache_disable(this_leaf, buf, slot);		\
369 }
370 SHOW_CACHE_DISABLE(0)
371 SHOW_CACHE_DISABLE(1)
372 
amd_l3_disable_index(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long idx)373 static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu,
374 				 unsigned slot, unsigned long idx)
375 {
376 	int i;
377 
378 	idx |= BIT(30);
379 
380 	/*
381 	 *  disable index in all 4 subcaches
382 	 */
383 	for (i = 0; i < 4; i++) {
384 		u32 reg = idx | (i << 20);
385 
386 		if (!nb->l3_cache.subcaches[i])
387 			continue;
388 
389 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
390 
391 		/*
392 		 * We need to WBINVD on a core on the node containing the L3
393 		 * cache which indices we disable therefore a simple wbinvd()
394 		 * is not sufficient.
395 		 */
396 		wbinvd_on_cpu(cpu);
397 
398 		reg |= BIT(31);
399 		pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg);
400 	}
401 }
402 
403 /*
404  * disable a L3 cache index by using a disable-slot
405  *
406  * @l3:    L3 cache descriptor
407  * @cpu:   A CPU on the node containing the L3 cache
408  * @slot:  slot number (0..1)
409  * @index: index to disable
410  *
411  * @return: 0 on success, error status on failure
412  */
amd_set_l3_disable_slot(struct amd_northbridge * nb,int cpu,unsigned slot,unsigned long index)413 static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu,
414 			    unsigned slot, unsigned long index)
415 {
416 	int ret = 0;
417 
418 	/*  check if @slot is already used or the index is already disabled */
419 	ret = amd_get_l3_disable_slot(nb, slot);
420 	if (ret >= 0)
421 		return -EEXIST;
422 
423 	if (index > nb->l3_cache.indices)
424 		return -EINVAL;
425 
426 	/* check whether the other slot has disabled the same index already */
427 	if (index == amd_get_l3_disable_slot(nb, !slot))
428 		return -EEXIST;
429 
430 	amd_l3_disable_index(nb, cpu, slot, index);
431 
432 	return 0;
433 }
434 
store_cache_disable(struct cacheinfo * this_leaf,const char * buf,size_t count,unsigned int slot)435 static ssize_t store_cache_disable(struct cacheinfo *this_leaf,
436 				   const char *buf, size_t count,
437 				   unsigned int slot)
438 {
439 	unsigned long val = 0;
440 	int cpu, err = 0;
441 	struct amd_northbridge *nb = this_leaf->priv;
442 
443 	if (!capable(CAP_SYS_ADMIN))
444 		return -EPERM;
445 
446 	cpu = cpumask_first(&this_leaf->shared_cpu_map);
447 
448 	if (kstrtoul(buf, 10, &val) < 0)
449 		return -EINVAL;
450 
451 	err = amd_set_l3_disable_slot(nb, cpu, slot, val);
452 	if (err) {
453 		if (err == -EEXIST)
454 			pr_warn("L3 slot %d in use/index already disabled!\n",
455 				   slot);
456 		return err;
457 	}
458 	return count;
459 }
460 
461 #define STORE_CACHE_DISABLE(slot)					\
462 static ssize_t								\
463 cache_disable_##slot##_store(struct device *dev,			\
464 			     struct device_attribute *attr,		\
465 			     const char *buf, size_t count)		\
466 {									\
467 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);		\
468 	return store_cache_disable(this_leaf, buf, count, slot);	\
469 }
470 STORE_CACHE_DISABLE(0)
471 STORE_CACHE_DISABLE(1)
472 
subcaches_show(struct device * dev,struct device_attribute * attr,char * buf)473 static ssize_t subcaches_show(struct device *dev,
474 			      struct device_attribute *attr, char *buf)
475 {
476 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
477 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
478 
479 	return sprintf(buf, "%x\n", amd_get_subcaches(cpu));
480 }
481 
subcaches_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)482 static ssize_t subcaches_store(struct device *dev,
483 			       struct device_attribute *attr,
484 			       const char *buf, size_t count)
485 {
486 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
487 	int cpu = cpumask_first(&this_leaf->shared_cpu_map);
488 	unsigned long val;
489 
490 	if (!capable(CAP_SYS_ADMIN))
491 		return -EPERM;
492 
493 	if (kstrtoul(buf, 16, &val) < 0)
494 		return -EINVAL;
495 
496 	if (amd_set_subcaches(cpu, val))
497 		return -EINVAL;
498 
499 	return count;
500 }
501 
502 static DEVICE_ATTR_RW(cache_disable_0);
503 static DEVICE_ATTR_RW(cache_disable_1);
504 static DEVICE_ATTR_RW(subcaches);
505 
506 static umode_t
cache_private_attrs_is_visible(struct kobject * kobj,struct attribute * attr,int unused)507 cache_private_attrs_is_visible(struct kobject *kobj,
508 			       struct attribute *attr, int unused)
509 {
510 	struct device *dev = kobj_to_dev(kobj);
511 	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
512 	umode_t mode = attr->mode;
513 
514 	if (!this_leaf->priv)
515 		return 0;
516 
517 	if ((attr == &dev_attr_subcaches.attr) &&
518 	    amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
519 		return mode;
520 
521 	if ((attr == &dev_attr_cache_disable_0.attr ||
522 	     attr == &dev_attr_cache_disable_1.attr) &&
523 	    amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
524 		return mode;
525 
526 	return 0;
527 }
528 
529 static struct attribute_group cache_private_group = {
530 	.is_visible = cache_private_attrs_is_visible,
531 };
532 
init_amd_l3_attrs(void)533 static void init_amd_l3_attrs(void)
534 {
535 	int n = 1;
536 	static struct attribute **amd_l3_attrs;
537 
538 	if (amd_l3_attrs) /* already initialized */
539 		return;
540 
541 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE))
542 		n += 2;
543 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
544 		n += 1;
545 
546 	amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL);
547 	if (!amd_l3_attrs)
548 		return;
549 
550 	n = 0;
551 	if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) {
552 		amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr;
553 		amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr;
554 	}
555 	if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING))
556 		amd_l3_attrs[n++] = &dev_attr_subcaches.attr;
557 
558 	cache_private_group.attrs = amd_l3_attrs;
559 }
560 
561 const struct attribute_group *
cache_get_priv_group(struct cacheinfo * this_leaf)562 cache_get_priv_group(struct cacheinfo *this_leaf)
563 {
564 	struct amd_northbridge *nb = this_leaf->priv;
565 
566 	if (this_leaf->level < 3 || !nb)
567 		return NULL;
568 
569 	if (nb && nb->l3_cache.indices)
570 		init_amd_l3_attrs();
571 
572 	return &cache_private_group;
573 }
574 
amd_init_l3_cache(struct _cpuid4_info_regs * this_leaf,int index)575 static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index)
576 {
577 	int node;
578 
579 	/* only for L3, and not in virtualized environments */
580 	if (index < 3)
581 		return;
582 
583 	node = amd_get_nb_id(smp_processor_id());
584 	this_leaf->nb = node_to_amd_nb(node);
585 	if (this_leaf->nb && !this_leaf->nb->l3_cache.indices)
586 		amd_calc_l3_indices(this_leaf->nb);
587 }
588 #else
589 #define amd_init_l3_cache(x, y)
590 #endif  /* CONFIG_AMD_NB && CONFIG_SYSFS */
591 
592 static int
cpuid4_cache_lookup_regs(int index,struct _cpuid4_info_regs * this_leaf)593 cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf)
594 {
595 	union _cpuid4_leaf_eax	eax;
596 	union _cpuid4_leaf_ebx	ebx;
597 	union _cpuid4_leaf_ecx	ecx;
598 	unsigned		edx;
599 
600 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
601 		if (boot_cpu_has(X86_FEATURE_TOPOEXT))
602 			cpuid_count(0x8000001d, index, &eax.full,
603 				    &ebx.full, &ecx.full, &edx);
604 		else
605 			amd_cpuid4(index, &eax, &ebx, &ecx);
606 		amd_init_l3_cache(this_leaf, index);
607 	} else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
608 		cpuid_count(0x8000001d, index, &eax.full,
609 			    &ebx.full, &ecx.full, &edx);
610 		amd_init_l3_cache(this_leaf, index);
611 	} else {
612 		cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
613 	}
614 
615 	if (eax.split.type == CTYPE_NULL)
616 		return -EIO; /* better error ? */
617 
618 	this_leaf->eax = eax;
619 	this_leaf->ebx = ebx;
620 	this_leaf->ecx = ecx;
621 	this_leaf->size = (ecx.split.number_of_sets          + 1) *
622 			  (ebx.split.coherency_line_size     + 1) *
623 			  (ebx.split.physical_line_partition + 1) *
624 			  (ebx.split.ways_of_associativity   + 1);
625 	return 0;
626 }
627 
find_num_cache_leaves(struct cpuinfo_x86 * c)628 static int find_num_cache_leaves(struct cpuinfo_x86 *c)
629 {
630 	unsigned int		eax, ebx, ecx, edx, op;
631 	union _cpuid4_leaf_eax	cache_eax;
632 	int 			i = -1;
633 
634 	if (c->x86_vendor == X86_VENDOR_AMD ||
635 	    c->x86_vendor == X86_VENDOR_HYGON)
636 		op = 0x8000001d;
637 	else
638 		op = 4;
639 
640 	do {
641 		++i;
642 		/* Do cpuid(op) loop to find out num_cache_leaves */
643 		cpuid_count(op, i, &eax, &ebx, &ecx, &edx);
644 		cache_eax.full = eax;
645 	} while (cache_eax.split.type != CTYPE_NULL);
646 	return i;
647 }
648 
cacheinfo_amd_init_llc_id(struct cpuinfo_x86 * c,int cpu,u8 node_id)649 void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
650 {
651 	/*
652 	 * We may have multiple LLCs if L3 caches exist, so check if we
653 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
654 	 */
655 	if (!cpuid_edx(0x80000006))
656 		return;
657 
658 	if (c->x86 < 0x17) {
659 		/* LLC is at the node level. */
660 		per_cpu(cpu_llc_id, cpu) = node_id;
661 	} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
662 		/*
663 		 * LLC is at the core complex level.
664 		 * Core complex ID is ApicId[3] for these processors.
665 		 */
666 		per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
667 	} else {
668 		/*
669 		 * LLC ID is calculated from the number of threads sharing the
670 		 * cache.
671 		 * */
672 		u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
673 		u32 llc_index = find_num_cache_leaves(c) - 1;
674 
675 		cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
676 		if (eax)
677 			num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
678 
679 		if (num_sharing_cache) {
680 			int bits = get_count_order(num_sharing_cache);
681 
682 			per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
683 		}
684 	}
685 }
686 
cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 * c,int cpu,u8 node_id)687 void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
688 {
689 	/*
690 	 * We may have multiple LLCs if L3 caches exist, so check if we
691 	 * have an L3 cache by looking at the L3 cache CPUID leaf.
692 	 */
693 	if (!cpuid_edx(0x80000006))
694 		return;
695 
696 	/*
697 	 * LLC is at the core complex level.
698 	 * Core complex ID is ApicId[3] for these processors.
699 	 */
700 	per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
701 }
702 
init_amd_cacheinfo(struct cpuinfo_x86 * c)703 void init_amd_cacheinfo(struct cpuinfo_x86 *c)
704 {
705 
706 	if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
707 		num_cache_leaves = find_num_cache_leaves(c);
708 	} else if (c->extended_cpuid_level >= 0x80000006) {
709 		if (cpuid_edx(0x80000006) & 0xf000)
710 			num_cache_leaves = 4;
711 		else
712 			num_cache_leaves = 3;
713 	}
714 }
715 
init_hygon_cacheinfo(struct cpuinfo_x86 * c)716 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
717 {
718 	num_cache_leaves = find_num_cache_leaves(c);
719 }
720 
init_intel_cacheinfo(struct cpuinfo_x86 * c)721 void init_intel_cacheinfo(struct cpuinfo_x86 *c)
722 {
723 	/* Cache sizes */
724 	unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
725 	unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
726 	unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
727 	unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
728 #ifdef CONFIG_SMP
729 	unsigned int cpu = c->cpu_index;
730 #endif
731 
732 	if (c->cpuid_level > 3) {
733 		static int is_initialized;
734 
735 		if (is_initialized == 0) {
736 			/* Init num_cache_leaves from boot CPU */
737 			num_cache_leaves = find_num_cache_leaves(c);
738 			is_initialized++;
739 		}
740 
741 		/*
742 		 * Whenever possible use cpuid(4), deterministic cache
743 		 * parameters cpuid leaf to find the cache details
744 		 */
745 		for (i = 0; i < num_cache_leaves; i++) {
746 			struct _cpuid4_info_regs this_leaf = {};
747 			int retval;
748 
749 			retval = cpuid4_cache_lookup_regs(i, &this_leaf);
750 			if (retval < 0)
751 				continue;
752 
753 			switch (this_leaf.eax.split.level) {
754 			case 1:
755 				if (this_leaf.eax.split.type == CTYPE_DATA)
756 					new_l1d = this_leaf.size/1024;
757 				else if (this_leaf.eax.split.type == CTYPE_INST)
758 					new_l1i = this_leaf.size/1024;
759 				break;
760 			case 2:
761 				new_l2 = this_leaf.size/1024;
762 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
763 				index_msb = get_count_order(num_threads_sharing);
764 				l2_id = c->apicid & ~((1 << index_msb) - 1);
765 				break;
766 			case 3:
767 				new_l3 = this_leaf.size/1024;
768 				num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
769 				index_msb = get_count_order(num_threads_sharing);
770 				l3_id = c->apicid & ~((1 << index_msb) - 1);
771 				break;
772 			default:
773 				break;
774 			}
775 		}
776 	}
777 	/*
778 	 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
779 	 * trace cache
780 	 */
781 	if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
782 		/* supports eax=2  call */
783 		int j, n;
784 		unsigned int regs[4];
785 		unsigned char *dp = (unsigned char *)regs;
786 		int only_trace = 0;
787 
788 		if (num_cache_leaves != 0 && c->x86 == 15)
789 			only_trace = 1;
790 
791 		/* Number of times to iterate */
792 		n = cpuid_eax(2) & 0xFF;
793 
794 		for (i = 0 ; i < n ; i++) {
795 			cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]);
796 
797 			/* If bit 31 is set, this is an unknown format */
798 			for (j = 0 ; j < 3 ; j++)
799 				if (regs[j] & (1 << 31))
800 					regs[j] = 0;
801 
802 			/* Byte 0 is level count, not a descriptor */
803 			for (j = 1 ; j < 16 ; j++) {
804 				unsigned char des = dp[j];
805 				unsigned char k = 0;
806 
807 				/* look up this descriptor in the table */
808 				while (cache_table[k].descriptor != 0) {
809 					if (cache_table[k].descriptor == des) {
810 						if (only_trace && cache_table[k].cache_type != LVL_TRACE)
811 							break;
812 						switch (cache_table[k].cache_type) {
813 						case LVL_1_INST:
814 							l1i += cache_table[k].size;
815 							break;
816 						case LVL_1_DATA:
817 							l1d += cache_table[k].size;
818 							break;
819 						case LVL_2:
820 							l2 += cache_table[k].size;
821 							break;
822 						case LVL_3:
823 							l3 += cache_table[k].size;
824 							break;
825 						case LVL_TRACE:
826 							trace += cache_table[k].size;
827 							break;
828 						}
829 
830 						break;
831 					}
832 
833 					k++;
834 				}
835 			}
836 		}
837 	}
838 
839 	if (new_l1d)
840 		l1d = new_l1d;
841 
842 	if (new_l1i)
843 		l1i = new_l1i;
844 
845 	if (new_l2) {
846 		l2 = new_l2;
847 #ifdef CONFIG_SMP
848 		per_cpu(cpu_llc_id, cpu) = l2_id;
849 #endif
850 	}
851 
852 	if (new_l3) {
853 		l3 = new_l3;
854 #ifdef CONFIG_SMP
855 		per_cpu(cpu_llc_id, cpu) = l3_id;
856 #endif
857 	}
858 
859 #ifdef CONFIG_SMP
860 	/*
861 	 * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
862 	 * turns means that the only possibility is SMT (as indicated in
863 	 * cpuid1). Since cpuid2 doesn't specify shared caches, and we know
864 	 * that SMT shares all caches, we can unconditionally set cpu_llc_id to
865 	 * c->phys_proc_id.
866 	 */
867 	if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
868 		per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
869 #endif
870 
871 	c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
872 
873 	if (!l2)
874 		cpu_detect_cache_sizes(c);
875 }
876 
__cache_amd_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)877 static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
878 				    struct _cpuid4_info_regs *base)
879 {
880 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
881 	struct cacheinfo *this_leaf;
882 	int i, sibling;
883 
884 	/*
885 	 * For L3, always use the pre-calculated cpu_llc_shared_mask
886 	 * to derive shared_cpu_map.
887 	 */
888 	if (index == 3) {
889 		for_each_cpu(i, cpu_llc_shared_mask(cpu)) {
890 			this_cpu_ci = get_cpu_cacheinfo(i);
891 			if (!this_cpu_ci->info_list)
892 				continue;
893 			this_leaf = this_cpu_ci->info_list + index;
894 			for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) {
895 				if (!cpu_online(sibling))
896 					continue;
897 				cpumask_set_cpu(sibling,
898 						&this_leaf->shared_cpu_map);
899 			}
900 		}
901 	} else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
902 		unsigned int apicid, nshared, first, last;
903 
904 		nshared = base->eax.split.num_threads_sharing + 1;
905 		apicid = cpu_data(cpu).apicid;
906 		first = apicid - (apicid % nshared);
907 		last = first + nshared - 1;
908 
909 		for_each_online_cpu(i) {
910 			this_cpu_ci = get_cpu_cacheinfo(i);
911 			if (!this_cpu_ci->info_list)
912 				continue;
913 
914 			apicid = cpu_data(i).apicid;
915 			if ((apicid < first) || (apicid > last))
916 				continue;
917 
918 			this_leaf = this_cpu_ci->info_list + index;
919 
920 			for_each_online_cpu(sibling) {
921 				apicid = cpu_data(sibling).apicid;
922 				if ((apicid < first) || (apicid > last))
923 					continue;
924 				cpumask_set_cpu(sibling,
925 						&this_leaf->shared_cpu_map);
926 			}
927 		}
928 	} else
929 		return 0;
930 
931 	return 1;
932 }
933 
__cache_cpumap_setup(unsigned int cpu,int index,struct _cpuid4_info_regs * base)934 static void __cache_cpumap_setup(unsigned int cpu, int index,
935 				 struct _cpuid4_info_regs *base)
936 {
937 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
938 	struct cacheinfo *this_leaf, *sibling_leaf;
939 	unsigned long num_threads_sharing;
940 	int index_msb, i;
941 	struct cpuinfo_x86 *c = &cpu_data(cpu);
942 
943 	if (c->x86_vendor == X86_VENDOR_AMD ||
944 	    c->x86_vendor == X86_VENDOR_HYGON) {
945 		if (__cache_amd_cpumap_setup(cpu, index, base))
946 			return;
947 	}
948 
949 	this_leaf = this_cpu_ci->info_list + index;
950 	num_threads_sharing = 1 + base->eax.split.num_threads_sharing;
951 
952 	cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
953 	if (num_threads_sharing == 1)
954 		return;
955 
956 	index_msb = get_count_order(num_threads_sharing);
957 
958 	for_each_online_cpu(i)
959 		if (cpu_data(i).apicid >> index_msb == c->apicid >> index_msb) {
960 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
961 
962 			if (i == cpu || !sib_cpu_ci->info_list)
963 				continue;/* skip if itself or no cacheinfo */
964 			sibling_leaf = sib_cpu_ci->info_list + index;
965 			cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
966 			cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map);
967 		}
968 }
969 
ci_leaf_init(struct cacheinfo * this_leaf,struct _cpuid4_info_regs * base)970 static void ci_leaf_init(struct cacheinfo *this_leaf,
971 			 struct _cpuid4_info_regs *base)
972 {
973 	this_leaf->id = base->id;
974 	this_leaf->attributes = CACHE_ID;
975 	this_leaf->level = base->eax.split.level;
976 	this_leaf->type = cache_type_map[base->eax.split.type];
977 	this_leaf->coherency_line_size =
978 				base->ebx.split.coherency_line_size + 1;
979 	this_leaf->ways_of_associativity =
980 				base->ebx.split.ways_of_associativity + 1;
981 	this_leaf->size = base->size;
982 	this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1;
983 	this_leaf->physical_line_partition =
984 				base->ebx.split.physical_line_partition + 1;
985 	this_leaf->priv = base->nb;
986 }
987 
__init_cache_level(unsigned int cpu)988 static int __init_cache_level(unsigned int cpu)
989 {
990 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
991 
992 	if (!num_cache_leaves)
993 		return -ENOENT;
994 	if (!this_cpu_ci)
995 		return -EINVAL;
996 	this_cpu_ci->num_levels = 3;
997 	this_cpu_ci->num_leaves = num_cache_leaves;
998 	return 0;
999 }
1000 
1001 /*
1002  * The max shared threads number comes from CPUID.4:EAX[25-14] with input
1003  * ECX as cache index. Then right shift apicid by the number's order to get
1004  * cache id for this cache node.
1005  */
get_cache_id(int cpu,struct _cpuid4_info_regs * id4_regs)1006 static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
1007 {
1008 	struct cpuinfo_x86 *c = &cpu_data(cpu);
1009 	unsigned long num_threads_sharing;
1010 	int index_msb;
1011 
1012 	num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing;
1013 	index_msb = get_count_order(num_threads_sharing);
1014 	id4_regs->id = c->apicid >> index_msb;
1015 }
1016 
__populate_cache_leaves(unsigned int cpu)1017 static int __populate_cache_leaves(unsigned int cpu)
1018 {
1019 	unsigned int idx, ret;
1020 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
1021 	struct cacheinfo *this_leaf = this_cpu_ci->info_list;
1022 	struct _cpuid4_info_regs id4_regs = {};
1023 
1024 	for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) {
1025 		ret = cpuid4_cache_lookup_regs(idx, &id4_regs);
1026 		if (ret)
1027 			return ret;
1028 		get_cache_id(cpu, &id4_regs);
1029 		ci_leaf_init(this_leaf++, &id4_regs);
1030 		__cache_cpumap_setup(cpu, idx, &id4_regs);
1031 	}
1032 	this_cpu_ci->cpu_map_populated = true;
1033 
1034 	return 0;
1035 }
1036 
1037 DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
1038 DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
1039