1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2022 Intel Corporation. All rights reserved. */
3 #include <linux/memregion.h>
4 #include <linux/genalloc.h>
5 #include <linux/device.h>
6 #include <linux/module.h>
7 #include <linux/slab.h>
8 #include <linux/uuid.h>
9 #include <linux/idr.h>
10 #include <cxlmem.h>
11 #include <cxl.h>
12 #include "core.h"
13
14 /**
15 * DOC: cxl core region
16 *
17 * CXL Regions represent mapped memory capacity in system physical address
18 * space. Whereas the CXL Root Decoders identify the bounds of potential CXL
19 * Memory ranges, Regions represent the active mapped capacity by the HDM
20 * Decoder Capability structures throughout the Host Bridges, Switches, and
21 * Endpoints in the topology.
22 *
23 * Region configuration has ordering constraints. UUID may be set at any time
24 * but is only visible for persistent regions.
25 * 1. Interleave granularity
26 * 2. Interleave size
27 * 3. Decoder targets
28 */
29
30 /*
31 * All changes to the interleave configuration occur with this lock held
32 * for write.
33 */
34 static DECLARE_RWSEM(cxl_region_rwsem);
35
36 static struct cxl_region *to_cxl_region(struct device *dev);
37
uuid_show(struct device * dev,struct device_attribute * attr,char * buf)38 static ssize_t uuid_show(struct device *dev, struct device_attribute *attr,
39 char *buf)
40 {
41 struct cxl_region *cxlr = to_cxl_region(dev);
42 struct cxl_region_params *p = &cxlr->params;
43 ssize_t rc;
44
45 rc = down_read_interruptible(&cxl_region_rwsem);
46 if (rc)
47 return rc;
48 rc = sysfs_emit(buf, "%pUb\n", &p->uuid);
49 up_read(&cxl_region_rwsem);
50
51 return rc;
52 }
53
is_dup(struct device * match,void * data)54 static int is_dup(struct device *match, void *data)
55 {
56 struct cxl_region_params *p;
57 struct cxl_region *cxlr;
58 uuid_t *uuid = data;
59
60 if (!is_cxl_region(match))
61 return 0;
62
63 lockdep_assert_held(&cxl_region_rwsem);
64 cxlr = to_cxl_region(match);
65 p = &cxlr->params;
66
67 if (uuid_equal(&p->uuid, uuid)) {
68 dev_dbg(match, "already has uuid: %pUb\n", uuid);
69 return -EBUSY;
70 }
71
72 return 0;
73 }
74
uuid_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)75 static ssize_t uuid_store(struct device *dev, struct device_attribute *attr,
76 const char *buf, size_t len)
77 {
78 struct cxl_region *cxlr = to_cxl_region(dev);
79 struct cxl_region_params *p = &cxlr->params;
80 uuid_t temp;
81 ssize_t rc;
82
83 if (len != UUID_STRING_LEN + 1)
84 return -EINVAL;
85
86 rc = uuid_parse(buf, &temp);
87 if (rc)
88 return rc;
89
90 if (uuid_is_null(&temp))
91 return -EINVAL;
92
93 rc = down_write_killable(&cxl_region_rwsem);
94 if (rc)
95 return rc;
96
97 if (uuid_equal(&p->uuid, &temp))
98 goto out;
99
100 rc = -EBUSY;
101 if (p->state >= CXL_CONFIG_ACTIVE)
102 goto out;
103
104 rc = bus_for_each_dev(&cxl_bus_type, NULL, &temp, is_dup);
105 if (rc < 0)
106 goto out;
107
108 uuid_copy(&p->uuid, &temp);
109 out:
110 up_write(&cxl_region_rwsem);
111
112 if (rc)
113 return rc;
114 return len;
115 }
116 static DEVICE_ATTR_RW(uuid);
117
cxl_rr_load(struct cxl_port * port,struct cxl_region * cxlr)118 static struct cxl_region_ref *cxl_rr_load(struct cxl_port *port,
119 struct cxl_region *cxlr)
120 {
121 return xa_load(&port->regions, (unsigned long)cxlr);
122 }
123
cxl_region_decode_reset(struct cxl_region * cxlr,int count)124 static int cxl_region_decode_reset(struct cxl_region *cxlr, int count)
125 {
126 struct cxl_region_params *p = &cxlr->params;
127 int i;
128
129 for (i = count - 1; i >= 0; i--) {
130 struct cxl_endpoint_decoder *cxled = p->targets[i];
131 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
132 struct cxl_port *iter = cxled_to_port(cxled);
133 struct cxl_ep *ep;
134 int rc;
135
136 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
137 iter = to_cxl_port(iter->dev.parent);
138
139 for (ep = cxl_ep_load(iter, cxlmd); iter;
140 iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
141 struct cxl_region_ref *cxl_rr;
142 struct cxl_decoder *cxld;
143
144 cxl_rr = cxl_rr_load(iter, cxlr);
145 cxld = cxl_rr->decoder;
146 rc = cxld->reset(cxld);
147 if (rc)
148 return rc;
149 }
150
151 rc = cxled->cxld.reset(&cxled->cxld);
152 if (rc)
153 return rc;
154 }
155
156 return 0;
157 }
158
cxl_region_decode_commit(struct cxl_region * cxlr)159 static int cxl_region_decode_commit(struct cxl_region *cxlr)
160 {
161 struct cxl_region_params *p = &cxlr->params;
162 int i, rc = 0;
163
164 for (i = 0; i < p->nr_targets; i++) {
165 struct cxl_endpoint_decoder *cxled = p->targets[i];
166 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
167 struct cxl_region_ref *cxl_rr;
168 struct cxl_decoder *cxld;
169 struct cxl_port *iter;
170 struct cxl_ep *ep;
171
172 /* commit bottom up */
173 for (iter = cxled_to_port(cxled); !is_cxl_root(iter);
174 iter = to_cxl_port(iter->dev.parent)) {
175 cxl_rr = cxl_rr_load(iter, cxlr);
176 cxld = cxl_rr->decoder;
177 if (cxld->commit)
178 rc = cxld->commit(cxld);
179 if (rc)
180 break;
181 }
182
183 if (rc) {
184 /* programming @iter failed, teardown */
185 for (ep = cxl_ep_load(iter, cxlmd); ep && iter;
186 iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
187 cxl_rr = cxl_rr_load(iter, cxlr);
188 cxld = cxl_rr->decoder;
189 cxld->reset(cxld);
190 }
191
192 cxled->cxld.reset(&cxled->cxld);
193 goto err;
194 }
195 }
196
197 return 0;
198
199 err:
200 /* undo the targets that were successfully committed */
201 cxl_region_decode_reset(cxlr, i);
202 return rc;
203 }
204
commit_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)205 static ssize_t commit_store(struct device *dev, struct device_attribute *attr,
206 const char *buf, size_t len)
207 {
208 struct cxl_region *cxlr = to_cxl_region(dev);
209 struct cxl_region_params *p = &cxlr->params;
210 bool commit;
211 ssize_t rc;
212
213 rc = kstrtobool(buf, &commit);
214 if (rc)
215 return rc;
216
217 rc = down_write_killable(&cxl_region_rwsem);
218 if (rc)
219 return rc;
220
221 /* Already in the requested state? */
222 if (commit && p->state >= CXL_CONFIG_COMMIT)
223 goto out;
224 if (!commit && p->state < CXL_CONFIG_COMMIT)
225 goto out;
226
227 /* Not ready to commit? */
228 if (commit && p->state < CXL_CONFIG_ACTIVE) {
229 rc = -ENXIO;
230 goto out;
231 }
232
233 if (commit)
234 rc = cxl_region_decode_commit(cxlr);
235 else {
236 p->state = CXL_CONFIG_RESET_PENDING;
237 up_write(&cxl_region_rwsem);
238 device_release_driver(&cxlr->dev);
239 down_write(&cxl_region_rwsem);
240
241 /*
242 * The lock was dropped, so need to revalidate that the reset is
243 * still pending.
244 */
245 if (p->state == CXL_CONFIG_RESET_PENDING)
246 rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
247 }
248
249 if (rc)
250 goto out;
251
252 if (commit)
253 p->state = CXL_CONFIG_COMMIT;
254 else if (p->state == CXL_CONFIG_RESET_PENDING)
255 p->state = CXL_CONFIG_ACTIVE;
256
257 out:
258 up_write(&cxl_region_rwsem);
259
260 if (rc)
261 return rc;
262 return len;
263 }
264
commit_show(struct device * dev,struct device_attribute * attr,char * buf)265 static ssize_t commit_show(struct device *dev, struct device_attribute *attr,
266 char *buf)
267 {
268 struct cxl_region *cxlr = to_cxl_region(dev);
269 struct cxl_region_params *p = &cxlr->params;
270 ssize_t rc;
271
272 rc = down_read_interruptible(&cxl_region_rwsem);
273 if (rc)
274 return rc;
275 rc = sysfs_emit(buf, "%d\n", p->state >= CXL_CONFIG_COMMIT);
276 up_read(&cxl_region_rwsem);
277
278 return rc;
279 }
280 static DEVICE_ATTR_RW(commit);
281
cxl_region_visible(struct kobject * kobj,struct attribute * a,int n)282 static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a,
283 int n)
284 {
285 struct device *dev = kobj_to_dev(kobj);
286 struct cxl_region *cxlr = to_cxl_region(dev);
287
288 if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM)
289 return 0;
290 return a->mode;
291 }
292
interleave_ways_show(struct device * dev,struct device_attribute * attr,char * buf)293 static ssize_t interleave_ways_show(struct device *dev,
294 struct device_attribute *attr, char *buf)
295 {
296 struct cxl_region *cxlr = to_cxl_region(dev);
297 struct cxl_region_params *p = &cxlr->params;
298 ssize_t rc;
299
300 rc = down_read_interruptible(&cxl_region_rwsem);
301 if (rc)
302 return rc;
303 rc = sysfs_emit(buf, "%d\n", p->interleave_ways);
304 up_read(&cxl_region_rwsem);
305
306 return rc;
307 }
308
309 static const struct attribute_group *get_cxl_region_target_group(void);
310
interleave_ways_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)311 static ssize_t interleave_ways_store(struct device *dev,
312 struct device_attribute *attr,
313 const char *buf, size_t len)
314 {
315 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
316 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
317 struct cxl_region *cxlr = to_cxl_region(dev);
318 struct cxl_region_params *p = &cxlr->params;
319 unsigned int val, save;
320 int rc;
321 u8 iw;
322
323 rc = kstrtouint(buf, 0, &val);
324 if (rc)
325 return rc;
326
327 rc = ways_to_cxl(val, &iw);
328 if (rc)
329 return rc;
330
331 /*
332 * Even for x3, x9, and x12 interleaves the region interleave must be a
333 * power of 2 multiple of the host bridge interleave.
334 */
335 if (!is_power_of_2(val / cxld->interleave_ways) ||
336 (val % cxld->interleave_ways)) {
337 dev_dbg(&cxlr->dev, "invalid interleave: %d\n", val);
338 return -EINVAL;
339 }
340
341 rc = down_write_killable(&cxl_region_rwsem);
342 if (rc)
343 return rc;
344 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
345 rc = -EBUSY;
346 goto out;
347 }
348
349 save = p->interleave_ways;
350 p->interleave_ways = val;
351 rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_target_group());
352 if (rc)
353 p->interleave_ways = save;
354 out:
355 up_write(&cxl_region_rwsem);
356 if (rc)
357 return rc;
358 return len;
359 }
360 static DEVICE_ATTR_RW(interleave_ways);
361
interleave_granularity_show(struct device * dev,struct device_attribute * attr,char * buf)362 static ssize_t interleave_granularity_show(struct device *dev,
363 struct device_attribute *attr,
364 char *buf)
365 {
366 struct cxl_region *cxlr = to_cxl_region(dev);
367 struct cxl_region_params *p = &cxlr->params;
368 ssize_t rc;
369
370 rc = down_read_interruptible(&cxl_region_rwsem);
371 if (rc)
372 return rc;
373 rc = sysfs_emit(buf, "%d\n", p->interleave_granularity);
374 up_read(&cxl_region_rwsem);
375
376 return rc;
377 }
378
interleave_granularity_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)379 static ssize_t interleave_granularity_store(struct device *dev,
380 struct device_attribute *attr,
381 const char *buf, size_t len)
382 {
383 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
384 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
385 struct cxl_region *cxlr = to_cxl_region(dev);
386 struct cxl_region_params *p = &cxlr->params;
387 int rc, val;
388 u16 ig;
389
390 rc = kstrtoint(buf, 0, &val);
391 if (rc)
392 return rc;
393
394 rc = granularity_to_cxl(val, &ig);
395 if (rc)
396 return rc;
397
398 /*
399 * When the host-bridge is interleaved, disallow region granularity !=
400 * root granularity. Regions with a granularity less than the root
401 * interleave result in needing multiple endpoints to support a single
402 * slot in the interleave (possible to suport in the future). Regions
403 * with a granularity greater than the root interleave result in invalid
404 * DPA translations (invalid to support).
405 */
406 if (cxld->interleave_ways > 1 && val != cxld->interleave_granularity)
407 return -EINVAL;
408
409 rc = down_write_killable(&cxl_region_rwsem);
410 if (rc)
411 return rc;
412 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE) {
413 rc = -EBUSY;
414 goto out;
415 }
416
417 p->interleave_granularity = val;
418 out:
419 up_write(&cxl_region_rwsem);
420 if (rc)
421 return rc;
422 return len;
423 }
424 static DEVICE_ATTR_RW(interleave_granularity);
425
resource_show(struct device * dev,struct device_attribute * attr,char * buf)426 static ssize_t resource_show(struct device *dev, struct device_attribute *attr,
427 char *buf)
428 {
429 struct cxl_region *cxlr = to_cxl_region(dev);
430 struct cxl_region_params *p = &cxlr->params;
431 u64 resource = -1ULL;
432 ssize_t rc;
433
434 rc = down_read_interruptible(&cxl_region_rwsem);
435 if (rc)
436 return rc;
437 if (p->res)
438 resource = p->res->start;
439 rc = sysfs_emit(buf, "%#llx\n", resource);
440 up_read(&cxl_region_rwsem);
441
442 return rc;
443 }
444 static DEVICE_ATTR_RO(resource);
445
alloc_hpa(struct cxl_region * cxlr,resource_size_t size)446 static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
447 {
448 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
449 struct cxl_region_params *p = &cxlr->params;
450 struct resource *res;
451 u32 remainder = 0;
452
453 lockdep_assert_held_write(&cxl_region_rwsem);
454
455 /* Nothing to do... */
456 if (p->res && resource_size(p->res) == size)
457 return 0;
458
459 /* To change size the old size must be freed first */
460 if (p->res)
461 return -EBUSY;
462
463 if (p->state >= CXL_CONFIG_INTERLEAVE_ACTIVE)
464 return -EBUSY;
465
466 /* ways, granularity and uuid (if PMEM) need to be set before HPA */
467 if (!p->interleave_ways || !p->interleave_granularity ||
468 (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid)))
469 return -ENXIO;
470
471 div_u64_rem(size, SZ_256M * p->interleave_ways, &remainder);
472 if (remainder)
473 return -EINVAL;
474
475 res = alloc_free_mem_region(cxlrd->res, size, SZ_256M,
476 dev_name(&cxlr->dev));
477 if (IS_ERR(res)) {
478 dev_dbg(&cxlr->dev, "failed to allocate HPA: %ld\n",
479 PTR_ERR(res));
480 return PTR_ERR(res);
481 }
482
483 p->res = res;
484 p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
485
486 return 0;
487 }
488
cxl_region_iomem_release(struct cxl_region * cxlr)489 static void cxl_region_iomem_release(struct cxl_region *cxlr)
490 {
491 struct cxl_region_params *p = &cxlr->params;
492
493 if (device_is_registered(&cxlr->dev))
494 lockdep_assert_held_write(&cxl_region_rwsem);
495 if (p->res) {
496 remove_resource(p->res);
497 kfree(p->res);
498 p->res = NULL;
499 }
500 }
501
free_hpa(struct cxl_region * cxlr)502 static int free_hpa(struct cxl_region *cxlr)
503 {
504 struct cxl_region_params *p = &cxlr->params;
505
506 lockdep_assert_held_write(&cxl_region_rwsem);
507
508 if (!p->res)
509 return 0;
510
511 if (p->state >= CXL_CONFIG_ACTIVE)
512 return -EBUSY;
513
514 cxl_region_iomem_release(cxlr);
515 p->state = CXL_CONFIG_IDLE;
516 return 0;
517 }
518
size_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)519 static ssize_t size_store(struct device *dev, struct device_attribute *attr,
520 const char *buf, size_t len)
521 {
522 struct cxl_region *cxlr = to_cxl_region(dev);
523 u64 val;
524 int rc;
525
526 rc = kstrtou64(buf, 0, &val);
527 if (rc)
528 return rc;
529
530 rc = down_write_killable(&cxl_region_rwsem);
531 if (rc)
532 return rc;
533
534 if (val)
535 rc = alloc_hpa(cxlr, val);
536 else
537 rc = free_hpa(cxlr);
538 up_write(&cxl_region_rwsem);
539
540 if (rc)
541 return rc;
542
543 return len;
544 }
545
size_show(struct device * dev,struct device_attribute * attr,char * buf)546 static ssize_t size_show(struct device *dev, struct device_attribute *attr,
547 char *buf)
548 {
549 struct cxl_region *cxlr = to_cxl_region(dev);
550 struct cxl_region_params *p = &cxlr->params;
551 u64 size = 0;
552 ssize_t rc;
553
554 rc = down_read_interruptible(&cxl_region_rwsem);
555 if (rc)
556 return rc;
557 if (p->res)
558 size = resource_size(p->res);
559 rc = sysfs_emit(buf, "%#llx\n", size);
560 up_read(&cxl_region_rwsem);
561
562 return rc;
563 }
564 static DEVICE_ATTR_RW(size);
565
566 static struct attribute *cxl_region_attrs[] = {
567 &dev_attr_uuid.attr,
568 &dev_attr_commit.attr,
569 &dev_attr_interleave_ways.attr,
570 &dev_attr_interleave_granularity.attr,
571 &dev_attr_resource.attr,
572 &dev_attr_size.attr,
573 NULL,
574 };
575
576 static const struct attribute_group cxl_region_group = {
577 .attrs = cxl_region_attrs,
578 .is_visible = cxl_region_visible,
579 };
580
show_targetN(struct cxl_region * cxlr,char * buf,int pos)581 static size_t show_targetN(struct cxl_region *cxlr, char *buf, int pos)
582 {
583 struct cxl_region_params *p = &cxlr->params;
584 struct cxl_endpoint_decoder *cxled;
585 int rc;
586
587 rc = down_read_interruptible(&cxl_region_rwsem);
588 if (rc)
589 return rc;
590
591 if (pos >= p->interleave_ways) {
592 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
593 p->interleave_ways);
594 rc = -ENXIO;
595 goto out;
596 }
597
598 cxled = p->targets[pos];
599 if (!cxled)
600 rc = sysfs_emit(buf, "\n");
601 else
602 rc = sysfs_emit(buf, "%s\n", dev_name(&cxled->cxld.dev));
603 out:
604 up_read(&cxl_region_rwsem);
605
606 return rc;
607 }
608
match_free_decoder(struct device * dev,void * data)609 static int match_free_decoder(struct device *dev, void *data)
610 {
611 struct cxl_decoder *cxld;
612 int *id = data;
613
614 if (!is_switch_decoder(dev))
615 return 0;
616
617 cxld = to_cxl_decoder(dev);
618
619 /* enforce ordered allocation */
620 if (cxld->id != *id)
621 return 0;
622
623 if (!cxld->region)
624 return 1;
625
626 (*id)++;
627
628 return 0;
629 }
630
cxl_region_find_decoder(struct cxl_port * port,struct cxl_region * cxlr)631 static struct cxl_decoder *cxl_region_find_decoder(struct cxl_port *port,
632 struct cxl_region *cxlr)
633 {
634 struct device *dev;
635 int id = 0;
636
637 dev = device_find_child(&port->dev, &id, match_free_decoder);
638 if (!dev)
639 return NULL;
640 /*
641 * This decoder is pinned registered as long as the endpoint decoder is
642 * registered, and endpoint decoder unregistration holds the
643 * cxl_region_rwsem over unregister events, so no need to hold on to
644 * this extra reference.
645 */
646 put_device(dev);
647 return to_cxl_decoder(dev);
648 }
649
alloc_region_ref(struct cxl_port * port,struct cxl_region * cxlr)650 static struct cxl_region_ref *alloc_region_ref(struct cxl_port *port,
651 struct cxl_region *cxlr)
652 {
653 struct cxl_region_params *p = &cxlr->params;
654 struct cxl_region_ref *cxl_rr, *iter;
655 unsigned long index;
656 int rc;
657
658 xa_for_each(&port->regions, index, iter) {
659 struct cxl_region_params *ip = &iter->region->params;
660
661 if (!ip->res)
662 continue;
663
664 if (ip->res->start > p->res->start) {
665 dev_dbg(&cxlr->dev,
666 "%s: HPA order violation %s:%pr vs %pr\n",
667 dev_name(&port->dev),
668 dev_name(&iter->region->dev), ip->res, p->res);
669 return ERR_PTR(-EBUSY);
670 }
671 }
672
673 cxl_rr = kzalloc(sizeof(*cxl_rr), GFP_KERNEL);
674 if (!cxl_rr)
675 return ERR_PTR(-ENOMEM);
676 cxl_rr->port = port;
677 cxl_rr->region = cxlr;
678 cxl_rr->nr_targets = 1;
679 xa_init(&cxl_rr->endpoints);
680
681 rc = xa_insert(&port->regions, (unsigned long)cxlr, cxl_rr, GFP_KERNEL);
682 if (rc) {
683 dev_dbg(&cxlr->dev,
684 "%s: failed to track region reference: %d\n",
685 dev_name(&port->dev), rc);
686 kfree(cxl_rr);
687 return ERR_PTR(rc);
688 }
689
690 return cxl_rr;
691 }
692
cxl_rr_free_decoder(struct cxl_region_ref * cxl_rr)693 static void cxl_rr_free_decoder(struct cxl_region_ref *cxl_rr)
694 {
695 struct cxl_region *cxlr = cxl_rr->region;
696 struct cxl_decoder *cxld = cxl_rr->decoder;
697
698 if (!cxld)
699 return;
700
701 dev_WARN_ONCE(&cxlr->dev, cxld->region != cxlr, "region mismatch\n");
702 if (cxld->region == cxlr) {
703 cxld->region = NULL;
704 put_device(&cxlr->dev);
705 }
706 }
707
free_region_ref(struct cxl_region_ref * cxl_rr)708 static void free_region_ref(struct cxl_region_ref *cxl_rr)
709 {
710 struct cxl_port *port = cxl_rr->port;
711 struct cxl_region *cxlr = cxl_rr->region;
712
713 cxl_rr_free_decoder(cxl_rr);
714 xa_erase(&port->regions, (unsigned long)cxlr);
715 xa_destroy(&cxl_rr->endpoints);
716 kfree(cxl_rr);
717 }
718
cxl_rr_ep_add(struct cxl_region_ref * cxl_rr,struct cxl_endpoint_decoder * cxled)719 static int cxl_rr_ep_add(struct cxl_region_ref *cxl_rr,
720 struct cxl_endpoint_decoder *cxled)
721 {
722 int rc;
723 struct cxl_port *port = cxl_rr->port;
724 struct cxl_region *cxlr = cxl_rr->region;
725 struct cxl_decoder *cxld = cxl_rr->decoder;
726 struct cxl_ep *ep = cxl_ep_load(port, cxled_to_memdev(cxled));
727
728 if (ep) {
729 rc = xa_insert(&cxl_rr->endpoints, (unsigned long)cxled, ep,
730 GFP_KERNEL);
731 if (rc)
732 return rc;
733 }
734 cxl_rr->nr_eps++;
735
736 if (!cxld->region) {
737 cxld->region = cxlr;
738 get_device(&cxlr->dev);
739 }
740
741 return 0;
742 }
743
cxl_rr_alloc_decoder(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,struct cxl_region_ref * cxl_rr)744 static int cxl_rr_alloc_decoder(struct cxl_port *port, struct cxl_region *cxlr,
745 struct cxl_endpoint_decoder *cxled,
746 struct cxl_region_ref *cxl_rr)
747 {
748 struct cxl_decoder *cxld;
749
750 if (port == cxled_to_port(cxled))
751 cxld = &cxled->cxld;
752 else
753 cxld = cxl_region_find_decoder(port, cxlr);
754 if (!cxld) {
755 dev_dbg(&cxlr->dev, "%s: no decoder available\n",
756 dev_name(&port->dev));
757 return -EBUSY;
758 }
759
760 if (cxld->region) {
761 dev_dbg(&cxlr->dev, "%s: %s already attached to %s\n",
762 dev_name(&port->dev), dev_name(&cxld->dev),
763 dev_name(&cxld->region->dev));
764 return -EBUSY;
765 }
766
767 cxl_rr->decoder = cxld;
768 return 0;
769 }
770
771 /**
772 * cxl_port_attach_region() - track a region's interest in a port by endpoint
773 * @port: port to add a new region reference 'struct cxl_region_ref'
774 * @cxlr: region to attach to @port
775 * @cxled: endpoint decoder used to create or further pin a region reference
776 * @pos: interleave position of @cxled in @cxlr
777 *
778 * The attach event is an opportunity to validate CXL decode setup
779 * constraints and record metadata needed for programming HDM decoders,
780 * in particular decoder target lists.
781 *
782 * The steps are:
783 *
784 * - validate that there are no other regions with a higher HPA already
785 * associated with @port
786 * - establish a region reference if one is not already present
787 *
788 * - additionally allocate a decoder instance that will host @cxlr on
789 * @port
790 *
791 * - pin the region reference by the endpoint
792 * - account for how many entries in @port's target list are needed to
793 * cover all of the added endpoints.
794 */
cxl_port_attach_region(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos)795 static int cxl_port_attach_region(struct cxl_port *port,
796 struct cxl_region *cxlr,
797 struct cxl_endpoint_decoder *cxled, int pos)
798 {
799 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
800 struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
801 struct cxl_region_ref *cxl_rr;
802 bool nr_targets_inc = false;
803 struct cxl_decoder *cxld;
804 unsigned long index;
805 int rc = -EBUSY;
806
807 lockdep_assert_held_write(&cxl_region_rwsem);
808
809 cxl_rr = cxl_rr_load(port, cxlr);
810 if (cxl_rr) {
811 struct cxl_ep *ep_iter;
812 int found = 0;
813
814 /*
815 * Walk the existing endpoints that have been attached to
816 * @cxlr at @port and see if they share the same 'next' port
817 * in the downstream direction. I.e. endpoints that share common
818 * upstream switch.
819 */
820 xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
821 if (ep_iter == ep)
822 continue;
823 if (ep_iter->next == ep->next) {
824 found++;
825 break;
826 }
827 }
828
829 /*
830 * New target port, or @port is an endpoint port that always
831 * accounts its own local decode as a target.
832 */
833 if (!found || !ep->next) {
834 cxl_rr->nr_targets++;
835 nr_targets_inc = true;
836 }
837 } else {
838 cxl_rr = alloc_region_ref(port, cxlr);
839 if (IS_ERR(cxl_rr)) {
840 dev_dbg(&cxlr->dev,
841 "%s: failed to allocate region reference\n",
842 dev_name(&port->dev));
843 return PTR_ERR(cxl_rr);
844 }
845 nr_targets_inc = true;
846
847 rc = cxl_rr_alloc_decoder(port, cxlr, cxled, cxl_rr);
848 if (rc)
849 goto out_erase;
850 }
851 cxld = cxl_rr->decoder;
852
853 rc = cxl_rr_ep_add(cxl_rr, cxled);
854 if (rc) {
855 dev_dbg(&cxlr->dev,
856 "%s: failed to track endpoint %s:%s reference\n",
857 dev_name(&port->dev), dev_name(&cxlmd->dev),
858 dev_name(&cxld->dev));
859 goto out_erase;
860 }
861
862 dev_dbg(&cxlr->dev,
863 "%s:%s %s add: %s:%s @ %d next: %s nr_eps: %d nr_targets: %d\n",
864 dev_name(port->uport), dev_name(&port->dev),
865 dev_name(&cxld->dev), dev_name(&cxlmd->dev),
866 dev_name(&cxled->cxld.dev), pos,
867 ep ? ep->next ? dev_name(ep->next->uport) :
868 dev_name(&cxlmd->dev) :
869 "none",
870 cxl_rr->nr_eps, cxl_rr->nr_targets);
871
872 return 0;
873 out_erase:
874 if (nr_targets_inc)
875 cxl_rr->nr_targets--;
876 if (cxl_rr->nr_eps == 0)
877 free_region_ref(cxl_rr);
878 return rc;
879 }
880
cxl_port_detach_region(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled)881 static void cxl_port_detach_region(struct cxl_port *port,
882 struct cxl_region *cxlr,
883 struct cxl_endpoint_decoder *cxled)
884 {
885 struct cxl_region_ref *cxl_rr;
886 struct cxl_ep *ep = NULL;
887
888 lockdep_assert_held_write(&cxl_region_rwsem);
889
890 cxl_rr = cxl_rr_load(port, cxlr);
891 if (!cxl_rr)
892 return;
893
894 /*
895 * Endpoint ports do not carry cxl_ep references, and they
896 * never target more than one endpoint by definition
897 */
898 if (cxl_rr->decoder == &cxled->cxld)
899 cxl_rr->nr_eps--;
900 else
901 ep = xa_erase(&cxl_rr->endpoints, (unsigned long)cxled);
902 if (ep) {
903 struct cxl_ep *ep_iter;
904 unsigned long index;
905 int found = 0;
906
907 cxl_rr->nr_eps--;
908 xa_for_each(&cxl_rr->endpoints, index, ep_iter) {
909 if (ep_iter->next == ep->next) {
910 found++;
911 break;
912 }
913 }
914 if (!found)
915 cxl_rr->nr_targets--;
916 }
917
918 if (cxl_rr->nr_eps == 0)
919 free_region_ref(cxl_rr);
920 }
921
check_last_peer(struct cxl_endpoint_decoder * cxled,struct cxl_ep * ep,struct cxl_region_ref * cxl_rr,int distance)922 static int check_last_peer(struct cxl_endpoint_decoder *cxled,
923 struct cxl_ep *ep, struct cxl_region_ref *cxl_rr,
924 int distance)
925 {
926 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
927 struct cxl_region *cxlr = cxl_rr->region;
928 struct cxl_region_params *p = &cxlr->params;
929 struct cxl_endpoint_decoder *cxled_peer;
930 struct cxl_port *port = cxl_rr->port;
931 struct cxl_memdev *cxlmd_peer;
932 struct cxl_ep *ep_peer;
933 int pos = cxled->pos;
934
935 /*
936 * If this position wants to share a dport with the last endpoint mapped
937 * then that endpoint, at index 'position - distance', must also be
938 * mapped by this dport.
939 */
940 if (pos < distance) {
941 dev_dbg(&cxlr->dev, "%s:%s: cannot host %s:%s at %d\n",
942 dev_name(port->uport), dev_name(&port->dev),
943 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
944 return -ENXIO;
945 }
946 cxled_peer = p->targets[pos - distance];
947 cxlmd_peer = cxled_to_memdev(cxled_peer);
948 ep_peer = cxl_ep_load(port, cxlmd_peer);
949 if (ep->dport != ep_peer->dport) {
950 dev_dbg(&cxlr->dev,
951 "%s:%s: %s:%s pos %d mismatched peer %s:%s\n",
952 dev_name(port->uport), dev_name(&port->dev),
953 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos,
954 dev_name(&cxlmd_peer->dev),
955 dev_name(&cxled_peer->cxld.dev));
956 return -ENXIO;
957 }
958
959 return 0;
960 }
961
cxl_port_setup_targets(struct cxl_port * port,struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled)962 static int cxl_port_setup_targets(struct cxl_port *port,
963 struct cxl_region *cxlr,
964 struct cxl_endpoint_decoder *cxled)
965 {
966 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
967 int parent_iw, parent_ig, ig, iw, rc, inc = 0, pos = cxled->pos;
968 struct cxl_port *parent_port = to_cxl_port(port->dev.parent);
969 struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
970 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
971 struct cxl_ep *ep = cxl_ep_load(port, cxlmd);
972 struct cxl_region_params *p = &cxlr->params;
973 struct cxl_decoder *cxld = cxl_rr->decoder;
974 struct cxl_switch_decoder *cxlsd;
975 u16 eig, peig;
976 u8 eiw, peiw;
977
978 /*
979 * While root level decoders support x3, x6, x12, switch level
980 * decoders only support powers of 2 up to x16.
981 */
982 if (!is_power_of_2(cxl_rr->nr_targets)) {
983 dev_dbg(&cxlr->dev, "%s:%s: invalid target count %d\n",
984 dev_name(port->uport), dev_name(&port->dev),
985 cxl_rr->nr_targets);
986 return -EINVAL;
987 }
988
989 cxlsd = to_cxl_switch_decoder(&cxld->dev);
990 if (cxl_rr->nr_targets_set) {
991 int i, distance;
992
993 /*
994 * Passthrough ports impose no distance requirements between
995 * peers
996 */
997 if (port->nr_dports == 1)
998 distance = 0;
999 else
1000 distance = p->nr_targets / cxl_rr->nr_targets;
1001 for (i = 0; i < cxl_rr->nr_targets_set; i++)
1002 if (ep->dport == cxlsd->target[i]) {
1003 rc = check_last_peer(cxled, ep, cxl_rr,
1004 distance);
1005 if (rc)
1006 return rc;
1007 goto out_target_set;
1008 }
1009 goto add_target;
1010 }
1011
1012 if (is_cxl_root(parent_port)) {
1013 parent_ig = cxlrd->cxlsd.cxld.interleave_granularity;
1014 parent_iw = cxlrd->cxlsd.cxld.interleave_ways;
1015 /*
1016 * For purposes of address bit routing, use power-of-2 math for
1017 * switch ports.
1018 */
1019 if (!is_power_of_2(parent_iw))
1020 parent_iw /= 3;
1021 } else {
1022 struct cxl_region_ref *parent_rr;
1023 struct cxl_decoder *parent_cxld;
1024
1025 parent_rr = cxl_rr_load(parent_port, cxlr);
1026 parent_cxld = parent_rr->decoder;
1027 parent_ig = parent_cxld->interleave_granularity;
1028 parent_iw = parent_cxld->interleave_ways;
1029 }
1030
1031 rc = granularity_to_cxl(parent_ig, &peig);
1032 if (rc) {
1033 dev_dbg(&cxlr->dev, "%s:%s: invalid parent granularity: %d\n",
1034 dev_name(parent_port->uport),
1035 dev_name(&parent_port->dev), parent_ig);
1036 return rc;
1037 }
1038
1039 rc = ways_to_cxl(parent_iw, &peiw);
1040 if (rc) {
1041 dev_dbg(&cxlr->dev, "%s:%s: invalid parent interleave: %d\n",
1042 dev_name(parent_port->uport),
1043 dev_name(&parent_port->dev), parent_iw);
1044 return rc;
1045 }
1046
1047 iw = cxl_rr->nr_targets;
1048 rc = ways_to_cxl(iw, &eiw);
1049 if (rc) {
1050 dev_dbg(&cxlr->dev, "%s:%s: invalid port interleave: %d\n",
1051 dev_name(port->uport), dev_name(&port->dev), iw);
1052 return rc;
1053 }
1054
1055 /*
1056 * If @parent_port is masking address bits, pick the next unused address
1057 * bit to route @port's targets.
1058 */
1059 if (parent_iw > 1 && cxl_rr->nr_targets > 1) {
1060 u32 address_bit = max(peig + peiw, eiw + peig);
1061
1062 eig = address_bit - eiw + 1;
1063 } else {
1064 eiw = peiw;
1065 eig = peig;
1066 }
1067
1068 rc = cxl_to_granularity(eig, &ig);
1069 if (rc) {
1070 dev_dbg(&cxlr->dev, "%s:%s: invalid interleave: %d\n",
1071 dev_name(port->uport), dev_name(&port->dev),
1072 256 << eig);
1073 return rc;
1074 }
1075
1076 cxld->interleave_ways = iw;
1077 cxld->interleave_granularity = ig;
1078 cxld->hpa_range = (struct range) {
1079 .start = p->res->start,
1080 .end = p->res->end,
1081 };
1082 dev_dbg(&cxlr->dev, "%s:%s iw: %d ig: %d\n", dev_name(port->uport),
1083 dev_name(&port->dev), iw, ig);
1084 add_target:
1085 if (cxl_rr->nr_targets_set == cxl_rr->nr_targets) {
1086 dev_dbg(&cxlr->dev,
1087 "%s:%s: targets full trying to add %s:%s at %d\n",
1088 dev_name(port->uport), dev_name(&port->dev),
1089 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1090 return -ENXIO;
1091 }
1092 cxlsd->target[cxl_rr->nr_targets_set] = ep->dport;
1093 inc = 1;
1094 out_target_set:
1095 cxl_rr->nr_targets_set += inc;
1096 dev_dbg(&cxlr->dev, "%s:%s target[%d] = %s for %s:%s @ %d\n",
1097 dev_name(port->uport), dev_name(&port->dev),
1098 cxl_rr->nr_targets_set - 1, dev_name(ep->dport->dport),
1099 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev), pos);
1100
1101 return 0;
1102 }
1103
cxl_port_reset_targets(struct cxl_port * port,struct cxl_region * cxlr)1104 static void cxl_port_reset_targets(struct cxl_port *port,
1105 struct cxl_region *cxlr)
1106 {
1107 struct cxl_region_ref *cxl_rr = cxl_rr_load(port, cxlr);
1108 struct cxl_decoder *cxld;
1109
1110 /*
1111 * After the last endpoint has been detached the entire cxl_rr may now
1112 * be gone.
1113 */
1114 if (!cxl_rr)
1115 return;
1116 cxl_rr->nr_targets_set = 0;
1117
1118 cxld = cxl_rr->decoder;
1119 cxld->hpa_range = (struct range) {
1120 .start = 0,
1121 .end = -1,
1122 };
1123 }
1124
cxl_region_teardown_targets(struct cxl_region * cxlr)1125 static void cxl_region_teardown_targets(struct cxl_region *cxlr)
1126 {
1127 struct cxl_region_params *p = &cxlr->params;
1128 struct cxl_endpoint_decoder *cxled;
1129 struct cxl_memdev *cxlmd;
1130 struct cxl_port *iter;
1131 struct cxl_ep *ep;
1132 int i;
1133
1134 for (i = 0; i < p->nr_targets; i++) {
1135 cxled = p->targets[i];
1136 cxlmd = cxled_to_memdev(cxled);
1137
1138 iter = cxled_to_port(cxled);
1139 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1140 iter = to_cxl_port(iter->dev.parent);
1141
1142 for (ep = cxl_ep_load(iter, cxlmd); iter;
1143 iter = ep->next, ep = cxl_ep_load(iter, cxlmd))
1144 cxl_port_reset_targets(iter, cxlr);
1145 }
1146 }
1147
cxl_region_setup_targets(struct cxl_region * cxlr)1148 static int cxl_region_setup_targets(struct cxl_region *cxlr)
1149 {
1150 struct cxl_region_params *p = &cxlr->params;
1151 struct cxl_endpoint_decoder *cxled;
1152 struct cxl_memdev *cxlmd;
1153 struct cxl_port *iter;
1154 struct cxl_ep *ep;
1155 int i, rc;
1156
1157 for (i = 0; i < p->nr_targets; i++) {
1158 cxled = p->targets[i];
1159 cxlmd = cxled_to_memdev(cxled);
1160
1161 iter = cxled_to_port(cxled);
1162 while (!is_cxl_root(to_cxl_port(iter->dev.parent)))
1163 iter = to_cxl_port(iter->dev.parent);
1164
1165 /*
1166 * Descend the topology tree programming targets while
1167 * looking for conflicts.
1168 */
1169 for (ep = cxl_ep_load(iter, cxlmd); iter;
1170 iter = ep->next, ep = cxl_ep_load(iter, cxlmd)) {
1171 rc = cxl_port_setup_targets(iter, cxlr, cxled);
1172 if (rc) {
1173 cxl_region_teardown_targets(cxlr);
1174 return rc;
1175 }
1176 }
1177 }
1178
1179 return 0;
1180 }
1181
cxl_region_attach(struct cxl_region * cxlr,struct cxl_endpoint_decoder * cxled,int pos)1182 static int cxl_region_attach(struct cxl_region *cxlr,
1183 struct cxl_endpoint_decoder *cxled, int pos)
1184 {
1185 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
1186 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1187 struct cxl_port *ep_port, *root_port, *iter;
1188 struct cxl_region_params *p = &cxlr->params;
1189 struct cxl_dport *dport;
1190 int i, rc = -ENXIO;
1191
1192 if (cxled->mode == CXL_DECODER_DEAD) {
1193 dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev));
1194 return -ENODEV;
1195 }
1196
1197 /* all full of members, or interleave config not established? */
1198 if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) {
1199 dev_dbg(&cxlr->dev, "region already active\n");
1200 return -EBUSY;
1201 } else if (p->state < CXL_CONFIG_INTERLEAVE_ACTIVE) {
1202 dev_dbg(&cxlr->dev, "interleave config missing\n");
1203 return -ENXIO;
1204 }
1205
1206 if (pos < 0 || pos >= p->interleave_ways) {
1207 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1208 p->interleave_ways);
1209 return -ENXIO;
1210 }
1211
1212 if (p->targets[pos] == cxled)
1213 return 0;
1214
1215 if (p->targets[pos]) {
1216 struct cxl_endpoint_decoder *cxled_target = p->targets[pos];
1217 struct cxl_memdev *cxlmd_target = cxled_to_memdev(cxled_target);
1218
1219 dev_dbg(&cxlr->dev, "position %d already assigned to %s:%s\n",
1220 pos, dev_name(&cxlmd_target->dev),
1221 dev_name(&cxled_target->cxld.dev));
1222 return -EBUSY;
1223 }
1224
1225 for (i = 0; i < p->interleave_ways; i++) {
1226 struct cxl_endpoint_decoder *cxled_target;
1227 struct cxl_memdev *cxlmd_target;
1228
1229 cxled_target = p->targets[pos];
1230 if (!cxled_target)
1231 continue;
1232
1233 cxlmd_target = cxled_to_memdev(cxled_target);
1234 if (cxlmd_target == cxlmd) {
1235 dev_dbg(&cxlr->dev,
1236 "%s already specified at position %d via: %s\n",
1237 dev_name(&cxlmd->dev), pos,
1238 dev_name(&cxled_target->cxld.dev));
1239 return -EBUSY;
1240 }
1241 }
1242
1243 ep_port = cxled_to_port(cxled);
1244 root_port = cxlrd_to_port(cxlrd);
1245 dport = cxl_find_dport_by_dev(root_port, ep_port->host_bridge);
1246 if (!dport) {
1247 dev_dbg(&cxlr->dev, "%s:%s invalid target for %s\n",
1248 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1249 dev_name(cxlr->dev.parent));
1250 return -ENXIO;
1251 }
1252
1253 if (cxlrd->calc_hb(cxlrd, pos) != dport) {
1254 dev_dbg(&cxlr->dev, "%s:%s invalid target position for %s\n",
1255 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1256 dev_name(&cxlrd->cxlsd.cxld.dev));
1257 return -ENXIO;
1258 }
1259
1260 if (cxled->cxld.target_type != cxlr->type) {
1261 dev_dbg(&cxlr->dev, "%s:%s type mismatch: %d vs %d\n",
1262 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1263 cxled->cxld.target_type, cxlr->type);
1264 return -ENXIO;
1265 }
1266
1267 if (!cxled->dpa_res) {
1268 dev_dbg(&cxlr->dev, "%s:%s: missing DPA allocation.\n",
1269 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev));
1270 return -ENXIO;
1271 }
1272
1273 if (resource_size(cxled->dpa_res) * p->interleave_ways !=
1274 resource_size(p->res)) {
1275 dev_dbg(&cxlr->dev,
1276 "%s:%s: decoder-size-%#llx * ways-%d != region-size-%#llx\n",
1277 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1278 (u64)resource_size(cxled->dpa_res), p->interleave_ways,
1279 (u64)resource_size(p->res));
1280 return -EINVAL;
1281 }
1282
1283 for (iter = ep_port; !is_cxl_root(iter);
1284 iter = to_cxl_port(iter->dev.parent)) {
1285 rc = cxl_port_attach_region(iter, cxlr, cxled, pos);
1286 if (rc)
1287 goto err;
1288 }
1289
1290 p->targets[pos] = cxled;
1291 cxled->pos = pos;
1292 p->nr_targets++;
1293
1294 if (p->nr_targets == p->interleave_ways) {
1295 rc = cxl_region_setup_targets(cxlr);
1296 if (rc)
1297 goto err_decrement;
1298 p->state = CXL_CONFIG_ACTIVE;
1299 }
1300
1301 cxled->cxld.interleave_ways = p->interleave_ways;
1302 cxled->cxld.interleave_granularity = p->interleave_granularity;
1303 cxled->cxld.hpa_range = (struct range) {
1304 .start = p->res->start,
1305 .end = p->res->end,
1306 };
1307
1308 return 0;
1309
1310 err_decrement:
1311 p->nr_targets--;
1312 err:
1313 for (iter = ep_port; !is_cxl_root(iter);
1314 iter = to_cxl_port(iter->dev.parent))
1315 cxl_port_detach_region(iter, cxlr, cxled);
1316 return rc;
1317 }
1318
cxl_region_detach(struct cxl_endpoint_decoder * cxled)1319 static int cxl_region_detach(struct cxl_endpoint_decoder *cxled)
1320 {
1321 struct cxl_port *iter, *ep_port = cxled_to_port(cxled);
1322 struct cxl_region *cxlr = cxled->cxld.region;
1323 struct cxl_region_params *p;
1324 int rc = 0;
1325
1326 lockdep_assert_held_write(&cxl_region_rwsem);
1327
1328 if (!cxlr)
1329 return 0;
1330
1331 p = &cxlr->params;
1332 get_device(&cxlr->dev);
1333
1334 if (p->state > CXL_CONFIG_ACTIVE) {
1335 /*
1336 * TODO: tear down all impacted regions if a device is
1337 * removed out of order
1338 */
1339 rc = cxl_region_decode_reset(cxlr, p->interleave_ways);
1340 if (rc)
1341 goto out;
1342 p->state = CXL_CONFIG_ACTIVE;
1343 }
1344
1345 for (iter = ep_port; !is_cxl_root(iter);
1346 iter = to_cxl_port(iter->dev.parent))
1347 cxl_port_detach_region(iter, cxlr, cxled);
1348
1349 if (cxled->pos < 0 || cxled->pos >= p->interleave_ways ||
1350 p->targets[cxled->pos] != cxled) {
1351 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1352
1353 dev_WARN_ONCE(&cxlr->dev, 1, "expected %s:%s at position %d\n",
1354 dev_name(&cxlmd->dev), dev_name(&cxled->cxld.dev),
1355 cxled->pos);
1356 goto out;
1357 }
1358
1359 if (p->state == CXL_CONFIG_ACTIVE) {
1360 p->state = CXL_CONFIG_INTERLEAVE_ACTIVE;
1361 cxl_region_teardown_targets(cxlr);
1362 }
1363 p->targets[cxled->pos] = NULL;
1364 p->nr_targets--;
1365 cxled->cxld.hpa_range = (struct range) {
1366 .start = 0,
1367 .end = -1,
1368 };
1369
1370 /* notify the region driver that one of its targets has departed */
1371 up_write(&cxl_region_rwsem);
1372 device_release_driver(&cxlr->dev);
1373 down_write(&cxl_region_rwsem);
1374 out:
1375 put_device(&cxlr->dev);
1376 return rc;
1377 }
1378
cxl_decoder_kill_region(struct cxl_endpoint_decoder * cxled)1379 void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled)
1380 {
1381 down_write(&cxl_region_rwsem);
1382 cxled->mode = CXL_DECODER_DEAD;
1383 cxl_region_detach(cxled);
1384 up_write(&cxl_region_rwsem);
1385 }
1386
attach_target(struct cxl_region * cxlr,const char * decoder,int pos)1387 static int attach_target(struct cxl_region *cxlr, const char *decoder, int pos)
1388 {
1389 struct device *dev;
1390 int rc;
1391
1392 dev = bus_find_device_by_name(&cxl_bus_type, NULL, decoder);
1393 if (!dev)
1394 return -ENODEV;
1395
1396 if (!is_endpoint_decoder(dev)) {
1397 put_device(dev);
1398 return -EINVAL;
1399 }
1400
1401 rc = down_write_killable(&cxl_region_rwsem);
1402 if (rc)
1403 goto out;
1404 down_read(&cxl_dpa_rwsem);
1405 rc = cxl_region_attach(cxlr, to_cxl_endpoint_decoder(dev), pos);
1406 up_read(&cxl_dpa_rwsem);
1407 up_write(&cxl_region_rwsem);
1408 out:
1409 put_device(dev);
1410 return rc;
1411 }
1412
detach_target(struct cxl_region * cxlr,int pos)1413 static int detach_target(struct cxl_region *cxlr, int pos)
1414 {
1415 struct cxl_region_params *p = &cxlr->params;
1416 int rc;
1417
1418 rc = down_write_killable(&cxl_region_rwsem);
1419 if (rc)
1420 return rc;
1421
1422 if (pos >= p->interleave_ways) {
1423 dev_dbg(&cxlr->dev, "position %d out of range %d\n", pos,
1424 p->interleave_ways);
1425 rc = -ENXIO;
1426 goto out;
1427 }
1428
1429 if (!p->targets[pos]) {
1430 rc = 0;
1431 goto out;
1432 }
1433
1434 rc = cxl_region_detach(p->targets[pos]);
1435 out:
1436 up_write(&cxl_region_rwsem);
1437 return rc;
1438 }
1439
store_targetN(struct cxl_region * cxlr,const char * buf,int pos,size_t len)1440 static size_t store_targetN(struct cxl_region *cxlr, const char *buf, int pos,
1441 size_t len)
1442 {
1443 int rc;
1444
1445 if (sysfs_streq(buf, "\n"))
1446 rc = detach_target(cxlr, pos);
1447 else
1448 rc = attach_target(cxlr, buf, pos);
1449
1450 if (rc < 0)
1451 return rc;
1452 return len;
1453 }
1454
1455 #define TARGET_ATTR_RW(n) \
1456 static ssize_t target##n##_show( \
1457 struct device *dev, struct device_attribute *attr, char *buf) \
1458 { \
1459 return show_targetN(to_cxl_region(dev), buf, (n)); \
1460 } \
1461 static ssize_t target##n##_store(struct device *dev, \
1462 struct device_attribute *attr, \
1463 const char *buf, size_t len) \
1464 { \
1465 return store_targetN(to_cxl_region(dev), buf, (n), len); \
1466 } \
1467 static DEVICE_ATTR_RW(target##n)
1468
1469 TARGET_ATTR_RW(0);
1470 TARGET_ATTR_RW(1);
1471 TARGET_ATTR_RW(2);
1472 TARGET_ATTR_RW(3);
1473 TARGET_ATTR_RW(4);
1474 TARGET_ATTR_RW(5);
1475 TARGET_ATTR_RW(6);
1476 TARGET_ATTR_RW(7);
1477 TARGET_ATTR_RW(8);
1478 TARGET_ATTR_RW(9);
1479 TARGET_ATTR_RW(10);
1480 TARGET_ATTR_RW(11);
1481 TARGET_ATTR_RW(12);
1482 TARGET_ATTR_RW(13);
1483 TARGET_ATTR_RW(14);
1484 TARGET_ATTR_RW(15);
1485
1486 static struct attribute *target_attrs[] = {
1487 &dev_attr_target0.attr,
1488 &dev_attr_target1.attr,
1489 &dev_attr_target2.attr,
1490 &dev_attr_target3.attr,
1491 &dev_attr_target4.attr,
1492 &dev_attr_target5.attr,
1493 &dev_attr_target6.attr,
1494 &dev_attr_target7.attr,
1495 &dev_attr_target8.attr,
1496 &dev_attr_target9.attr,
1497 &dev_attr_target10.attr,
1498 &dev_attr_target11.attr,
1499 &dev_attr_target12.attr,
1500 &dev_attr_target13.attr,
1501 &dev_attr_target14.attr,
1502 &dev_attr_target15.attr,
1503 NULL,
1504 };
1505
cxl_region_target_visible(struct kobject * kobj,struct attribute * a,int n)1506 static umode_t cxl_region_target_visible(struct kobject *kobj,
1507 struct attribute *a, int n)
1508 {
1509 struct device *dev = kobj_to_dev(kobj);
1510 struct cxl_region *cxlr = to_cxl_region(dev);
1511 struct cxl_region_params *p = &cxlr->params;
1512
1513 if (n < p->interleave_ways)
1514 return a->mode;
1515 return 0;
1516 }
1517
1518 static const struct attribute_group cxl_region_target_group = {
1519 .attrs = target_attrs,
1520 .is_visible = cxl_region_target_visible,
1521 };
1522
get_cxl_region_target_group(void)1523 static const struct attribute_group *get_cxl_region_target_group(void)
1524 {
1525 return &cxl_region_target_group;
1526 }
1527
1528 static const struct attribute_group *region_groups[] = {
1529 &cxl_base_attribute_group,
1530 &cxl_region_group,
1531 &cxl_region_target_group,
1532 NULL,
1533 };
1534
cxl_region_release(struct device * dev)1535 static void cxl_region_release(struct device *dev)
1536 {
1537 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev->parent);
1538 struct cxl_region *cxlr = to_cxl_region(dev);
1539 int id = atomic_read(&cxlrd->region_id);
1540
1541 /*
1542 * Try to reuse the recently idled id rather than the cached
1543 * next id to prevent the region id space from increasing
1544 * unnecessarily.
1545 */
1546 if (cxlr->id < id)
1547 if (atomic_try_cmpxchg(&cxlrd->region_id, &id, cxlr->id)) {
1548 memregion_free(id);
1549 goto out;
1550 }
1551
1552 memregion_free(cxlr->id);
1553 out:
1554 put_device(dev->parent);
1555 kfree(cxlr);
1556 }
1557
1558 const struct device_type cxl_region_type = {
1559 .name = "cxl_region",
1560 .release = cxl_region_release,
1561 .groups = region_groups
1562 };
1563
is_cxl_region(struct device * dev)1564 bool is_cxl_region(struct device *dev)
1565 {
1566 return dev->type == &cxl_region_type;
1567 }
1568 EXPORT_SYMBOL_NS_GPL(is_cxl_region, CXL);
1569
to_cxl_region(struct device * dev)1570 static struct cxl_region *to_cxl_region(struct device *dev)
1571 {
1572 if (dev_WARN_ONCE(dev, dev->type != &cxl_region_type,
1573 "not a cxl_region device\n"))
1574 return NULL;
1575
1576 return container_of(dev, struct cxl_region, dev);
1577 }
1578
unregister_region(void * dev)1579 static void unregister_region(void *dev)
1580 {
1581 struct cxl_region *cxlr = to_cxl_region(dev);
1582 struct cxl_region_params *p = &cxlr->params;
1583 int i;
1584
1585 device_del(dev);
1586
1587 /*
1588 * Now that region sysfs is shutdown, the parameter block is now
1589 * read-only, so no need to hold the region rwsem to access the
1590 * region parameters.
1591 */
1592 for (i = 0; i < p->interleave_ways; i++)
1593 detach_target(cxlr, i);
1594
1595 cxl_region_iomem_release(cxlr);
1596 put_device(dev);
1597 }
1598
1599 static struct lock_class_key cxl_region_key;
1600
cxl_region_alloc(struct cxl_root_decoder * cxlrd,int id)1601 static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int id)
1602 {
1603 struct cxl_region *cxlr;
1604 struct device *dev;
1605
1606 cxlr = kzalloc(sizeof(*cxlr), GFP_KERNEL);
1607 if (!cxlr) {
1608 memregion_free(id);
1609 return ERR_PTR(-ENOMEM);
1610 }
1611
1612 dev = &cxlr->dev;
1613 device_initialize(dev);
1614 lockdep_set_class(&dev->mutex, &cxl_region_key);
1615 dev->parent = &cxlrd->cxlsd.cxld.dev;
1616 /*
1617 * Keep root decoder pinned through cxl_region_release to fixup
1618 * region id allocations
1619 */
1620 get_device(dev->parent);
1621 device_set_pm_not_required(dev);
1622 dev->bus = &cxl_bus_type;
1623 dev->type = &cxl_region_type;
1624 cxlr->id = id;
1625
1626 return cxlr;
1627 }
1628
1629 /**
1630 * devm_cxl_add_region - Adds a region to a decoder
1631 * @cxlrd: root decoder
1632 * @id: memregion id to create, or memregion_free() on failure
1633 * @mode: mode for the endpoint decoders of this region
1634 * @type: select whether this is an expander or accelerator (type-2 or type-3)
1635 *
1636 * This is the second step of region initialization. Regions exist within an
1637 * address space which is mapped by a @cxlrd.
1638 *
1639 * Return: 0 if the region was added to the @cxlrd, else returns negative error
1640 * code. The region will be named "regionZ" where Z is the unique region number.
1641 */
devm_cxl_add_region(struct cxl_root_decoder * cxlrd,int id,enum cxl_decoder_mode mode,enum cxl_decoder_type type)1642 static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
1643 int id,
1644 enum cxl_decoder_mode mode,
1645 enum cxl_decoder_type type)
1646 {
1647 struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
1648 struct cxl_region *cxlr;
1649 struct device *dev;
1650 int rc;
1651
1652 cxlr = cxl_region_alloc(cxlrd, id);
1653 if (IS_ERR(cxlr))
1654 return cxlr;
1655 cxlr->mode = mode;
1656 cxlr->type = type;
1657
1658 dev = &cxlr->dev;
1659 rc = dev_set_name(dev, "region%d", id);
1660 if (rc)
1661 goto err;
1662
1663 rc = device_add(dev);
1664 if (rc)
1665 goto err;
1666
1667 rc = devm_add_action_or_reset(port->uport, unregister_region, cxlr);
1668 if (rc)
1669 return ERR_PTR(rc);
1670
1671 dev_dbg(port->uport, "%s: created %s\n",
1672 dev_name(&cxlrd->cxlsd.cxld.dev), dev_name(dev));
1673 return cxlr;
1674
1675 err:
1676 put_device(dev);
1677 return ERR_PTR(rc);
1678 }
1679
create_pmem_region_show(struct device * dev,struct device_attribute * attr,char * buf)1680 static ssize_t create_pmem_region_show(struct device *dev,
1681 struct device_attribute *attr, char *buf)
1682 {
1683 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
1684
1685 return sysfs_emit(buf, "region%u\n", atomic_read(&cxlrd->region_id));
1686 }
1687
create_pmem_region_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1688 static ssize_t create_pmem_region_store(struct device *dev,
1689 struct device_attribute *attr,
1690 const char *buf, size_t len)
1691 {
1692 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
1693 struct cxl_region *cxlr;
1694 int id, rc;
1695
1696 rc = sscanf(buf, "region%d\n", &id);
1697 if (rc != 1)
1698 return -EINVAL;
1699
1700 rc = memregion_alloc(GFP_KERNEL);
1701 if (rc < 0)
1702 return rc;
1703
1704 if (atomic_cmpxchg(&cxlrd->region_id, id, rc) != id) {
1705 memregion_free(rc);
1706 return -EBUSY;
1707 }
1708
1709 cxlr = devm_cxl_add_region(cxlrd, id, CXL_DECODER_PMEM,
1710 CXL_DECODER_EXPANDER);
1711 if (IS_ERR(cxlr))
1712 return PTR_ERR(cxlr);
1713
1714 return len;
1715 }
1716 DEVICE_ATTR_RW(create_pmem_region);
1717
region_show(struct device * dev,struct device_attribute * attr,char * buf)1718 static ssize_t region_show(struct device *dev, struct device_attribute *attr,
1719 char *buf)
1720 {
1721 struct cxl_decoder *cxld = to_cxl_decoder(dev);
1722 ssize_t rc;
1723
1724 rc = down_read_interruptible(&cxl_region_rwsem);
1725 if (rc)
1726 return rc;
1727
1728 if (cxld->region)
1729 rc = sysfs_emit(buf, "%s\n", dev_name(&cxld->region->dev));
1730 else
1731 rc = sysfs_emit(buf, "\n");
1732 up_read(&cxl_region_rwsem);
1733
1734 return rc;
1735 }
1736 DEVICE_ATTR_RO(region);
1737
1738 static struct cxl_region *
cxl_find_region_by_name(struct cxl_root_decoder * cxlrd,const char * name)1739 cxl_find_region_by_name(struct cxl_root_decoder *cxlrd, const char *name)
1740 {
1741 struct cxl_decoder *cxld = &cxlrd->cxlsd.cxld;
1742 struct device *region_dev;
1743
1744 region_dev = device_find_child_by_name(&cxld->dev, name);
1745 if (!region_dev)
1746 return ERR_PTR(-ENODEV);
1747
1748 return to_cxl_region(region_dev);
1749 }
1750
delete_region_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t len)1751 static ssize_t delete_region_store(struct device *dev,
1752 struct device_attribute *attr,
1753 const char *buf, size_t len)
1754 {
1755 struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev);
1756 struct cxl_port *port = to_cxl_port(dev->parent);
1757 struct cxl_region *cxlr;
1758
1759 cxlr = cxl_find_region_by_name(cxlrd, buf);
1760 if (IS_ERR(cxlr))
1761 return PTR_ERR(cxlr);
1762
1763 devm_release_action(port->uport, unregister_region, cxlr);
1764 put_device(&cxlr->dev);
1765
1766 return len;
1767 }
1768 DEVICE_ATTR_WO(delete_region);
1769
cxl_pmem_region_release(struct device * dev)1770 static void cxl_pmem_region_release(struct device *dev)
1771 {
1772 struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev);
1773 int i;
1774
1775 for (i = 0; i < cxlr_pmem->nr_mappings; i++) {
1776 struct cxl_memdev *cxlmd = cxlr_pmem->mapping[i].cxlmd;
1777
1778 put_device(&cxlmd->dev);
1779 }
1780
1781 kfree(cxlr_pmem);
1782 }
1783
1784 static const struct attribute_group *cxl_pmem_region_attribute_groups[] = {
1785 &cxl_base_attribute_group,
1786 NULL,
1787 };
1788
1789 const struct device_type cxl_pmem_region_type = {
1790 .name = "cxl_pmem_region",
1791 .release = cxl_pmem_region_release,
1792 .groups = cxl_pmem_region_attribute_groups,
1793 };
1794
is_cxl_pmem_region(struct device * dev)1795 bool is_cxl_pmem_region(struct device *dev)
1796 {
1797 return dev->type == &cxl_pmem_region_type;
1798 }
1799 EXPORT_SYMBOL_NS_GPL(is_cxl_pmem_region, CXL);
1800
to_cxl_pmem_region(struct device * dev)1801 struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev)
1802 {
1803 if (dev_WARN_ONCE(dev, !is_cxl_pmem_region(dev),
1804 "not a cxl_pmem_region device\n"))
1805 return NULL;
1806 return container_of(dev, struct cxl_pmem_region, dev);
1807 }
1808 EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, CXL);
1809
1810 static struct lock_class_key cxl_pmem_region_key;
1811
cxl_pmem_region_alloc(struct cxl_region * cxlr)1812 static struct cxl_pmem_region *cxl_pmem_region_alloc(struct cxl_region *cxlr)
1813 {
1814 struct cxl_region_params *p = &cxlr->params;
1815 struct cxl_pmem_region *cxlr_pmem;
1816 struct device *dev;
1817 int i;
1818
1819 down_read(&cxl_region_rwsem);
1820 if (p->state != CXL_CONFIG_COMMIT) {
1821 cxlr_pmem = ERR_PTR(-ENXIO);
1822 goto out;
1823 }
1824
1825 cxlr_pmem = kzalloc(struct_size(cxlr_pmem, mapping, p->nr_targets),
1826 GFP_KERNEL);
1827 if (!cxlr_pmem) {
1828 cxlr_pmem = ERR_PTR(-ENOMEM);
1829 goto out;
1830 }
1831
1832 cxlr_pmem->hpa_range.start = p->res->start;
1833 cxlr_pmem->hpa_range.end = p->res->end;
1834
1835 /* Snapshot the region configuration underneath the cxl_region_rwsem */
1836 cxlr_pmem->nr_mappings = p->nr_targets;
1837 for (i = 0; i < p->nr_targets; i++) {
1838 struct cxl_endpoint_decoder *cxled = p->targets[i];
1839 struct cxl_memdev *cxlmd = cxled_to_memdev(cxled);
1840 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i];
1841
1842 m->cxlmd = cxlmd;
1843 get_device(&cxlmd->dev);
1844 m->start = cxled->dpa_res->start;
1845 m->size = resource_size(cxled->dpa_res);
1846 m->position = i;
1847 }
1848
1849 dev = &cxlr_pmem->dev;
1850 cxlr_pmem->cxlr = cxlr;
1851 device_initialize(dev);
1852 lockdep_set_class(&dev->mutex, &cxl_pmem_region_key);
1853 device_set_pm_not_required(dev);
1854 dev->parent = &cxlr->dev;
1855 dev->bus = &cxl_bus_type;
1856 dev->type = &cxl_pmem_region_type;
1857 out:
1858 up_read(&cxl_region_rwsem);
1859
1860 return cxlr_pmem;
1861 }
1862
cxlr_pmem_unregister(void * dev)1863 static void cxlr_pmem_unregister(void *dev)
1864 {
1865 device_unregister(dev);
1866 }
1867
1868 /**
1869 * devm_cxl_add_pmem_region() - add a cxl_region-to-nd_region bridge
1870 * @cxlr: parent CXL region for this pmem region bridge device
1871 *
1872 * Return: 0 on success negative error code on failure.
1873 */
devm_cxl_add_pmem_region(struct cxl_region * cxlr)1874 static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
1875 {
1876 struct cxl_pmem_region *cxlr_pmem;
1877 struct device *dev;
1878 int rc;
1879
1880 cxlr_pmem = cxl_pmem_region_alloc(cxlr);
1881 if (IS_ERR(cxlr_pmem))
1882 return PTR_ERR(cxlr_pmem);
1883
1884 dev = &cxlr_pmem->dev;
1885 rc = dev_set_name(dev, "pmem_region%d", cxlr->id);
1886 if (rc)
1887 goto err;
1888
1889 rc = device_add(dev);
1890 if (rc)
1891 goto err;
1892
1893 dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
1894 dev_name(dev));
1895
1896 return devm_add_action_or_reset(&cxlr->dev, cxlr_pmem_unregister, dev);
1897
1898 err:
1899 put_device(dev);
1900 return rc;
1901 }
1902
cxl_region_probe(struct device * dev)1903 static int cxl_region_probe(struct device *dev)
1904 {
1905 struct cxl_region *cxlr = to_cxl_region(dev);
1906 struct cxl_region_params *p = &cxlr->params;
1907 int rc;
1908
1909 rc = down_read_interruptible(&cxl_region_rwsem);
1910 if (rc) {
1911 dev_dbg(&cxlr->dev, "probe interrupted\n");
1912 return rc;
1913 }
1914
1915 if (p->state < CXL_CONFIG_COMMIT) {
1916 dev_dbg(&cxlr->dev, "config state: %d\n", p->state);
1917 rc = -ENXIO;
1918 }
1919
1920 /*
1921 * From this point on any path that changes the region's state away from
1922 * CXL_CONFIG_COMMIT is also responsible for releasing the driver.
1923 */
1924 up_read(&cxl_region_rwsem);
1925
1926 switch (cxlr->mode) {
1927 case CXL_DECODER_PMEM:
1928 return devm_cxl_add_pmem_region(cxlr);
1929 default:
1930 dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
1931 cxlr->mode);
1932 return -ENXIO;
1933 }
1934 }
1935
1936 static struct cxl_driver cxl_region_driver = {
1937 .name = "cxl_region",
1938 .probe = cxl_region_probe,
1939 .id = CXL_DEVICE_REGION,
1940 };
1941
cxl_region_init(void)1942 int cxl_region_init(void)
1943 {
1944 return cxl_driver_register(&cxl_region_driver);
1945 }
1946
cxl_region_exit(void)1947 void cxl_region_exit(void)
1948 {
1949 cxl_driver_unregister(&cxl_region_driver);
1950 }
1951
1952 MODULE_IMPORT_NS(CXL);
1953 MODULE_ALIAS_CXL(CXL_DEVICE_REGION);
1954