1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright 2020-21 IBM Corp.
4 */
5
6 #define pr_fmt(fmt) "vas: " fmt
7
8 #include <linux/module.h>
9 #include <linux/kernel.h>
10 #include <linux/export.h>
11 #include <linux/types.h>
12 #include <linux/delay.h>
13 #include <linux/slab.h>
14 #include <linux/interrupt.h>
15 #include <linux/irqdomain.h>
16 #include <asm/machdep.h>
17 #include <asm/hvcall.h>
18 #include <asm/plpar_wrappers.h>
19 #include <asm/firmware.h>
20 #include <asm/vphn.h>
21 #include <asm/vas.h>
22 #include "vas.h"
23
24 #define VAS_INVALID_WIN_ADDRESS 0xFFFFFFFFFFFFFFFFul
25 #define VAS_DEFAULT_DOMAIN_ID 0xFFFFFFFFFFFFFFFFul
26 /* The hypervisor allows one credit per window right now */
27 #define DEF_WIN_CREDS 1
28
29 static struct vas_all_caps caps_all;
30 static bool copypaste_feat;
31 static struct hv_vas_cop_feat_caps hv_cop_caps;
32
33 static struct vas_caps vascaps[VAS_MAX_FEAT_TYPE];
34 static DEFINE_MUTEX(vas_pseries_mutex);
35 static bool migration_in_progress;
36
hcall_return_busy_check(long rc)37 static long hcall_return_busy_check(long rc)
38 {
39 /* Check if we are stalled for some time */
40 if (H_IS_LONG_BUSY(rc)) {
41 msleep(get_longbusy_msecs(rc));
42 rc = H_BUSY;
43 } else if (rc == H_BUSY) {
44 cond_resched();
45 }
46
47 return rc;
48 }
49
50 /*
51 * Allocate VAS window hcall
52 */
h_allocate_vas_window(struct pseries_vas_window * win,u64 * domain,u8 wintype,u16 credits)53 static int h_allocate_vas_window(struct pseries_vas_window *win, u64 *domain,
54 u8 wintype, u16 credits)
55 {
56 long retbuf[PLPAR_HCALL9_BUFSIZE] = {0};
57 long rc;
58
59 do {
60 rc = plpar_hcall9(H_ALLOCATE_VAS_WINDOW, retbuf, wintype,
61 credits, domain[0], domain[1], domain[2],
62 domain[3], domain[4], domain[5]);
63
64 rc = hcall_return_busy_check(rc);
65 } while (rc == H_BUSY);
66
67 if (rc == H_SUCCESS) {
68 if (win->win_addr == VAS_INVALID_WIN_ADDRESS) {
69 pr_err("H_ALLOCATE_VAS_WINDOW: COPY/PASTE is not supported\n");
70 return -ENOTSUPP;
71 }
72 win->vas_win.winid = retbuf[0];
73 win->win_addr = retbuf[1];
74 win->complete_irq = retbuf[2];
75 win->fault_irq = retbuf[3];
76 return 0;
77 }
78
79 pr_err("H_ALLOCATE_VAS_WINDOW error: %ld, wintype: %u, credits: %u\n",
80 rc, wintype, credits);
81
82 return -EIO;
83 }
84
85 /*
86 * Deallocate VAS window hcall.
87 */
h_deallocate_vas_window(u64 winid)88 static int h_deallocate_vas_window(u64 winid)
89 {
90 long rc;
91
92 do {
93 rc = plpar_hcall_norets(H_DEALLOCATE_VAS_WINDOW, winid);
94
95 rc = hcall_return_busy_check(rc);
96 } while (rc == H_BUSY);
97
98 if (rc == H_SUCCESS)
99 return 0;
100
101 pr_err("H_DEALLOCATE_VAS_WINDOW error: %ld, winid: %llu\n",
102 rc, winid);
103 return -EIO;
104 }
105
106 /*
107 * Modify VAS window.
108 * After the window is opened with allocate window hcall, configure it
109 * with flags and LPAR PID before using.
110 */
h_modify_vas_window(struct pseries_vas_window * win)111 static int h_modify_vas_window(struct pseries_vas_window *win)
112 {
113 long rc;
114
115 /*
116 * AMR value is not supported in Linux VAS implementation.
117 * The hypervisor ignores it if 0 is passed.
118 */
119 do {
120 rc = plpar_hcall_norets(H_MODIFY_VAS_WINDOW,
121 win->vas_win.winid, win->pid, 0,
122 VAS_MOD_WIN_FLAGS, 0);
123
124 rc = hcall_return_busy_check(rc);
125 } while (rc == H_BUSY);
126
127 if (rc == H_SUCCESS)
128 return 0;
129
130 pr_err("H_MODIFY_VAS_WINDOW error: %ld, winid %u pid %u\n",
131 rc, win->vas_win.winid, win->pid);
132 return -EIO;
133 }
134
135 /*
136 * This hcall is used to determine the capabilities from the hypervisor.
137 * @hcall: H_QUERY_VAS_CAPABILITIES or H_QUERY_NX_CAPABILITIES
138 * @query_type: If 0 is passed, the hypervisor returns the overall
139 * capabilities which provides all feature(s) that are
140 * available. Then query the hypervisor to get the
141 * corresponding capabilities for the specific feature.
142 * Example: H_QUERY_VAS_CAPABILITIES provides VAS GZIP QoS
143 * and VAS GZIP Default capabilities.
144 * H_QUERY_NX_CAPABILITIES provides NX GZIP
145 * capabilities.
146 * @result: Return buffer to save capabilities.
147 */
h_query_vas_capabilities(const u64 hcall,u8 query_type,u64 result)148 int h_query_vas_capabilities(const u64 hcall, u8 query_type, u64 result)
149 {
150 long rc;
151
152 rc = plpar_hcall_norets(hcall, query_type, result);
153
154 if (rc == H_SUCCESS)
155 return 0;
156
157 /* H_FUNCTION means HV does not support VAS so don't print an error */
158 if (rc != H_FUNCTION) {
159 pr_err("%s error %ld, query_type %u, result buffer 0x%llx\n",
160 (hcall == H_QUERY_VAS_CAPABILITIES) ?
161 "H_QUERY_VAS_CAPABILITIES" :
162 "H_QUERY_NX_CAPABILITIES",
163 rc, query_type, result);
164 }
165
166 return -EIO;
167 }
168 EXPORT_SYMBOL_GPL(h_query_vas_capabilities);
169
170 /*
171 * hcall to get fault CRB from the hypervisor.
172 */
h_get_nx_fault(u32 winid,u64 buffer)173 static int h_get_nx_fault(u32 winid, u64 buffer)
174 {
175 long rc;
176
177 rc = plpar_hcall_norets(H_GET_NX_FAULT, winid, buffer);
178
179 if (rc == H_SUCCESS)
180 return 0;
181
182 pr_err("H_GET_NX_FAULT error: %ld, winid %u, buffer 0x%llx\n",
183 rc, winid, buffer);
184 return -EIO;
185
186 }
187
188 /*
189 * Handle the fault interrupt.
190 * When the fault interrupt is received for each window, query the
191 * hypervisor to get the fault CRB on the specific fault. Then
192 * process the CRB by updating CSB or send signal if the user space
193 * CSB is invalid.
194 * Note: The hypervisor forwards an interrupt for each fault request.
195 * So one fault CRB to process for each H_GET_NX_FAULT hcall.
196 */
pseries_vas_fault_thread_fn(int irq,void * data)197 static irqreturn_t pseries_vas_fault_thread_fn(int irq, void *data)
198 {
199 struct pseries_vas_window *txwin = data;
200 struct coprocessor_request_block crb;
201 struct vas_user_win_ref *tsk_ref;
202 int rc;
203
204 while (atomic_read(&txwin->pending_faults)) {
205 rc = h_get_nx_fault(txwin->vas_win.winid, (u64)virt_to_phys(&crb));
206 if (!rc) {
207 tsk_ref = &txwin->vas_win.task_ref;
208 vas_dump_crb(&crb);
209 vas_update_csb(&crb, tsk_ref);
210 }
211 atomic_dec(&txwin->pending_faults);
212 }
213
214 return IRQ_HANDLED;
215 }
216
217 /*
218 * irq_default_primary_handler() can be used only with IRQF_ONESHOT
219 * which disables IRQ before executing the thread handler and enables
220 * it after. But this disabling interrupt sets the VAS IRQ OFF
221 * state in the hypervisor. If the NX generates fault interrupt
222 * during this window, the hypervisor will not deliver this
223 * interrupt to the LPAR. So use VAS specific IRQ handler instead
224 * of calling the default primary handler.
225 */
pseries_vas_irq_handler(int irq,void * data)226 static irqreturn_t pseries_vas_irq_handler(int irq, void *data)
227 {
228 struct pseries_vas_window *txwin = data;
229
230 /*
231 * The thread hanlder will process this interrupt if it is
232 * already running.
233 */
234 atomic_inc(&txwin->pending_faults);
235
236 return IRQ_WAKE_THREAD;
237 }
238
239 /*
240 * Allocate window and setup IRQ mapping.
241 */
allocate_setup_window(struct pseries_vas_window * txwin,u64 * domain,u8 wintype)242 static int allocate_setup_window(struct pseries_vas_window *txwin,
243 u64 *domain, u8 wintype)
244 {
245 int rc;
246
247 rc = h_allocate_vas_window(txwin, domain, wintype, DEF_WIN_CREDS);
248 if (rc)
249 return rc;
250 /*
251 * On PowerVM, the hypervisor setup and forwards the fault
252 * interrupt per window. So the IRQ setup and fault handling
253 * will be done for each open window separately.
254 */
255 txwin->fault_virq = irq_create_mapping(NULL, txwin->fault_irq);
256 if (!txwin->fault_virq) {
257 pr_err("Failed irq mapping %d\n", txwin->fault_irq);
258 rc = -EINVAL;
259 goto out_win;
260 }
261
262 txwin->name = kasprintf(GFP_KERNEL, "vas-win-%d",
263 txwin->vas_win.winid);
264 if (!txwin->name) {
265 rc = -ENOMEM;
266 goto out_irq;
267 }
268
269 rc = request_threaded_irq(txwin->fault_virq,
270 pseries_vas_irq_handler,
271 pseries_vas_fault_thread_fn, 0,
272 txwin->name, txwin);
273 if (rc) {
274 pr_err("VAS-Window[%d]: Request IRQ(%u) failed with %d\n",
275 txwin->vas_win.winid, txwin->fault_virq, rc);
276 goto out_free;
277 }
278
279 txwin->vas_win.wcreds_max = DEF_WIN_CREDS;
280
281 return 0;
282 out_free:
283 kfree(txwin->name);
284 out_irq:
285 irq_dispose_mapping(txwin->fault_virq);
286 out_win:
287 h_deallocate_vas_window(txwin->vas_win.winid);
288 return rc;
289 }
290
free_irq_setup(struct pseries_vas_window * txwin)291 static inline void free_irq_setup(struct pseries_vas_window *txwin)
292 {
293 free_irq(txwin->fault_virq, txwin);
294 kfree(txwin->name);
295 irq_dispose_mapping(txwin->fault_virq);
296 }
297
vas_allocate_window(int vas_id,u64 flags,enum vas_cop_type cop_type)298 static struct vas_window *vas_allocate_window(int vas_id, u64 flags,
299 enum vas_cop_type cop_type)
300 {
301 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
302 struct vas_cop_feat_caps *cop_feat_caps;
303 struct vas_caps *caps;
304 struct pseries_vas_window *txwin;
305 int rc;
306
307 txwin = kzalloc(sizeof(*txwin), GFP_KERNEL);
308 if (!txwin)
309 return ERR_PTR(-ENOMEM);
310
311 /*
312 * A VAS window can have many credits which means that many
313 * requests can be issued simultaneously. But the hypervisor
314 * restricts one credit per window.
315 * The hypervisor introduces 2 different types of credits:
316 * Default credit type (Uses normal priority FIFO):
317 * A limited number of credits are assigned to partitions
318 * based on processor entitlement. But these credits may be
319 * over-committed on a system depends on whether the CPUs
320 * are in shared or dedicated modes - that is, more requests
321 * may be issued across the system than NX can service at
322 * once which can result in paste command failure (RMA_busy).
323 * Then the process has to resend requests or fall-back to
324 * SW compression.
325 * Quality of Service (QoS) credit type (Uses high priority FIFO):
326 * To avoid NX HW contention, the system admins can assign
327 * QoS credits for each LPAR so that this partition is
328 * guaranteed access to NX resources. These credits are
329 * assigned to partitions via the HMC.
330 * Refer PAPR for more information.
331 *
332 * Allocate window with QoS credits if user requested. Otherwise
333 * default credits are used.
334 */
335 if (flags & VAS_TX_WIN_FLAG_QOS_CREDIT)
336 caps = &vascaps[VAS_GZIP_QOS_FEAT_TYPE];
337 else
338 caps = &vascaps[VAS_GZIP_DEF_FEAT_TYPE];
339
340 cop_feat_caps = &caps->caps;
341
342 if (atomic_inc_return(&cop_feat_caps->nr_used_credits) >
343 atomic_read(&cop_feat_caps->nr_total_credits)) {
344 pr_err("Credits are not available to allocate window\n");
345 rc = -EINVAL;
346 goto out;
347 }
348
349 if (vas_id == -1) {
350 /*
351 * The user space is requesting to allocate a window on
352 * a VAS instance where the process is executing.
353 * On PowerVM, domain values are passed to the hypervisor
354 * to select VAS instance. Useful if the process is
355 * affinity to NUMA node.
356 * The hypervisor selects VAS instance if
357 * VAS_DEFAULT_DOMAIN_ID (-1) is passed for domain values.
358 * The h_allocate_vas_window hcall is defined to take a
359 * domain values as specified by h_home_node_associativity,
360 * So no unpacking needs to be done.
361 */
362 rc = plpar_hcall9(H_HOME_NODE_ASSOCIATIVITY, domain,
363 VPHN_FLAG_VCPU, hard_smp_processor_id());
364 if (rc != H_SUCCESS) {
365 pr_err("H_HOME_NODE_ASSOCIATIVITY error: %d\n", rc);
366 goto out;
367 }
368 }
369
370 txwin->pid = mfspr(SPRN_PID);
371
372 /*
373 * Allocate / Deallocate window hcalls and setup / free IRQs
374 * have to be protected with mutex.
375 * Open VAS window: Allocate window hcall and setup IRQ
376 * Close VAS window: Deallocate window hcall and free IRQ
377 * The hypervisor waits until all NX requests are
378 * completed before closing the window. So expects OS
379 * to handle NX faults, means IRQ can be freed only
380 * after the deallocate window hcall is returned.
381 * So once the window is closed with deallocate hcall before
382 * the IRQ is freed, it can be assigned to new allocate
383 * hcall with the same fault IRQ by the hypervisor. It can
384 * result in setup IRQ fail for the new window since the
385 * same fault IRQ is not freed by the OS before.
386 */
387 mutex_lock(&vas_pseries_mutex);
388 if (migration_in_progress)
389 rc = -EBUSY;
390 else
391 rc = allocate_setup_window(txwin, (u64 *)&domain[0],
392 cop_feat_caps->win_type);
393 mutex_unlock(&vas_pseries_mutex);
394 if (rc)
395 goto out;
396
397 /*
398 * Modify window and it is ready to use.
399 */
400 rc = h_modify_vas_window(txwin);
401 if (!rc)
402 rc = get_vas_user_win_ref(&txwin->vas_win.task_ref);
403 if (rc)
404 goto out_free;
405
406 txwin->win_type = cop_feat_caps->win_type;
407 mutex_lock(&vas_pseries_mutex);
408 /*
409 * Possible to lose the acquired credit with DLPAR core
410 * removal after the window is opened. So if there are any
411 * closed windows (means with lost credits), do not give new
412 * window to user space. New windows will be opened only
413 * after the existing windows are reopened when credits are
414 * available.
415 */
416 if (!caps->nr_close_wins) {
417 list_add(&txwin->win_list, &caps->list);
418 caps->nr_open_windows++;
419 mutex_unlock(&vas_pseries_mutex);
420 vas_user_win_add_mm_context(&txwin->vas_win.task_ref);
421 return &txwin->vas_win;
422 }
423 mutex_unlock(&vas_pseries_mutex);
424
425 put_vas_user_win_ref(&txwin->vas_win.task_ref);
426 rc = -EBUSY;
427 pr_err("No credit is available to allocate window\n");
428
429 out_free:
430 /*
431 * Window is not operational. Free IRQ before closing
432 * window so that do not have to hold mutex.
433 */
434 free_irq_setup(txwin);
435 h_deallocate_vas_window(txwin->vas_win.winid);
436 out:
437 atomic_dec(&cop_feat_caps->nr_used_credits);
438 kfree(txwin);
439 return ERR_PTR(rc);
440 }
441
vas_paste_address(struct vas_window * vwin)442 static u64 vas_paste_address(struct vas_window *vwin)
443 {
444 struct pseries_vas_window *win;
445
446 win = container_of(vwin, struct pseries_vas_window, vas_win);
447 return win->win_addr;
448 }
449
deallocate_free_window(struct pseries_vas_window * win)450 static int deallocate_free_window(struct pseries_vas_window *win)
451 {
452 int rc = 0;
453
454 /*
455 * The hypervisor waits for all requests including faults
456 * are processed before closing the window - Means all
457 * credits have to be returned. In the case of fault
458 * request, a credit is returned after OS issues
459 * H_GET_NX_FAULT hcall.
460 * So free IRQ after executing H_DEALLOCATE_VAS_WINDOW
461 * hcall.
462 */
463 rc = h_deallocate_vas_window(win->vas_win.winid);
464 if (!rc)
465 free_irq_setup(win);
466
467 return rc;
468 }
469
vas_deallocate_window(struct vas_window * vwin)470 static int vas_deallocate_window(struct vas_window *vwin)
471 {
472 struct pseries_vas_window *win;
473 struct vas_cop_feat_caps *caps;
474 int rc = 0;
475
476 if (!vwin)
477 return -EINVAL;
478
479 win = container_of(vwin, struct pseries_vas_window, vas_win);
480
481 /* Should not happen */
482 if (win->win_type >= VAS_MAX_FEAT_TYPE) {
483 pr_err("Window (%u): Invalid window type %u\n",
484 vwin->winid, win->win_type);
485 return -EINVAL;
486 }
487
488 caps = &vascaps[win->win_type].caps;
489 mutex_lock(&vas_pseries_mutex);
490 /*
491 * VAS window is already closed in the hypervisor when
492 * lost the credit or with migration. So just remove the entry
493 * from the list, remove task references and free vas_window
494 * struct.
495 */
496 if (!(win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
497 !(win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
498 rc = deallocate_free_window(win);
499 if (rc) {
500 mutex_unlock(&vas_pseries_mutex);
501 return rc;
502 }
503 } else
504 vascaps[win->win_type].nr_close_wins--;
505
506 list_del(&win->win_list);
507 atomic_dec(&caps->nr_used_credits);
508 vascaps[win->win_type].nr_open_windows--;
509 mutex_unlock(&vas_pseries_mutex);
510
511 mm_context_remove_vas_window(vwin->task_ref.mm);
512 put_vas_user_win_ref(&vwin->task_ref);
513
514 kfree(win);
515 return 0;
516 }
517
518 static const struct vas_user_win_ops vops_pseries = {
519 .open_win = vas_allocate_window, /* Open and configure window */
520 .paste_addr = vas_paste_address, /* To do copy/paste */
521 .close_win = vas_deallocate_window, /* Close window */
522 };
523
524 /*
525 * Supporting only nx-gzip coprocessor type now, but this API code
526 * extended to other coprocessor types later.
527 */
vas_register_api_pseries(struct module * mod,enum vas_cop_type cop_type,const char * name)528 int vas_register_api_pseries(struct module *mod, enum vas_cop_type cop_type,
529 const char *name)
530 {
531 if (!copypaste_feat)
532 return -ENOTSUPP;
533
534 return vas_register_coproc_api(mod, cop_type, name, &vops_pseries);
535 }
536 EXPORT_SYMBOL_GPL(vas_register_api_pseries);
537
vas_unregister_api_pseries(void)538 void vas_unregister_api_pseries(void)
539 {
540 vas_unregister_coproc_api();
541 }
542 EXPORT_SYMBOL_GPL(vas_unregister_api_pseries);
543
544 /*
545 * Get the specific capabilities based on the feature type.
546 * Right now supports GZIP default and GZIP QoS capabilities.
547 */
get_vas_capabilities(u8 feat,enum vas_cop_feat_type type,struct hv_vas_cop_feat_caps * hv_caps)548 static int __init get_vas_capabilities(u8 feat, enum vas_cop_feat_type type,
549 struct hv_vas_cop_feat_caps *hv_caps)
550 {
551 struct vas_cop_feat_caps *caps;
552 struct vas_caps *vcaps;
553 int rc = 0;
554
555 vcaps = &vascaps[type];
556 memset(vcaps, 0, sizeof(*vcaps));
557 INIT_LIST_HEAD(&vcaps->list);
558
559 vcaps->feat = feat;
560 caps = &vcaps->caps;
561
562 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, feat,
563 (u64)virt_to_phys(hv_caps));
564 if (rc)
565 return rc;
566
567 caps->user_mode = hv_caps->user_mode;
568 if (!(caps->user_mode & VAS_COPY_PASTE_USER_MODE)) {
569 pr_err("User space COPY/PASTE is not supported\n");
570 return -ENOTSUPP;
571 }
572
573 caps->descriptor = be64_to_cpu(hv_caps->descriptor);
574 caps->win_type = hv_caps->win_type;
575 if (caps->win_type >= VAS_MAX_FEAT_TYPE) {
576 pr_err("Unsupported window type %u\n", caps->win_type);
577 return -EINVAL;
578 }
579 caps->max_lpar_creds = be16_to_cpu(hv_caps->max_lpar_creds);
580 caps->max_win_creds = be16_to_cpu(hv_caps->max_win_creds);
581 atomic_set(&caps->nr_total_credits,
582 be16_to_cpu(hv_caps->target_lpar_creds));
583 if (feat == VAS_GZIP_DEF_FEAT) {
584 caps->def_lpar_creds = be16_to_cpu(hv_caps->def_lpar_creds);
585
586 if (caps->max_win_creds < DEF_WIN_CREDS) {
587 pr_err("Window creds(%u) > max allowed window creds(%u)\n",
588 DEF_WIN_CREDS, caps->max_win_creds);
589 return -EINVAL;
590 }
591 }
592
593 rc = sysfs_add_vas_caps(caps);
594 if (rc)
595 return rc;
596
597 copypaste_feat = true;
598
599 return 0;
600 }
601
602 /*
603 * VAS windows can be closed due to lost credits when the core is
604 * removed. So reopen them if credits are available due to DLPAR
605 * core add and set the window active status. When NX sees the page
606 * fault on the unmapped paste address, the kernel handles the fault
607 * by setting the remapping to new paste address if the window is
608 * active.
609 */
reconfig_open_windows(struct vas_caps * vcaps,int creds,bool migrate)610 static int reconfig_open_windows(struct vas_caps *vcaps, int creds,
611 bool migrate)
612 {
613 long domain[PLPAR_HCALL9_BUFSIZE] = {VAS_DEFAULT_DOMAIN_ID};
614 struct vas_cop_feat_caps *caps = &vcaps->caps;
615 struct pseries_vas_window *win = NULL, *tmp;
616 int rc, mv_ents = 0;
617 int flag;
618
619 /*
620 * Nothing to do if there are no closed windows.
621 */
622 if (!vcaps->nr_close_wins)
623 return 0;
624
625 /*
626 * For the core removal, the hypervisor reduces the credits
627 * assigned to the LPAR and the kernel closes VAS windows
628 * in the hypervisor depends on reduced credits. The kernel
629 * uses LIFO (the last windows that are opened will be closed
630 * first) and expects to open in the same order when credits
631 * are available.
632 * For example, 40 windows are closed when the LPAR lost 2 cores
633 * (dedicated). If 1 core is added, this LPAR can have 20 more
634 * credits. It means the kernel can reopen 20 windows. So move
635 * 20 entries in the VAS windows lost and reopen next 20 windows.
636 * For partition migration, reopen all windows that are closed
637 * during resume.
638 */
639 if ((vcaps->nr_close_wins > creds) && !migrate)
640 mv_ents = vcaps->nr_close_wins - creds;
641
642 list_for_each_entry_safe(win, tmp, &vcaps->list, win_list) {
643 if (!mv_ents)
644 break;
645
646 mv_ents--;
647 }
648
649 /*
650 * Open windows if they are closed only with migration or
651 * DLPAR (lost credit) before.
652 */
653 if (migrate)
654 flag = VAS_WIN_MIGRATE_CLOSE;
655 else
656 flag = VAS_WIN_NO_CRED_CLOSE;
657
658 list_for_each_entry_safe_from(win, tmp, &vcaps->list, win_list) {
659 /*
660 * This window is closed with DLPAR and migration events.
661 * So reopen the window with the last event.
662 * The user space is not suspended with the current
663 * migration notifier. So the user space can issue DLPAR
664 * CPU hotplug while migration in progress. In this case
665 * this window will be opened with the last event.
666 */
667 if ((win->vas_win.status & VAS_WIN_NO_CRED_CLOSE) &&
668 (win->vas_win.status & VAS_WIN_MIGRATE_CLOSE)) {
669 win->vas_win.status &= ~flag;
670 continue;
671 }
672
673 /*
674 * Nothing to do on this window if it is not closed
675 * with this flag
676 */
677 if (!(win->vas_win.status & flag))
678 continue;
679
680 rc = allocate_setup_window(win, (u64 *)&domain[0],
681 caps->win_type);
682 if (rc)
683 return rc;
684
685 rc = h_modify_vas_window(win);
686 if (rc)
687 goto out;
688
689 mutex_lock(&win->vas_win.task_ref.mmap_mutex);
690 /*
691 * Set window status to active
692 */
693 win->vas_win.status &= ~flag;
694 mutex_unlock(&win->vas_win.task_ref.mmap_mutex);
695 win->win_type = caps->win_type;
696 if (!--vcaps->nr_close_wins)
697 break;
698 }
699
700 return 0;
701 out:
702 /*
703 * Window modify HCALL failed. So close the window to the
704 * hypervisor and return.
705 */
706 free_irq_setup(win);
707 h_deallocate_vas_window(win->vas_win.winid);
708 return rc;
709 }
710
711 /*
712 * The hypervisor reduces the available credits if the LPAR lost core. It
713 * means the excessive windows should not be active and the user space
714 * should not be using these windows to send compression requests to NX.
715 * So the kernel closes the excessive windows and unmap the paste address
716 * such that the user space receives paste instruction failure. Then up to
717 * the user space to fall back to SW compression and manage with the
718 * existing windows.
719 */
reconfig_close_windows(struct vas_caps * vcap,int excess_creds,bool migrate)720 static int reconfig_close_windows(struct vas_caps *vcap, int excess_creds,
721 bool migrate)
722 {
723 struct pseries_vas_window *win, *tmp;
724 struct vas_user_win_ref *task_ref;
725 struct vm_area_struct *vma;
726 int rc = 0, flag;
727
728 if (migrate)
729 flag = VAS_WIN_MIGRATE_CLOSE;
730 else
731 flag = VAS_WIN_NO_CRED_CLOSE;
732
733 list_for_each_entry_safe(win, tmp, &vcap->list, win_list) {
734 /*
735 * This window is already closed due to lost credit
736 * or for migration before. Go for next window.
737 * For migration, nothing to do since this window
738 * closed for DLPAR and will be reopened even on
739 * the destination system with other DLPAR operation.
740 */
741 if ((win->vas_win.status & VAS_WIN_MIGRATE_CLOSE) ||
742 (win->vas_win.status & VAS_WIN_NO_CRED_CLOSE)) {
743 win->vas_win.status |= flag;
744 continue;
745 }
746
747 task_ref = &win->vas_win.task_ref;
748 /*
749 * VAS mmap (coproc_mmap()) and its fault handler
750 * (vas_mmap_fault()) are called after holding mmap lock.
751 * So hold mmap mutex after mmap_lock to avoid deadlock.
752 */
753 mmap_write_lock(task_ref->mm);
754 mutex_lock(&task_ref->mmap_mutex);
755 vma = task_ref->vma;
756 /*
757 * Number of available credits are reduced, So select
758 * and close windows.
759 */
760 win->vas_win.status |= flag;
761
762 /*
763 * vma is set in the original mapping. But this mapping
764 * is done with mmap() after the window is opened with ioctl.
765 * so we may not see the original mapping if the core remove
766 * is done before the original mmap() and after the ioctl.
767 */
768 if (vma)
769 zap_vma_pages(vma);
770
771 mutex_unlock(&task_ref->mmap_mutex);
772 mmap_write_unlock(task_ref->mm);
773 /*
774 * Close VAS window in the hypervisor, but do not
775 * free vas_window struct since it may be reused
776 * when the credit is available later (DLPAR with
777 * adding cores). This struct will be used
778 * later when the process issued with close(FD).
779 */
780 rc = deallocate_free_window(win);
781 /*
782 * This failure is from the hypervisor.
783 * No way to stop migration for these failures.
784 * So ignore error and continue closing other windows.
785 */
786 if (rc && !migrate)
787 return rc;
788
789 vcap->nr_close_wins++;
790
791 /*
792 * For migration, do not depend on lpar_creds in case if
793 * mismatch with the hypervisor value (should not happen).
794 * So close all active windows in the list and will be
795 * reopened windows based on the new lpar_creds on the
796 * destination system during resume.
797 */
798 if (!migrate && !--excess_creds)
799 break;
800 }
801
802 return 0;
803 }
804
805 /*
806 * Get new VAS capabilities when the core add/removal configuration
807 * changes. Reconfig window configurations based on the credits
808 * availability from this new capabilities.
809 */
vas_reconfig_capabilties(u8 type,int new_nr_creds)810 int vas_reconfig_capabilties(u8 type, int new_nr_creds)
811 {
812 struct vas_cop_feat_caps *caps;
813 int old_nr_creds;
814 struct vas_caps *vcaps;
815 int rc = 0, nr_active_wins;
816
817 if (type >= VAS_MAX_FEAT_TYPE) {
818 pr_err("Invalid credit type %d\n", type);
819 return -EINVAL;
820 }
821
822 vcaps = &vascaps[type];
823 caps = &vcaps->caps;
824
825 mutex_lock(&vas_pseries_mutex);
826
827 old_nr_creds = atomic_read(&caps->nr_total_credits);
828
829 atomic_set(&caps->nr_total_credits, new_nr_creds);
830 /*
831 * The total number of available credits may be decreased or
832 * increased with DLPAR operation. Means some windows have to be
833 * closed / reopened. Hold the vas_pseries_mutex so that the
834 * user space can not open new windows.
835 */
836 if (old_nr_creds < new_nr_creds) {
837 /*
838 * If the existing target credits is less than the new
839 * target, reopen windows if they are closed due to
840 * the previous DLPAR (core removal).
841 */
842 rc = reconfig_open_windows(vcaps, new_nr_creds - old_nr_creds,
843 false);
844 } else {
845 /*
846 * # active windows is more than new LPAR available
847 * credits. So close the excessive windows.
848 * On pseries, each window will have 1 credit.
849 */
850 nr_active_wins = vcaps->nr_open_windows - vcaps->nr_close_wins;
851 if (nr_active_wins > new_nr_creds)
852 rc = reconfig_close_windows(vcaps,
853 nr_active_wins - new_nr_creds,
854 false);
855 }
856
857 mutex_unlock(&vas_pseries_mutex);
858 return rc;
859 }
860
pseries_vas_dlpar_cpu(void)861 int pseries_vas_dlpar_cpu(void)
862 {
863 int new_nr_creds, rc;
864
865 /*
866 * NX-GZIP is not enabled. Nothing to do for DLPAR event
867 */
868 if (!copypaste_feat)
869 return 0;
870
871
872 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
873 vascaps[VAS_GZIP_DEF_FEAT_TYPE].feat,
874 (u64)virt_to_phys(&hv_cop_caps));
875 if (!rc) {
876 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
877 rc = vas_reconfig_capabilties(VAS_GZIP_DEF_FEAT_TYPE, new_nr_creds);
878 }
879
880 if (rc)
881 pr_err("Failed reconfig VAS capabilities with DLPAR\n");
882
883 return rc;
884 }
885
886 /*
887 * Total number of default credits available (target_credits)
888 * in LPAR depends on number of cores configured. It varies based on
889 * whether processors are in shared mode or dedicated mode.
890 * Get the notifier when CPU configuration is changed with DLPAR
891 * operation so that get the new target_credits (vas default capabilities)
892 * and then update the existing windows usage if needed.
893 */
pseries_vas_notifier(struct notifier_block * nb,unsigned long action,void * data)894 static int pseries_vas_notifier(struct notifier_block *nb,
895 unsigned long action, void *data)
896 {
897 struct of_reconfig_data *rd = data;
898 struct device_node *dn = rd->dn;
899 const __be32 *intserv = NULL;
900 int len;
901
902 /*
903 * For shared CPU partition, the hypervisor assigns total credits
904 * based on entitled core capacity. So updating VAS windows will
905 * be called from lparcfg_write().
906 */
907 if (is_shared_processor())
908 return NOTIFY_OK;
909
910 if ((action == OF_RECONFIG_ATTACH_NODE) ||
911 (action == OF_RECONFIG_DETACH_NODE))
912 intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s",
913 &len);
914 /*
915 * Processor config is not changed
916 */
917 if (!intserv)
918 return NOTIFY_OK;
919
920 return pseries_vas_dlpar_cpu();
921 }
922
923 static struct notifier_block pseries_vas_nb = {
924 .notifier_call = pseries_vas_notifier,
925 };
926
927 /*
928 * For LPM, all windows have to be closed on the source partition
929 * before migration and reopen them on the destination partition
930 * after migration. So closing windows during suspend and
931 * reopen them during resume.
932 */
vas_migration_handler(int action)933 int vas_migration_handler(int action)
934 {
935 struct vas_cop_feat_caps *caps;
936 int old_nr_creds, new_nr_creds = 0;
937 struct vas_caps *vcaps;
938 int i, rc = 0;
939
940 /*
941 * NX-GZIP is not enabled. Nothing to do for migration.
942 */
943 if (!copypaste_feat)
944 return rc;
945
946 mutex_lock(&vas_pseries_mutex);
947
948 if (action == VAS_SUSPEND)
949 migration_in_progress = true;
950 else
951 migration_in_progress = false;
952
953 for (i = 0; i < VAS_MAX_FEAT_TYPE; i++) {
954 vcaps = &vascaps[i];
955 caps = &vcaps->caps;
956 old_nr_creds = atomic_read(&caps->nr_total_credits);
957
958 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES,
959 vcaps->feat,
960 (u64)virt_to_phys(&hv_cop_caps));
961 if (!rc) {
962 new_nr_creds = be16_to_cpu(hv_cop_caps.target_lpar_creds);
963 /*
964 * Should not happen. But incase print messages, close
965 * all windows in the list during suspend and reopen
966 * windows based on new lpar_creds on the destination
967 * system.
968 */
969 if (old_nr_creds != new_nr_creds) {
970 pr_err("Target credits mismatch with the hypervisor\n");
971 pr_err("state(%d): lpar creds: %d HV lpar creds: %d\n",
972 action, old_nr_creds, new_nr_creds);
973 pr_err("Used creds: %d, Active creds: %d\n",
974 atomic_read(&caps->nr_used_credits),
975 vcaps->nr_open_windows - vcaps->nr_close_wins);
976 }
977 } else {
978 pr_err("state(%d): Get VAS capabilities failed with %d\n",
979 action, rc);
980 /*
981 * We can not stop migration with the current lpm
982 * implementation. So continue closing all windows in
983 * the list (during suspend) and return without
984 * opening windows (during resume) if VAS capabilities
985 * HCALL failed.
986 */
987 if (action == VAS_RESUME)
988 goto out;
989 }
990
991 switch (action) {
992 case VAS_SUSPEND:
993 rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows,
994 true);
995 break;
996 case VAS_RESUME:
997 atomic_set(&caps->nr_total_credits, new_nr_creds);
998 rc = reconfig_open_windows(vcaps, new_nr_creds, true);
999 break;
1000 default:
1001 /* should not happen */
1002 pr_err("Invalid migration action %d\n", action);
1003 rc = -EINVAL;
1004 goto out;
1005 }
1006
1007 /*
1008 * Ignore errors during suspend and return for resume.
1009 */
1010 if (rc && (action == VAS_RESUME))
1011 goto out;
1012 }
1013
1014 out:
1015 mutex_unlock(&vas_pseries_mutex);
1016 return rc;
1017 }
1018
pseries_vas_init(void)1019 static int __init pseries_vas_init(void)
1020 {
1021 struct hv_vas_all_caps *hv_caps;
1022 int rc = 0;
1023
1024 /*
1025 * Linux supports user space COPY/PASTE only with Radix
1026 */
1027 if (!radix_enabled()) {
1028 copypaste_feat = false;
1029 pr_err("API is supported only with radix page tables\n");
1030 return -ENOTSUPP;
1031 }
1032
1033 hv_caps = kmalloc(sizeof(*hv_caps), GFP_KERNEL);
1034 if (!hv_caps)
1035 return -ENOMEM;
1036 /*
1037 * Get VAS overall capabilities by passing 0 to feature type.
1038 */
1039 rc = h_query_vas_capabilities(H_QUERY_VAS_CAPABILITIES, 0,
1040 (u64)virt_to_phys(hv_caps));
1041 if (rc)
1042 goto out;
1043
1044 caps_all.descriptor = be64_to_cpu(hv_caps->descriptor);
1045 caps_all.feat_type = be64_to_cpu(hv_caps->feat_type);
1046
1047 sysfs_pseries_vas_init(&caps_all);
1048
1049 /*
1050 * QOS capabilities available
1051 */
1052 if (caps_all.feat_type & VAS_GZIP_QOS_FEAT_BIT) {
1053 rc = get_vas_capabilities(VAS_GZIP_QOS_FEAT,
1054 VAS_GZIP_QOS_FEAT_TYPE, &hv_cop_caps);
1055
1056 if (rc)
1057 goto out;
1058 }
1059 /*
1060 * Default capabilities available
1061 */
1062 if (caps_all.feat_type & VAS_GZIP_DEF_FEAT_BIT)
1063 rc = get_vas_capabilities(VAS_GZIP_DEF_FEAT,
1064 VAS_GZIP_DEF_FEAT_TYPE, &hv_cop_caps);
1065
1066 if (!rc && copypaste_feat) {
1067 if (firmware_has_feature(FW_FEATURE_LPAR))
1068 of_reconfig_notifier_register(&pseries_vas_nb);
1069
1070 pr_info("GZIP feature is available\n");
1071 } else {
1072 /*
1073 * Should not happen, but only when get default
1074 * capabilities HCALL failed. So disable copy paste
1075 * feature.
1076 */
1077 copypaste_feat = false;
1078 }
1079
1080 out:
1081 kfree(hv_caps);
1082 return rc;
1083 }
1084 machine_device_initcall(pseries, pseries_vas_init);
1085