1 // SPDX-License-Identifier: GPL-2.0 OR MIT
2 /**************************************************************************
3 *
4 * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27 #include "vmwgfx_drv.h"
28
29 /*
30 * Different methods for tracking dirty:
31 * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
32 * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
33 * accesses in the VM mkwrite() callback
34 */
35 enum vmw_bo_dirty_method {
36 VMW_BO_DIRTY_PAGETABLE,
37 VMW_BO_DIRTY_MKWRITE,
38 };
39
40 /*
41 * No dirtied pages at scan trigger a transition to the _MKWRITE method,
42 * similarly a certain percentage of dirty pages trigger a transition to
43 * the _PAGETABLE method. How many triggers should we wait for before
44 * changing method?
45 */
46 #define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
47
48 /* Percentage to trigger a transition to the _PAGETABLE method */
49 #define VMW_DIRTY_PERCENTAGE 10
50
51 /**
52 * struct vmw_bo_dirty - Dirty information for buffer objects
53 * @start: First currently dirty bit
54 * @end: Last currently dirty bit + 1
55 * @method: The currently used dirty method
56 * @change_count: Number of consecutive method change triggers
57 * @ref_count: Reference count for this structure
58 * @bitmap_size: The size of the bitmap in bits. Typically equal to the
59 * nuber of pages in the bo.
60 * @size: The accounting size for this struct.
61 * @bitmap: A bitmap where each bit represents a page. A set bit means a
62 * dirty page.
63 */
64 struct vmw_bo_dirty {
65 unsigned long start;
66 unsigned long end;
67 enum vmw_bo_dirty_method method;
68 unsigned int change_count;
69 unsigned int ref_count;
70 unsigned long bitmap_size;
71 size_t size;
72 unsigned long bitmap[];
73 };
74
75 /**
76 * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits
77 * @vbo: The buffer object to scan
78 *
79 * Scans the pagetable for dirty bits. Clear those bits and modify the
80 * dirty structure with the results. This function may change the
81 * dirty-tracking method.
82 */
vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object * vbo)83 static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo)
84 {
85 struct vmw_bo_dirty *dirty = vbo->dirty;
86 pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
87 struct address_space *mapping = vbo->base.bdev->dev_mapping;
88 pgoff_t num_marked;
89
90 num_marked = clean_record_shared_mapping_range
91 (mapping,
92 offset, dirty->bitmap_size,
93 offset, &dirty->bitmap[0],
94 &dirty->start, &dirty->end);
95 if (num_marked == 0)
96 dirty->change_count++;
97 else
98 dirty->change_count = 0;
99
100 if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
101 dirty->change_count = 0;
102 dirty->method = VMW_BO_DIRTY_MKWRITE;
103 wp_shared_mapping_range(mapping,
104 offset, dirty->bitmap_size);
105 clean_record_shared_mapping_range(mapping,
106 offset, dirty->bitmap_size,
107 offset, &dirty->bitmap[0],
108 &dirty->start, &dirty->end);
109 }
110 }
111
112 /**
113 * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method
114 * @vbo: The buffer object to scan
115 *
116 * Write-protect pages written to so that consecutive write accesses will
117 * trigger a call to mkwrite.
118 *
119 * This function may change the dirty-tracking method.
120 */
vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object * vbo)121 static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo)
122 {
123 struct vmw_bo_dirty *dirty = vbo->dirty;
124 unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
125 struct address_space *mapping = vbo->base.bdev->dev_mapping;
126 pgoff_t num_marked;
127
128 if (dirty->end <= dirty->start)
129 return;
130
131 num_marked = wp_shared_mapping_range(vbo->base.bdev->dev_mapping,
132 dirty->start + offset,
133 dirty->end - dirty->start);
134
135 if (100UL * num_marked / dirty->bitmap_size >
136 VMW_DIRTY_PERCENTAGE) {
137 dirty->change_count++;
138 } else {
139 dirty->change_count = 0;
140 }
141
142 if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
143 pgoff_t start = 0;
144 pgoff_t end = dirty->bitmap_size;
145
146 dirty->method = VMW_BO_DIRTY_PAGETABLE;
147 clean_record_shared_mapping_range(mapping, offset, end, offset,
148 &dirty->bitmap[0],
149 &start, &end);
150 bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size);
151 if (dirty->start < dirty->end)
152 bitmap_set(&dirty->bitmap[0], dirty->start,
153 dirty->end - dirty->start);
154 dirty->change_count = 0;
155 }
156 }
157
158 /**
159 * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
160 * tracking structure
161 * @vbo: The buffer object to scan
162 *
163 * This function may change the dirty tracking method.
164 */
vmw_bo_dirty_scan(struct vmw_buffer_object * vbo)165 void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
166 {
167 struct vmw_bo_dirty *dirty = vbo->dirty;
168
169 if (dirty->method == VMW_BO_DIRTY_PAGETABLE)
170 vmw_bo_dirty_scan_pagetable(vbo);
171 else
172 vmw_bo_dirty_scan_mkwrite(vbo);
173 }
174
175 /**
176 * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
177 * an unmap_mapping_range operation.
178 * @vbo: The buffer object,
179 * @start: First page of the range within the buffer object.
180 * @end: Last page of the range within the buffer object + 1.
181 *
182 * If we're using the _PAGETABLE scan method, we may leak dirty pages
183 * when calling unmap_mapping_range(). This function makes sure we pick
184 * up all dirty pages.
185 */
vmw_bo_dirty_pre_unmap(struct vmw_buffer_object * vbo,pgoff_t start,pgoff_t end)186 static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
187 pgoff_t start, pgoff_t end)
188 {
189 struct vmw_bo_dirty *dirty = vbo->dirty;
190 unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
191 struct address_space *mapping = vbo->base.bdev->dev_mapping;
192
193 if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
194 return;
195
196 wp_shared_mapping_range(mapping, start + offset, end - start);
197 clean_record_shared_mapping_range(mapping, start + offset,
198 end - start, offset,
199 &dirty->bitmap[0], &dirty->start,
200 &dirty->end);
201 }
202
203 /**
204 * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
205 * @vbo: The buffer object,
206 * @start: First page of the range within the buffer object.
207 * @end: Last page of the range within the buffer object + 1.
208 *
209 * This is similar to ttm_bo_unmap_virtual() except it takes a subrange.
210 */
vmw_bo_dirty_unmap(struct vmw_buffer_object * vbo,pgoff_t start,pgoff_t end)211 void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
212 pgoff_t start, pgoff_t end)
213 {
214 unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
215 struct address_space *mapping = vbo->base.bdev->dev_mapping;
216
217 vmw_bo_dirty_pre_unmap(vbo, start, end);
218 unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
219 (loff_t) (end - start) << PAGE_SHIFT);
220 }
221
222 /**
223 * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
224 * @vbo: The buffer object
225 *
226 * This function registers a dirty-tracking user to a buffer object.
227 * A user can be for example a resource or a vma in a special user-space
228 * mapping.
229 *
230 * Return: Zero on success, -ENOMEM on memory allocation failure.
231 */
vmw_bo_dirty_add(struct vmw_buffer_object * vbo)232 int vmw_bo_dirty_add(struct vmw_buffer_object *vbo)
233 {
234 struct vmw_bo_dirty *dirty = vbo->dirty;
235 pgoff_t num_pages = vbo->base.resource->num_pages;
236 size_t size, acc_size;
237 int ret;
238 static struct ttm_operation_ctx ctx = {
239 .interruptible = false,
240 .no_wait_gpu = false
241 };
242
243 if (dirty) {
244 dirty->ref_count++;
245 return 0;
246 }
247
248 size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long);
249 acc_size = ttm_round_pot(size);
250 ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx);
251 if (ret) {
252 VMW_DEBUG_USER("Out of graphics memory for buffer object "
253 "dirty tracker.\n");
254 return ret;
255 }
256 dirty = kvzalloc(size, GFP_KERNEL);
257 if (!dirty) {
258 ret = -ENOMEM;
259 goto out_no_dirty;
260 }
261
262 dirty->size = acc_size;
263 dirty->bitmap_size = num_pages;
264 dirty->start = dirty->bitmap_size;
265 dirty->end = 0;
266 dirty->ref_count = 1;
267 if (num_pages < PAGE_SIZE / sizeof(pte_t)) {
268 dirty->method = VMW_BO_DIRTY_PAGETABLE;
269 } else {
270 struct address_space *mapping = vbo->base.bdev->dev_mapping;
271 pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
272
273 dirty->method = VMW_BO_DIRTY_MKWRITE;
274
275 /* Write-protect and then pick up already dirty bits */
276 wp_shared_mapping_range(mapping, offset, num_pages);
277 clean_record_shared_mapping_range(mapping, offset, num_pages,
278 offset,
279 &dirty->bitmap[0],
280 &dirty->start, &dirty->end);
281 }
282
283 vbo->dirty = dirty;
284
285 return 0;
286
287 out_no_dirty:
288 ttm_mem_global_free(&ttm_mem_glob, acc_size);
289 return ret;
290 }
291
292 /**
293 * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
294 * @vbo: The buffer object
295 *
296 * This function releases a dirty-tracking user from a buffer object.
297 * If the reference count reaches zero, then the dirty-tracking object is
298 * freed and the pointer to it cleared.
299 *
300 * Return: Zero on success, -ENOMEM on memory allocation failure.
301 */
vmw_bo_dirty_release(struct vmw_buffer_object * vbo)302 void vmw_bo_dirty_release(struct vmw_buffer_object *vbo)
303 {
304 struct vmw_bo_dirty *dirty = vbo->dirty;
305
306 if (dirty && --dirty->ref_count == 0) {
307 size_t acc_size = dirty->size;
308
309 kvfree(dirty);
310 ttm_mem_global_free(&ttm_mem_glob, acc_size);
311 vbo->dirty = NULL;
312 }
313 }
314
315 /**
316 * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from
317 * its backing mob.
318 * @res: The resource
319 *
320 * This function will pick up all dirty ranges affecting the resource from
321 * it's backup mob, and call vmw_resource_dirty_update() once for each
322 * range. The transferred ranges will be cleared from the backing mob's
323 * dirty tracking.
324 */
vmw_bo_dirty_transfer_to_res(struct vmw_resource * res)325 void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res)
326 {
327 struct vmw_buffer_object *vbo = res->backup;
328 struct vmw_bo_dirty *dirty = vbo->dirty;
329 pgoff_t start, cur, end;
330 unsigned long res_start = res->backup_offset;
331 unsigned long res_end = res->backup_offset + res->backup_size;
332
333 WARN_ON_ONCE(res_start & ~PAGE_MASK);
334 res_start >>= PAGE_SHIFT;
335 res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
336
337 if (res_start >= dirty->end || res_end <= dirty->start)
338 return;
339
340 cur = max(res_start, dirty->start);
341 res_end = max(res_end, dirty->end);
342 while (cur < res_end) {
343 unsigned long num;
344
345 start = find_next_bit(&dirty->bitmap[0], res_end, cur);
346 if (start >= res_end)
347 break;
348
349 end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1);
350 cur = end + 1;
351 num = end - start;
352 bitmap_clear(&dirty->bitmap[0], start, num);
353 vmw_resource_dirty_update(res, start, end);
354 }
355
356 if (res_start <= dirty->start && res_end > dirty->start)
357 dirty->start = res_end;
358 if (res_start < dirty->end && res_end >= dirty->end)
359 dirty->end = res_start;
360 }
361
362 /**
363 * vmw_bo_dirty_clear_res - Clear a resource's dirty region from
364 * its backing mob.
365 * @res: The resource
366 *
367 * This function will clear all dirty ranges affecting the resource from
368 * it's backup mob's dirty tracking.
369 */
vmw_bo_dirty_clear_res(struct vmw_resource * res)370 void vmw_bo_dirty_clear_res(struct vmw_resource *res)
371 {
372 unsigned long res_start = res->backup_offset;
373 unsigned long res_end = res->backup_offset + res->backup_size;
374 struct vmw_buffer_object *vbo = res->backup;
375 struct vmw_bo_dirty *dirty = vbo->dirty;
376
377 res_start >>= PAGE_SHIFT;
378 res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
379
380 if (res_start >= dirty->end || res_end <= dirty->start)
381 return;
382
383 res_start = max(res_start, dirty->start);
384 res_end = min(res_end, dirty->end);
385 bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start);
386
387 if (res_start <= dirty->start && res_end > dirty->start)
388 dirty->start = res_end;
389 if (res_start < dirty->end && res_end >= dirty->end)
390 dirty->end = res_start;
391 }
392
vmw_bo_vm_mkwrite(struct vm_fault * vmf)393 vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf)
394 {
395 struct vm_area_struct *vma = vmf->vma;
396 struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
397 vma->vm_private_data;
398 vm_fault_t ret;
399 unsigned long page_offset;
400 unsigned int save_flags;
401 struct vmw_buffer_object *vbo =
402 container_of(bo, typeof(*vbo), base);
403
404 /*
405 * mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly.
406 * So make sure the TTM helpers are aware.
407 */
408 save_flags = vmf->flags;
409 vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY;
410 ret = ttm_bo_vm_reserve(bo, vmf);
411 vmf->flags = save_flags;
412 if (ret)
413 return ret;
414
415 page_offset = vmf->pgoff - drm_vma_node_start(&bo->base.vma_node);
416 if (unlikely(page_offset >= bo->resource->num_pages)) {
417 ret = VM_FAULT_SIGBUS;
418 goto out_unlock;
419 }
420
421 if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE &&
422 !test_bit(page_offset, &vbo->dirty->bitmap[0])) {
423 struct vmw_bo_dirty *dirty = vbo->dirty;
424
425 __set_bit(page_offset, &dirty->bitmap[0]);
426 dirty->start = min(dirty->start, page_offset);
427 dirty->end = max(dirty->end, page_offset + 1);
428 }
429
430 out_unlock:
431 dma_resv_unlock(bo->base.resv);
432 return ret;
433 }
434
vmw_bo_vm_fault(struct vm_fault * vmf)435 vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
436 {
437 struct vm_area_struct *vma = vmf->vma;
438 struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
439 vma->vm_private_data;
440 struct vmw_buffer_object *vbo =
441 container_of(bo, struct vmw_buffer_object, base);
442 pgoff_t num_prefault;
443 pgprot_t prot;
444 vm_fault_t ret;
445
446 ret = ttm_bo_vm_reserve(bo, vmf);
447 if (ret)
448 return ret;
449
450 num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 :
451 TTM_BO_VM_NUM_PREFAULT;
452
453 if (vbo->dirty) {
454 pgoff_t allowed_prefault;
455 unsigned long page_offset;
456
457 page_offset = vmf->pgoff -
458 drm_vma_node_start(&bo->base.vma_node);
459 if (page_offset >= bo->resource->num_pages ||
460 vmw_resources_clean(vbo, page_offset,
461 page_offset + PAGE_SIZE,
462 &allowed_prefault)) {
463 ret = VM_FAULT_SIGBUS;
464 goto out_unlock;
465 }
466
467 num_prefault = min(num_prefault, allowed_prefault);
468 }
469
470 /*
471 * If we don't track dirty using the MKWRITE method, make sure
472 * sure the page protection is write-enabled so we don't get
473 * a lot of unnecessary write faults.
474 */
475 if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
476 prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
477 else
478 prot = vm_get_page_prot(vma->vm_flags);
479
480 ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault, 1);
481 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
482 return ret;
483
484 out_unlock:
485 dma_resv_unlock(bo->base.resv);
486
487 return ret;
488 }
489
490 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
vmw_bo_vm_huge_fault(struct vm_fault * vmf,enum page_entry_size pe_size)491 vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
492 enum page_entry_size pe_size)
493 {
494 struct vm_area_struct *vma = vmf->vma;
495 struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
496 vma->vm_private_data;
497 struct vmw_buffer_object *vbo =
498 container_of(bo, struct vmw_buffer_object, base);
499 pgprot_t prot;
500 vm_fault_t ret;
501 pgoff_t fault_page_size;
502 bool write = vmf->flags & FAULT_FLAG_WRITE;
503
504 switch (pe_size) {
505 case PE_SIZE_PMD:
506 fault_page_size = HPAGE_PMD_SIZE >> PAGE_SHIFT;
507 break;
508 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
509 case PE_SIZE_PUD:
510 fault_page_size = HPAGE_PUD_SIZE >> PAGE_SHIFT;
511 break;
512 #endif
513 default:
514 WARN_ON_ONCE(1);
515 return VM_FAULT_FALLBACK;
516 }
517
518 /* Always do write dirty-tracking and COW on PTE level. */
519 if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags)))
520 return VM_FAULT_FALLBACK;
521
522 ret = ttm_bo_vm_reserve(bo, vmf);
523 if (ret)
524 return ret;
525
526 if (vbo->dirty) {
527 pgoff_t allowed_prefault;
528 unsigned long page_offset;
529
530 page_offset = vmf->pgoff -
531 drm_vma_node_start(&bo->base.vma_node);
532 if (page_offset >= bo->resource->num_pages ||
533 vmw_resources_clean(vbo, page_offset,
534 page_offset + PAGE_SIZE,
535 &allowed_prefault)) {
536 ret = VM_FAULT_SIGBUS;
537 goto out_unlock;
538 }
539
540 /*
541 * Write protect, so we get a new fault on write, and can
542 * split.
543 */
544 prot = vm_get_page_prot(vma->vm_flags & ~VM_SHARED);
545 } else {
546 prot = vm_get_page_prot(vma->vm_flags);
547 }
548
549 ret = ttm_bo_vm_fault_reserved(vmf, prot, 1, fault_page_size);
550 if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
551 return ret;
552
553 out_unlock:
554 dma_resv_unlock(bo->base.resv);
555
556 return ret;
557 }
558 #endif
559