1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2021 Intel Corporation
4  */
5 
6 #include <drm/ttm/ttm_bo_driver.h>
7 
8 #include "i915_deps.h"
9 #include "i915_drv.h"
10 #include "intel_memory_region.h"
11 #include "intel_region_ttm.h"
12 
13 #include "gem/i915_gem_object.h"
14 #include "gem/i915_gem_region.h"
15 #include "gem/i915_gem_ttm.h"
16 #include "gem/i915_gem_ttm_move.h"
17 
18 #include "gt/intel_engine_pm.h"
19 #include "gt/intel_gt.h"
20 #include "gt/intel_migrate.h"
21 
22 /**
23  * DOC: Selftest failure modes for failsafe migration:
24  *
25  * For fail_gpu_migration, the gpu blit scheduled is always a clear blit
26  * rather than a copy blit, and then we force the failure paths as if
27  * the blit fence returned an error.
28  *
29  * For fail_work_allocation we fail the kmalloc of the async worker, we
30  * sync the gpu blit. If it then fails, or fail_gpu_migration is set to
31  * true, then a memcpy operation is performed sync.
32  */
33 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
34 static bool fail_gpu_migration;
35 static bool fail_work_allocation;
36 static bool ban_memcpy;
37 
i915_ttm_migrate_set_failure_modes(bool gpu_migration,bool work_allocation)38 void i915_ttm_migrate_set_failure_modes(bool gpu_migration,
39 					bool work_allocation)
40 {
41 	fail_gpu_migration = gpu_migration;
42 	fail_work_allocation = work_allocation;
43 }
44 
i915_ttm_migrate_set_ban_memcpy(bool ban)45 void i915_ttm_migrate_set_ban_memcpy(bool ban)
46 {
47 	ban_memcpy = ban;
48 }
49 #endif
50 
51 static enum i915_cache_level
i915_ttm_cache_level(struct drm_i915_private * i915,struct ttm_resource * res,struct ttm_tt * ttm)52 i915_ttm_cache_level(struct drm_i915_private *i915, struct ttm_resource *res,
53 		     struct ttm_tt *ttm)
54 {
55 	return ((HAS_LLC(i915) || HAS_SNOOP(i915)) &&
56 		!i915_ttm_gtt_binds_lmem(res) &&
57 		ttm->caching == ttm_cached) ? I915_CACHE_LLC :
58 		I915_CACHE_NONE;
59 }
60 
61 static struct intel_memory_region *
i915_ttm_region(struct ttm_device * bdev,int ttm_mem_type)62 i915_ttm_region(struct ttm_device *bdev, int ttm_mem_type)
63 {
64 	struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev);
65 
66 	/* There's some room for optimization here... */
67 	GEM_BUG_ON(ttm_mem_type != I915_PL_SYSTEM &&
68 		   ttm_mem_type < I915_PL_LMEM0);
69 	if (ttm_mem_type == I915_PL_SYSTEM)
70 		return intel_memory_region_lookup(i915, INTEL_MEMORY_SYSTEM,
71 						  0);
72 
73 	return intel_memory_region_lookup(i915, INTEL_MEMORY_LOCAL,
74 					  ttm_mem_type - I915_PL_LMEM0);
75 }
76 
77 /**
78  * i915_ttm_adjust_domains_after_move - Adjust the GEM domains after a
79  * TTM move
80  * @obj: The gem object
81  */
i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object * obj)82 void i915_ttm_adjust_domains_after_move(struct drm_i915_gem_object *obj)
83 {
84 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
85 
86 	if (i915_ttm_cpu_maps_iomem(bo->resource) || bo->ttm->caching != ttm_cached) {
87 		obj->write_domain = I915_GEM_DOMAIN_WC;
88 		obj->read_domains = I915_GEM_DOMAIN_WC;
89 	} else {
90 		obj->write_domain = I915_GEM_DOMAIN_CPU;
91 		obj->read_domains = I915_GEM_DOMAIN_CPU;
92 	}
93 }
94 
95 /**
96  * i915_ttm_adjust_gem_after_move - Adjust the GEM state after a TTM move
97  * @obj: The gem object
98  *
99  * Adjusts the GEM object's region, mem_flags and cache coherency after a
100  * TTM move.
101  */
i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object * obj)102 void i915_ttm_adjust_gem_after_move(struct drm_i915_gem_object *obj)
103 {
104 	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
105 	unsigned int cache_level;
106 	unsigned int i;
107 
108 	/*
109 	 * If object was moved to an allowable region, update the object
110 	 * region to consider it migrated. Note that if it's currently not
111 	 * in an allowable region, it's evicted and we don't update the
112 	 * object region.
113 	 */
114 	if (intel_region_to_ttm_type(obj->mm.region) != bo->resource->mem_type) {
115 		for (i = 0; i < obj->mm.n_placements; ++i) {
116 			struct intel_memory_region *mr = obj->mm.placements[i];
117 
118 			if (intel_region_to_ttm_type(mr) == bo->resource->mem_type &&
119 			    mr != obj->mm.region) {
120 				i915_gem_object_release_memory_region(obj);
121 				i915_gem_object_init_memory_region(obj, mr);
122 				break;
123 			}
124 		}
125 	}
126 
127 	obj->mem_flags &= ~(I915_BO_FLAG_STRUCT_PAGE | I915_BO_FLAG_IOMEM);
128 
129 	obj->mem_flags |= i915_ttm_cpu_maps_iomem(bo->resource) ? I915_BO_FLAG_IOMEM :
130 		I915_BO_FLAG_STRUCT_PAGE;
131 
132 	cache_level = i915_ttm_cache_level(to_i915(bo->base.dev), bo->resource,
133 					   bo->ttm);
134 	i915_gem_object_set_cache_coherency(obj, cache_level);
135 }
136 
137 /**
138  * i915_ttm_move_notify - Prepare an object for move
139  * @bo: The ttm buffer object.
140  *
141  * This function prepares an object for move by removing all GPU bindings,
142  * removing all CPU mapings and finally releasing the pages sg-table.
143  *
144  * Return: 0 if successful, negative error code on error.
145  */
i915_ttm_move_notify(struct ttm_buffer_object * bo)146 int i915_ttm_move_notify(struct ttm_buffer_object *bo)
147 {
148 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
149 	int ret;
150 
151 	/*
152 	 * Note: The async unbinding here will actually transform the
153 	 * blocking wait for unbind into a wait before finally submitting
154 	 * evict / migration blit and thus stall the migration timeline
155 	 * which may not be good for overall throughput. We should make
156 	 * sure we await the unbind fences *after* the migration blit
157 	 * instead of *before* as we currently do.
158 	 */
159 	ret = i915_gem_object_unbind(obj, I915_GEM_OBJECT_UNBIND_ACTIVE |
160 				     I915_GEM_OBJECT_UNBIND_ASYNC);
161 	if (ret)
162 		return ret;
163 
164 	ret = __i915_gem_object_put_pages(obj);
165 	if (ret)
166 		return ret;
167 
168 	return 0;
169 }
170 
i915_ttm_accel_move(struct ttm_buffer_object * bo,bool clear,struct ttm_resource * dst_mem,struct ttm_tt * dst_ttm,struct sg_table * dst_st,const struct i915_deps * deps)171 static struct dma_fence *i915_ttm_accel_move(struct ttm_buffer_object *bo,
172 					     bool clear,
173 					     struct ttm_resource *dst_mem,
174 					     struct ttm_tt *dst_ttm,
175 					     struct sg_table *dst_st,
176 					     const struct i915_deps *deps)
177 {
178 	struct drm_i915_private *i915 = container_of(bo->bdev, typeof(*i915),
179 						     bdev);
180 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
181 	struct i915_request *rq;
182 	struct ttm_tt *src_ttm = bo->ttm;
183 	enum i915_cache_level src_level, dst_level;
184 	int ret;
185 
186 	if (!to_gt(i915)->migrate.context || intel_gt_is_wedged(to_gt(i915)))
187 		return ERR_PTR(-EINVAL);
188 
189 	/* With fail_gpu_migration, we always perform a GPU clear. */
190 	if (I915_SELFTEST_ONLY(fail_gpu_migration))
191 		clear = true;
192 
193 	dst_level = i915_ttm_cache_level(i915, dst_mem, dst_ttm);
194 	if (clear) {
195 		if (bo->type == ttm_bo_type_kernel &&
196 		    !I915_SELFTEST_ONLY(fail_gpu_migration))
197 			return ERR_PTR(-EINVAL);
198 
199 		intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
200 		ret = intel_context_migrate_clear(to_gt(i915)->migrate.context, deps,
201 						  dst_st->sgl, dst_level,
202 						  i915_ttm_gtt_binds_lmem(dst_mem),
203 						  0, &rq);
204 	} else {
205 		struct i915_refct_sgt *src_rsgt =
206 			i915_ttm_resource_get_st(obj, bo->resource);
207 
208 		if (IS_ERR(src_rsgt))
209 			return ERR_CAST(src_rsgt);
210 
211 		src_level = i915_ttm_cache_level(i915, bo->resource, src_ttm);
212 		intel_engine_pm_get(to_gt(i915)->migrate.context->engine);
213 		ret = intel_context_migrate_copy(to_gt(i915)->migrate.context,
214 						 deps, src_rsgt->table.sgl,
215 						 src_level,
216 						 i915_ttm_gtt_binds_lmem(bo->resource),
217 						 dst_st->sgl, dst_level,
218 						 i915_ttm_gtt_binds_lmem(dst_mem),
219 						 &rq);
220 
221 		i915_refct_sgt_put(src_rsgt);
222 	}
223 
224 	intel_engine_pm_put(to_gt(i915)->migrate.context->engine);
225 
226 	if (ret && rq) {
227 		i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
228 		i915_request_put(rq);
229 	}
230 
231 	return ret ? ERR_PTR(ret) : &rq->fence;
232 }
233 
234 /**
235  * struct i915_ttm_memcpy_arg - argument for the bo memcpy functionality.
236  * @_dst_iter: Storage space for the destination kmap iterator.
237  * @_src_iter: Storage space for the source kmap iterator.
238  * @dst_iter: Pointer to the destination kmap iterator.
239  * @src_iter: Pointer to the source kmap iterator.
240  * @clear: Whether to clear instead of copy.
241  * @src_rsgt: Refcounted scatter-gather list of source memory.
242  * @dst_rsgt: Refcounted scatter-gather list of destination memory.
243  */
244 struct i915_ttm_memcpy_arg {
245 	union {
246 		struct ttm_kmap_iter_tt tt;
247 		struct ttm_kmap_iter_iomap io;
248 	} _dst_iter,
249 	_src_iter;
250 	struct ttm_kmap_iter *dst_iter;
251 	struct ttm_kmap_iter *src_iter;
252 	unsigned long num_pages;
253 	bool clear;
254 	struct i915_refct_sgt *src_rsgt;
255 	struct i915_refct_sgt *dst_rsgt;
256 };
257 
258 /**
259  * struct i915_ttm_memcpy_work - Async memcpy worker under a dma-fence.
260  * @fence: The dma-fence.
261  * @work: The work struct use for the memcpy work.
262  * @lock: The fence lock. Not used to protect anything else ATM.
263  * @irq_work: Low latency worker to signal the fence since it can't be done
264  * from the callback for lockdep reasons.
265  * @cb: Callback for the accelerated migration fence.
266  * @arg: The argument for the memcpy functionality.
267  * @i915: The i915 pointer.
268  * @obj: The GEM object.
269  * @memcpy_allowed: Instead of processing the @arg, and falling back to memcpy
270  * or memset, we wedge the device and set the @obj unknown_state, to prevent
271  * further access to the object with the CPU or GPU.  On some devices we might
272  * only be permitted to use the blitter engine for such operations.
273  */
274 struct i915_ttm_memcpy_work {
275 	struct dma_fence fence;
276 	struct work_struct work;
277 	spinlock_t lock;
278 	struct irq_work irq_work;
279 	struct dma_fence_cb cb;
280 	struct i915_ttm_memcpy_arg arg;
281 	struct drm_i915_private *i915;
282 	struct drm_i915_gem_object *obj;
283 	bool memcpy_allowed;
284 };
285 
i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg * arg)286 static void i915_ttm_move_memcpy(struct i915_ttm_memcpy_arg *arg)
287 {
288 	ttm_move_memcpy(arg->clear, arg->num_pages,
289 			arg->dst_iter, arg->src_iter);
290 }
291 
i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg * arg,struct ttm_buffer_object * bo,bool clear,struct ttm_resource * dst_mem,struct ttm_tt * dst_ttm,struct i915_refct_sgt * dst_rsgt)292 static void i915_ttm_memcpy_init(struct i915_ttm_memcpy_arg *arg,
293 				 struct ttm_buffer_object *bo, bool clear,
294 				 struct ttm_resource *dst_mem,
295 				 struct ttm_tt *dst_ttm,
296 				 struct i915_refct_sgt *dst_rsgt)
297 {
298 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
299 	struct intel_memory_region *dst_reg, *src_reg;
300 
301 	dst_reg = i915_ttm_region(bo->bdev, dst_mem->mem_type);
302 	src_reg = i915_ttm_region(bo->bdev, bo->resource->mem_type);
303 	GEM_BUG_ON(!dst_reg || !src_reg);
304 
305 	arg->dst_iter = !i915_ttm_cpu_maps_iomem(dst_mem) ?
306 		ttm_kmap_iter_tt_init(&arg->_dst_iter.tt, dst_ttm) :
307 		ttm_kmap_iter_iomap_init(&arg->_dst_iter.io, &dst_reg->iomap,
308 					 &dst_rsgt->table, dst_reg->region.start);
309 
310 	arg->src_iter = !i915_ttm_cpu_maps_iomem(bo->resource) ?
311 		ttm_kmap_iter_tt_init(&arg->_src_iter.tt, bo->ttm) :
312 		ttm_kmap_iter_iomap_init(&arg->_src_iter.io, &src_reg->iomap,
313 					 &obj->ttm.cached_io_rsgt->table,
314 					 src_reg->region.start);
315 	arg->clear = clear;
316 	arg->num_pages = bo->base.size >> PAGE_SHIFT;
317 
318 	arg->dst_rsgt = i915_refct_sgt_get(dst_rsgt);
319 	arg->src_rsgt = clear ? NULL :
320 		i915_ttm_resource_get_st(obj, bo->resource);
321 }
322 
i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg * arg)323 static void i915_ttm_memcpy_release(struct i915_ttm_memcpy_arg *arg)
324 {
325 	i915_refct_sgt_put(arg->src_rsgt);
326 	i915_refct_sgt_put(arg->dst_rsgt);
327 }
328 
__memcpy_work(struct work_struct * work)329 static void __memcpy_work(struct work_struct *work)
330 {
331 	struct i915_ttm_memcpy_work *copy_work =
332 		container_of(work, typeof(*copy_work), work);
333 	struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
334 	bool cookie;
335 
336 	/*
337 	 * FIXME: We need to take a closer look here. We should be able to plonk
338 	 * this into the fence critical section.
339 	 */
340 	if (!copy_work->memcpy_allowed) {
341 		struct intel_gt *gt;
342 		unsigned int id;
343 
344 		for_each_gt(gt, copy_work->i915, id)
345 			intel_gt_set_wedged(gt);
346 	}
347 
348 	cookie = dma_fence_begin_signalling();
349 
350 	if (copy_work->memcpy_allowed) {
351 		i915_ttm_move_memcpy(arg);
352 	} else {
353 		/*
354 		 * Prevent further use of the object. Any future GTT binding or
355 		 * CPU access is not allowed once we signal the fence. Outside
356 		 * of the fence critical section, we then also then wedge the gpu
357 		 * to indicate the device is not functional.
358 		 *
359 		 * The below dma_fence_signal() is our write-memory-barrier.
360 		 */
361 		copy_work->obj->mm.unknown_state = true;
362 	}
363 
364 	dma_fence_end_signalling(cookie);
365 
366 	dma_fence_signal(&copy_work->fence);
367 
368 	i915_ttm_memcpy_release(arg);
369 	i915_gem_object_put(copy_work->obj);
370 	dma_fence_put(&copy_work->fence);
371 }
372 
__memcpy_irq_work(struct irq_work * irq_work)373 static void __memcpy_irq_work(struct irq_work *irq_work)
374 {
375 	struct i915_ttm_memcpy_work *copy_work =
376 		container_of(irq_work, typeof(*copy_work), irq_work);
377 	struct i915_ttm_memcpy_arg *arg = &copy_work->arg;
378 
379 	dma_fence_signal(&copy_work->fence);
380 	i915_ttm_memcpy_release(arg);
381 	i915_gem_object_put(copy_work->obj);
382 	dma_fence_put(&copy_work->fence);
383 }
384 
__memcpy_cb(struct dma_fence * fence,struct dma_fence_cb * cb)385 static void __memcpy_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
386 {
387 	struct i915_ttm_memcpy_work *copy_work =
388 		container_of(cb, typeof(*copy_work), cb);
389 
390 	if (unlikely(fence->error || I915_SELFTEST_ONLY(fail_gpu_migration))) {
391 		INIT_WORK(&copy_work->work, __memcpy_work);
392 		queue_work(system_unbound_wq, &copy_work->work);
393 	} else {
394 		init_irq_work(&copy_work->irq_work, __memcpy_irq_work);
395 		irq_work_queue(&copy_work->irq_work);
396 	}
397 }
398 
get_driver_name(struct dma_fence * fence)399 static const char *get_driver_name(struct dma_fence *fence)
400 {
401 	return "i915_ttm_memcpy_work";
402 }
403 
get_timeline_name(struct dma_fence * fence)404 static const char *get_timeline_name(struct dma_fence *fence)
405 {
406 	return "unbound";
407 }
408 
409 static const struct dma_fence_ops dma_fence_memcpy_ops = {
410 	.get_driver_name = get_driver_name,
411 	.get_timeline_name = get_timeline_name,
412 };
413 
414 static struct dma_fence *
i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work * work,struct dma_fence * dep)415 i915_ttm_memcpy_work_arm(struct i915_ttm_memcpy_work *work,
416 			 struct dma_fence *dep)
417 {
418 	int ret;
419 
420 	spin_lock_init(&work->lock);
421 	dma_fence_init(&work->fence, &dma_fence_memcpy_ops, &work->lock, 0, 0);
422 	dma_fence_get(&work->fence);
423 	ret = dma_fence_add_callback(dep, &work->cb, __memcpy_cb);
424 	if (ret) {
425 		if (ret != -ENOENT)
426 			dma_fence_wait(dep, false);
427 
428 		return ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ? -EINVAL :
429 			       dep->error);
430 	}
431 
432 	return &work->fence;
433 }
434 
i915_ttm_memcpy_allowed(struct ttm_buffer_object * bo,struct ttm_resource * dst_mem)435 static bool i915_ttm_memcpy_allowed(struct ttm_buffer_object *bo,
436 				    struct ttm_resource *dst_mem)
437 {
438 	if (i915_gem_object_needs_ccs_pages(i915_ttm_to_gem(bo)))
439 		return false;
440 
441 	if (!(i915_ttm_resource_mappable(bo->resource) &&
442 	      i915_ttm_resource_mappable(dst_mem)))
443 		return false;
444 
445 	return I915_SELFTEST_ONLY(ban_memcpy) ? false : true;
446 }
447 
448 static struct dma_fence *
__i915_ttm_move(struct ttm_buffer_object * bo,const struct ttm_operation_ctx * ctx,bool clear,struct ttm_resource * dst_mem,struct ttm_tt * dst_ttm,struct i915_refct_sgt * dst_rsgt,bool allow_accel,const struct i915_deps * move_deps)449 __i915_ttm_move(struct ttm_buffer_object *bo,
450 		const struct ttm_operation_ctx *ctx, bool clear,
451 		struct ttm_resource *dst_mem, struct ttm_tt *dst_ttm,
452 		struct i915_refct_sgt *dst_rsgt, bool allow_accel,
453 		const struct i915_deps *move_deps)
454 {
455 	const bool memcpy_allowed = i915_ttm_memcpy_allowed(bo, dst_mem);
456 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
457 	struct drm_i915_private *i915 = to_i915(bo->base.dev);
458 	struct i915_ttm_memcpy_work *copy_work = NULL;
459 	struct i915_ttm_memcpy_arg _arg, *arg = &_arg;
460 	struct dma_fence *fence = ERR_PTR(-EINVAL);
461 
462 	if (allow_accel) {
463 		fence = i915_ttm_accel_move(bo, clear, dst_mem, dst_ttm,
464 					    &dst_rsgt->table, move_deps);
465 
466 		/*
467 		 * We only need to intercept the error when moving to lmem.
468 		 * When moving to system, TTM or shmem will provide us with
469 		 * cleared pages.
470 		 */
471 		if (!IS_ERR(fence) && !i915_ttm_gtt_binds_lmem(dst_mem) &&
472 		    !I915_SELFTEST_ONLY(fail_gpu_migration ||
473 					fail_work_allocation))
474 			goto out;
475 	}
476 
477 	/* If we've scheduled gpu migration. Try to arm error intercept. */
478 	if (!IS_ERR(fence)) {
479 		struct dma_fence *dep = fence;
480 
481 		if (!I915_SELFTEST_ONLY(fail_work_allocation))
482 			copy_work = kzalloc(sizeof(*copy_work), GFP_KERNEL);
483 
484 		if (copy_work) {
485 			copy_work->i915 = i915;
486 			copy_work->memcpy_allowed = memcpy_allowed;
487 			copy_work->obj = i915_gem_object_get(obj);
488 			arg = &copy_work->arg;
489 			if (memcpy_allowed)
490 				i915_ttm_memcpy_init(arg, bo, clear, dst_mem,
491 						     dst_ttm, dst_rsgt);
492 
493 			fence = i915_ttm_memcpy_work_arm(copy_work, dep);
494 		} else {
495 			dma_fence_wait(dep, false);
496 			fence = ERR_PTR(I915_SELFTEST_ONLY(fail_gpu_migration) ?
497 					-EINVAL : fence->error);
498 		}
499 		dma_fence_put(dep);
500 
501 		if (!IS_ERR(fence))
502 			goto out;
503 	} else {
504 		int err = PTR_ERR(fence);
505 
506 		if (err == -EINTR || err == -ERESTARTSYS || err == -EAGAIN)
507 			return fence;
508 
509 		if (move_deps) {
510 			err = i915_deps_sync(move_deps, ctx);
511 			if (err)
512 				return ERR_PTR(err);
513 		}
514 	}
515 
516 	/* Error intercept failed or no accelerated migration to start with */
517 
518 	if (memcpy_allowed) {
519 		if (!copy_work)
520 			i915_ttm_memcpy_init(arg, bo, clear, dst_mem, dst_ttm,
521 					     dst_rsgt);
522 		i915_ttm_move_memcpy(arg);
523 		i915_ttm_memcpy_release(arg);
524 	}
525 	if (copy_work)
526 		i915_gem_object_put(copy_work->obj);
527 	kfree(copy_work);
528 
529 	return memcpy_allowed ? NULL : ERR_PTR(-EIO);
530 out:
531 	if (!fence && copy_work) {
532 		i915_ttm_memcpy_release(arg);
533 		i915_gem_object_put(copy_work->obj);
534 		kfree(copy_work);
535 	}
536 
537 	return fence;
538 }
539 
540 /**
541  * i915_ttm_move - The TTM move callback used by i915.
542  * @bo: The buffer object.
543  * @evict: Whether this is an eviction.
544  * @dst_mem: The destination ttm resource.
545  * @hop: If we need multihop, what temporary memory type to move to.
546  *
547  * Return: 0 if successful, negative error code otherwise.
548  */
i915_ttm_move(struct ttm_buffer_object * bo,bool evict,struct ttm_operation_ctx * ctx,struct ttm_resource * dst_mem,struct ttm_place * hop)549 int i915_ttm_move(struct ttm_buffer_object *bo, bool evict,
550 		  struct ttm_operation_ctx *ctx,
551 		  struct ttm_resource *dst_mem,
552 		  struct ttm_place *hop)
553 {
554 	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
555 	struct ttm_resource_manager *dst_man =
556 		ttm_manager_type(bo->bdev, dst_mem->mem_type);
557 	struct dma_fence *migration_fence = NULL;
558 	struct ttm_tt *ttm = bo->ttm;
559 	struct i915_refct_sgt *dst_rsgt;
560 	bool clear;
561 	int ret;
562 
563 	if (GEM_WARN_ON(!obj)) {
564 		ttm_bo_move_null(bo, dst_mem);
565 		return 0;
566 	}
567 
568 	ret = i915_ttm_move_notify(bo);
569 	if (ret)
570 		return ret;
571 
572 	if (obj->mm.madv != I915_MADV_WILLNEED) {
573 		i915_ttm_purge(obj);
574 		ttm_resource_free(bo, &dst_mem);
575 		return 0;
576 	}
577 
578 	/* Populate ttm with pages if needed. Typically system memory. */
579 	if (ttm && (dst_man->use_tt || (ttm->page_flags & TTM_TT_FLAG_SWAPPED))) {
580 		ret = ttm_tt_populate(bo->bdev, ttm, ctx);
581 		if (ret)
582 			return ret;
583 	}
584 
585 	dst_rsgt = i915_ttm_resource_get_st(obj, dst_mem);
586 	if (IS_ERR(dst_rsgt))
587 		return PTR_ERR(dst_rsgt);
588 
589 	clear = !i915_ttm_cpu_maps_iomem(bo->resource) && (!ttm || !ttm_tt_is_populated(ttm));
590 	if (!(clear && ttm && !(ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC))) {
591 		struct i915_deps deps;
592 
593 		i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
594 		ret = i915_deps_add_resv(&deps, bo->base.resv, ctx);
595 		if (ret) {
596 			i915_refct_sgt_put(dst_rsgt);
597 			return ret;
598 		}
599 
600 		migration_fence = __i915_ttm_move(bo, ctx, clear, dst_mem, ttm,
601 						  dst_rsgt, true, &deps);
602 		i915_deps_fini(&deps);
603 	}
604 
605 	/* We can possibly get an -ERESTARTSYS here */
606 	if (IS_ERR(migration_fence)) {
607 		i915_refct_sgt_put(dst_rsgt);
608 		return PTR_ERR(migration_fence);
609 	}
610 
611 	if (migration_fence) {
612 		if (I915_SELFTEST_ONLY(evict && fail_gpu_migration))
613 			ret = -EIO; /* never feed non-migrate fences into ttm */
614 		else
615 			ret = ttm_bo_move_accel_cleanup(bo, migration_fence, evict,
616 							true, dst_mem);
617 		if (ret) {
618 			dma_fence_wait(migration_fence, false);
619 			ttm_bo_move_sync_cleanup(bo, dst_mem);
620 		}
621 		dma_fence_put(migration_fence);
622 	} else {
623 		ttm_bo_move_sync_cleanup(bo, dst_mem);
624 	}
625 
626 	i915_ttm_adjust_domains_after_move(obj);
627 	i915_ttm_free_cached_io_rsgt(obj);
628 
629 	if (i915_ttm_gtt_binds_lmem(dst_mem) || i915_ttm_cpu_maps_iomem(dst_mem)) {
630 		obj->ttm.cached_io_rsgt = dst_rsgt;
631 		obj->ttm.get_io_page.sg_pos = dst_rsgt->table.sgl;
632 		obj->ttm.get_io_page.sg_idx = 0;
633 	} else {
634 		i915_refct_sgt_put(dst_rsgt);
635 	}
636 
637 	i915_ttm_adjust_lru(obj);
638 	i915_ttm_adjust_gem_after_move(obj);
639 	return 0;
640 }
641 
642 /**
643  * i915_gem_obj_copy_ttm - Copy the contents of one ttm-based gem object to
644  * another
645  * @dst: The destination object
646  * @src: The source object
647  * @allow_accel: Allow using the blitter. Otherwise TTM memcpy is used.
648  * @intr: Whether to perform waits interruptible:
649  *
650  * Note: The caller is responsible for assuring that the underlying
651  * TTM objects are populated if needed and locked.
652  *
653  * Return: Zero on success. Negative error code on error. If @intr == true,
654  * then it may return -ERESTARTSYS or -EINTR.
655  */
i915_gem_obj_copy_ttm(struct drm_i915_gem_object * dst,struct drm_i915_gem_object * src,bool allow_accel,bool intr)656 int i915_gem_obj_copy_ttm(struct drm_i915_gem_object *dst,
657 			  struct drm_i915_gem_object *src,
658 			  bool allow_accel, bool intr)
659 {
660 	struct ttm_buffer_object *dst_bo = i915_gem_to_ttm(dst);
661 	struct ttm_buffer_object *src_bo = i915_gem_to_ttm(src);
662 	struct ttm_operation_ctx ctx = {
663 		.interruptible = intr,
664 	};
665 	struct i915_refct_sgt *dst_rsgt;
666 	struct dma_fence *copy_fence;
667 	struct i915_deps deps;
668 	int ret;
669 
670 	assert_object_held(dst);
671 	assert_object_held(src);
672 	i915_deps_init(&deps, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
673 
674 	ret = dma_resv_reserve_fences(src_bo->base.resv, 1);
675 	if (ret)
676 		return ret;
677 
678 	ret = dma_resv_reserve_fences(dst_bo->base.resv, 1);
679 	if (ret)
680 		return ret;
681 
682 	ret = i915_deps_add_resv(&deps, dst_bo->base.resv, &ctx);
683 	if (ret)
684 		return ret;
685 
686 	ret = i915_deps_add_resv(&deps, src_bo->base.resv, &ctx);
687 	if (ret)
688 		return ret;
689 
690 	dst_rsgt = i915_ttm_resource_get_st(dst, dst_bo->resource);
691 	copy_fence = __i915_ttm_move(src_bo, &ctx, false, dst_bo->resource,
692 				     dst_bo->ttm, dst_rsgt, allow_accel,
693 				     &deps);
694 
695 	i915_deps_fini(&deps);
696 	i915_refct_sgt_put(dst_rsgt);
697 	if (IS_ERR_OR_NULL(copy_fence))
698 		return PTR_ERR_OR_ZERO(copy_fence);
699 
700 	dma_resv_add_fence(dst_bo->base.resv, copy_fence, DMA_RESV_USAGE_WRITE);
701 	dma_resv_add_fence(src_bo->base.resv, copy_fence, DMA_RESV_USAGE_READ);
702 	dma_fence_put(copy_fence);
703 
704 	return 0;
705 }
706