1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include "i915_drv.h"
7 #include "gt/intel_context.h"
8 #include "gt/intel_engine_pm.h"
9 #include "gt/intel_gt.h"
10 #include "gt/intel_gt_buffer_pool.h"
11 #include "gt/intel_ring.h"
12 #include "i915_gem_clflush.h"
13 #include "i915_gem_object_blt.h"
14 
intel_emit_vma_fill_blt(struct intel_context * ce,struct i915_vma * vma,struct i915_gem_ww_ctx * ww,u32 value)15 struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
16 					 struct i915_vma *vma,
17 					 struct i915_gem_ww_ctx *ww,
18 					 u32 value)
19 {
20 	struct drm_i915_private *i915 = ce->vm->i915;
21 	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
22 	struct intel_gt_buffer_pool_node *pool;
23 	struct i915_vma *batch;
24 	u64 offset;
25 	u64 count;
26 	u64 rem;
27 	u32 size;
28 	u32 *cmd;
29 	int err;
30 
31 	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
32 	intel_engine_pm_get(ce->engine);
33 
34 	count = div_u64(round_up(vma->size, block_size), block_size);
35 	size = (1 + 8 * count) * sizeof(u32);
36 	size = round_up(size, PAGE_SIZE);
37 	pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
38 	if (IS_ERR(pool)) {
39 		err = PTR_ERR(pool);
40 		goto out_pm;
41 	}
42 
43 	err = i915_gem_object_lock(pool->obj, ww);
44 	if (err)
45 		goto out_put;
46 
47 	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
48 	if (IS_ERR(batch)) {
49 		err = PTR_ERR(batch);
50 		goto out_put;
51 	}
52 
53 	err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
54 	if (unlikely(err))
55 		goto out_put;
56 
57 	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
58 	if (IS_ERR(cmd)) {
59 		err = PTR_ERR(cmd);
60 		goto out_unpin;
61 	}
62 
63 	rem = vma->size;
64 	offset = vma->node.start;
65 
66 	do {
67 		u32 size = min_t(u64, rem, block_size);
68 
69 		GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
70 
71 		if (INTEL_GEN(i915) >= 8) {
72 			*cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
73 			*cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
74 			*cmd++ = 0;
75 			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
76 			*cmd++ = lower_32_bits(offset);
77 			*cmd++ = upper_32_bits(offset);
78 			*cmd++ = value;
79 		} else {
80 			*cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
81 			*cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
82 			*cmd++ = 0;
83 			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
84 			*cmd++ = offset;
85 			*cmd++ = value;
86 		}
87 
88 		/* Allow ourselves to be preempted in between blocks. */
89 		*cmd++ = MI_ARB_CHECK;
90 
91 		offset += size;
92 		rem -= size;
93 	} while (rem);
94 
95 	*cmd = MI_BATCH_BUFFER_END;
96 
97 	i915_gem_object_flush_map(pool->obj);
98 	i915_gem_object_unpin_map(pool->obj);
99 
100 	intel_gt_chipset_flush(ce->vm->gt);
101 
102 	batch->private = pool;
103 	return batch;
104 
105 out_unpin:
106 	i915_vma_unpin(batch);
107 out_put:
108 	intel_gt_buffer_pool_put(pool);
109 out_pm:
110 	intel_engine_pm_put(ce->engine);
111 	return ERR_PTR(err);
112 }
113 
intel_emit_vma_mark_active(struct i915_vma * vma,struct i915_request * rq)114 int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
115 {
116 	int err;
117 
118 	err = i915_request_await_object(rq, vma->obj, false);
119 	if (err == 0)
120 		err = i915_vma_move_to_active(vma, rq, 0);
121 	if (unlikely(err))
122 		return err;
123 
124 	return intel_gt_buffer_pool_mark_active(vma->private, rq);
125 }
126 
intel_emit_vma_release(struct intel_context * ce,struct i915_vma * vma)127 void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
128 {
129 	i915_vma_unpin(vma);
130 	intel_gt_buffer_pool_put(vma->private);
131 	intel_engine_pm_put(ce->engine);
132 }
133 
134 static int
move_obj_to_gpu(struct drm_i915_gem_object * obj,struct i915_request * rq,bool write)135 move_obj_to_gpu(struct drm_i915_gem_object *obj,
136 		struct i915_request *rq,
137 		bool write)
138 {
139 	if (obj->cache_dirty & ~obj->cache_coherent)
140 		i915_gem_clflush_object(obj, 0);
141 
142 	return i915_request_await_object(rq, obj, write);
143 }
144 
i915_gem_object_fill_blt(struct drm_i915_gem_object * obj,struct intel_context * ce,u32 value)145 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
146 			     struct intel_context *ce,
147 			     u32 value)
148 {
149 	struct i915_gem_ww_ctx ww;
150 	struct i915_request *rq;
151 	struct i915_vma *batch;
152 	struct i915_vma *vma;
153 	int err;
154 
155 	vma = i915_vma_instance(obj, ce->vm, NULL);
156 	if (IS_ERR(vma))
157 		return PTR_ERR(vma);
158 
159 	i915_gem_ww_ctx_init(&ww, true);
160 	intel_engine_pm_get(ce->engine);
161 retry:
162 	err = i915_gem_object_lock(obj, &ww);
163 	if (err)
164 		goto out;
165 
166 	err = intel_context_pin_ww(ce, &ww);
167 	if (err)
168 		goto out;
169 
170 	err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
171 	if (err)
172 		goto out_ctx;
173 
174 	batch = intel_emit_vma_fill_blt(ce, vma, &ww, value);
175 	if (IS_ERR(batch)) {
176 		err = PTR_ERR(batch);
177 		goto out_vma;
178 	}
179 
180 	rq = i915_request_create(ce);
181 	if (IS_ERR(rq)) {
182 		err = PTR_ERR(rq);
183 		goto out_batch;
184 	}
185 
186 	err = intel_emit_vma_mark_active(batch, rq);
187 	if (unlikely(err))
188 		goto out_request;
189 
190 	err = move_obj_to_gpu(vma->obj, rq, true);
191 	if (err == 0)
192 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
193 	if (unlikely(err))
194 		goto out_request;
195 
196 	if (ce->engine->emit_init_breadcrumb)
197 		err = ce->engine->emit_init_breadcrumb(rq);
198 
199 	if (likely(!err))
200 		err = ce->engine->emit_bb_start(rq,
201 						batch->node.start,
202 						batch->node.size,
203 						0);
204 out_request:
205 	if (unlikely(err))
206 		i915_request_set_error_once(rq, err);
207 
208 	i915_request_add(rq);
209 out_batch:
210 	intel_emit_vma_release(ce, batch);
211 out_vma:
212 	i915_vma_unpin(vma);
213 out_ctx:
214 	intel_context_unpin(ce);
215 out:
216 	if (err == -EDEADLK) {
217 		err = i915_gem_ww_ctx_backoff(&ww);
218 		if (!err)
219 			goto retry;
220 	}
221 	i915_gem_ww_ctx_fini(&ww);
222 	intel_engine_pm_put(ce->engine);
223 	return err;
224 }
225 
226 /* Wa_1209644611:icl,ehl */
wa_1209644611_applies(struct drm_i915_private * i915,u32 size)227 static bool wa_1209644611_applies(struct drm_i915_private *i915, u32 size)
228 {
229 	u32 height = size >> PAGE_SHIFT;
230 
231 	if (!IS_GEN(i915, 11))
232 		return false;
233 
234 	return height % 4 == 3 && height <= 8;
235 }
236 
intel_emit_vma_copy_blt(struct intel_context * ce,struct i915_gem_ww_ctx * ww,struct i915_vma * src,struct i915_vma * dst)237 struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
238 					 struct i915_gem_ww_ctx *ww,
239 					 struct i915_vma *src,
240 					 struct i915_vma *dst)
241 {
242 	struct drm_i915_private *i915 = ce->vm->i915;
243 	const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
244 	struct intel_gt_buffer_pool_node *pool;
245 	struct i915_vma *batch;
246 	u64 src_offset, dst_offset;
247 	u64 count, rem;
248 	u32 size, *cmd;
249 	int err;
250 
251 	GEM_BUG_ON(src->size != dst->size);
252 
253 	GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
254 	intel_engine_pm_get(ce->engine);
255 
256 	count = div_u64(round_up(dst->size, block_size), block_size);
257 	size = (1 + 11 * count) * sizeof(u32);
258 	size = round_up(size, PAGE_SIZE);
259 	pool = intel_gt_get_buffer_pool(ce->engine->gt, size);
260 	if (IS_ERR(pool)) {
261 		err = PTR_ERR(pool);
262 		goto out_pm;
263 	}
264 
265 	err = i915_gem_object_lock(pool->obj, ww);
266 	if (err)
267 		goto out_put;
268 
269 	batch = i915_vma_instance(pool->obj, ce->vm, NULL);
270 	if (IS_ERR(batch)) {
271 		err = PTR_ERR(batch);
272 		goto out_put;
273 	}
274 
275 	err = i915_vma_pin_ww(batch, ww, 0, 0, PIN_USER);
276 	if (unlikely(err))
277 		goto out_put;
278 
279 	cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
280 	if (IS_ERR(cmd)) {
281 		err = PTR_ERR(cmd);
282 		goto out_unpin;
283 	}
284 
285 	rem = src->size;
286 	src_offset = src->node.start;
287 	dst_offset = dst->node.start;
288 
289 	do {
290 		size = min_t(u64, rem, block_size);
291 		GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
292 
293 		if (INTEL_GEN(i915) >= 9 &&
294 		    !wa_1209644611_applies(i915, size)) {
295 			*cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
296 			*cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
297 			*cmd++ = 0;
298 			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
299 			*cmd++ = lower_32_bits(dst_offset);
300 			*cmd++ = upper_32_bits(dst_offset);
301 			*cmd++ = 0;
302 			*cmd++ = PAGE_SIZE;
303 			*cmd++ = lower_32_bits(src_offset);
304 			*cmd++ = upper_32_bits(src_offset);
305 		} else if (INTEL_GEN(i915) >= 8) {
306 			*cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
307 			*cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
308 			*cmd++ = 0;
309 			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
310 			*cmd++ = lower_32_bits(dst_offset);
311 			*cmd++ = upper_32_bits(dst_offset);
312 			*cmd++ = 0;
313 			*cmd++ = PAGE_SIZE;
314 			*cmd++ = lower_32_bits(src_offset);
315 			*cmd++ = upper_32_bits(src_offset);
316 		} else {
317 			*cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
318 			*cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
319 			*cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
320 			*cmd++ = dst_offset;
321 			*cmd++ = PAGE_SIZE;
322 			*cmd++ = src_offset;
323 		}
324 
325 		/* Allow ourselves to be preempted in between blocks. */
326 		*cmd++ = MI_ARB_CHECK;
327 
328 		src_offset += size;
329 		dst_offset += size;
330 		rem -= size;
331 	} while (rem);
332 
333 	*cmd = MI_BATCH_BUFFER_END;
334 
335 	i915_gem_object_flush_map(pool->obj);
336 	i915_gem_object_unpin_map(pool->obj);
337 
338 	intel_gt_chipset_flush(ce->vm->gt);
339 	batch->private = pool;
340 	return batch;
341 
342 out_unpin:
343 	i915_vma_unpin(batch);
344 out_put:
345 	intel_gt_buffer_pool_put(pool);
346 out_pm:
347 	intel_engine_pm_put(ce->engine);
348 	return ERR_PTR(err);
349 }
350 
i915_gem_object_copy_blt(struct drm_i915_gem_object * src,struct drm_i915_gem_object * dst,struct intel_context * ce)351 int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
352 			     struct drm_i915_gem_object *dst,
353 			     struct intel_context *ce)
354 {
355 	struct i915_address_space *vm = ce->vm;
356 	struct i915_vma *vma[2], *batch;
357 	struct i915_gem_ww_ctx ww;
358 	struct i915_request *rq;
359 	int err, i;
360 
361 	vma[0] = i915_vma_instance(src, vm, NULL);
362 	if (IS_ERR(vma[0]))
363 		return PTR_ERR(vma[0]);
364 
365 	vma[1] = i915_vma_instance(dst, vm, NULL);
366 	if (IS_ERR(vma[1]))
367 		return PTR_ERR(vma[1]);
368 
369 	i915_gem_ww_ctx_init(&ww, true);
370 	intel_engine_pm_get(ce->engine);
371 retry:
372 	err = i915_gem_object_lock(src, &ww);
373 	if (!err)
374 		err = i915_gem_object_lock(dst, &ww);
375 	if (!err)
376 		err = intel_context_pin_ww(ce, &ww);
377 	if (err)
378 		goto out;
379 
380 	err = i915_vma_pin_ww(vma[0], &ww, 0, 0, PIN_USER);
381 	if (err)
382 		goto out_ctx;
383 
384 	err = i915_vma_pin_ww(vma[1], &ww, 0, 0, PIN_USER);
385 	if (unlikely(err))
386 		goto out_unpin_src;
387 
388 	batch = intel_emit_vma_copy_blt(ce, &ww, vma[0], vma[1]);
389 	if (IS_ERR(batch)) {
390 		err = PTR_ERR(batch);
391 		goto out_unpin_dst;
392 	}
393 
394 	rq = i915_request_create(ce);
395 	if (IS_ERR(rq)) {
396 		err = PTR_ERR(rq);
397 		goto out_batch;
398 	}
399 
400 	err = intel_emit_vma_mark_active(batch, rq);
401 	if (unlikely(err))
402 		goto out_request;
403 
404 	for (i = 0; i < ARRAY_SIZE(vma); i++) {
405 		err = move_obj_to_gpu(vma[i]->obj, rq, i);
406 		if (unlikely(err))
407 			goto out_request;
408 	}
409 
410 	for (i = 0; i < ARRAY_SIZE(vma); i++) {
411 		unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
412 
413 		err = i915_vma_move_to_active(vma[i], rq, flags);
414 		if (unlikely(err))
415 			goto out_request;
416 	}
417 
418 	if (rq->engine->emit_init_breadcrumb) {
419 		err = rq->engine->emit_init_breadcrumb(rq);
420 		if (unlikely(err))
421 			goto out_request;
422 	}
423 
424 	err = rq->engine->emit_bb_start(rq,
425 					batch->node.start, batch->node.size,
426 					0);
427 
428 out_request:
429 	if (unlikely(err))
430 		i915_request_set_error_once(rq, err);
431 
432 	i915_request_add(rq);
433 out_batch:
434 	intel_emit_vma_release(ce, batch);
435 out_unpin_dst:
436 	i915_vma_unpin(vma[1]);
437 out_unpin_src:
438 	i915_vma_unpin(vma[0]);
439 out_ctx:
440 	intel_context_unpin(ce);
441 out:
442 	if (err == -EDEADLK) {
443 		err = i915_gem_ww_ctx_backoff(&ww);
444 		if (!err)
445 			goto retry;
446 	}
447 	i915_gem_ww_ctx_fini(&ww);
448 	intel_engine_pm_put(ce->engine);
449 	return err;
450 }
451 
452 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
453 #include "selftests/i915_gem_object_blt.c"
454 #endif
455