1 /*
2  * Copyright © 2017 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 
27 #include "../i915_selftest.h"
28 #include "i915_random.h"
29 
cpu_set(struct drm_i915_gem_object * obj,unsigned long offset,u32 v)30 static int cpu_set(struct drm_i915_gem_object *obj,
31 		   unsigned long offset,
32 		   u32 v)
33 {
34 	unsigned int needs_clflush;
35 	struct page *page;
36 	u32 *map;
37 	int err;
38 
39 	err = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
40 	if (err)
41 		return err;
42 
43 	page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
44 	map = kmap_atomic(page);
45 
46 	if (needs_clflush & CLFLUSH_BEFORE) {
47 		mb();
48 		clflush(map+offset_in_page(offset) / sizeof(*map));
49 		mb();
50 	}
51 
52 	map[offset_in_page(offset) / sizeof(*map)] = v;
53 
54 	if (needs_clflush & CLFLUSH_AFTER) {
55 		mb();
56 		clflush(map+offset_in_page(offset) / sizeof(*map));
57 		mb();
58 	}
59 
60 	kunmap_atomic(map);
61 
62 	i915_gem_obj_finish_shmem_access(obj);
63 	return 0;
64 }
65 
cpu_get(struct drm_i915_gem_object * obj,unsigned long offset,u32 * v)66 static int cpu_get(struct drm_i915_gem_object *obj,
67 		   unsigned long offset,
68 		   u32 *v)
69 {
70 	unsigned int needs_clflush;
71 	struct page *page;
72 	u32 *map;
73 	int err;
74 
75 	err = i915_gem_obj_prepare_shmem_read(obj, &needs_clflush);
76 	if (err)
77 		return err;
78 
79 	page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
80 	map = kmap_atomic(page);
81 
82 	if (needs_clflush & CLFLUSH_BEFORE) {
83 		mb();
84 		clflush(map+offset_in_page(offset) / sizeof(*map));
85 		mb();
86 	}
87 
88 	*v = map[offset_in_page(offset) / sizeof(*map)];
89 	kunmap_atomic(map);
90 
91 	i915_gem_obj_finish_shmem_access(obj);
92 	return 0;
93 }
94 
gtt_set(struct drm_i915_gem_object * obj,unsigned long offset,u32 v)95 static int gtt_set(struct drm_i915_gem_object *obj,
96 		   unsigned long offset,
97 		   u32 v)
98 {
99 	struct i915_vma *vma;
100 	u32 __iomem *map;
101 	int err;
102 
103 	err = i915_gem_object_set_to_gtt_domain(obj, true);
104 	if (err)
105 		return err;
106 
107 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
108 	if (IS_ERR(vma))
109 		return PTR_ERR(vma);
110 
111 	map = i915_vma_pin_iomap(vma);
112 	i915_vma_unpin(vma);
113 	if (IS_ERR(map))
114 		return PTR_ERR(map);
115 
116 	iowrite32(v, &map[offset / sizeof(*map)]);
117 	i915_vma_unpin_iomap(vma);
118 
119 	return 0;
120 }
121 
gtt_get(struct drm_i915_gem_object * obj,unsigned long offset,u32 * v)122 static int gtt_get(struct drm_i915_gem_object *obj,
123 		   unsigned long offset,
124 		   u32 *v)
125 {
126 	struct i915_vma *vma;
127 	u32 __iomem *map;
128 	int err;
129 
130 	err = i915_gem_object_set_to_gtt_domain(obj, false);
131 	if (err)
132 		return err;
133 
134 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE);
135 	if (IS_ERR(vma))
136 		return PTR_ERR(vma);
137 
138 	map = i915_vma_pin_iomap(vma);
139 	i915_vma_unpin(vma);
140 	if (IS_ERR(map))
141 		return PTR_ERR(map);
142 
143 	*v = ioread32(&map[offset / sizeof(*map)]);
144 	i915_vma_unpin_iomap(vma);
145 
146 	return 0;
147 }
148 
wc_set(struct drm_i915_gem_object * obj,unsigned long offset,u32 v)149 static int wc_set(struct drm_i915_gem_object *obj,
150 		  unsigned long offset,
151 		  u32 v)
152 {
153 	u32 *map;
154 	int err;
155 
156 	err = i915_gem_object_set_to_wc_domain(obj, true);
157 	if (err)
158 		return err;
159 
160 	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
161 	if (IS_ERR(map))
162 		return PTR_ERR(map);
163 
164 	map[offset / sizeof(*map)] = v;
165 	i915_gem_object_unpin_map(obj);
166 
167 	return 0;
168 }
169 
wc_get(struct drm_i915_gem_object * obj,unsigned long offset,u32 * v)170 static int wc_get(struct drm_i915_gem_object *obj,
171 		  unsigned long offset,
172 		  u32 *v)
173 {
174 	u32 *map;
175 	int err;
176 
177 	err = i915_gem_object_set_to_wc_domain(obj, false);
178 	if (err)
179 		return err;
180 
181 	map = i915_gem_object_pin_map(obj, I915_MAP_WC);
182 	if (IS_ERR(map))
183 		return PTR_ERR(map);
184 
185 	*v = map[offset / sizeof(*map)];
186 	i915_gem_object_unpin_map(obj);
187 
188 	return 0;
189 }
190 
gpu_set(struct drm_i915_gem_object * obj,unsigned long offset,u32 v)191 static int gpu_set(struct drm_i915_gem_object *obj,
192 		   unsigned long offset,
193 		   u32 v)
194 {
195 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
196 	struct i915_request *rq;
197 	struct i915_vma *vma;
198 	u32 *cs;
199 	int err;
200 
201 	err = i915_gem_object_set_to_gtt_domain(obj, true);
202 	if (err)
203 		return err;
204 
205 	vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0);
206 	if (IS_ERR(vma))
207 		return PTR_ERR(vma);
208 
209 	rq = i915_request_alloc(i915->engine[RCS], i915->kernel_context);
210 	if (IS_ERR(rq)) {
211 		i915_vma_unpin(vma);
212 		return PTR_ERR(rq);
213 	}
214 
215 	cs = intel_ring_begin(rq, 4);
216 	if (IS_ERR(cs)) {
217 		i915_request_add(rq);
218 		i915_vma_unpin(vma);
219 		return PTR_ERR(cs);
220 	}
221 
222 	if (INTEL_GEN(i915) >= 8) {
223 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
224 		*cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset);
225 		*cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset);
226 		*cs++ = v;
227 	} else if (INTEL_GEN(i915) >= 4) {
228 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
229 		*cs++ = 0;
230 		*cs++ = i915_ggtt_offset(vma) + offset;
231 		*cs++ = v;
232 	} else {
233 		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
234 		*cs++ = i915_ggtt_offset(vma) + offset;
235 		*cs++ = v;
236 		*cs++ = MI_NOOP;
237 	}
238 	intel_ring_advance(rq, cs);
239 
240 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
241 	i915_vma_unpin(vma);
242 
243 	i915_request_add(rq);
244 
245 	return err;
246 }
247 
always_valid(struct drm_i915_private * i915)248 static bool always_valid(struct drm_i915_private *i915)
249 {
250 	return true;
251 }
252 
needs_fence_registers(struct drm_i915_private * i915)253 static bool needs_fence_registers(struct drm_i915_private *i915)
254 {
255 	return !i915_terminally_wedged(&i915->gpu_error);
256 }
257 
needs_mi_store_dword(struct drm_i915_private * i915)258 static bool needs_mi_store_dword(struct drm_i915_private *i915)
259 {
260 	if (i915_terminally_wedged(&i915->gpu_error))
261 		return false;
262 
263 	return intel_engine_can_store_dword(i915->engine[RCS]);
264 }
265 
266 static const struct igt_coherency_mode {
267 	const char *name;
268 	int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v);
269 	int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v);
270 	bool (*valid)(struct drm_i915_private *i915);
271 } igt_coherency_mode[] = {
272 	{ "cpu", cpu_set, cpu_get, always_valid },
273 	{ "gtt", gtt_set, gtt_get, needs_fence_registers },
274 	{ "wc", wc_set, wc_get, always_valid },
275 	{ "gpu", gpu_set, NULL, needs_mi_store_dword },
276 	{ },
277 };
278 
igt_gem_coherency(void * arg)279 static int igt_gem_coherency(void *arg)
280 {
281 	const unsigned int ncachelines = PAGE_SIZE/64;
282 	I915_RND_STATE(prng);
283 	struct drm_i915_private *i915 = arg;
284 	const struct igt_coherency_mode *read, *write, *over;
285 	struct drm_i915_gem_object *obj;
286 	unsigned long count, n;
287 	u32 *offsets, *values;
288 	int err = 0;
289 
290 	/* We repeatedly write, overwrite and read from a sequence of
291 	 * cachelines in order to try and detect incoherency (unflushed writes
292 	 * from either the CPU or GPU). Each setter/getter uses our cache
293 	 * domain API which should prevent incoherency.
294 	 */
295 
296 	offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL);
297 	if (!offsets)
298 		return -ENOMEM;
299 	for (count = 0; count < ncachelines; count++)
300 		offsets[count] = count * 64 + 4 * (count % 16);
301 
302 	values = offsets + ncachelines;
303 
304 	mutex_lock(&i915->drm.struct_mutex);
305 	for (over = igt_coherency_mode; over->name; over++) {
306 		if (!over->set)
307 			continue;
308 
309 		if (!over->valid(i915))
310 			continue;
311 
312 		for (write = igt_coherency_mode; write->name; write++) {
313 			if (!write->set)
314 				continue;
315 
316 			if (!write->valid(i915))
317 				continue;
318 
319 			for (read = igt_coherency_mode; read->name; read++) {
320 				if (!read->get)
321 					continue;
322 
323 				if (!read->valid(i915))
324 					continue;
325 
326 				for_each_prime_number_from(count, 1, ncachelines) {
327 					obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
328 					if (IS_ERR(obj)) {
329 						err = PTR_ERR(obj);
330 						goto unlock;
331 					}
332 
333 					i915_random_reorder(offsets, ncachelines, &prng);
334 					for (n = 0; n < count; n++)
335 						values[n] = prandom_u32_state(&prng);
336 
337 					for (n = 0; n < count; n++) {
338 						err = over->set(obj, offsets[n], ~values[n]);
339 						if (err) {
340 							pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
341 							       n, count, over->name, err);
342 							goto put_object;
343 						}
344 					}
345 
346 					for (n = 0; n < count; n++) {
347 						err = write->set(obj, offsets[n], values[n]);
348 						if (err) {
349 							pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
350 							       n, count, write->name, err);
351 							goto put_object;
352 						}
353 					}
354 
355 					for (n = 0; n < count; n++) {
356 						u32 found;
357 
358 						err = read->get(obj, offsets[n], &found);
359 						if (err) {
360 							pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
361 							       n, count, read->name, err);
362 							goto put_object;
363 						}
364 
365 						if (found != values[n]) {
366 							pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n",
367 							       n, count, over->name,
368 							       write->name, values[n],
369 							       read->name, found,
370 							       ~values[n], offsets[n]);
371 							err = -EINVAL;
372 							goto put_object;
373 						}
374 					}
375 
376 					__i915_gem_object_release_unless_active(obj);
377 				}
378 			}
379 		}
380 	}
381 unlock:
382 	mutex_unlock(&i915->drm.struct_mutex);
383 	kfree(offsets);
384 	return err;
385 
386 put_object:
387 	__i915_gem_object_release_unless_active(obj);
388 	goto unlock;
389 }
390 
i915_gem_coherency_live_selftests(struct drm_i915_private * i915)391 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
392 {
393 	static const struct i915_subtest tests[] = {
394 		SUBTEST(igt_gem_coherency),
395 	};
396 
397 	return i915_subtests(tests, i915);
398 }
399