1 /*
2  * Copyright © 2008-2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/oom.h>
26 #include <linux/sched/mm.h>
27 #include <linux/shmem_fs.h>
28 #include <linux/slab.h>
29 #include <linux/swap.h>
30 #include <linux/pci.h>
31 #include <linux/dma-buf.h>
32 #include <linux/vmalloc.h>
33 #include <drm/drmP.h>
34 #include <drm/i915_drm.h>
35 
36 #include "i915_drv.h"
37 #include "i915_trace.h"
38 
shrinker_lock(struct drm_i915_private * i915,bool * unlock)39 static bool shrinker_lock(struct drm_i915_private *i915, bool *unlock)
40 {
41 	switch (mutex_trylock_recursive(&i915->drm.struct_mutex)) {
42 	case MUTEX_TRYLOCK_RECURSIVE:
43 		*unlock = false;
44 		return true;
45 
46 	case MUTEX_TRYLOCK_FAILED:
47 		*unlock = false;
48 		preempt_disable();
49 		do {
50 			cpu_relax();
51 			if (mutex_trylock(&i915->drm.struct_mutex)) {
52 				*unlock = true;
53 				break;
54 			}
55 		} while (!need_resched());
56 		preempt_enable();
57 		return *unlock;
58 
59 	case MUTEX_TRYLOCK_SUCCESS:
60 		*unlock = true;
61 		return true;
62 	}
63 
64 	BUG();
65 }
66 
shrinker_unlock(struct drm_i915_private * i915,bool unlock)67 static void shrinker_unlock(struct drm_i915_private *i915, bool unlock)
68 {
69 	if (!unlock)
70 		return;
71 
72 	mutex_unlock(&i915->drm.struct_mutex);
73 }
74 
swap_available(void)75 static bool swap_available(void)
76 {
77 	return get_nr_swap_pages() > 0;
78 }
79 
can_release_pages(struct drm_i915_gem_object * obj)80 static bool can_release_pages(struct drm_i915_gem_object *obj)
81 {
82 	/* Consider only shrinkable ojects. */
83 	if (!i915_gem_object_is_shrinkable(obj))
84 		return false;
85 
86 	/* Only report true if by unbinding the object and putting its pages
87 	 * we can actually make forward progress towards freeing physical
88 	 * pages.
89 	 *
90 	 * If the pages are pinned for any other reason than being bound
91 	 * to the GPU, simply unbinding from the GPU is not going to succeed
92 	 * in releasing our pin count on the pages themselves.
93 	 */
94 	if (atomic_read(&obj->mm.pages_pin_count) > obj->bind_count)
95 		return false;
96 
97 	/* If any vma are "permanently" pinned, it will prevent us from
98 	 * reclaiming the obj->mm.pages. We only allow scanout objects to claim
99 	 * a permanent pin, along with a few others like the context objects.
100 	 * To simplify the scan, and to avoid walking the list of vma under the
101 	 * object, we just check the count of its permanently pinned.
102 	 */
103 	if (READ_ONCE(obj->pin_global))
104 		return false;
105 
106 	/* We can only return physical pages to the system if we can either
107 	 * discard the contents (because the user has marked them as being
108 	 * purgeable) or if we can move their contents out to swap.
109 	 */
110 	return swap_available() || obj->mm.madv == I915_MADV_DONTNEED;
111 }
112 
unsafe_drop_pages(struct drm_i915_gem_object * obj)113 static bool unsafe_drop_pages(struct drm_i915_gem_object *obj)
114 {
115 	if (i915_gem_object_unbind(obj) == 0)
116 		__i915_gem_object_put_pages(obj, I915_MM_SHRINKER);
117 	return !i915_gem_object_has_pages(obj);
118 }
119 
120 /**
121  * i915_gem_shrink - Shrink buffer object caches
122  * @i915: i915 device
123  * @target: amount of memory to make available, in pages
124  * @nr_scanned: optional output for number of pages scanned (incremental)
125  * @flags: control flags for selecting cache types
126  *
127  * This function is the main interface to the shrinker. It will try to release
128  * up to @target pages of main memory backing storage from buffer objects.
129  * Selection of the specific caches can be done with @flags. This is e.g. useful
130  * when purgeable objects should be removed from caches preferentially.
131  *
132  * Note that it's not guaranteed that released amount is actually available as
133  * free system memory - the pages might still be in-used to due to other reasons
134  * (like cpu mmaps) or the mm core has reused them before we could grab them.
135  * Therefore code that needs to explicitly shrink buffer objects caches (e.g. to
136  * avoid deadlocks in memory reclaim) must fall back to i915_gem_shrink_all().
137  *
138  * Also note that any kind of pinning (both per-vma address space pins and
139  * backing storage pins at the buffer object level) result in the shrinker code
140  * having to skip the object.
141  *
142  * Returns:
143  * The number of pages of backing storage actually released.
144  */
145 unsigned long
i915_gem_shrink(struct drm_i915_private * i915,unsigned long target,unsigned long * nr_scanned,unsigned flags)146 i915_gem_shrink(struct drm_i915_private *i915,
147 		unsigned long target,
148 		unsigned long *nr_scanned,
149 		unsigned flags)
150 {
151 	const struct {
152 		struct list_head *list;
153 		unsigned int bit;
154 	} phases[] = {
155 		{ &i915->mm.unbound_list, I915_SHRINK_UNBOUND },
156 		{ &i915->mm.bound_list, I915_SHRINK_BOUND },
157 		{ NULL, 0 },
158 	}, *phase;
159 	unsigned long count = 0;
160 	unsigned long scanned = 0;
161 	bool unlock;
162 
163 	if (!shrinker_lock(i915, &unlock))
164 		return 0;
165 
166 	/*
167 	 * When shrinking the active list, also consider active contexts.
168 	 * Active contexts are pinned until they are retired, and so can
169 	 * not be simply unbound to retire and unpin their pages. To shrink
170 	 * the contexts, we must wait until the gpu is idle.
171 	 *
172 	 * We don't care about errors here; if we cannot wait upon the GPU,
173 	 * we will free as much as we can and hope to get a second chance.
174 	 */
175 	if (flags & I915_SHRINK_ACTIVE)
176 		i915_gem_wait_for_idle(i915,
177 				       I915_WAIT_LOCKED,
178 				       MAX_SCHEDULE_TIMEOUT);
179 
180 	trace_i915_gem_shrink(i915, target, flags);
181 	i915_retire_requests(i915);
182 
183 	/*
184 	 * Unbinding of objects will require HW access; Let us not wake the
185 	 * device just to recover a little memory. If absolutely necessary,
186 	 * we will force the wake during oom-notifier.
187 	 */
188 	if ((flags & I915_SHRINK_BOUND) &&
189 	    !intel_runtime_pm_get_if_in_use(i915))
190 		flags &= ~I915_SHRINK_BOUND;
191 
192 	/*
193 	 * As we may completely rewrite the (un)bound list whilst unbinding
194 	 * (due to retiring requests) we have to strictly process only
195 	 * one element of the list at the time, and recheck the list
196 	 * on every iteration.
197 	 *
198 	 * In particular, we must hold a reference whilst removing the
199 	 * object as we may end up waiting for and/or retiring the objects.
200 	 * This might release the final reference (held by the active list)
201 	 * and result in the object being freed from under us. This is
202 	 * similar to the precautions the eviction code must take whilst
203 	 * removing objects.
204 	 *
205 	 * Also note that although these lists do not hold a reference to
206 	 * the object we can safely grab one here: The final object
207 	 * unreferencing and the bound_list are both protected by the
208 	 * dev->struct_mutex and so we won't ever be able to observe an
209 	 * object on the bound_list with a reference count equals 0.
210 	 */
211 	for (phase = phases; phase->list; phase++) {
212 		struct list_head still_in_list;
213 		struct drm_i915_gem_object *obj;
214 
215 		if ((flags & phase->bit) == 0)
216 			continue;
217 
218 		INIT_LIST_HEAD(&still_in_list);
219 
220 		/*
221 		 * We serialize our access to unreferenced objects through
222 		 * the use of the struct_mutex. While the objects are not
223 		 * yet freed (due to RCU then a workqueue) we still want
224 		 * to be able to shrink their pages, so they remain on
225 		 * the unbound/bound list until actually freed.
226 		 */
227 		spin_lock(&i915->mm.obj_lock);
228 		while (count < target &&
229 		       (obj = list_first_entry_or_null(phase->list,
230 						       typeof(*obj),
231 						       mm.link))) {
232 			list_move_tail(&obj->mm.link, &still_in_list);
233 
234 			if (flags & I915_SHRINK_PURGEABLE &&
235 			    obj->mm.madv != I915_MADV_DONTNEED)
236 				continue;
237 
238 			if (flags & I915_SHRINK_VMAPS &&
239 			    !is_vmalloc_addr(obj->mm.mapping))
240 				continue;
241 
242 			if (!(flags & I915_SHRINK_ACTIVE) &&
243 			    (i915_gem_object_is_active(obj) ||
244 			     i915_gem_object_is_framebuffer(obj)))
245 				continue;
246 
247 			if (!can_release_pages(obj))
248 				continue;
249 
250 			spin_unlock(&i915->mm.obj_lock);
251 
252 			if (unsafe_drop_pages(obj)) {
253 				/* May arrive from get_pages on another bo */
254 				mutex_lock_nested(&obj->mm.lock,
255 						  I915_MM_SHRINKER);
256 				if (!i915_gem_object_has_pages(obj)) {
257 					__i915_gem_object_invalidate(obj);
258 					count += obj->base.size >> PAGE_SHIFT;
259 				}
260 				mutex_unlock(&obj->mm.lock);
261 			}
262 			scanned += obj->base.size >> PAGE_SHIFT;
263 
264 			spin_lock(&i915->mm.obj_lock);
265 		}
266 		list_splice_tail(&still_in_list, phase->list);
267 		spin_unlock(&i915->mm.obj_lock);
268 	}
269 
270 	if (flags & I915_SHRINK_BOUND)
271 		intel_runtime_pm_put(i915);
272 
273 	i915_retire_requests(i915);
274 
275 	shrinker_unlock(i915, unlock);
276 
277 	if (nr_scanned)
278 		*nr_scanned += scanned;
279 	return count;
280 }
281 
282 /**
283  * i915_gem_shrink_all - Shrink buffer object caches completely
284  * @i915: i915 device
285  *
286  * This is a simple wraper around i915_gem_shrink() to aggressively shrink all
287  * caches completely. It also first waits for and retires all outstanding
288  * requests to also be able to release backing storage for active objects.
289  *
290  * This should only be used in code to intentionally quiescent the gpu or as a
291  * last-ditch effort when memory seems to have run out.
292  *
293  * Returns:
294  * The number of pages of backing storage actually released.
295  */
i915_gem_shrink_all(struct drm_i915_private * i915)296 unsigned long i915_gem_shrink_all(struct drm_i915_private *i915)
297 {
298 	unsigned long freed;
299 
300 	intel_runtime_pm_get(i915);
301 	freed = i915_gem_shrink(i915, -1UL, NULL,
302 				I915_SHRINK_BOUND |
303 				I915_SHRINK_UNBOUND |
304 				I915_SHRINK_ACTIVE);
305 	intel_runtime_pm_put(i915);
306 
307 	return freed;
308 }
309 
310 static unsigned long
i915_gem_shrinker_count(struct shrinker * shrinker,struct shrink_control * sc)311 i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
312 {
313 	struct drm_i915_private *i915 =
314 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
315 	struct drm_i915_gem_object *obj;
316 	unsigned long num_objects = 0;
317 	unsigned long count = 0;
318 
319 	spin_lock(&i915->mm.obj_lock);
320 	list_for_each_entry(obj, &i915->mm.unbound_list, mm.link)
321 		if (can_release_pages(obj)) {
322 			count += obj->base.size >> PAGE_SHIFT;
323 			num_objects++;
324 		}
325 
326 	list_for_each_entry(obj, &i915->mm.bound_list, mm.link)
327 		if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) {
328 			count += obj->base.size >> PAGE_SHIFT;
329 			num_objects++;
330 		}
331 	spin_unlock(&i915->mm.obj_lock);
332 
333 	/* Update our preferred vmscan batch size for the next pass.
334 	 * Our rough guess for an effective batch size is roughly 2
335 	 * available GEM objects worth of pages. That is we don't want
336 	 * the shrinker to fire, until it is worth the cost of freeing an
337 	 * entire GEM object.
338 	 */
339 	if (num_objects) {
340 		unsigned long avg = 2 * count / num_objects;
341 
342 		i915->mm.shrinker.batch =
343 			max((i915->mm.shrinker.batch + avg) >> 1,
344 			    128ul /* default SHRINK_BATCH */);
345 	}
346 
347 	return count;
348 }
349 
350 static unsigned long
i915_gem_shrinker_scan(struct shrinker * shrinker,struct shrink_control * sc)351 i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
352 {
353 	struct drm_i915_private *i915 =
354 		container_of(shrinker, struct drm_i915_private, mm.shrinker);
355 	unsigned long freed;
356 	bool unlock;
357 
358 	sc->nr_scanned = 0;
359 
360 	if (!shrinker_lock(i915, &unlock))
361 		return SHRINK_STOP;
362 
363 	freed = i915_gem_shrink(i915,
364 				sc->nr_to_scan,
365 				&sc->nr_scanned,
366 				I915_SHRINK_BOUND |
367 				I915_SHRINK_UNBOUND |
368 				I915_SHRINK_PURGEABLE);
369 	if (sc->nr_scanned < sc->nr_to_scan)
370 		freed += i915_gem_shrink(i915,
371 					 sc->nr_to_scan - sc->nr_scanned,
372 					 &sc->nr_scanned,
373 					 I915_SHRINK_BOUND |
374 					 I915_SHRINK_UNBOUND);
375 	if (sc->nr_scanned < sc->nr_to_scan && current_is_kswapd()) {
376 		intel_runtime_pm_get(i915);
377 		freed += i915_gem_shrink(i915,
378 					 sc->nr_to_scan - sc->nr_scanned,
379 					 &sc->nr_scanned,
380 					 I915_SHRINK_ACTIVE |
381 					 I915_SHRINK_BOUND |
382 					 I915_SHRINK_UNBOUND);
383 		intel_runtime_pm_put(i915);
384 	}
385 
386 	shrinker_unlock(i915, unlock);
387 
388 	return sc->nr_scanned ? freed : SHRINK_STOP;
389 }
390 
391 static bool
shrinker_lock_uninterruptible(struct drm_i915_private * i915,bool * unlock,int timeout_ms)392 shrinker_lock_uninterruptible(struct drm_i915_private *i915, bool *unlock,
393 			      int timeout_ms)
394 {
395 	unsigned long timeout = jiffies + msecs_to_jiffies_timeout(timeout_ms);
396 
397 	do {
398 		if (i915_gem_wait_for_idle(i915,
399 					   0, MAX_SCHEDULE_TIMEOUT) == 0 &&
400 		    shrinker_lock(i915, unlock))
401 			break;
402 
403 		schedule_timeout_killable(1);
404 		if (fatal_signal_pending(current))
405 			return false;
406 
407 		if (time_after(jiffies, timeout)) {
408 			pr_err("Unable to lock GPU to purge memory.\n");
409 			return false;
410 		}
411 	} while (1);
412 
413 	return true;
414 }
415 
416 static int
i915_gem_shrinker_oom(struct notifier_block * nb,unsigned long event,void * ptr)417 i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr)
418 {
419 	struct drm_i915_private *i915 =
420 		container_of(nb, struct drm_i915_private, mm.oom_notifier);
421 	struct drm_i915_gem_object *obj;
422 	unsigned long unevictable, bound, unbound, freed_pages;
423 
424 	freed_pages = i915_gem_shrink_all(i915);
425 
426 	/* Because we may be allocating inside our own driver, we cannot
427 	 * assert that there are no objects with pinned pages that are not
428 	 * being pointed to by hardware.
429 	 */
430 	unbound = bound = unevictable = 0;
431 	spin_lock(&i915->mm.obj_lock);
432 	list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) {
433 		if (!can_release_pages(obj))
434 			unevictable += obj->base.size >> PAGE_SHIFT;
435 		else
436 			unbound += obj->base.size >> PAGE_SHIFT;
437 	}
438 	list_for_each_entry(obj, &i915->mm.bound_list, mm.link) {
439 		if (!can_release_pages(obj))
440 			unevictable += obj->base.size >> PAGE_SHIFT;
441 		else
442 			bound += obj->base.size >> PAGE_SHIFT;
443 	}
444 	spin_unlock(&i915->mm.obj_lock);
445 
446 	if (freed_pages || unbound || bound)
447 		pr_info("Purging GPU memory, %lu pages freed, "
448 			"%lu pages still pinned.\n",
449 			freed_pages, unevictable);
450 	if (unbound || bound)
451 		pr_err("%lu and %lu pages still available in the "
452 		       "bound and unbound GPU page lists.\n",
453 		       bound, unbound);
454 
455 	*(unsigned long *)ptr += freed_pages;
456 	return NOTIFY_DONE;
457 }
458 
459 static int
i915_gem_shrinker_vmap(struct notifier_block * nb,unsigned long event,void * ptr)460 i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr)
461 {
462 	struct drm_i915_private *i915 =
463 		container_of(nb, struct drm_i915_private, mm.vmap_notifier);
464 	struct i915_vma *vma, *next;
465 	unsigned long freed_pages = 0;
466 	bool unlock;
467 	int ret;
468 
469 	if (!shrinker_lock_uninterruptible(i915, &unlock, 5000))
470 		return NOTIFY_DONE;
471 
472 	/* Force everything onto the inactive lists */
473 	ret = i915_gem_wait_for_idle(i915,
474 				     I915_WAIT_LOCKED,
475 				     MAX_SCHEDULE_TIMEOUT);
476 	if (ret)
477 		goto out;
478 
479 	intel_runtime_pm_get(i915);
480 	freed_pages += i915_gem_shrink(i915, -1UL, NULL,
481 				       I915_SHRINK_BOUND |
482 				       I915_SHRINK_UNBOUND |
483 				       I915_SHRINK_ACTIVE |
484 				       I915_SHRINK_VMAPS);
485 	intel_runtime_pm_put(i915);
486 
487 	/* We also want to clear any cached iomaps as they wrap vmap */
488 	list_for_each_entry_safe(vma, next,
489 				 &i915->ggtt.vm.inactive_list, vm_link) {
490 		unsigned long count = vma->node.size >> PAGE_SHIFT;
491 		if (vma->iomap && i915_vma_unbind(vma) == 0)
492 			freed_pages += count;
493 	}
494 
495 out:
496 	shrinker_unlock(i915, unlock);
497 
498 	*(unsigned long *)ptr += freed_pages;
499 	return NOTIFY_DONE;
500 }
501 
502 /**
503  * i915_gem_shrinker_register - Register the i915 shrinker
504  * @i915: i915 device
505  *
506  * This function registers and sets up the i915 shrinker and OOM handler.
507  */
i915_gem_shrinker_register(struct drm_i915_private * i915)508 void i915_gem_shrinker_register(struct drm_i915_private *i915)
509 {
510 	i915->mm.shrinker.scan_objects = i915_gem_shrinker_scan;
511 	i915->mm.shrinker.count_objects = i915_gem_shrinker_count;
512 	i915->mm.shrinker.seeks = DEFAULT_SEEKS;
513 	i915->mm.shrinker.batch = 4096;
514 	WARN_ON(register_shrinker(&i915->mm.shrinker));
515 
516 	i915->mm.oom_notifier.notifier_call = i915_gem_shrinker_oom;
517 	WARN_ON(register_oom_notifier(&i915->mm.oom_notifier));
518 
519 	i915->mm.vmap_notifier.notifier_call = i915_gem_shrinker_vmap;
520 	WARN_ON(register_vmap_purge_notifier(&i915->mm.vmap_notifier));
521 }
522 
523 /**
524  * i915_gem_shrinker_unregister - Unregisters the i915 shrinker
525  * @i915: i915 device
526  *
527  * This function unregisters the i915 shrinker and OOM handler.
528  */
i915_gem_shrinker_unregister(struct drm_i915_private * i915)529 void i915_gem_shrinker_unregister(struct drm_i915_private *i915)
530 {
531 	WARN_ON(unregister_vmap_purge_notifier(&i915->mm.vmap_notifier));
532 	WARN_ON(unregister_oom_notifier(&i915->mm.oom_notifier));
533 	unregister_shrinker(&i915->mm.shrinker);
534 }
535 
i915_gem_shrinker_taints_mutex(struct mutex * mutex)536 void i915_gem_shrinker_taints_mutex(struct mutex *mutex)
537 {
538 	if (!IS_ENABLED(CONFIG_LOCKDEP))
539 		return;
540 
541 	fs_reclaim_acquire(GFP_KERNEL);
542 	mutex_lock(mutex);
543 	mutex_unlock(mutex);
544 	fs_reclaim_release(GFP_KERNEL);
545 }
546