1 /*
2 * Support file for amdgcn in newlib.
3 * Copyright (c) 2024 BayLibre.
4 *
5 * The authors hereby grant permission to use, copy, modify, distribute,
6 * and license this software and its documentation for any purpose, provided
7 * that existing copyright notices are retained in all copies and that this
8 * notice is included verbatim in any distributions. No written agreement,
9 * license, or royalty fee is required for any of the authorized uses.
10 * Modifications to this software may be copyrighted by their authors
11 * and need not follow the licensing terms described here, provided that
12 * the new terms are clearly indicated on the first page of each file where
13 * they apply.
14 */
15
16 /* Lock routines for AMD GPU devices.
17
18 The lock is a 32-bit int:
19 - bits 0-3: wavefront id
20 - bits 4-23: workgroup id (+1, so never zero)
21 - bits 24-31: recursive lock count.
22
23 The purpose of the "relaxed" loads and stores being "atomic" here is
24 mostly just to ensure we punch through the caches consistently.
25
26 Non-recursive locks may be unlocked by any thread. It's an error to
27 attempt to unlock a recursive lock from the wrong thread.
28
29 The DEBUG statements here use sprintf and write to avoid taking locks
30 themselves. */
31
32 #include <sys/lock.h>
33 #include <assert.h>
34
35 #define DEBUG 0
36
37 #if DEBUG
38 extern void write(int, char *, int);
39 #endif
40
41 static unsigned
__gcn_thread_id()42 __gcn_thread_id ()
43 {
44 /* Dim(0) is the workgroup ID; range 0 to maybe thousands.
45 Dim(1) is the wavefront ID; range 0 to 15. */
46 return (((__builtin_gcn_dim_pos (0) + 1) << 4)
47 + __builtin_gcn_dim_pos (1));
48 }
49
50 static int
__gcn_lock_acquire_int(_LOCK_T * lock_ptr,int _try)51 __gcn_lock_acquire_int (_LOCK_T *lock_ptr, int _try)
52 {
53 int id = __gcn_thread_id ();
54
55 #if DEBUG
56 char buf[1000];
57 __builtin_sprintf (buf,"acquire:%p(%d) lock_value:0x%x id:0x%x", lock_ptr,
58 _try, *lock_ptr, id);
59 write (1, buf, __builtin_strlen(buf));
60 #endif
61
62 int expected = 0;
63 while (!__atomic_compare_exchange_n (lock_ptr, &expected, id, 0,
64 __ATOMIC_ACQUIRE, __ATOMIC_RELAXED))
65 {
66 /* Lock *not* acquired. */
67 if (_try)
68 return 0;
69 else
70 {
71 asm ("s_sleep 64");
72 expected = 0;
73 }
74 }
75
76 #if DEBUG
77 __builtin_sprintf (buf,"acquired:%p(%d) lock_value:0x%x id:0x%x", lock_ptr,
78 _try, *lock_ptr, id);
79 write (1, buf, __builtin_strlen(buf));
80 #endif
81
82 return 1;
83 }
84
85 int
__gcn_try_lock_acquire(_LOCK_T * lock_ptr)86 __gcn_try_lock_acquire (_LOCK_T *lock_ptr)
87 {
88 return __gcn_lock_acquire_int (lock_ptr, 1);
89 }
90
91 void
__gcn_lock_acquire(_LOCK_T * lock_ptr)92 __gcn_lock_acquire (_LOCK_T *lock_ptr)
93 {
94 __gcn_lock_acquire_int (lock_ptr, 0);
95 }
96
97 static int
__gcn_lock_acquire_recursive_int(_LOCK_T * lock_ptr,int _try)98 __gcn_lock_acquire_recursive_int (_LOCK_T *lock_ptr, int _try)
99 {
100 int id = __gcn_thread_id ();
101
102 #if DEBUG
103 char buf[1000];
104 __builtin_sprintf (buf,"acquire recursive:%p(%d) lock_value:0x%x id:0x%x",
105 lock_ptr, _try, *lock_ptr, id);
106 write (1, buf, __builtin_strlen(buf));
107 #endif
108
109 unsigned int lock_value = __atomic_load_n (lock_ptr, __ATOMIC_RELAXED);
110 if ((lock_value & 0xffffff) == id)
111 {
112 /* This thread already holds the lock.
113 Increment the recursion counter and update the lock. */
114 int count = lock_value >> 24;
115 lock_value = ((count + 1) << 24) | id;
116 __atomic_store_n (lock_ptr, lock_value, __ATOMIC_RELAXED);
117
118 #if DEBUG
119 __builtin_sprintf (buf,
120 "increment recursive:%p(%d) lock_value:0x%x id:0x%x",
121 lock_ptr, _try, *lock_ptr, id);
122 write (1, buf, __builtin_strlen(buf));
123 #endif
124
125 return 1;
126 }
127 else
128 return __gcn_lock_acquire_int (lock_ptr, _try);
129 }
130
131 int
__gcn_lock_try_acquire_recursive(_LOCK_T * lock_ptr)132 __gcn_lock_try_acquire_recursive (_LOCK_T *lock_ptr)
133 {
134 return __gcn_lock_acquire_recursive_int (lock_ptr, 1);
135 }
136
137 void
__gcn_lock_acquire_recursive(_LOCK_T * lock_ptr)138 __gcn_lock_acquire_recursive (_LOCK_T *lock_ptr)
139 {
140 __gcn_lock_acquire_recursive_int (lock_ptr, 0);
141 }
142
143 void
__gcn_lock_release(_LOCK_T * lock_ptr)144 __gcn_lock_release (_LOCK_T *lock_ptr)
145 {
146 #if DEBUG
147 char buf[1000];
148 __builtin_sprintf (buf,"release:%p lock_value:0x%x id:0x%x", lock_ptr,
149 *lock_ptr, __gcn_thread_id());
150 write (1, buf, __builtin_strlen(buf));
151 #endif
152
153 __atomic_store_n (lock_ptr, 0, __ATOMIC_RELEASE);
154 }
155
156 void
__gcn_lock_release_recursive(_LOCK_T * lock_ptr)157 __gcn_lock_release_recursive (_LOCK_T *lock_ptr)
158 {
159 int id = __gcn_thread_id ();
160 unsigned int lock_value = __atomic_load_n (lock_ptr, __ATOMIC_RELAXED);
161
162 #if DEBUG
163 char buf[1000];
164 __builtin_sprintf (buf, "release recursive:%p lock_value:0x%x id:0x%x",
165 lock_ptr, lock_value, id);
166 write (1, buf, __builtin_strlen(buf));
167 #endif
168
169 /* It is an error to call this function from the wrong thread. */
170 assert ((lock_value & 0xffffff) == id);
171
172 /* Decrement or release the lock. */
173 int count = lock_value >> 24;
174 if (count > 0)
175 {
176 lock_value = ((count - 1) << 24) | id;
177 __atomic_store_n (lock_ptr, lock_value, __ATOMIC_RELAXED);
178
179 #if DEBUG
180 __builtin_sprintf (buf, "decrement recursive:%p lock_value:0x%x id:0x%x",
181 lock_ptr, *lock_ptr, id);
182 write (1, buf, __builtin_strlen(buf));
183 #endif
184 }
185 else
186 __gcn_lock_release (lock_ptr);
187 }
188