1 /*
2 (C) Copyright IBM Corp. 2008
3
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 * Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of IBM nor the names of its contributors may be
15 used to endorse or promote products derived from this software without
16 specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 POSSIBILITY OF SUCH DAMAGE.
29
30 Author: Ken Werner <ken.werner@de.ibm.com>
31 */
32
33 #include <picolibc.h>
34
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <sys/uio.h>
42 #include <fcntl.h>
43 #include <ea.h>
44 #include <spu_intrinsics.h>
45 #include <spu_mfcio.h>
46 #include <spu_timer.h>
47 #include <limits.h>
48 #include <sys/linux_syscalls.h>
49
50 /* Magic cookie. */
51 #define GMON_MAGIC_COOKIE "gmon"
52
53 /* Version number. */
54 #define GMON_VERSION 1
55
56 /* Fraction of text space to allocate for histogram counters. */
57 #define HISTFRACTION 4
58
59 /* Histogram counter type. */
60 #define HISTCOUNTER unsigned short
61
62 /* Fraction of text space to allocate for "from" hash buckets. HASHFRACTION is
63 based on the minimum number of bytes of separation between two subroutine
64 call points in the object code. */
65 #define HASHFRACTION 4
66
67 /* Percent of text space to allocate for tostructs with a minimum. */
68 #define ARCDENSITY 3
69
70 /* Minimal amount of arcs. */
71 #define MINARCS 50
72
73 /* Rounding macros. */
74 #define ROUNDDOWN(x,y) (((x)/(y))*(y))
75 #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
76
77 /* Sampling rate in Hertz. */
78 #define SAMPLE_INTERVAL 100
79
80 /* Tag definitions for the gmon.out sub headers. */
81 #define GMON_TAG_TIME_HIST 0
82 #define GMON_TAG_CG_ARC 1
83
84 struct tostruct
85 {
86 uintptr_t selfpc;
87 long count;
88 unsigned short link;
89 };
90
91 struct gmon_hdr
92 {
93 char cookie[4];
94 int32_t version;
95 char spare[3 * 4];
96 };
97
98 struct gmon_hist_hdr
99 {
100 uintptr_t low_pc;
101 uintptr_t high_pc;
102 int32_t hist_size;
103 int32_t prof_rate;
104 char dimen[15];
105 char dimen_abbrev;
106 } __attribute__ ((packed));
107
108 struct rawarc
109 {
110 uintptr_t raw_frompc;
111 uintptr_t raw_selfpc;
112 long raw_count;
113 } __attribute__ ((packed));
114
115 /* start and end of the text section */
116 extern char _start;
117 extern char _etext;
118
119 /* EAR entry for the starting address of SPE executable image. */
120 extern const unsigned long long _EAR_;
121 __asm__ (".section .toe,\"a\",@nobits\n\r"
122 ".align 4\n\r"
123 ".type _EAR_, @object\n\r"
124 ".size _EAR_, 16\n" "_EAR_: .space 16\n" ".previous");
125
126 /* froms are indexing tos */
127 static __ea unsigned short *froms;
128 static __ea struct tostruct *tos = 0;
129 static long tolimit = 0;
130 static uintptr_t s_lowpc = 0;
131 static uintptr_t s_highpc = 0;
132 static unsigned long s_textsize = 0;
133
134 static int fd;
135 static int hist_size;
136 static int timer_id;
137
138 void
__sample(int id)139 __sample (int id)
140 {
141 unsigned int pc;
142 unsigned int pc_backup;
143 off_t offset;
144 unsigned short val;
145
146 if (id != timer_id)
147 return;
148
149 /* Fetch program counter. */
150 pc = spu_read_srr0 () & ~3;
151 pc_backup = pc;
152 if (pc < s_lowpc || pc > s_highpc)
153 return;
154 pc -= (uintptr_t) & _start;
155 offset = pc / HISTFRACTION * sizeof (HISTCOUNTER) + sizeof (struct gmon_hdr)
156 + 1 + sizeof (struct gmon_hist_hdr);
157
158 /* Read, increment and write the counter. */
159 if (pread (fd, &val, 2, offset) != 2)
160 {
161 perror ("can't read the histogram");
162 return;
163 }
164 if (val < USHRT_MAX)
165 ++val;
166 if (pwrite (fd, &val, 2, offset) != 2)
167 {
168 perror ("can't write the histogram");
169 }
170 }
171
172 static void
write_histogram(int fd)173 write_histogram (int fd)
174 {
175 struct gmon_hist_hdr hist_hdr;
176 u_char tag = GMON_TAG_TIME_HIST;
177 hist_hdr.low_pc = s_lowpc;
178 hist_hdr.high_pc = s_highpc;
179 hist_hdr.hist_size = hist_size / sizeof (HISTCOUNTER); /* Amount of bins. */
180 hist_hdr.prof_rate = 100; /* Hertz. */
181 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
182 hist_hdr.dimen_abbrev = 's';
183 struct iovec iov[2] = {
184 {&tag, sizeof (tag)},
185 {&hist_hdr, sizeof (struct gmon_hist_hdr)}
186 };
187 if (writev (fd, iov, 2) != sizeof (struct gmon_hist_hdr) + sizeof (tag))
188 perror ("can't write the histogram header");
189
190 /* Skip the already written histogram data. */
191 lseek (fd, hist_size, SEEK_CUR);
192 }
193
194 static void
write_callgraph(int fd)195 write_callgraph (int fd)
196 {
197 int fromindex, endfrom;
198 uintptr_t frompc;
199 int toindex;
200 struct rawarc rawarc;
201 u_char tag = GMON_TAG_CG_ARC;
202 endfrom = s_textsize / (HASHFRACTION * sizeof (*froms));
203 for (fromindex = 0; fromindex < endfrom; ++fromindex)
204 {
205 if (froms[fromindex])
206 {
207 frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof (*froms));
208 for (toindex = froms[fromindex]; toindex != 0;
209 toindex = tos[toindex].link)
210 {
211 rawarc.raw_frompc = frompc;
212 rawarc.raw_selfpc = tos[toindex].selfpc;
213 rawarc.raw_count = tos[toindex].count;
214 struct iovec iov[2] = {
215 {&tag, sizeof (tag)},
216 {&rawarc, sizeof (struct rawarc)}
217 };
218 if (writev (fd, iov, 2) != sizeof (tag) + sizeof (struct rawarc))
219 perror ("can't write the callgraph");
220 }
221 }
222 }
223 }
224
225 void
__mcleanup(void)226 __mcleanup (void)
227 {
228 struct gmon_hdr ghdr;
229
230 /* Disable sampling. */
231 spu_timer_stop (timer_id);
232 spu_timer_free (timer_id);
233 spu_clock_stop ();
234
235 /* Jump to the beginning of the gmon.out file. */
236 if (lseek (fd, 0, SEEK_SET) == -1)
237 {
238 perror ("Cannot seek to the beginning of the gmon.out file.");
239 close (fd);
240 return;
241 }
242
243 /* Write the gmon.out header. */
244 memset (&ghdr, '\0', sizeof (struct gmon_hdr));
245 memcpy (&ghdr.cookie[0], GMON_MAGIC_COOKIE, sizeof (ghdr.cookie));
246 ghdr.version = GMON_VERSION;
247 if (write (fd, &ghdr, sizeof (struct gmon_hdr)) == -1)
248 {
249 perror ("Cannot write the gmon header to the gmon.out file.");
250 close (fd);
251 return;
252 }
253
254 /* Write the sampling buffer (histogram). */
255 write_histogram (fd);
256
257 /* Write the call graph. */
258 write_callgraph (fd);
259
260 close (fd);
261 }
262
263 void
__monstartup(unsigned long long spu_id)264 __monstartup (unsigned long long spu_id)
265 {
266 char filename[64];
267 s_lowpc =
268 ROUNDDOWN ((uintptr_t) & _start, HISTFRACTION * sizeof (HISTCOUNTER));
269 s_highpc =
270 ROUNDUP ((uintptr_t) & _etext, HISTFRACTION * sizeof (HISTCOUNTER));
271 s_textsize = s_highpc - s_lowpc;
272
273 hist_size = s_textsize / HISTFRACTION * sizeof (HISTCOUNTER);
274
275 /* Allocate froms. */
276 froms = malloc_ea (s_textsize / HASHFRACTION);
277 if (froms == NULL)
278 {
279 fprintf (stderr, "Cannot allocate ea memory for the froms array.\n");
280 return;
281 }
282 memset_ea (froms, 0, s_textsize / HASHFRACTION);
283
284 /* Determine tolimit. */
285 tolimit = s_textsize * ARCDENSITY / 100;
286 if (tolimit < MINARCS)
287 tolimit = MINARCS;
288
289 /* Allocate tos. */
290 tos = malloc_ea (tolimit * sizeof (struct tostruct));
291 if (tos == NULL)
292 {
293 fprintf (stderr, "Cannot allocate ea memory for the tos array.\n");
294 return;
295 }
296 memset_ea (tos, 0, tolimit * sizeof (struct tostruct));
297
298 /* Determine the gmon.out file name. */
299 if (spu_id)
300 snprintf (filename, sizeof (filename), "gmon-%d-%llu-%llu.out",
301 linux_getpid (), spu_id, _EAR_);
302 else
303 strncpy (filename, "gmon.out", sizeof (filename));
304 /* Open the gmon.out file. */
305 fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
306 if (fd == -1)
307 {
308 char errstr[128];
309 snprintf (errstr, sizeof (errstr), "Cannot open file: %s", filename);
310 perror (errstr);
311 return;
312 }
313 /* Truncate the file up to the size where the histogram fits in. */
314 if (ftruncate (fd,
315 sizeof (struct gmon_hdr) + 1 +
316 sizeof (struct gmon_hist_hdr) + hist_size) == -1)
317 {
318 char errstr[128];
319 snprintf (errstr, sizeof (errstr), "Cannot truncate file: %s", filename);
320 perror (errstr);
321 return;
322 }
323
324 /* Start the histogram sampler. */
325 spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih);
326 timer_id = spu_timer_alloc (spu_timebase () / SAMPLE_INTERVAL, __sample);
327 spu_clock_start ();
328 spu_timer_start (timer_id);
329
330 atexit (__mcleanup);
331 }
332
333 void
__mcount_internal(uintptr_t frompc,uintptr_t selfpc)334 __mcount_internal (uintptr_t frompc, uintptr_t selfpc)
335 {
336 /* sefpc: the address of the function just entered. */
337 /* frompc: the caller of the function just entered. */
338 unsigned int mach_stat;
339 __ea unsigned short *frompcindex;
340 unsigned short toindex;
341 __ea struct tostruct *top;
342 __ea struct tostruct *prevtop;
343
344 /* Save current state and disable interrupts. */
345 mach_stat = spu_readch(SPU_RdMachStat);
346 spu_idisable ();
347
348 /* Sanity checks. */
349 if (frompc < s_lowpc || frompc > s_highpc)
350 goto done;
351 frompc -= s_lowpc;
352 if (frompc > s_textsize)
353 goto done;
354
355 /* frompc indexes into the froms array the value at that position indexes
356 into the tos array. */
357 frompcindex = &froms[(frompc) / (HASHFRACTION * sizeof (*froms))];
358 toindex = *frompcindex;
359 if (toindex == 0)
360 {
361 /* First time traversing this arc link of tos[0] incremented. */
362 toindex = ++tos[0].link;
363 /* Sanity check. */
364 if (toindex >= tolimit)
365 {
366 --tos[0].link;
367 goto done;
368 }
369 /* Save the index into the froms array for the next time we traverse this arc. */
370 *frompcindex = toindex;
371 top = &tos[toindex];
372 /* Sets the address of the function just entered. */
373 top->selfpc = selfpc;
374 top->count = 1;
375 top->link = 0;
376 goto done;
377 }
378
379 /* toindex points to a tostruct */
380 top = &tos[toindex];
381 if (top->selfpc == selfpc)
382 {
383 /* The arc is at front of the chain. This is the most common case. */
384 top->count++;
385 goto done;
386 }
387
388 /* top->selfpc != selfpc
389 The pc we have got is not the pc we already stored (i.e. multiple function
390 calls to the same fuction within a function. The arc is not at front of
391 the chain. */
392 for (;;)
393 {
394 if (top->link == 0)
395 {
396 /* We are at the end of the chain and selfpc was not found. Thus we create
397 a new tostruct and link it to the head of the chain. */
398 toindex = ++tos[0].link;
399 /* Sanity check. */
400 if (toindex >= tolimit)
401 {
402 --tos[0].link;
403 goto done;
404 }
405 top = &tos[toindex];
406 top->selfpc = selfpc;
407 top->count = 1;
408 /* Link back to the old tos entry. */
409 top->link = *frompcindex;
410 /* Store a link to the new top in the froms array which makes the
411 current tos head of the chain. */
412 *frompcindex = toindex;
413 goto done;
414 }
415 else
416 {
417 /* Otherwise check the next arc on the chain. */
418 prevtop = top;
419 top = &tos[top->link];
420 if (top->selfpc == selfpc)
421 {
422 /* selfpc matches; increment its count. */
423 top->count++;
424 /* Move it to the head of the chain. */
425 /* Save previous tos index. */
426 toindex = prevtop->link;
427 /* Link the former to to the current tos. */
428 prevtop->link = top->link;
429 /* Link back to the old tos entry. */
430 top->link = *frompcindex;
431 /* Store a link to the new top in the froms array which makes the
432 current tos head of the chain. */
433 *frompcindex = toindex;
434 goto done;
435 }
436 }
437 }
438 done:
439 /* Enable interrupts if necessary. */
440 if (__builtin_expect (mach_stat & 1, 0))
441 spu_ienable ();
442 }
443