1 /*
2 (C) Copyright IBM Corp. 2008
3
4 All rights reserved.
5
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8
9 * Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of IBM nor the names of its contributors may be
15 used to endorse or promote products derived from this software without
16 specific prior written permission.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 POSSIBILITY OF SUCH DAMAGE.
29
30 Author: Ken Werner <ken.werner@de.ibm.com>
31 */
32
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <sys/uio.h>
40 #include <fcntl.h>
41 #include <ea.h>
42 #include <spu_intrinsics.h>
43 #include <spu_mfcio.h>
44 #include <spu_timer.h>
45 #include <limits.h>
46 #include <sys/linux_syscalls.h>
47
48 /* Magic cookie. */
49 #define GMON_MAGIC_COOKIE "gmon"
50
51 /* Version number. */
52 #define GMON_VERSION 1
53
54 /* Fraction of text space to allocate for histogram counters. */
55 #define HISTFRACTION 4
56
57 /* Histogram counter type. */
58 #define HISTCOUNTER unsigned short
59
60 /* Fraction of text space to allocate for "from" hash buckets. HASHFRACTION is
61 based on the minimum number of bytes of separation between two subroutine
62 call points in the object code. */
63 #define HASHFRACTION 4
64
65 /* Percent of text space to allocate for tostructs with a minimum. */
66 #define ARCDENSITY 3
67
68 /* Minimal amount of arcs. */
69 #define MINARCS 50
70
71 /* Rounding macros. */
72 #define ROUNDDOWN(x,y) (((x)/(y))*(y))
73 #define ROUNDUP(x,y) ((((x)+(y)-1)/(y))*(y))
74
75 /* Sampling rate in Hertz. */
76 #define SAMPLE_INTERVAL 100
77
78 /* Tag definitions for the gmon.out sub headers. */
79 #define GMON_TAG_TIME_HIST 0
80 #define GMON_TAG_CG_ARC 1
81
82 struct tostruct
83 {
84 uintptr_t selfpc;
85 long count;
86 unsigned short link;
87 };
88
89 struct gmon_hdr
90 {
91 char cookie[4];
92 int32_t version;
93 char spare[3 * 4];
94 };
95
96 struct gmon_hist_hdr
97 {
98 uintptr_t low_pc;
99 uintptr_t high_pc;
100 int32_t hist_size;
101 int32_t prof_rate;
102 char dimen[15];
103 char dimen_abbrev;
104 } __attribute__ ((packed));
105
106 struct rawarc
107 {
108 uintptr_t raw_frompc;
109 uintptr_t raw_selfpc;
110 long raw_count;
111 } __attribute__ ((packed));
112
113 /* start and end of the text section */
114 extern char _start;
115 extern char _etext;
116
117 /* EAR entry for the starting address of SPE executable image. */
118 extern const unsigned long long _EAR_;
119 __asm__ (".section .toe,\"a\",@nobits\n\r"
120 ".align 4\n\r"
121 ".type _EAR_, @object\n\r"
122 ".size _EAR_, 16\n" "_EAR_: .space 16\n" ".previous");
123
124 /* froms are indexing tos */
125 static __ea unsigned short *froms;
126 static __ea struct tostruct *tos = 0;
127 static long tolimit = 0;
128 static uintptr_t s_lowpc = 0;
129 static uintptr_t s_highpc = 0;
130 static unsigned long s_textsize = 0;
131
132 static int fd;
133 static int hist_size;
134 static int timer_id;
135
136 void
__sample(int id)137 __sample (int id)
138 {
139 unsigned int pc;
140 unsigned int pc_backup;
141 off_t offset;
142 unsigned short val;
143
144 if (id != timer_id)
145 return;
146
147 /* Fetch program counter. */
148 pc = spu_read_srr0 () & ~3;
149 pc_backup = pc;
150 if (pc < s_lowpc || pc > s_highpc)
151 return;
152 pc -= (uintptr_t) & _start;
153 offset = pc / HISTFRACTION * sizeof (HISTCOUNTER) + sizeof (struct gmon_hdr)
154 + 1 + sizeof (struct gmon_hist_hdr);
155
156 /* Read, increment and write the counter. */
157 if (pread (fd, &val, 2, offset) != 2)
158 {
159 perror ("can't read the histogram");
160 return;
161 }
162 if (val < USHRT_MAX)
163 ++val;
164 if (pwrite (fd, &val, 2, offset) != 2)
165 {
166 perror ("can't write the histogram");
167 }
168 }
169
170 static void
write_histogram(int fd)171 write_histogram (int fd)
172 {
173 struct gmon_hist_hdr hist_hdr;
174 u_char tag = GMON_TAG_TIME_HIST;
175 hist_hdr.low_pc = s_lowpc;
176 hist_hdr.high_pc = s_highpc;
177 hist_hdr.hist_size = hist_size / sizeof (HISTCOUNTER); /* Amount of bins. */
178 hist_hdr.prof_rate = 100; /* Hertz. */
179 strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
180 hist_hdr.dimen_abbrev = 's';
181 struct iovec iov[2] = {
182 {&tag, sizeof (tag)},
183 {&hist_hdr, sizeof (struct gmon_hist_hdr)}
184 };
185 if (writev (fd, iov, 2) != sizeof (struct gmon_hist_hdr) + sizeof (tag))
186 perror ("can't write the histogram header");
187
188 /* Skip the already written histogram data. */
189 lseek (fd, hist_size, SEEK_CUR);
190 }
191
192 static void
write_callgraph(int fd)193 write_callgraph (int fd)
194 {
195 int fromindex, endfrom;
196 uintptr_t frompc;
197 int toindex;
198 struct rawarc rawarc;
199 u_char tag = GMON_TAG_CG_ARC;
200 endfrom = s_textsize / (HASHFRACTION * sizeof (*froms));
201 for (fromindex = 0; fromindex < endfrom; ++fromindex)
202 {
203 if (froms[fromindex])
204 {
205 frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof (*froms));
206 for (toindex = froms[fromindex]; toindex != 0;
207 toindex = tos[toindex].link)
208 {
209 rawarc.raw_frompc = frompc;
210 rawarc.raw_selfpc = tos[toindex].selfpc;
211 rawarc.raw_count = tos[toindex].count;
212 struct iovec iov[2] = {
213 {&tag, sizeof (tag)},
214 {&rawarc, sizeof (struct rawarc)}
215 };
216 if (writev (fd, iov, 2) != sizeof (tag) + sizeof (struct rawarc))
217 perror ("can't write the callgraph");
218 }
219 }
220 }
221 }
222
223 void
__mcleanup(void)224 __mcleanup (void)
225 {
226 struct gmon_hdr ghdr;
227
228 /* Disable sampling. */
229 spu_timer_stop (timer_id);
230 spu_timer_free (timer_id);
231 spu_clock_stop ();
232
233 /* Jump to the beginning of the gmon.out file. */
234 if (lseek (fd, 0, SEEK_SET) == -1)
235 {
236 perror ("Cannot seek to the beginning of the gmon.out file.");
237 close (fd);
238 return;
239 }
240
241 /* Write the gmon.out header. */
242 memset (&ghdr, '\0', sizeof (struct gmon_hdr));
243 memcpy (&ghdr.cookie[0], GMON_MAGIC_COOKIE, sizeof (ghdr.cookie));
244 ghdr.version = GMON_VERSION;
245 if (write (fd, &ghdr, sizeof (struct gmon_hdr)) == -1)
246 {
247 perror ("Cannot write the gmon header to the gmon.out file.");
248 close (fd);
249 return;
250 }
251
252 /* Write the sampling buffer (histogram). */
253 write_histogram (fd);
254
255 /* Write the call graph. */
256 write_callgraph (fd);
257
258 close (fd);
259 }
260
261 void
__monstartup(unsigned long long spu_id)262 __monstartup (unsigned long long spu_id)
263 {
264 char filename[64];
265 s_lowpc =
266 ROUNDDOWN ((uintptr_t) & _start, HISTFRACTION * sizeof (HISTCOUNTER));
267 s_highpc =
268 ROUNDUP ((uintptr_t) & _etext, HISTFRACTION * sizeof (HISTCOUNTER));
269 s_textsize = s_highpc - s_lowpc;
270
271 hist_size = s_textsize / HISTFRACTION * sizeof (HISTCOUNTER);
272
273 /* Allocate froms. */
274 froms = malloc_ea (s_textsize / HASHFRACTION);
275 if (froms == NULL)
276 {
277 fprintf (stderr, "Cannot allocate ea memory for the froms array.\n");
278 return;
279 }
280 memset_ea (froms, 0, s_textsize / HASHFRACTION);
281
282 /* Determine tolimit. */
283 tolimit = s_textsize * ARCDENSITY / 100;
284 if (tolimit < MINARCS)
285 tolimit = MINARCS;
286
287 /* Allocate tos. */
288 tos = malloc_ea (tolimit * sizeof (struct tostruct));
289 if (tos == NULL)
290 {
291 fprintf (stderr, "Cannot allocate ea memory for the tos array.\n");
292 return;
293 }
294 memset_ea (tos, 0, tolimit * sizeof (struct tostruct));
295
296 /* Determine the gmon.out file name. */
297 if (spu_id)
298 snprintf (filename, sizeof (filename), "gmon-%d-%llu-%llu.out",
299 linux_getpid (), spu_id, _EAR_);
300 else
301 strncpy (filename, "gmon.out", sizeof (filename));
302 /* Open the gmon.out file. */
303 fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
304 if (fd == -1)
305 {
306 char errstr[128];
307 snprintf (errstr, sizeof (errstr), "Cannot open file: %s", filename);
308 perror (errstr);
309 return;
310 }
311 /* Truncate the file up to the size where the histogram fits in. */
312 if (ftruncate (fd,
313 sizeof (struct gmon_hdr) + 1 +
314 sizeof (struct gmon_hist_hdr) + hist_size) == -1)
315 {
316 char errstr[128];
317 snprintf (errstr, sizeof (errstr), "Cannot truncate file: %s", filename);
318 perror (errstr);
319 return;
320 }
321
322 /* Start the histogram sampler. */
323 spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih);
324 timer_id = spu_timer_alloc (spu_timebase () / SAMPLE_INTERVAL, __sample);
325 spu_clock_start ();
326 spu_timer_start (timer_id);
327
328 atexit (__mcleanup);
329 }
330
331 void
__mcount_internal(uintptr_t frompc,uintptr_t selfpc)332 __mcount_internal (uintptr_t frompc, uintptr_t selfpc)
333 {
334 /* sefpc: the address of the function just entered. */
335 /* frompc: the caller of the function just entered. */
336 unsigned int mach_stat;
337 __ea unsigned short *frompcindex;
338 unsigned short toindex;
339 __ea struct tostruct *top;
340 __ea struct tostruct *prevtop;
341
342 /* Save current state and disable interrupts. */
343 mach_stat = spu_readch(SPU_RdMachStat);
344 spu_idisable ();
345
346 /* Sanity checks. */
347 if (frompc < s_lowpc || frompc > s_highpc)
348 goto done;
349 frompc -= s_lowpc;
350 if (frompc > s_textsize)
351 goto done;
352
353 /* frompc indexes into the froms array the value at that position indexes
354 into the tos array. */
355 frompcindex = &froms[(frompc) / (HASHFRACTION * sizeof (*froms))];
356 toindex = *frompcindex;
357 if (toindex == 0)
358 {
359 /* First time traversing this arc link of tos[0] incremented. */
360 toindex = ++tos[0].link;
361 /* Sanity check. */
362 if (toindex >= tolimit)
363 {
364 --tos[0].link;
365 goto done;
366 }
367 /* Save the index into the froms array for the next time we traverse this arc. */
368 *frompcindex = toindex;
369 top = &tos[toindex];
370 /* Sets the address of the function just entered. */
371 top->selfpc = selfpc;
372 top->count = 1;
373 top->link = 0;
374 goto done;
375 }
376
377 /* toindex points to a tostruct */
378 top = &tos[toindex];
379 if (top->selfpc == selfpc)
380 {
381 /* The arc is at front of the chain. This is the most common case. */
382 top->count++;
383 goto done;
384 }
385
386 /* top->selfpc != selfpc
387 The pc we have got is not the pc we already stored (i.e. multiple function
388 calls to the same fuction within a function. The arc is not at front of
389 the chain. */
390 for (;;)
391 {
392 if (top->link == 0)
393 {
394 /* We are at the end of the chain and selfpc was not found. Thus we create
395 a new tostruct and link it to the head of the chain. */
396 toindex = ++tos[0].link;
397 /* Sanity check. */
398 if (toindex >= tolimit)
399 {
400 --tos[0].link;
401 goto done;
402 }
403 top = &tos[toindex];
404 top->selfpc = selfpc;
405 top->count = 1;
406 /* Link back to the old tos entry. */
407 top->link = *frompcindex;
408 /* Store a link to the new top in the froms array which makes the
409 current tos head of the chain. */
410 *frompcindex = toindex;
411 goto done;
412 }
413 else
414 {
415 /* Otherwise check the next arc on the chain. */
416 prevtop = top;
417 top = &tos[top->link];
418 if (top->selfpc == selfpc)
419 {
420 /* selfpc matches; increment its count. */
421 top->count++;
422 /* Move it to the head of the chain. */
423 /* Save previous tos index. */
424 toindex = prevtop->link;
425 /* Link the former to to the current tos. */
426 prevtop->link = top->link;
427 /* Link back to the old tos entry. */
428 top->link = *frompcindex;
429 /* Store a link to the new top in the froms array which makes the
430 current tos head of the chain. */
431 *frompcindex = toindex;
432 goto done;
433 }
434 }
435 }
436 done:
437 /* Enable interrupts if necessary. */
438 if (__builtin_expect (mach_stat & 1, 0))
439 spu_ienable ();
440 }
441