1 /*
2 (C) Copyright IBM Corp. 2008
3 
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9 * Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of IBM nor the names of its contributors may be
15 used to endorse or promote products derived from this software without
16 specific prior written permission.
17 
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 POSSIBILITY OF SUCH DAMAGE.
29 
30 Author: Ken Werner <ken.werner@de.ibm.com>
31 */
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <unistd.h>
37 #include <sys/types.h>
38 #include <sys/stat.h>
39 #include <sys/uio.h>
40 #include <fcntl.h>
41 #include <ea.h>
42 #include <spu_intrinsics.h>
43 #include <spu_mfcio.h>
44 #include <spu_timer.h>
45 #include <limits.h>
46 #include <sys/linux_syscalls.h>
47 
48 /* Magic cookie.  */
49 #define GMON_MAGIC_COOKIE "gmon"
50 
51 /* Version number.  */
52 #define GMON_VERSION 1
53 
54 /* Fraction of text space to allocate for histogram counters.  */
55 #define HISTFRACTION 4
56 
57 /* Histogram counter type.  */
58 #define HISTCOUNTER unsigned short
59 
60 /* Fraction of text space to allocate for "from" hash buckets. HASHFRACTION is
61    based on the minimum number of bytes of separation between two subroutine
62    call points in the object code.  */
63 #define HASHFRACTION 4
64 
65 /* Percent of text space to allocate for tostructs with a minimum.  */
66 #define ARCDENSITY 3
67 
68 /* Minimal amount of arcs.  */
69 #define MINARCS 50
70 
71 /* Rounding macros.  */
72 #define ROUNDDOWN(x,y) (((x)/(y))*(y))
73 #define ROUNDUP(x,y)   ((((x)+(y)-1)/(y))*(y))
74 
75 /* Sampling rate in Hertz.  */
76 #define SAMPLE_INTERVAL 100
77 
78 /* Tag definitions for the gmon.out sub headers.  */
79 #define GMON_TAG_TIME_HIST 0
80 #define GMON_TAG_CG_ARC 1
81 
82 struct tostruct
83 {
84   uintptr_t selfpc;
85   long count;
86   unsigned short link;
87 };
88 
89 struct gmon_hdr
90 {
91   char cookie[4];
92   int32_t version;
93   char spare[3 * 4];
94 };
95 
96 struct gmon_hist_hdr
97 {
98   uintptr_t low_pc;
99   uintptr_t high_pc;
100   int32_t hist_size;
101   int32_t prof_rate;
102   char dimen[15];
103   char dimen_abbrev;
104 } __attribute__ ((packed));
105 
106 struct rawarc
107 {
108   uintptr_t raw_frompc;
109   uintptr_t raw_selfpc;
110   long raw_count;
111 } __attribute__ ((packed));
112 
113 /* start and end of the text section */
114 extern char _start;
115 extern char _etext;
116 
117 /* EAR entry for the starting address of SPE executable image.  */
118 extern const unsigned long long _EAR_;
119 __asm__ (".section .toe,\"a\",@nobits\n\r"
120      ".align 4\n\r"
121      ".type _EAR_, @object\n\r"
122      ".size _EAR_, 16\n" "_EAR_: .space 16\n" ".previous");
123 
124 /* froms are indexing tos */
125 static __ea unsigned short *froms;
126 static __ea struct tostruct *tos = 0;
127 static long tolimit = 0;
128 static uintptr_t s_lowpc = 0;
129 static uintptr_t s_highpc = 0;
130 static unsigned long s_textsize = 0;
131 
132 static int fd;
133 static int hist_size;
134 static int timer_id;
135 
136 void
__sample(int id)137 __sample (int id)
138 {
139   unsigned int pc;
140   unsigned int pc_backup;
141   off_t offset;
142   unsigned short val;
143 
144   if (id != timer_id)
145     return;
146 
147   /* Fetch program counter.  */
148   pc = spu_read_srr0 () & ~3;
149   pc_backup = pc;
150   if (pc < s_lowpc || pc > s_highpc)
151     return;
152   pc -= (uintptr_t) & _start;
153   offset = pc / HISTFRACTION * sizeof (HISTCOUNTER) + sizeof (struct gmon_hdr)
154              + 1 + sizeof (struct gmon_hist_hdr);
155 
156   /* Read, increment and write the counter.  */
157   if (pread (fd, &val, 2, offset) != 2)
158     {
159       perror ("can't read the histogram");
160       return;
161     }
162   if (val < USHRT_MAX)
163     ++val;
164   if (pwrite (fd, &val, 2, offset) != 2)
165     {
166       perror ("can't write the histogram");
167     }
168 }
169 
170 static void
write_histogram(int fd)171 write_histogram (int fd)
172 {
173   struct gmon_hist_hdr hist_hdr;
174   u_char tag = GMON_TAG_TIME_HIST;
175   hist_hdr.low_pc = s_lowpc;
176   hist_hdr.high_pc = s_highpc;
177   hist_hdr.hist_size = hist_size / sizeof (HISTCOUNTER); /* Amount of bins.  */
178   hist_hdr.prof_rate = 100; /* Hertz.  */
179   strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
180   hist_hdr.dimen_abbrev = 's';
181   struct iovec iov[2] = {
182     {&tag, sizeof (tag)},
183     {&hist_hdr, sizeof (struct gmon_hist_hdr)}
184   };
185   if (writev (fd, iov, 2) != sizeof (struct gmon_hist_hdr) + sizeof (tag))
186     perror ("can't write the histogram header");
187 
188   /* Skip the already written histogram data.  */
189   lseek (fd, hist_size, SEEK_CUR);
190 }
191 
192 static void
write_callgraph(int fd)193 write_callgraph (int fd)
194 {
195   int fromindex, endfrom;
196   uintptr_t frompc;
197   int toindex;
198   struct rawarc rawarc;
199   u_char tag = GMON_TAG_CG_ARC;
200   endfrom = s_textsize / (HASHFRACTION * sizeof (*froms));
201   for (fromindex = 0; fromindex < endfrom; ++fromindex)
202     {
203       if (froms[fromindex])
204 	{
205 	  frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof (*froms));
206 	  for (toindex = froms[fromindex]; toindex != 0;
207 	       toindex = tos[toindex].link)
208 	    {
209 	      rawarc.raw_frompc = frompc;
210 	      rawarc.raw_selfpc = tos[toindex].selfpc;
211 	      rawarc.raw_count = tos[toindex].count;
212 	      struct iovec iov[2] = {
213 		{&tag, sizeof (tag)},
214 		{&rawarc, sizeof (struct rawarc)}
215 	      };
216 	      if (writev (fd, iov, 2) != sizeof (tag) + sizeof (struct rawarc))
217                 perror ("can't write the callgraph");
218 	    }
219 	}
220     }
221 }
222 
223 void
__mcleanup(void)224 __mcleanup (void)
225 {
226   struct gmon_hdr ghdr;
227 
228   /* Disable sampling.  */
229   spu_timer_stop (timer_id);
230   spu_timer_free (timer_id);
231   spu_clock_stop ();
232 
233   /* Jump to the beginning of the gmon.out file.  */
234   if (lseek (fd, 0, SEEK_SET) == -1)
235     {
236       perror ("Cannot seek to the beginning of the gmon.out file.");
237       close (fd);
238       return;
239     }
240 
241   /* Write the gmon.out header.  */
242   memset (&ghdr, '\0', sizeof (struct gmon_hdr));
243   memcpy (&ghdr.cookie[0], GMON_MAGIC_COOKIE, sizeof (ghdr.cookie));
244   ghdr.version = GMON_VERSION;
245   if (write (fd, &ghdr, sizeof (struct gmon_hdr)) == -1)
246     {
247       perror ("Cannot write the gmon header to the gmon.out file.");
248       close (fd);
249       return;
250     }
251 
252   /* Write the sampling buffer (histogram).  */
253   write_histogram (fd);
254 
255   /* Write the call graph.  */
256   write_callgraph (fd);
257 
258   close (fd);
259 }
260 
261 void
__monstartup(unsigned long long spu_id)262 __monstartup (unsigned long long spu_id)
263 {
264   char filename[64];
265   s_lowpc =
266     ROUNDDOWN ((uintptr_t) & _start, HISTFRACTION * sizeof (HISTCOUNTER));
267   s_highpc =
268     ROUNDUP ((uintptr_t) & _etext, HISTFRACTION * sizeof (HISTCOUNTER));
269   s_textsize = s_highpc - s_lowpc;
270 
271   hist_size = s_textsize / HISTFRACTION * sizeof (HISTCOUNTER);
272 
273   /* Allocate froms.  */
274   froms = malloc_ea (s_textsize / HASHFRACTION);
275   if (froms == NULL)
276     {
277       fprintf (stderr, "Cannot allocate ea memory for the froms array.\n");
278       return;
279     }
280   memset_ea (froms, 0, s_textsize / HASHFRACTION);
281 
282   /* Determine tolimit.  */
283   tolimit = s_textsize * ARCDENSITY / 100;
284   if (tolimit < MINARCS)
285     tolimit = MINARCS;
286 
287   /* Allocate tos. */
288   tos = malloc_ea (tolimit * sizeof (struct tostruct));
289   if (tos == NULL)
290     {
291       fprintf (stderr, "Cannot allocate ea memory for the tos array.\n");
292       return;
293     }
294   memset_ea (tos, 0, tolimit * sizeof (struct tostruct));
295 
296   /* Determine the gmon.out file name.  */
297   if (spu_id)
298     snprintf (filename, sizeof (filename), "gmon-%d-%llu-%llu.out",
299 	      linux_getpid (), spu_id, _EAR_);
300   else
301     strncpy (filename, "gmon.out", sizeof (filename));
302   /* Open the gmon.out file.  */
303   fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
304   if (fd == -1)
305     {
306       char errstr[128];
307       snprintf (errstr, sizeof (errstr), "Cannot open file: %s", filename);
308       perror (errstr);
309       return;
310     }
311   /* Truncate the file up to the size where the histogram fits in.  */
312   if (ftruncate (fd,
313 		 sizeof (struct gmon_hdr) + 1 +
314 		 sizeof (struct gmon_hist_hdr) + hist_size) == -1)
315     {
316       char errstr[128];
317       snprintf (errstr, sizeof (errstr), "Cannot truncate file: %s", filename);
318       perror (errstr);
319       return;
320     }
321 
322   /* Start the histogram sampler.  */
323   spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih);
324   timer_id = spu_timer_alloc (spu_timebase () / SAMPLE_INTERVAL, __sample);
325   spu_clock_start ();
326   spu_timer_start (timer_id);
327 
328   atexit (__mcleanup);
329 }
330 
331 void
__mcount_internal(uintptr_t frompc,uintptr_t selfpc)332 __mcount_internal (uintptr_t frompc, uintptr_t selfpc)
333 {
334   /* sefpc: the address of the function just entered.  */
335   /* frompc: the caller of the function just entered.  */
336   unsigned int mach_stat;
337   __ea unsigned short *frompcindex;
338   unsigned short toindex;
339   __ea struct tostruct *top;
340   __ea struct tostruct *prevtop;
341 
342   /* Save current state and disable interrupts.  */
343   mach_stat = spu_readch(SPU_RdMachStat);
344   spu_idisable ();
345 
346   /* Sanity checks.  */
347   if (frompc < s_lowpc || frompc > s_highpc)
348     goto done;
349   frompc -= s_lowpc;
350   if (frompc > s_textsize)
351     goto done;
352 
353   /* frompc indexes into the froms array the value at that position indexes
354      into the tos array.  */
355   frompcindex = &froms[(frompc) / (HASHFRACTION * sizeof (*froms))];
356   toindex = *frompcindex;
357   if (toindex == 0)
358     {
359       /* First time traversing this arc link of tos[0] incremented.  */
360       toindex = ++tos[0].link;
361       /* Sanity check.  */
362       if (toindex >= tolimit)
363 	{
364 	  --tos[0].link;
365 	  goto done;
366 	}
367       /* Save the index into the froms array for the next time we traverse this arc.  */
368       *frompcindex = toindex;
369       top = &tos[toindex];
370       /* Sets the address of the function just entered.  */
371       top->selfpc = selfpc;
372       top->count = 1;
373       top->link = 0;
374       goto done;
375     }
376 
377   /* toindex points to a tostruct */
378   top = &tos[toindex];
379   if (top->selfpc == selfpc)
380     {
381       /* The arc is at front of the chain. This is the most common case.  */
382       top->count++;
383       goto done;
384     }
385 
386   /* top->selfpc != selfpc
387      The pc we have got is not the pc we already stored (i.e. multiple function
388      calls to the same fuction within a function. The arc is not at front of
389      the chain.  */
390   for (;;)
391     {
392       if (top->link == 0)
393 	{
394 	  /* We are at the end of the chain and selfpc was not found. Thus we create
395 	     a new tostruct and link it to the head of the chain.  */
396 	  toindex = ++tos[0].link;
397 	  /* Sanity check.  */
398 	  if (toindex >= tolimit)
399 	    {
400 	      --tos[0].link;
401 	      goto done;
402 	    }
403 	  top = &tos[toindex];
404 	  top->selfpc = selfpc;
405 	  top->count = 1;
406 	  /* Link back to the old tos entry.  */
407 	  top->link = *frompcindex;
408 	  /* Store a link to the new top in the froms array which makes the
409 	     current tos head of the chain.  */
410 	  *frompcindex = toindex;
411 	  goto done;
412 	}
413       else
414 	{
415 	  /* Otherwise check the next arc on the chain.  */
416 	  prevtop = top;
417 	  top = &tos[top->link];
418 	  if (top->selfpc == selfpc)
419 	    {
420 	      /* selfpc matches; increment its count.  */
421 	      top->count++;
422 	      /* Move it to the head of the chain.  */
423 	      /* Save previous tos index.  */
424 	      toindex = prevtop->link;
425 	      /* Link the former to to the current tos.  */
426 	      prevtop->link = top->link;
427 	      /* Link back to the old tos entry.  */
428 	      top->link = *frompcindex;
429 	      /* Store a link to the new top in the froms array which makes the
430 	         current tos head of the chain.  */
431 	      *frompcindex = toindex;
432 	      goto done;
433 	    }
434 	}
435     }
436 done:
437   /* Enable interrupts if necessary.  */
438   if (__builtin_expect (mach_stat & 1, 0))
439     spu_ienable ();
440 }
441