1 /*
2 (C) Copyright IBM Corp. 2008
3 
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9 * Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
11 * Redistributions in binary form must reproduce the above copyright
12 notice, this list of conditions and the following disclaimer in the
13 documentation and/or other materials provided with the distribution.
14 * Neither the name of IBM nor the names of its contributors may be
15 used to endorse or promote products derived from this software without
16 specific prior written permission.
17 
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 POSSIBILITY OF SUCH DAMAGE.
29 
30 Author: Ken Werner <ken.werner@de.ibm.com>
31 */
32 
33 #include <picolibc.h>
34 
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <unistd.h>
39 #include <sys/types.h>
40 #include <sys/stat.h>
41 #include <sys/uio.h>
42 #include <fcntl.h>
43 #include <ea.h>
44 #include <spu_intrinsics.h>
45 #include <spu_mfcio.h>
46 #include <spu_timer.h>
47 #include <limits.h>
48 #include <sys/linux_syscalls.h>
49 
50 /* Magic cookie.  */
51 #define GMON_MAGIC_COOKIE "gmon"
52 
53 /* Version number.  */
54 #define GMON_VERSION 1
55 
56 /* Fraction of text space to allocate for histogram counters.  */
57 #define HISTFRACTION 4
58 
59 /* Histogram counter type.  */
60 #define HISTCOUNTER unsigned short
61 
62 /* Fraction of text space to allocate for "from" hash buckets. HASHFRACTION is
63    based on the minimum number of bytes of separation between two subroutine
64    call points in the object code.  */
65 #define HASHFRACTION 4
66 
67 /* Percent of text space to allocate for tostructs with a minimum.  */
68 #define ARCDENSITY 3
69 
70 /* Minimal amount of arcs.  */
71 #define MINARCS 50
72 
73 /* Rounding macros.  */
74 #define ROUNDDOWN(x,y) (((x)/(y))*(y))
75 #define ROUNDUP(x,y)   ((((x)+(y)-1)/(y))*(y))
76 
77 /* Sampling rate in Hertz.  */
78 #define SAMPLE_INTERVAL 100
79 
80 /* Tag definitions for the gmon.out sub headers.  */
81 #define GMON_TAG_TIME_HIST 0
82 #define GMON_TAG_CG_ARC 1
83 
84 struct tostruct
85 {
86   uintptr_t selfpc;
87   long count;
88   unsigned short link;
89 };
90 
91 struct gmon_hdr
92 {
93   char cookie[4];
94   int32_t version;
95   char spare[3 * 4];
96 };
97 
98 struct gmon_hist_hdr
99 {
100   uintptr_t low_pc;
101   uintptr_t high_pc;
102   int32_t hist_size;
103   int32_t prof_rate;
104   char dimen[15];
105   char dimen_abbrev;
106 } __attribute__ ((packed));
107 
108 struct rawarc
109 {
110   uintptr_t raw_frompc;
111   uintptr_t raw_selfpc;
112   long raw_count;
113 } __attribute__ ((packed));
114 
115 /* start and end of the text section */
116 extern char _start;
117 extern char _etext;
118 
119 /* EAR entry for the starting address of SPE executable image.  */
120 extern const unsigned long long _EAR_;
121 __asm__ (".section .toe,\"a\",@nobits\n\r"
122      ".align 4\n\r"
123      ".type _EAR_, @object\n\r"
124      ".size _EAR_, 16\n" "_EAR_: .space 16\n" ".previous");
125 
126 /* froms are indexing tos */
127 static __ea unsigned short *froms;
128 static __ea struct tostruct *tos = 0;
129 static long tolimit = 0;
130 static uintptr_t s_lowpc = 0;
131 static uintptr_t s_highpc = 0;
132 static unsigned long s_textsize = 0;
133 
134 static int fd;
135 static int hist_size;
136 static int timer_id;
137 
138 void
__sample(int id)139 __sample (int id)
140 {
141   unsigned int pc;
142   unsigned int pc_backup;
143   off_t offset;
144   unsigned short val;
145 
146   if (id != timer_id)
147     return;
148 
149   /* Fetch program counter.  */
150   pc = spu_read_srr0 () & ~3;
151   pc_backup = pc;
152   if (pc < s_lowpc || pc > s_highpc)
153     return;
154   pc -= (uintptr_t) & _start;
155   offset = pc / HISTFRACTION * sizeof (HISTCOUNTER) + sizeof (struct gmon_hdr)
156              + 1 + sizeof (struct gmon_hist_hdr);
157 
158   /* Read, increment and write the counter.  */
159   if (pread (fd, &val, 2, offset) != 2)
160     {
161       perror ("can't read the histogram");
162       return;
163     }
164   if (val < USHRT_MAX)
165     ++val;
166   if (pwrite (fd, &val, 2, offset) != 2)
167     {
168       perror ("can't write the histogram");
169     }
170 }
171 
172 static void
write_histogram(int fd)173 write_histogram (int fd)
174 {
175   struct gmon_hist_hdr hist_hdr;
176   u_char tag = GMON_TAG_TIME_HIST;
177   hist_hdr.low_pc = s_lowpc;
178   hist_hdr.high_pc = s_highpc;
179   hist_hdr.hist_size = hist_size / sizeof (HISTCOUNTER); /* Amount of bins.  */
180   hist_hdr.prof_rate = 100; /* Hertz.  */
181   strncpy (hist_hdr.dimen, "seconds", sizeof (hist_hdr.dimen));
182   hist_hdr.dimen_abbrev = 's';
183   struct iovec iov[2] = {
184     {&tag, sizeof (tag)},
185     {&hist_hdr, sizeof (struct gmon_hist_hdr)}
186   };
187   if (writev (fd, iov, 2) != sizeof (struct gmon_hist_hdr) + sizeof (tag))
188     perror ("can't write the histogram header");
189 
190   /* Skip the already written histogram data.  */
191   lseek (fd, hist_size, SEEK_CUR);
192 }
193 
194 static void
write_callgraph(int fd)195 write_callgraph (int fd)
196 {
197   int fromindex, endfrom;
198   uintptr_t frompc;
199   int toindex;
200   struct rawarc rawarc;
201   u_char tag = GMON_TAG_CG_ARC;
202   endfrom = s_textsize / (HASHFRACTION * sizeof (*froms));
203   for (fromindex = 0; fromindex < endfrom; ++fromindex)
204     {
205       if (froms[fromindex])
206 	{
207 	  frompc = s_lowpc + (fromindex * HASHFRACTION * sizeof (*froms));
208 	  for (toindex = froms[fromindex]; toindex != 0;
209 	       toindex = tos[toindex].link)
210 	    {
211 	      rawarc.raw_frompc = frompc;
212 	      rawarc.raw_selfpc = tos[toindex].selfpc;
213 	      rawarc.raw_count = tos[toindex].count;
214 	      struct iovec iov[2] = {
215 		{&tag, sizeof (tag)},
216 		{&rawarc, sizeof (struct rawarc)}
217 	      };
218 	      if (writev (fd, iov, 2) != sizeof (tag) + sizeof (struct rawarc))
219                 perror ("can't write the callgraph");
220 	    }
221 	}
222     }
223 }
224 
225 void
__mcleanup(void)226 __mcleanup (void)
227 {
228   struct gmon_hdr ghdr;
229 
230   /* Disable sampling.  */
231   spu_timer_stop (timer_id);
232   spu_timer_free (timer_id);
233   spu_clock_stop ();
234 
235   /* Jump to the beginning of the gmon.out file.  */
236   if (lseek (fd, 0, SEEK_SET) == -1)
237     {
238       perror ("Cannot seek to the beginning of the gmon.out file.");
239       close (fd);
240       return;
241     }
242 
243   /* Write the gmon.out header.  */
244   memset (&ghdr, '\0', sizeof (struct gmon_hdr));
245   memcpy (&ghdr.cookie[0], GMON_MAGIC_COOKIE, sizeof (ghdr.cookie));
246   ghdr.version = GMON_VERSION;
247   if (write (fd, &ghdr, sizeof (struct gmon_hdr)) == -1)
248     {
249       perror ("Cannot write the gmon header to the gmon.out file.");
250       close (fd);
251       return;
252     }
253 
254   /* Write the sampling buffer (histogram).  */
255   write_histogram (fd);
256 
257   /* Write the call graph.  */
258   write_callgraph (fd);
259 
260   close (fd);
261 }
262 
263 void
__monstartup(unsigned long long spu_id)264 __monstartup (unsigned long long spu_id)
265 {
266   char filename[64];
267   s_lowpc =
268     ROUNDDOWN ((uintptr_t) & _start, HISTFRACTION * sizeof (HISTCOUNTER));
269   s_highpc =
270     ROUNDUP ((uintptr_t) & _etext, HISTFRACTION * sizeof (HISTCOUNTER));
271   s_textsize = s_highpc - s_lowpc;
272 
273   hist_size = s_textsize / HISTFRACTION * sizeof (HISTCOUNTER);
274 
275   /* Allocate froms.  */
276   froms = malloc_ea (s_textsize / HASHFRACTION);
277   if (froms == NULL)
278     {
279       fprintf (stderr, "Cannot allocate ea memory for the froms array.\n");
280       return;
281     }
282   memset_ea (froms, 0, s_textsize / HASHFRACTION);
283 
284   /* Determine tolimit.  */
285   tolimit = s_textsize * ARCDENSITY / 100;
286   if (tolimit < MINARCS)
287     tolimit = MINARCS;
288 
289   /* Allocate tos. */
290   tos = malloc_ea (tolimit * sizeof (struct tostruct));
291   if (tos == NULL)
292     {
293       fprintf (stderr, "Cannot allocate ea memory for the tos array.\n");
294       return;
295     }
296   memset_ea (tos, 0, tolimit * sizeof (struct tostruct));
297 
298   /* Determine the gmon.out file name.  */
299   if (spu_id)
300     snprintf (filename, sizeof (filename), "gmon-%d-%llu-%llu.out",
301 	      linux_getpid (), spu_id, _EAR_);
302   else
303     strncpy (filename, "gmon.out", sizeof (filename));
304   /* Open the gmon.out file.  */
305   fd = open (filename, O_RDWR | O_CREAT | O_TRUNC, 0644);
306   if (fd == -1)
307     {
308       char errstr[128];
309       snprintf (errstr, sizeof (errstr), "Cannot open file: %s", filename);
310       perror (errstr);
311       return;
312     }
313   /* Truncate the file up to the size where the histogram fits in.  */
314   if (ftruncate (fd,
315 		 sizeof (struct gmon_hdr) + 1 +
316 		 sizeof (struct gmon_hist_hdr) + hist_size) == -1)
317     {
318       char errstr[128];
319       snprintf (errstr, sizeof (errstr), "Cannot truncate file: %s", filename);
320       perror (errstr);
321       return;
322     }
323 
324   /* Start the histogram sampler.  */
325   spu_slih_register (MFC_DECREMENTER_EVENT, spu_clock_slih);
326   timer_id = spu_timer_alloc (spu_timebase () / SAMPLE_INTERVAL, __sample);
327   spu_clock_start ();
328   spu_timer_start (timer_id);
329 
330   atexit (__mcleanup);
331 }
332 
333 void
__mcount_internal(uintptr_t frompc,uintptr_t selfpc)334 __mcount_internal (uintptr_t frompc, uintptr_t selfpc)
335 {
336   /* sefpc: the address of the function just entered.  */
337   /* frompc: the caller of the function just entered.  */
338   unsigned int mach_stat;
339   __ea unsigned short *frompcindex;
340   unsigned short toindex;
341   __ea struct tostruct *top;
342   __ea struct tostruct *prevtop;
343 
344   /* Save current state and disable interrupts.  */
345   mach_stat = spu_readch(SPU_RdMachStat);
346   spu_idisable ();
347 
348   /* Sanity checks.  */
349   if (frompc < s_lowpc || frompc > s_highpc)
350     goto done;
351   frompc -= s_lowpc;
352   if (frompc > s_textsize)
353     goto done;
354 
355   /* frompc indexes into the froms array the value at that position indexes
356      into the tos array.  */
357   frompcindex = &froms[(frompc) / (HASHFRACTION * sizeof (*froms))];
358   toindex = *frompcindex;
359   if (toindex == 0)
360     {
361       /* First time traversing this arc link of tos[0] incremented.  */
362       toindex = ++tos[0].link;
363       /* Sanity check.  */
364       if (toindex >= tolimit)
365 	{
366 	  --tos[0].link;
367 	  goto done;
368 	}
369       /* Save the index into the froms array for the next time we traverse this arc.  */
370       *frompcindex = toindex;
371       top = &tos[toindex];
372       /* Sets the address of the function just entered.  */
373       top->selfpc = selfpc;
374       top->count = 1;
375       top->link = 0;
376       goto done;
377     }
378 
379   /* toindex points to a tostruct */
380   top = &tos[toindex];
381   if (top->selfpc == selfpc)
382     {
383       /* The arc is at front of the chain. This is the most common case.  */
384       top->count++;
385       goto done;
386     }
387 
388   /* top->selfpc != selfpc
389      The pc we have got is not the pc we already stored (i.e. multiple function
390      calls to the same fuction within a function. The arc is not at front of
391      the chain.  */
392   for (;;)
393     {
394       if (top->link == 0)
395 	{
396 	  /* We are at the end of the chain and selfpc was not found. Thus we create
397 	     a new tostruct and link it to the head of the chain.  */
398 	  toindex = ++tos[0].link;
399 	  /* Sanity check.  */
400 	  if (toindex >= tolimit)
401 	    {
402 	      --tos[0].link;
403 	      goto done;
404 	    }
405 	  top = &tos[toindex];
406 	  top->selfpc = selfpc;
407 	  top->count = 1;
408 	  /* Link back to the old tos entry.  */
409 	  top->link = *frompcindex;
410 	  /* Store a link to the new top in the froms array which makes the
411 	     current tos head of the chain.  */
412 	  *frompcindex = toindex;
413 	  goto done;
414 	}
415       else
416 	{
417 	  /* Otherwise check the next arc on the chain.  */
418 	  prevtop = top;
419 	  top = &tos[top->link];
420 	  if (top->selfpc == selfpc)
421 	    {
422 	      /* selfpc matches; increment its count.  */
423 	      top->count++;
424 	      /* Move it to the head of the chain.  */
425 	      /* Save previous tos index.  */
426 	      toindex = prevtop->link;
427 	      /* Link the former to to the current tos.  */
428 	      prevtop->link = top->link;
429 	      /* Link back to the old tos entry.  */
430 	      top->link = *frompcindex;
431 	      /* Store a link to the new top in the froms array which makes the
432 	         current tos head of the chain.  */
433 	      *frompcindex = toindex;
434 	      goto done;
435 	    }
436 	}
437     }
438 done:
439   /* Enable interrupts if necessary.  */
440   if (__builtin_expect (mach_stat & 1, 0))
441     spu_ienable ();
442 }
443