gprofng: Don't hardcode -Wno-nonnull-compare
[binutils-gdb.git] / gprofng / libcollector / collector.c
1 /* Copyright (C) 2021 Free Software Foundation, Inc.
2 Contributed by Oracle.
3
4 This file is part of GNU Binutils.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
20
21 #include "config.h"
22 #include <alloca.h>
23 #include <errno.h>
24 #include <signal.h>
25 #include <ucontext.h>
26 #include <stdlib.h> /* exit() */
27 #include <sys/param.h>
28 #include <sys/utsname.h> /* struct utsname */
29 #include <sys/resource.h>
30 #include <sys/syscall.h> /* system call fork() */
31
32 #include "gp-defs.h"
33 #include "collector.h"
34 #include "descendants.h"
35 #include "gp-experiment.h"
36 #include "memmgr.h"
37 #include "cc_libcollector.h"
38 #include "tsd.h"
39
40 /* TprintfT(<level>,...) definitions. Adjust per module as needed */
41 #define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings
42 #define DBG_LT1 1 // for configuration details, warnings
43 #define DBG_LT2 2
44 #define DBG_LT3 3
45
46 typedef unsigned long ulong_t;
47
48 extern char **environ;
49 extern void __collector_close_experiment ();
50 extern int __collector_set_size_limit (char *par);
51
52 /* ------- internal function prototypes ---------- */
53 CollectorModule __collector_register_module (ModuleInterface *modint);
54 static void write_sample (char *name);
55 static const char *__collector_get_params ();
56 static const char *__collector_get_expdir ();
57 static FrameInfo __collector_getUserCtx (CollectorModule modl, HiResTime ts, int mode, void *arg);
58 static FrameInfo __collector_getUID1 (CM_Array *arg);
59 static int __collector_writeMetaData (CollectorModule modl, char *format, ...);
60 static int __collector_writeDataRecord (CollectorModule modl, struct Common_packet *pckt);
61 static int __collector_writeDataPacket (CollectorModule modl, struct CM_Packet *pckt);
62 static void *allocCSize (struct Heap*, unsigned, int);
63 static void freeCSize (struct Heap*, void*, unsigned);
64 static void *allocVSize (struct Heap*, unsigned);
65 static void *reallocVSize (struct Heap*, void*, unsigned);
66
67 static int collector_create_expr_dir (const char *new_exp_name);
68 static int collector_create_expr_dir_lineage (const char *parent_exp_name);
69 static int collector_exp_dir_append_x (int linenum, const char *parent_exp_name);
70 static int collector_tail_init (const char *parent_exp_name);
71 static int log_open ();
72 static void log_header_write (sp_origin_t origin);
73 static void log_pause ();
74 static void log_resume ();
75 static void fs_warn ();
76 static void log_close ();
77 static void get_progspec (char *cmdline, int tmp_sz, char *progname, int sz);
78 static void sample_handler (int, siginfo_t*, void*);
79 static int sample_set_interval (char *);
80 static int set_duration (char *);
81 static int sample_set_user_sig (char *);
82 static void pause_handler (int, siginfo_t*, void*);
83 static int pause_set_user_sig (char *);
84 static int set_user_sig_action (char*);
85 static void ovw_open ();
86 static hrtime_t ovw_write ();
87
88 /* ------- global data controlling the collector's behavior -------- */
89
90 static CollectorInterface collector_interface ={
91 __collector_register_module, /* registerModule */
92 __collector_get_params, /* getParams */
93 __collector_get_expdir, /* getExpDir */
94 __collector_log_write, /* writeLog */
95 __collector_getUserCtx, /* getFrameInfo */
96 __collector_getUID1, /* getUID */
97 __collector_getUID, /* getUID2 */
98 __collector_getStackTrace, /* getStackTrace */
99 __collector_writeMetaData, /* writeMetaData */
100 __collector_writeDataRecord, /* writeDataRecord */
101 __collector_writeDataPacket, /* writeDataPacket */
102 write_sample, /* write_sample */
103 get_progspec, /* get_progspec */
104 __collector_open_experiment, /* open_experiment */
105 NULL, /* getHiResTime */
106 __collector_newHeap, /* newHeap */
107 __collector_deleteHeap, /* deleteHeap */
108 allocCSize, /* allocCSize */
109 freeCSize, /* freeCSize */
110 allocVSize, /* allocVSize */
111 reallocVSize, /* reallocVSize */
112 __collector_tsd_create_key, /* createKey */
113 __collector_tsd_get_by_key, /* getKey */
114 __collector_dlog /* writeDebugInfo */
115 };
116
117 #define MAX_MODULES 32
118 static ModuleInterface *modules[MAX_MODULES];
119 static int modules_st[MAX_MODULES];
120 static void *modules_hndl[MAX_MODULES];
121 static volatile int nmodules = 0;
122
123 /* flag set non-zero, if data collected implies a filesystem warning is appropriate */
124 static int fs_matters = 0;
125 static const char *collector_params = NULL;
126 static const char *project_home = NULL;
127 Heap *__collector_heap = NULL;
128 int __collector_no_threads;
129 int __collector_libthread_T1 = -1;
130
131 static volatile int collector_paused = 0;
132
133 int __collector_tracelevel = -1;
134 static int collector_debug_opt = 0;
135
136 hrtime_t __collector_next_sample = 0;
137 int __collector_sample_period = 0; /* if non-zero, periodic sampling is enabled */
138
139 hrtime_t __collector_delay_start = 0; /* if non-zero, delay before starting data */
140 hrtime_t __collector_terminate_time = 0; /* if non-zero, fixed duration run */
141
142 static collector_mutex_t __collector_glob_lock = COLLECTOR_MUTEX_INITIALIZER;
143 static collector_mutex_t __collector_open_guard = COLLECTOR_MUTEX_INITIALIZER;
144 static collector_mutex_t __collector_close_guard = COLLECTOR_MUTEX_INITIALIZER;
145 static collector_mutex_t __collector_sample_guard = COLLECTOR_MUTEX_INITIALIZER;
146 static collector_mutex_t __collector_suspend_guard = COLLECTOR_MUTEX_INITIALIZER;
147 static collector_mutex_t __collector_resume_guard = COLLECTOR_MUTEX_INITIALIZER;
148 char __collector_exp_dir_name[MAXPATHLEN + 1] = ""; /* experiment directory */
149 int __collector_size_limit = 0;
150
151 static char *archive_mode = NULL;
152
153 volatile sp_state_t __collector_expstate = EXP_INIT;
154 static int exp_origin = SP_ORIGIN_LIBCOL_INIT;
155 static int exp_open = 0;
156 int __collector_exp_active = 0;
157 static int paused_when_suspended = 0;
158 static int exp_initted = 0;
159 static char exp_progspec[_POSIX_ARG_MAX + 1]; /* program cmdline. includes args */
160 static char exp_progname[_POSIX_ARG_MAX + 1]; /* program name == argv[0] */
161
162 hrtime_t __collector_start_time = 0;
163 static time_t start_sec_time = 0;
164
165 /* Sample related data */
166 static int sample_installed = 0; /* 1 if the sample signal handler installed */
167 static int sample_mode = 0; /* dynamically turns sample record writing on/off */
168 static int sample_number = 0; /* index of the current sample record */
169 static struct sigaction old_sample_handler;
170 int __collector_sample_sig = -1; /* user-specified sample signal */
171 int __collector_sample_sig_warn = 0; /* non-zero if warning already given */
172
173 /* Pause/resume related data */
174 static struct sigaction old_pause_handler;
175 int __collector_pause_sig = -1; /* user-specified pause signal */
176 int __collector_pause_sig_warn = 0; /* non-zero if warning already given */
177
178 static struct sigaction old_close_handler;
179 static struct sigaction old_exit_handler;
180
181 /* Experiment files */
182 static char ovw_name[MAXPATHLEN]; /* Overview data file name */
183
184 /* macro to convert a timestruc to hrtime_t */
185 #define ts2hrt(x) ((hrtime_t)(x).tv_sec*NANOSEC + (hrtime_t)(x).tv_nsec)
186
187 static void
188 init_tracelevel ()
189 {
190 #if DEBUG
191 char *s = CALL_UTIL (getenv)("SP_COLLECTOR_TRACELEVEL");
192 if (s != NULL)
193 __collector_tracelevel = CALL_UTIL (atoi)(s);
194 TprintfT (DBG_LT0, "collector: SP_COLLECTOR_TRACELEVEL=%d\n", __collector_tracelevel);
195 s = CALL_UTIL (getenv)("SP_COLLECTOR_DEBUG");
196 if (s != NULL)
197 collector_debug_opt = CALL_UTIL (atoi)(s) & ~(SP_DUMP_TIME | SP_DUMP_FLAG);
198 #endif
199 }
200
201 static CollectorInterface *
202 get_collector_interface ()
203 {
204 if (collector_interface.getHiResTime == NULL)
205 collector_interface.getHiResTime = __collector_gethrtime;
206 return &collector_interface;
207 }
208
209 /*
210 * __collector_module_init is an alternate method to initialize
211 * dynamic collector modules (er_heap, er_sync, er_iotrace, er_mpi, tha).
212 * Every module that needs to register itself with libcollector
213 * before the experiment is open implements its own global
214 * __collector_module_init and makes sure the next one is called.
215 */
216 static void
217 collector_module_init (CollectorInterface *col_intf)
218 {
219 int nmodules = 0;
220
221 ModuleInitFunc next_init = (ModuleInitFunc) dlsym (RTLD_DEFAULT, "__collector_module_init");
222 if (next_init != NULL)
223 {
224 nmodules++;
225 next_init (col_intf);
226 }
227 TprintfT (DBG_LT1, "collector_module_init: %d modules\n", nmodules);
228 }
229
230 /* Routines concerned with general experiment start and stop */
231
232 /* initialization -- init section routine -- called when libcollector loaded */
233 static void collector_init () __attribute__ ((constructor));
234
235 static void
236 collector_init ()
237 {
238 if (__collector_util_init () != 0)
239 /* we can't do anything without various utility functions */
240 abort ();
241 init_tracelevel ();
242
243 /*
244 * Unconditionally install the SIGPROF handler
245 * to process signals originated in dtracelets.
246 */
247 __collector_sigprof_install ();
248
249 /* Initialize all preloaded modules */
250 collector_module_init (get_collector_interface ());
251
252 /* determine experiment name */
253 char *exp = CALL_UTIL (getenv)("SP_COLLECTOR_EXPNAME");
254 if ((exp == NULL) || (CALL_UTIL (strlen)(exp) == 0))
255 {
256 TprintfT (DBG_LT0, "collector_init: SP_COLLECTOR_EXPNAME undefined - no experiment to start\n");
257 /* not set -- no experiment to run */
258 return;
259 }
260 else
261 TprintfT (DBG_LT1, "collector_init: found SP_COLLECTOR_EXPNAME = %s\n", exp);
262
263 /* determine the data descriptor for the experiment */
264 char *params = CALL_UTIL (getenv)("SP_COLLECTOR_PARAMS");
265 if (params == NULL)
266 {
267 TprintfT (0, "collector_init: SP_COLLECTOR_PARAMS undefined - no experiment to start\n");
268 return;
269 }
270
271 /* now do the real open of the experiment */
272 if (__collector_open_experiment (exp, params, SP_ORIGIN_LIBCOL_INIT))
273 {
274 TprintfT (0, "collector_init: __collector_open_experiment failed\n");
275 /* experiment open failed, close it */
276 __collector_close_experiment ();
277 return;
278 }
279 return;
280 }
281
282 CollectorModule
283 __collector_register_module (ModuleInterface *modint)
284 {
285 TprintfT (DBG_LT1, "collector: module %s calls for registration.\n",
286 modint->description == NULL ? "(null)" : modint->description);
287 if (modint == NULL)
288 return COLLECTOR_MODULE_ERR;
289 if (nmodules >= MAX_MODULES)
290 return COLLECTOR_MODULE_ERR;
291 if (modint->initInterface &&
292 modint->initInterface (get_collector_interface ()))
293 return COLLECTOR_MODULE_ERR;
294 int idx = nmodules++;
295 modules[idx] = modint;
296 modules_st[idx] = 0;
297
298 if (exp_open && modint->openExperiment)
299 {
300 modules_st[idx] = modint->openExperiment (__collector_exp_dir_name);
301 if (modules_st[idx] == COL_ERROR_NONE && modules[idx]->description != NULL)
302 {
303 modules_hndl[idx] = __collector_create_handle (modules[idx]->description);
304 if (modules_hndl[idx] == NULL)
305 modules_st[idx] = -1;
306 }
307 }
308 if (__collector_exp_active && collector_paused == 0 &&
309 modint->startDataCollection && modules_st[idx] == 0)
310 modint->startDataCollection ();
311 TprintfT (DBG_LT1, "collector: module %s (%d) registered.\n",
312 modint->description == NULL ? "(null)" : modint->description, idx);
313 return (CollectorModule) idx;
314 }
315
316 static const char *
317 __collector_get_params ()
318 {
319 return collector_params;
320 }
321
322 static const char *
323 __collector_get_expdir ()
324 {
325 return __collector_exp_dir_name;
326 }
327
328 static FrameInfo
329 __collector_getUserCtx (CollectorModule modl, HiResTime ts, int mode, void *arg)
330 {
331 return __collector_get_frame_info (ts, mode, arg);
332 }
333
334 static FrameInfo
335 __collector_getUID1 (CM_Array *arg)
336 {
337 return __collector_getUID (arg, (FrameInfo) 0);
338 }
339
340 static int
341 __collector_writeMetaData (CollectorModule modl, char *format, ...)
342 {
343 if (modl < 0 || modl >= nmodules || modules[modl]->description == NULL)
344 {
345 TprintfT (DBG_LT0, "__collector_writeMetaData(): bad module: %d\n", modl);
346 return 1;
347 }
348 char fname[MAXPATHLEN + 1];
349 CALL_UTIL (strlcpy)(fname, __collector_exp_dir_name, sizeof (fname));
350 CALL_UTIL (strlcat)(fname, "/metadata.", sizeof (fname));
351 CALL_UTIL (strlcat)(fname, modules[modl]->description, sizeof (fname));
352 CALL_UTIL (strlcat)(fname, ".xml", sizeof (fname));
353 int fd = CALL_UTIL (open)(fname, O_CREAT | O_WRONLY | O_APPEND,
354 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
355 if (fd < 0)
356 {
357 TprintfT (DBG_LT0, "__collector_writeMetaData(): can't open file: %s\n", fname);
358 return 1;
359 }
360 char buf[1024];
361 char *bufptr = buf;
362 va_list va;
363 va_start (va, format);
364 int sz = __collector_xml_vsnprintf (bufptr, sizeof (buf), format, va);
365 va_end (va);
366
367 if (sz >= sizeof (buf))
368 {
369 /* Allocate a new buffer */
370 sz += 1; /* add the terminating null byte */
371 bufptr = (char*) alloca (sz);
372
373 va_start (va, format);
374 sz = __collector_xml_vsnprintf (bufptr, sz, format, va);
375 va_end (va);
376 }
377 CALL_UTIL (write)(fd, bufptr, sz);
378 CALL_UTIL (close)(fd);
379 return COL_ERROR_NONE;
380 }
381
382 /* check that the header fields are filled-in, and then call __collector_writeDataPacket */
383 static int
384 __collector_writeDataRecord (CollectorModule modl, struct Common_packet *pckt)
385 {
386 return __collector_write_record (modules_hndl[modl], pckt);
387 }
388
389 static int
390 __collector_writeDataPacket (CollectorModule modl, struct CM_Packet *pckt)
391 {
392 return __collector_write_packet (modules_hndl[modl], pckt);
393 }
394
395 static void *
396 allocCSize (struct Heap *heap, unsigned sz, int log)
397 {
398 return __collector_allocCSize (heap ? heap : __collector_heap, sz, log);
399 }
400
401 static void
402 freeCSize (struct Heap *heap, void *ptr, unsigned sz)
403 {
404 __collector_freeCSize (heap ? heap : __collector_heap, ptr, sz);
405 }
406
407 static void *
408 allocVSize (struct Heap *heap, unsigned sz)
409 {
410 return __collector_allocVSize (heap ? heap : __collector_heap, sz);
411 }
412
413 static void *
414 reallocVSize (struct Heap *heap, void *ptr, unsigned sz)
415 {
416 return __collector_reallocVSize (heap ? heap : __collector_heap, ptr, sz);
417 }
418
419 static time_t
420 get_gm_time (struct tm *tp)
421 {
422 /*
423 Note that glibc contains a function of the same purpose named `timegm'.
424 But obviously, it is not universally available.
425
426 Some implementations of mktime return -1 for the nonexistent localtime hour
427 at the beginning of DST. In this event, use 'mktime(tm - 1hr) + 3600'.
428 nonexistent
429 tm_isdst is set to 0 to force mktime to introduce a consistent offset
430 (the non DST offset) since tm and tm+o might be on opposite sides of a DST change.
431
432 Schematically:
433 mktime(tm) --> t+o
434 gmtime_r(t+o) --> tm+o
435 mktime(tm+o) --> t+2o
436 t = t+o - (t+2o - t+o)
437 */
438 struct tm stm;
439 time_t tl = CALL_UTIL (mktime)(tp);
440 if (tl == -1)
441 {
442 stm = *tp;
443 stm.tm_hour--;
444 tl = CALL_UTIL (mktime)(&stm);
445 if (tl == -1)
446 return -1;
447 tl += 3600;
448 }
449
450 (void) (CALL_UTIL (gmtime_r)(&tl, &stm));
451 stm.tm_isdst = 0;
452 time_t tb = CALL_UTIL (mktime)(&stm);
453 if (tb == -1)
454 {
455 stm.tm_hour--;
456 tb = CALL_UTIL (mktime)(&stm);
457 if (tb == -1)
458 return -1;
459 tb += 3600;
460 }
461 return (tl - (tb - tl));
462 }
463
464 static void
465 log_write_event_run ()
466 {
467 /* get the gm and local time */
468 struct tm start_stm;
469 CALL_UTIL (gmtime_r)(&start_sec_time, &start_stm);
470 time_t start_gm_time = get_gm_time (&start_stm);
471 time_t lcl_time = CALL_UTIL (mktime)(&start_stm);
472 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" time=\"%lld\" tm_zone=\"%lld\"/>\n",
473 SP_JCMD_RUN,
474 (unsigned) (__collector_start_time / NANOSEC),
475 (unsigned) (__collector_start_time % NANOSEC),
476 (long long) start_gm_time,
477 (long long) (lcl_time - start_gm_time));
478 }
479
480 static void *
481 m_dlopen (const char *filename, int flag)
482 {
483 void *p = dlopen (filename, flag);
484 TprintfT (DBG_LT1, "collector.c: dlopen(%s, %d) returns %p\n", filename, flag, p);
485 return p;
486 }
487 /* real routine to open an experiment
488 * called by collector_init from libcollector init section
489 * called by __collector_start_experiment when a child is forked */
490 int
491 __collector_open_experiment (const char *exp, const char *params, sp_origin_t origin)
492 {
493 char *s;
494 char *buf = NULL;
495 char *duration_string = NULL;
496 int err;
497 int is_founder = 1;
498 int record_this_experiment = 1;
499 int seen_F_flag = 0;
500 static char buffer[32];
501 if (exp_open)
502 {
503 /* experiment already opened */
504 TprintfT (0, "collector: ERROR: Attempt to open opened experiment\n");
505 return COL_ERROR_EXPOPEN;
506 }
507 __collector_start_time = collector_interface.getHiResTime ();
508 TprintfT (DBG_LT1, "\n\t\t__collector_open_experiment(SP_COLLECTOR_EXPNAME=%s, params=%s, origin=%d); setting start_time\n",
509 exp, params, origin);
510 if (environ)
511 __collector_env_printall ("__collector_open_experiment", environ);
512 else
513 TprintfT (DBG_LT1, "collector_open_experiment found environ == NULL)\n");
514
515 /*
516 * Recheck sigprof handler
517 * XXXX Bug 18177509 - additional sigprof signal kills target program
518 */
519 __collector_sigprof_install ();
520 exp_origin = origin;
521 collector_params = params;
522
523 /* Determine which of the three possible threading models:
524 * singlethreaded
525 * multi-LWP (no threads)
526 * multithreaded
527 * is the one the target is actually using.
528 *
529 * we really only need to distinguish between first two
530 * and the third. The thr_main() trick does exactly that.
531 * is the one the target is actually using.
532 *
533 * __collector_no_threads applies to all signal handlers,
534 * and must be set before signal handlers are installed.
535 */
536 __collector_no_threads = 0;
537 __collector_exp_dir_name[0] = 0;
538 sample_mode = 0;
539 sample_number = 0;
540
541 /* create global heap */
542 if (__collector_heap == NULL)
543 {
544 __collector_heap = __collector_newHeap ();
545 if (__collector_heap == NULL)
546 {
547 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment COLERROR_NOZMEM 1\n");
548 return COL_ERROR_NOZMEM;
549 }
550 }
551 //check whether is origin is collect
552 char * envar = CALL_UTIL (getenv)("SP_COLLECTOR_ORIGIN_COLLECT");
553 TprintfT (DBG_LT1, "__collector_open_experiment SP_COLLECTOR_ORIGIN_COLLECT = '%s'\n",
554 (envar == NULL) ? "NULL" : envar);
555 if (envar)
556 exp_origin = SP_ORIGIN_COLLECT;
557
558 //check if this is the founder process
559 is_founder = getpid ();
560 if (origin != SP_ORIGIN_DBX_ATTACH)
561 {
562 envar = CALL_UTIL (getenv)("SP_COLLECTOR_FOUNDER");
563 if (envar)
564 is_founder = CALL_UTIL (atoi)(envar);
565 if (is_founder != 0)
566 {
567 if (is_founder != getpid ())
568 {
569 TprintfT (0, "__collector_open_experiment SP_COLLECTOR_FOUNDER=%d != pid(%d)\n",
570 is_founder, getpid ());
571 //CALL_UTIL(fprintf)(stderr, "__collector_open_experiment SP_COLLECTOR_FOUNDER=%d != pid(%d); not recording experiment\n",
572 //is_founder, getpid() );
573 //return COL_ERROR_UNEXP_FOUNDER;
574 is_founder = 0; // Special case (CR 22917352)
575 }
576 /* clear FOUNDER for descendant experiments */
577 TprintfT (0, "__collector_open_experiment setting SP_COLLECTOR_FOUNDER=0\n");
578 CALL_UTIL (strlcpy)(buffer, "SP_COLLECTOR_FOUNDER=0", sizeof (buffer));
579 CALL_UTIL (putenv)(buffer);
580 }
581 }
582
583 /* Set up fork/exec interposition (requires __collector_heap). */
584 /* Determine if "collect -F" specification enables this subexperiment */
585 get_progspec (exp_progspec, sizeof (exp_progspec), exp_progname, sizeof (exp_progname));
586
587 /* convert the returned exp_progname to a basename */
588 const char * base_name = __collector_strrchr (exp_progname, '/');
589 if (base_name == NULL)
590 base_name = exp_progname;
591 else
592 base_name = base_name + 1;
593 err = __collector_ext_line_init (&record_this_experiment, exp_progspec, base_name);
594 if (err != COL_ERROR_NONE)
595 {
596 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment COLERROR: %d\n", err);
597 return err;
598 }
599
600 /* Due to the fix of bug 15691122, we need to initialize unwind to make
601 * the function __collector_ext_return_address() work for dlopen interposition.
602 * */
603 if (!record_this_experiment && !is_founder)
604 {
605 TprintfT (DBG_LT0, "__collector_open_experiment: NOT creating experiment. (is_founder=%d, record=%d)\n",
606 is_founder, record_this_experiment);
607 return collector_tail_init (exp);
608 }
609 TprintfT (DBG_LT0, "__collector_open_experiment: is_founder=%d, record=%d\n",
610 is_founder, record_this_experiment);
611 if (is_founder || origin == SP_ORIGIN_FORK)
612 {
613 CALL_UTIL (strlcpy)(__collector_exp_dir_name, exp, sizeof (__collector_exp_dir_name));
614 if (origin == SP_ORIGIN_FORK)
615 { /*create exp dir for fork-child*/
616 if (collector_create_expr_dir (__collector_exp_dir_name))
617 {
618 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 1: `%s'\n", exp);
619 return COL_ERROR_BADDIR;
620 }
621 }
622 }
623 else
624 {/* founder/fork-child will already have created experiment dir, but exec/combo descendants must do so now */
625 if (collector_create_expr_dir_lineage (exp))
626 {
627 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 2: `%s'\n", exp);
628 return COL_ERROR_BADDIR;
629 }
630 static char exp_name_env[MAXPATHLEN + 1];
631 TprintfT (DBG_LT1, "collector_open_experiment: setting SP_COLLECTOR_EXPNAME to %s\n", __collector_exp_dir_name);
632 CALL_UTIL (snprintf)(exp_name_env, sizeof (exp_name_env), "SP_COLLECTOR_EXPNAME=%s", __collector_exp_dir_name);
633 CALL_UTIL (putenv)(exp_name_env);
634 }
635 /* Check that the name is that of a directory (new structure) */
636 DIR *expDir = CALL_UTIL (opendir)(__collector_exp_dir_name);
637 if (expDir == NULL)
638 {
639 /* can't open it */
640 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 3: `%s'\n", exp);
641 return COL_ERROR_BADDIR;
642 }
643 CALL_UTIL (closedir)(expDir);
644
645 if (CALL_UTIL (access)(__collector_exp_dir_name, W_OK))
646 {
647 TprintfT (0, "collector: ERROR: access error: errno=%d\n", errno);
648 if ((errno == EACCES) || (errno == EROFS))
649 {
650 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_DIRPERM: `%s'\n", exp);
651 TprintfT (DBG_LT0, "collector: ERROR: experiment directory `%s' is not writeable\n",
652 __collector_exp_dir_name);
653 return COL_ERROR_DIRPERM;
654 }
655 else
656 {
657 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 4: `%s'\n", exp);
658 return COL_ERROR_BADDIR;
659 }
660 }
661
662 /* reset the paused flag */
663 collector_paused = (origin == SP_ORIGIN_FORK ? paused_when_suspended : 0);
664
665 /* mark the experiment as opened */
666 __collector_expstate = EXP_OPEN;
667 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_OPEN\n");
668
669 /* open the log file */
670 err = log_open ();
671 if (err != COL_ERROR_NONE)
672 {
673 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_LOG_OPEN\n");
674 return COL_ERROR_LOG_OPEN;
675 }
676 if (origin != SP_ORIGIN_GENEXP && origin != SP_ORIGIN_KERNEL)
677 log_header_write (origin);
678
679 /* Make a copy of params so that we can modify the string */
680 int paramsz = CALL_UTIL (strlen)(params) + 1;
681 buf = (char*) alloca (paramsz);
682 if (buf == NULL)
683 {
684 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_ARGS2BIG: %s\n", params);
685 TprintfT (DBG_LT0, "collector: ERROR: experiment parameter `%s' is too long\n", params);
686 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n",
687 SP_JCMD_CERROR, COL_ERROR_ARGS2BIG);
688 return COL_ERROR_ARGS2BIG;
689 }
690 CALL_UTIL (strlcpy)(buf, params, paramsz);
691
692 /* create directory for archives (if founder) */
693 char archives[MAXPATHLEN];
694 CALL_UTIL (snprintf)(archives, MAXPATHLEN, "%s/%s", __collector_exp_dir_name,
695 SP_ARCHIVES_DIR);
696 if (is_founder)
697 {
698 mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
699 if ((CALL_UTIL (mkdir)(archives, dmode) != 0) && (errno != EEXIST))
700 {
701 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_MKDIR: %s: errno = %d\n", archives, errno);
702 TprintfT (0, "collector: ERROR: mkdir(%s) failed: errno = %d\n", archives, errno);
703 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">mkdir(%s): errno=%d</event>\n",
704 SP_JCMD_COMMENT, COL_COMMENT_NONE, archives, errno);
705 /* this is not a fatal error currently */
706 }
707 else
708 TprintfT (DBG_LT1, "collector: archive mkdir(%s) succeeded\n", archives);
709 }
710
711 /* initialize the segments map and mmap interposition */
712 if (origin != SP_ORIGIN_GENEXP && origin != SP_ORIGIN_KERNEL)
713 {
714 if ((err = __collector_ext_mmap_install (1)) != COL_ERROR_NONE)
715 {
716 __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", SP_JCMD_CERROR, err);
717 return err;
718 }
719 }
720
721 /* open the overview file for sample data */
722 if (origin != SP_ORIGIN_GENEXP)
723 ovw_open ();
724
725 /* initialize TSD module (note: relies on __collector_heap) */
726 if (__collector_tsd_init () != 0)
727 {
728 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_TSD_INIT\n");
729 __collector_log_write ("<event kind=\"%s\" id=\"%d\">TSD could not be initialized</event>\n", SP_JCMD_CERROR, COL_ERROR_TSD_INIT);
730 return COL_ERROR_TSD_INIT;
731 }
732
733 /* experiment is initialized; allow pause/resume/close */
734 exp_initted = 1;
735
736 // 24935305 should not use SIGPROF if collect -p -t and -S are all off
737 /* (check here if -t or -S is on; -p is checked later) */
738 if (((params[0] == 't' || params[0] == 'S') && params[1] == ':')
739 || CALL_UTIL (strstr)(params, ";t:")
740 || CALL_UTIL (strstr)(params, ";S:"))
741 {
742 /* set a default time to 100 ms.; use negative value to force setting */
743 TprintfT (DBG_LT1, "collector: open_experiment setting timer to 100000\n");
744 __collector_ext_itimer_set (-100000);
745 }
746
747 /* call open for all dynamic modules */
748 int i;
749 for (i = 0; i < nmodules; i++)
750 {
751 if (modules[i]->openExperiment != NULL)
752 {
753 modules_st[i] = modules[i]->openExperiment (__collector_exp_dir_name);
754 if (modules_st[i] == COL_ERROR_NONE && modules[i]->description != NULL)
755 {
756 modules_hndl[i] = __collector_create_handle (modules[i]->description);
757 if (modules_hndl[i] == NULL)
758 modules_st[i] = -1;
759 }
760 }
761 /* check to see if anyone closed the experiment */
762 if (!exp_initted)
763 {
764 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_EXP_OPEN\n");
765 __collector_log_write ("<event kind=\"%s\" id=\"%d\">Experiment closed prematurely</event>\n", SP_JCMD_CERROR, COL_ERROR_EXPOPEN);
766 return COL_ERROR_EXPOPEN;
767 }
768 }
769
770 /* initialize for subsequent stack unwinds */
771 __collector_ext_unwind_init (1);
772 TprintfT (DBG_LT0, "__collector_open_experiment(); module init done, params=%s\n",
773 buf);
774
775 /* now parse the data descriptor */
776 /* The parameter string is a series of specifiers,
777 * each of which is of the form:
778 * <key>:<param>;
779 * key is a single letter, the : and ; are mandatory,
780 * and param is a string which may be zero-length, and
781 * which contains any character except a null-byte or ;
782 * param is interpreted by the handler for the particular key
783 */
784
785 s = buf;
786
787 while (*s)
788 {
789 char *par;
790 char key = *s++;
791 /* ensure that it's followed by a colon */
792 if (*s++ != ':')
793 {
794 TprintfT (0, "collector: ERROR: parameter %c is not followed by a colon\n", key);
795 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, COL_ERROR_ARGS, params);
796 return COL_ERROR_ARGS;
797 }
798 /* find the semicolon terminator */
799 par = s;
800 while (*s && (*s != ';'))
801 s++;
802 if (*s != ';')
803 {
804 /* not followed by semicolon */
805 TprintfT (0, "collector: ERROR: parameter %c:%s is not terminated by a semicolon\n", key, par);
806 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, COL_ERROR_ARGS, params);
807 return COL_ERROR_ARGS;
808 }
809 /* terminate par, and position for next descriptor */
810 *s++ = 0;
811
812 /* now process that element of the data descriptor */
813 switch (key)
814 {
815 case 'g': /* g<sig>; */
816 if ((err = sample_set_user_sig (par)) != COL_ERROR_NONE)
817 {
818 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
819 return err;
820 }
821 break;
822 case 'd': /* d<sig>; -or- d<sig>p; */
823 if ((err = pause_set_user_sig (par)) != COL_ERROR_NONE)
824 {
825 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
826 return err;
827 }
828 break;
829 case 'H':
830 m_dlopen ("libgp-heap.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */
831 break;
832 case 's':
833 m_dlopen ("libgp-sync.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */
834 break;
835 case 'i':
836 m_dlopen ("libgp-iotrace.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */
837 break;
838 case 'F': /* F; */
839 seen_F_flag = 1;
840 TprintfT (DBG_LT0, "__collector_open_experiment: calling __collector_ext_line_install (%s, %s)\n",
841 par, __collector_exp_dir_name);
842 if ((err = __collector_ext_line_install (par, __collector_exp_dir_name)) != COL_ERROR_NONE)
843 {
844 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
845 return err;
846 }
847 break;
848 case 'a': /* a; */
849 archive_mode = __collector_strdup (par);
850 break;
851 case 't': /* t:<expt-duration>; */
852 duration_string = par;
853 break;
854 case 'S': /* S:<sample-interval>; */
855 if ((err = sample_set_interval (par)) != COL_ERROR_NONE)
856 {
857 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
858 return err;
859 }
860 break;
861 case 'L': /* L:<experiment-size-limit>; */
862 if ((err = __collector_set_size_limit (par)) != COL_ERROR_NONE)
863 {
864 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
865 return err;
866 }
867 break;
868 case 'P': /* P:PROJECT_HOME; */
869 project_home = __collector_strdup (par);
870 break;
871 case 'h':
872 case 'p':
873 fs_matters = 1;
874 break;
875 case 'Y':
876 err = set_user_sig_action (par);
877 if (err != COL_ERROR_NONE)
878 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
879 break;
880 default:
881 /* Ignore unknown parameters; allow them to be handled by modules */
882 break;
883 }
884 }
885 /* end of data descriptor parsing */
886
887 if (!seen_F_flag)
888 {
889 char * par = "0"; // This will not happen when collect has no -F option
890 if ((err = __collector_ext_line_install (par, __collector_exp_dir_name)) != COL_ERROR_NONE)
891 {
892 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
893 return err;
894 }
895 }
896
897 /* now that we know what data is being collected, we can set the filesystem warning */
898 fs_warn ();
899
900 // We have to create all tsd keys before __collector_tsd_allocate().
901 // With the pthreads-based implementation, this might no longer be necessary.
902 // In any case, we still have to create the key before a thread can use it.
903 __collector_ext_gettid_tsd_create_key ();
904 __collector_ext_dispatcher_tsd_create_key ();
905
906 /* allocate tsd for the current thread */
907 if (__collector_tsd_allocate () != 0)
908 {
909 __collector_log_write ("<event kind=\"%s\" id=\"%d\">TSD allocate failed</event>\n", SP_JCMD_CERROR, COL_ERROR_EXPOPEN);
910 return COL_ERROR_EXPOPEN;
911 }
912 /* init tsd for unwind, called right after __collector_tsd_allocate()*/
913 __collector_ext_unwind_key_init (1, NULL);
914
915 /* start java attach if suitable */
916 if (exp_origin == SP_ORIGIN_DBX_ATTACH)
917 __collector_jprofile_start_attach ();
918 start_sec_time = CALL_UTIL (time)(NULL);
919 __collector_start_time = collector_interface.getHiResTime ();
920 TprintfT (DBG_LT0, "\t__collector_open_experiment; resetting start_time\n");
921 if (duration_string != NULL && (err = set_duration (duration_string)) != COL_ERROR_NONE)
922 {
923 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, duration_string);
924 return err;
925 }
926
927 /* install the common SIGPROF dispatcher (requires TSD) */
928 if ((err = __collector_ext_dispatcher_install ()) != COL_ERROR_NONE)
929 {
930 __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", SP_JCMD_CERROR, err);
931 return err;
932 }
933
934 /* mark the experiment open complete */
935 exp_open = 1;
936 if (exp_origin == SP_ORIGIN_DBX_ATTACH)
937 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" time=\"%lld\" tm_zone=\"%lld\"/>\n",
938 SP_JCMD_RUN,
939 (unsigned) (__collector_start_time / NANOSEC), (unsigned) (__collector_start_time % NANOSEC),
940 (long long) start_sec_time, (long long) 0);
941 else
942 log_write_event_run ();
943
944 /* schedule the first sample */
945 __collector_next_sample = __collector_start_time + ((hrtime_t) NANOSEC) * __collector_sample_period;
946 __collector_ext_usage_sample (MASTER_SMPL, "collector_open_experiment");
947
948 /* start data collection in dynamic modules */
949 if (collector_paused == 0)
950 {
951 for (i = 0; i < nmodules; i++)
952 if (modules[i]->startDataCollection != NULL && modules_st[i] == 0)
953 modules[i]->startDataCollection ();
954 }
955 else
956 {
957 hrtime_t ts = GETRELTIME ();
958 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n",
959 SP_JCMD_PAUSE, (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC));
960 }
961
962 /* mark the experiment active */
963 __collector_exp_active = 1;
964 return COL_ERROR_NONE;
965 }
966
967 /* prepare directory for new experiment of fork-child */
968
969 /* return 0 if successful */
970 static int
971 collector_create_expr_dir (const char *new_exp_name)
972 {
973 int ret = -1;
974 mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
975 TprintfT (DBG_LT1, "collector: __collector_create_expr_dir(%s)\n", new_exp_name);
976 if (CALL_UTIL (mkdir)(new_exp_name, dmode) < 0)
977 TprintfT (0, "__collector_create_expr_dir(%s) ERROR: errno=%d\n", new_exp_name, errno);
978 else
979 ret = 0;
980 return (ret);
981 }
982
983 /* append _xN to __collector_exp_dir_name*/
984 /* return 0 if successful */
985 static int
986 collector_exp_dir_append_x (int linenum, const char *parent_exp_name)
987 {
988 char buffer[MAXPATHLEN + 1];
989 char * p = __collector_strrchr (parent_exp_name, '/');
990 if (p == NULL || (*(p + 1) != '_'))
991 {
992 size_t sz = CALL_UTIL (strlen)(parent_exp_name);
993 const char * q = parent_exp_name + sz - 3;
994 if (sz < 3 || __collector_strncmp (q, ".er", CALL_UTIL (strlen)(q)) != 0
995 || CALL_UTIL (access)(parent_exp_name, F_OK) != 0)
996 {
997 TprintfT (0, "collector_exp_dir_append_x() ERROR: invalid parent_exp_name %s\n", parent_exp_name);
998 return -1;
999 }
1000 CALL_UTIL (strlcpy)(buffer, parent_exp_name, sizeof (buffer));
1001 CALL_UTIL (snprintf)(__collector_exp_dir_name, sizeof (__collector_exp_dir_name),
1002 "%s/_x%d.er", buffer, linenum);
1003 }
1004 else
1005 {
1006 p = __collector_strrchr (parent_exp_name, '.');
1007 if (p == NULL || *(p + 1) != 'e' || *(p + 2) != 'r')
1008 {
1009 TprintfT (0, "collector_exp_dir_append_x() ERROR: invalid parent_exp_name %s\n", parent_exp_name);
1010 return -1;
1011 }
1012 CALL_UTIL (strlcpy)(buffer, parent_exp_name,
1013 ((p - parent_exp_name + 1)<sizeof (buffer)) ? (p - parent_exp_name + 1) : sizeof (buffer));
1014 CALL_UTIL (snprintf)(__collector_exp_dir_name, sizeof (__collector_exp_dir_name),
1015 "%s_x%d.er", buffer, linenum);
1016 }
1017 return 0;
1018 }
1019
1020 /* prepare directory for new experiment of exec/combo child*/
1021
1022 /* return 0 if successful */
1023 static int
1024 collector_create_expr_dir_lineage (const char *parent_exp_name)
1025 {
1026 int ret = -1;
1027 mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
1028 int linenum = 1;
1029 while (linenum < INT_MAX)
1030 {
1031 if (collector_exp_dir_append_x (linenum, parent_exp_name) != 0)
1032 return -1;
1033 if (CALL_UTIL (access)(__collector_exp_dir_name, F_OK) != 0)
1034 {
1035 if (CALL_UTIL (mkdir)(__collector_exp_dir_name, dmode) == 0)
1036 return 0;
1037 }
1038 linenum++;
1039 TprintfT (DBG_LT0, "collector: collector_create_expr_dir_lineage(%s -> %s)\n", parent_exp_name, __collector_exp_dir_name);
1040 }
1041 return (ret);
1042 }
1043
1044 /* Finish the initializing work if we don't collect data while libcollector.so is preloaded. */
1045 /* return COL_ERROR_NONE if successful */
1046 static int
1047 collector_tail_init (const char *parent_exp_name)
1048 {
1049 int err = COL_ERROR_NONE;
1050 if (exp_origin != SP_ORIGIN_FORK)
1051 {
1052 /* For exec/combo descendants. Don't create dir for this subexp, but update lineage by appending "_x0". */
1053 /* Different children can have the same _x0 if their name don't match -F exp.
1054 * Assume their fork children inherit the program name, there will be no _x0_fN.er to create.
1055 * So we don't need to worry about the lineage messed up by _x0.
1056 */
1057 int linenum = 0;
1058 if (collector_exp_dir_append_x (linenum, parent_exp_name) != 0)
1059 return COL_ERROR_BADDIR;
1060 static char exp_name_env[MAXPATHLEN + 1];
1061 CALL_UTIL (snprintf)(exp_name_env, sizeof (exp_name_env), "SP_COLLECTOR_EXPNAME=%s", __collector_exp_dir_name);
1062 TprintfT (DBG_LT1, "collector_tail_init: setting SP_COLLECTOR_EXPNAME to %s\n", __collector_exp_dir_name);
1063 CALL_UTIL (putenv)(exp_name_env);
1064 }
1065 /* initialize the segments map and mmap interposition */
1066 if (exp_origin != SP_ORIGIN_GENEXP && exp_origin != SP_ORIGIN_KERNEL)
1067 if ((err = __collector_ext_mmap_install (0)) != COL_ERROR_NONE)
1068 return err;
1069
1070 /* initialize TSD module (note: relies on __collector_heap) */
1071 if (__collector_tsd_init () != 0)
1072 return COL_ERROR_EXPOPEN;
1073
1074 /* initialize for subsequent stack unwinds */
1075 __collector_ext_unwind_init (0);
1076
1077 char * buf = NULL;
1078 /* Make a copy of params so that we can modify the string */
1079 int paramsz = CALL_UTIL (strlen)(collector_params) + 1;
1080 buf = (char*) alloca (paramsz);
1081 CALL_UTIL (strlcpy)(buf, collector_params, paramsz);
1082
1083 char *par_F = "0";
1084 char *s;
1085 for (s = buf; *s;)
1086 {
1087 char key = *s++;
1088 /* ensure that it's followed by a colon */
1089 if (*s++ != ':')
1090 {
1091 TprintfT (DBG_LT0, "collector_tail_init: ERROR: parameter %c is not followed by a colon\n", key);
1092 return COL_ERROR_ARGS;
1093 }
1094
1095 /* find the semicolon terminator */
1096 char *par = s;
1097 while (*s && (*s != ';'))
1098 s++;
1099 if (*s != ';')
1100 {
1101 /* not followed by semicolon */
1102 TprintfT (0, "collector_tail_init: ERROR: parameter %c:%s is not terminated by a semicolon\n", key, par);
1103 return COL_ERROR_ARGS;
1104 }
1105 /* terminate par, and position for next descriptor */
1106 *s++ = 0;
1107 /* now process that element of the data descriptor */
1108 if (key == 'F')
1109 {
1110 par_F = par;
1111 break;
1112 }
1113 }
1114 if ((err = __collector_ext_line_install (par_F, __collector_exp_dir_name)) != COL_ERROR_NONE)
1115 return err;
1116
1117 /* allocate tsd for the current thread */
1118 if (__collector_tsd_allocate () != 0)
1119 return COL_ERROR_EXPOPEN;
1120 return COL_ERROR_NONE;
1121 }
1122
1123 /* routines concerning closing the experiment */
1124 /* close down -- fini section routine */
1125 static void collector_fini () __attribute__ ((destructor));
1126 static void
1127 collector_fini ()
1128 {
1129 TprintfT (DBG_LT0, "collector_fini: closing experiment\n");
1130 __collector_close_experiment ();
1131
1132 }
1133
1134 void collector_terminate_expt () __attribute__ ((weak, alias ("__collector_terminate_expt")));
1135
1136 /* __collector_terminate_expt called by user, or from dbx */
1137 void
1138 __collector_terminate_expt ()
1139 {
1140 TprintfT (DBG_LT0, "__collector_terminate_expt: %s; calling close\n", __collector_exp_dir_name);
1141 __collector_close_experiment ();
1142 TprintfT (DBG_LT0, "__collector_terminate_expt done\n\n");
1143 }
1144
1145 /*
1146 * We manage the SIGCHLD handler with sigaction and don't worry about signal or sigset().
1147 * This is in line with the comments in dispatcher.c
1148 * immediately preceding the wrapper function for (Linux) signal().
1149 */
1150 static struct sigaction original_sigchld_sigaction;
1151 static pid_t mychild_pid = -1;
1152
1153 /* __collector_SIGCHLD_signal_handler called when er_archive exits */
1154 static void
1155 __collector_SIGCHLD_signal_handler (int sig, siginfo_t *si, void *context)
1156 {
1157 pid_t calling_pid = si->si_pid;
1158 /* Potential race.
1159 * We get mychild_pid from the vfork() return value.
1160 * So there is an outside chance that the child completes and sends SIGCHLD
1161 * before the handler knows the value of mychild_pid.
1162 */
1163 if (calling_pid == mychild_pid)
1164 // er_archive has exited; so restore the user handler
1165 __collector_sigaction (SIGCHLD, &original_sigchld_sigaction, NULL);
1166 else
1167 {
1168 // if we can't identify the pid, the signal must be for the user's handler
1169 if (original_sigchld_sigaction.sa_handler != SIG_DFL
1170 && original_sigchld_sigaction.sa_handler != SIG_IGN)
1171 original_sigchld_sigaction.sa_sigaction (sig, si, context);
1172 }
1173 TprintfT (DBG_LT1, "__collector_SIGCHLD_signal_handler done\n\n");
1174 }
1175
1176 int
1177 collector_sigchld_sigaction (const struct sigaction *nact,
1178 struct sigaction *oact)
1179 {
1180 // get the current SIGCHLD handler
1181 struct sigaction cur_handler;
1182 __collector_sigaction (SIGCHLD, NULL, &cur_handler);
1183
1184 // if we have NOT installed our own handler, return an error
1185 // (force the caller to deal with this case)
1186 if (cur_handler.sa_sigaction != __collector_SIGCHLD_signal_handler)
1187 return -1;
1188
1189 // if we HAVE installed our own handler, act on the user's handler
1190 if (oact)
1191 __collector_memcpy (oact, &original_sigchld_sigaction, sizeof (struct sigaction));
1192 if (nact)
1193 __collector_memcpy (&original_sigchld_sigaction, nact, sizeof (struct sigaction));
1194 return 0;
1195 }
1196
1197 /*
1198 * __collector_close_experiment may be called either from
1199 * __collector_terminate_expt() or the .fini section
1200 */
1201 void
1202 __collector_close_experiment ()
1203 {
1204 hrtime_t ts;
1205 char *argv[10];
1206 int status;
1207 TprintfT (DBG_LT1, "collector: __collector_close_experiment(): %s\n", __collector_exp_dir_name);
1208 if (!exp_initted)
1209 return;
1210 /* The experiment may have been previously closed */
1211 if (!exp_open)
1212 return;
1213
1214 if (__collector_mutex_trylock (&__collector_close_guard))
1215 /* someone else is in the middle of closing the experiment */
1216 return;
1217
1218 /* record the termination of the experiment */
1219 ts = GETRELTIME ();
1220 collector_params = NULL;
1221
1222 /* tell all dynamic modules to stop data collection */
1223 int i;
1224 for (i = 0; i < nmodules; i++)
1225 if (modules[i]->stopDataCollection != NULL)
1226 modules[i]->stopDataCollection ();
1227
1228 /* notify all dynamic modules the experiment is being closed */
1229 for (i = 0; i < nmodules; i++)
1230 {
1231 if (modules[i]->closeExperiment != NULL)
1232 modules[i]->closeExperiment ();
1233 __collector_delete_handle (modules_hndl[i]);
1234 modules_hndl[i] = NULL;
1235 }
1236
1237 /* acquire the global lock -- only one close at a time */
1238 __collector_mutex_lock (&__collector_glob_lock);
1239 /* deinstall mmap tracing (with final update) */
1240 __collector_ext_mmap_deinstall (1);
1241
1242 /* deinstall common SIGPROF dispatcher */
1243 __collector_ext_dispatcher_deinstall ();
1244
1245 /* disable line interposition */
1246 __collector_ext_line_close ();
1247
1248 /* Other threads may be reading tsd now. */
1249 //__collector_tsd_fini();
1250
1251 /* delete global heap */
1252 /* omazur: do not delete the global heap
1253 * to avoid crashes in TSD. Need a better solution.
1254 __collector_deleteHeap( __collector_heap );
1255 __collector_heap = NULL;
1256 */
1257 __collector_mutex_unlock (&__collector_glob_lock);
1258
1259 /* take a final sample */
1260 __collector_ext_usage_sample (MASTER_SMPL, "collector_close_experiment");
1261 sample_mode = 0;
1262
1263 /* close the frameinfo file */
1264 __collector_ext_unwind_close ();
1265 if (exp_origin != SP_ORIGIN_DBX_ATTACH)
1266 log_write_event_run ();
1267
1268 /* mark the experiment as closed */
1269 __collector_expstate = EXP_CLOSED;
1270 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_CLOSED: project_home=%s\n",
1271 STR (project_home));
1272 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n",
1273 SP_JCMD_EXIT,
1274 (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC));
1275
1276 /* derive er_archive's absolute path from that of libcollector */
1277 argv[0] = NULL;
1278 if (project_home && archive_mode && __collector_strcmp (archive_mode, "off"))
1279 {
1280 /* construct a command to launch it */
1281 char *er_archive_name = "/bin/gp-archive";
1282 size_t cmdlen = CALL_UTIL (strlen)(project_home) + CALL_UTIL (strlen)(er_archive_name) + 1;
1283 char *command = (char*) alloca (cmdlen);
1284 CALL_UTIL (snprintf)(command, cmdlen, "%s%s", project_home, er_archive_name);
1285 if (CALL_UTIL (access)(command, F_OK) == 0)
1286 {
1287 // build the argument list
1288 int nargs = 0;
1289 argv[nargs++] = command;
1290 argv[nargs++] = "-n";
1291 argv[nargs++] = "-a";
1292 argv[nargs++] = archive_mode;
1293 size_t len = CALL_UTIL (strlen)(__collector_exp_dir_name) + 1;
1294 size_t len1 = CALL_UTIL (strlen)(SP_ARCHIVE_LOG_FILE) + 1;
1295 char *str = (char*) alloca (len + len1);
1296 CALL_UTIL (snprintf)(str, len + 15, "%s/%s", __collector_exp_dir_name, SP_ARCHIVE_LOG_FILE);
1297 argv[nargs++] = "--outfile";
1298 argv[nargs++] = str;
1299 str = (char*) alloca (len);
1300 CALL_UTIL (snprintf)(str, len, "%s", __collector_exp_dir_name);
1301 argv[nargs++] = str;
1302 argv[nargs] = NULL;
1303 }
1304 }
1305
1306 /* log the archive command to be run */
1307 if (argv[0] == NULL)
1308 {
1309 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n",
1310 SP_JCMD_COMMENT, COL_COMMENT_NONE, "No archive command run");
1311 TprintfT (DBG_LT1, "collector: No archive command run\n");
1312 }
1313 else
1314 {
1315 char cmdbuf[4096];
1316 int bufoffset = 0;
1317 int i;
1318 for (i = 0; argv[i] != NULL; i++)
1319 {
1320 bufoffset += CALL_UTIL (snprintf)(&cmdbuf[bufoffset], (sizeof (cmdbuf) - bufoffset),
1321 " %s", argv[i]);
1322 }
1323 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">Archive command `%s'</event>\n",
1324 SP_JCMD_COMMENT, COL_COMMENT_NONE, cmdbuf);
1325 TprintfT (DBG_LT1, "collector: running `%s'\n", cmdbuf);
1326 }
1327 log_close ();
1328 TprintfT (DBG_LT1, "__collector_close_experiment(%s) done\n", __collector_exp_dir_name);
1329 exp_open = 0; /* mark the experiment as closed */
1330 __collector_exp_active = 0; /* mark the experiment as inactive */
1331
1332 /* reset all experiment parameters */
1333 sample_mode = 0;
1334 collector_paused = 0;
1335 __collector_pause_sig = -1;
1336 __collector_pause_sig_warn = 0;
1337 __collector_sample_sig = -1;
1338 __collector_sample_sig_warn = 0;
1339 __collector_sample_period = 0;
1340 __collector_exp_dir_name[0] = 0;
1341
1342 /* uninstall the pause and sample signal handlers */
1343 /* XXXX -- not yet, because of potential race conditions in libthread */
1344 if (argv[0] == NULL)
1345 {
1346 /* er_archive command will not be run */
1347 __collector_mutex_unlock (&__collector_close_guard);
1348 return;
1349 }
1350
1351 struct sigaction sa;
1352 CALL_UTIL (memset)(&sa, 0, sizeof (struct sigaction));
1353 sa.sa_sigaction = __collector_SIGCHLD_signal_handler;
1354 sa.sa_flags = SA_SIGINFO;
1355 __collector_sigaction (SIGCHLD, &sa, &original_sigchld_sigaction);
1356
1357 /* linetrace interposition takes care of unsetting Environment variables */
1358 /* create a child process to invoke er_archive */
1359 pid_t pid = CALL_UTIL (vfork)();
1360 if (pid == 0)
1361 {
1362 /* pid is zero == child process -- invoke er_archive */
1363 /* Unset LD_PRELOAD environment variables */
1364 CALL_UTIL (unsetenv)("LD_PRELOAD_32");
1365 CALL_UTIL (unsetenv)("LD_PRELOAD_64");
1366 CALL_UTIL (unsetenv)("LD_PRELOAD");
1367 /* Invoke er_archive */
1368 CALL_UTIL (execv)(argv[0], argv);
1369 CALL_UTIL (exit)(1); /* exec failed -- child exits with an error */
1370 }
1371 else if (pid != -1)
1372 {
1373 mychild_pid = pid; // notify our signal handler who the child is
1374 pid_t w;
1375 /* copied from system.c */
1376 do
1377 {
1378 w = CALL_UTIL (waitpid)(pid, &status, 0);
1379 }
1380 while (w == -1 && errno == EINTR);
1381 TprintfT (DBG_LT1, "collector: creating archive done\n");
1382 // __collector_SIGCHLD_signal_handler should now be de-installed, but it does so itself
1383 }
1384 else
1385 /* child-process creation failed */
1386 TprintfT (DBG_LT0, "collector: creating archive process failed\n");
1387
1388 __collector_mutex_unlock (&__collector_close_guard);
1389 TprintfT (DBG_LT1, "collector: __collector_close_experiment done\n");
1390 return;
1391 }
1392
1393 /*
1394 * void __collector_clean_state()
1395 * Perform all necessary cleanup steps in child process after fork().
1396 */
1397 void
1398 __collector_clean_state ()
1399 {
1400 TprintfT (DBG_LT1, "collector: collector_clean_state()\n");
1401 int i;
1402 /*
1403 * We are in child process after fork().
1404 * First of all we have to reset all mutex locks in collector's subsystems.
1405 * After that we can reinitialize modules.
1406 */
1407 __collector_mmgr_init_mutex_locks (__collector_heap);
1408 __collector_mutex_init (&__collector_glob_lock);
1409 __collector_mutex_init (&__collector_open_guard);
1410 __collector_mutex_init (&__collector_close_guard);
1411 __collector_mutex_init (&__collector_sample_guard);
1412 __collector_mutex_init (&__collector_suspend_guard);
1413 __collector_mutex_init (&__collector_resume_guard);
1414
1415 if (__collector_mutex_trylock (&__collector_close_guard))
1416 /* someone else is in the middle of closing the experiment */
1417 return;
1418
1419 /* Stop data collection in all dynamic modules */
1420 for (i = 0; i < nmodules; i++)
1421 if (modules[i]->stopDataCollection != NULL)
1422 modules[i]->stopDataCollection ();
1423
1424 // Now we can reset modules
1425 for (i = 0; i < nmodules; i++)
1426 {
1427 if (modules[i]->detachExperiment != NULL && modules_st[i] == 0)
1428 modules[i]->detachExperiment ();
1429 __collector_delete_handle (modules_hndl[i]);
1430 modules_hndl[i] = NULL;
1431 }
1432
1433 /* acquire the global lock -- only one suspend at a time */
1434 __collector_mutex_lock (&__collector_glob_lock);
1435 {
1436
1437 /* stop any profile data writing */
1438 paused_when_suspended = collector_paused;
1439 collector_paused = 1;
1440
1441 /* deinstall common SIGPROF dispatcher */
1442 __collector_ext_dispatcher_suspend ();
1443
1444 /* mark the experiment as suspended */
1445 __collector_exp_active = 0;
1446
1447 /* XXXX mark the experiment as closed! */
1448 exp_open = 0; /* This is a hack to allow fork child to call__collector_open_experiment() */
1449
1450 /* mark the experiment log closed! */
1451 log_close ();
1452 }
1453 __collector_mutex_unlock (&__collector_glob_lock);
1454
1455 // Now we can reset subsystems.
1456 __collector_ext_dispatcher_fork_child_cleanup ();
1457 __collector_mmap_fork_child_cleanup ();
1458 __collector_tsd_fork_child_cleanup ();
1459 paused_when_suspended = 0;
1460 collector_paused = 0;
1461 __collector_expstate = EXP_INIT;
1462 TprintfT (DBG_LT1, "__collector_clean_slate: __collector_expstate->EXP_INIT\n");
1463 exp_origin = SP_ORIGIN_LIBCOL_INIT;
1464 exp_initted = 0;
1465 __collector_start_time = collector_interface.getHiResTime ();
1466 TprintfT (DBG_LT1, " -->__collector_clean_slate; resetting start_time\n");
1467 start_sec_time = 0;
1468
1469 /* Sample related data */
1470 sample_installed = 0; // 1 if the sample signal handler installed
1471 sample_mode = 0; // dynamically turns sample record writing on/off
1472 sample_number = 0; // index of the current sample record
1473 __collector_sample_sig = -1; // user-specified sample signal
1474 __collector_sample_sig_warn = 0; // non-zero if warning already given
1475
1476 /* Pause/resume related data */
1477 __collector_pause_sig = -1; // user-specified pause signal
1478 __collector_pause_sig_warn = 0; // non-zero if warning already given
1479 __collector_mutex_unlock (&__collector_close_guard);
1480 return;
1481 }
1482
1483 /* modelled on __collector_close_experiment */
1484 void
1485 __collector_suspend_experiment (char *why)
1486 {
1487 if (!exp_initted)
1488 return;
1489 /* The experiment may have been previously closed */
1490 if (!exp_open)
1491 return;
1492 /* The experiment may have been previously suspended */
1493 if (!__collector_exp_active)
1494 return;
1495 if (__collector_mutex_trylock (&__collector_suspend_guard))
1496 /* someone else is in the middle of suspending the experiment */
1497 return;
1498
1499 /* Stop data collection in all dynamic modules */
1500 int i;
1501 for (i = 0; i < nmodules; i++)
1502 if (modules[i]->stopDataCollection != NULL)
1503 modules[i]->stopDataCollection ();
1504
1505 /* take a pre-suspension sample */
1506 __collector_ext_usage_sample (MASTER_SMPL, why);
1507
1508 /* acquire the global lock -- only one suspend at a time */
1509 __collector_mutex_lock (&__collector_glob_lock);
1510 /* stop any profile data writing */
1511 paused_when_suspended = collector_paused;
1512 collector_paused = 1;
1513
1514 /* deinstall common SIGPROF dispatcher */
1515 __collector_ext_dispatcher_suspend ();
1516
1517 /* mark the experiment as suspended */
1518 __collector_exp_active = 0;
1519
1520 /* XXXX mark the experiment as closed! */
1521 exp_open = 0; // This is a hack to allow fork child to call __collector_open_experiment()
1522 log_pause (); // mark the experiment log closed!
1523 TprintfT (DBG_LT0, "collector: collector_suspend_experiment(%s, %d)\n\n", why, collector_paused);
1524 __collector_mutex_unlock (&__collector_glob_lock);
1525 __collector_mutex_unlock (&__collector_suspend_guard);
1526 return;
1527 }
1528
1529 void
1530 __collector_resume_experiment ()
1531 {
1532 if (!exp_initted)
1533 return;
1534
1535 /* The experiment may have been previously resumed */
1536 if (__collector_exp_active)
1537 return;
1538 if (__collector_mutex_trylock (&__collector_resume_guard))
1539 /* someone else is in the middle of resuming the experiment */
1540 return;
1541
1542 /* acquire the global lock -- only one resume at a time */
1543 __collector_mutex_lock (&__collector_glob_lock);
1544 /* mark the experiment as re-activated */
1545 __collector_exp_active = 1;
1546 /* XXXX mark the experiment as open! */
1547 exp_open = 1; // This is a hack to allow fork child to call__collector_open_experiment()
1548 log_resume (); // mark the experiment log re-opened!
1549 TprintfT (DBG_LT0, "collector: collector_resume_experiment(%d)\n", paused_when_suspended);
1550 /* resume any profile data writing */
1551 collector_paused = paused_when_suspended;
1552 /* restart common SIGPROF dispatcher */
1553 __collector_ext_dispatcher_restart ();
1554 __collector_mutex_unlock (&__collector_glob_lock);
1555
1556 /* take a post-suspension sample */
1557 __collector_ext_usage_sample (MASTER_SMPL, "collector_resume_experiment");
1558
1559 /* Resume data collection in all dynamic modules */
1560 if (collector_paused == 0)
1561 {
1562 int i;
1563 for (i = 0; i < nmodules; i++)
1564 if (modules[i]->startDataCollection != NULL && modules_st[i] == 0)
1565 modules[i]->startDataCollection ();
1566 }
1567
1568 if (__collector_sample_period != 0)
1569 {
1570 hrtime_t now = collector_interface.getHiResTime ();
1571 while (__collector_next_sample < now)
1572 __collector_next_sample += ((hrtime_t) NANOSEC) * __collector_sample_period;
1573 }
1574
1575 /* check for experiment past termination time */
1576 if (__collector_terminate_time != 0)
1577 {
1578 hrtime_t now = collector_interface.getHiResTime ();
1579 if (__collector_terminate_time < now)
1580 {
1581 TprintfT (DBG_LT0, "__collector_resume_experiment: now (%lld) > terminate_time (%lld); closing experiment\n",
1582 (now - __collector_start_time), (__collector_terminate_time - __collector_start_time));
1583 __collector_close_experiment ();
1584 }
1585 }
1586 __collector_mutex_unlock (&__collector_resume_guard);
1587 return;
1588 }
1589
1590 /* Code to support Samples and Pause/Resume */
1591 void collector_sample () __attribute__ ((weak, alias ("__collector_sample")));
1592 void
1593 __collector_sample (char *name)
1594 {
1595 __collector_ext_usage_sample (PROGRAM_SMPL, name);
1596 }
1597
1598 static void
1599 write_sample (char *name)
1600 {
1601 if (sample_mode == 0)
1602 return;
1603 /* make the sample timestamp relative to the start */
1604 hrtime_t ts, now = collector_interface.getHiResTime ();
1605
1606 /* update time for next periodic sample */
1607 /* since this is common to all LWPs, and only one (the first!) will
1608 update it to the next period, doing the update early will avoid
1609 the overhead/frustration of the other LWPs
1610 */
1611 if (__collector_sample_period != 0)
1612 {
1613 /* this update should only be done for periodic samples */
1614 while (__collector_next_sample < now)
1615 __collector_next_sample += ((hrtime_t) NANOSEC) * __collector_sample_period;
1616 }
1617
1618 /* take the sample and record it; use (return - __collector_start_time) for timestamp */
1619 now = ovw_write ();
1620 ts = now - __collector_start_time;
1621
1622 /* write sample records to log file */
1623 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" id=\"%d\" label=\"%s\"/>\n",
1624 SP_JCMD_SAMPLE,
1625 (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC),
1626 sample_number,
1627 name);
1628 /* increment the sample number */
1629 sample_number++;
1630 }
1631
1632 /*
1633 * __collector_ext_usage_sample
1634 *
1635 * Handle taking a process usage sample and recording it.
1636 * Common to all different types of sample:
1637 * libcollector master samples at initiation and close,
1638 * programmatic samples via libcollector API calls,
1639 * periodic samples originating in the dispatcher,
1640 * manual samples originating in the signal sample handler,
1641 * manual samples originating from the debugger
1642 * Differentiating type and name information is currently not recorded.
1643 */
1644 void
1645 __collector_ext_usage_sample (Smpl_type type, char *name)
1646 {
1647 /* name is optional */
1648 if (name == NULL)
1649 name = "";
1650 TprintfT (DBG_LT3, "collector: __collector_ext_usage_sample(%d,%s)\n", type, name);
1651 if (!exp_initted)
1652 return;
1653
1654 /* if paused, don't record periodic samples */
1655 if ((type == PERIOD_SMPL) && (collector_paused == 1))
1656 return;
1657
1658 /* There is a possibility of entering this function
1659 * from sample_handler, dbx direct call to __collector_sample,
1660 * and user called collector_sample. Since we are making a
1661 * new sample anyway just return.
1662 */
1663 if (__collector_mutex_trylock (&__collector_sample_guard))
1664 return;
1665 if (type != PERIOD_SMPL || __collector_sample_period != 0)
1666 write_sample (name);
1667 __collector_mutex_unlock (&__collector_sample_guard);
1668 }
1669
1670 /* set the sample period from the parameter */
1671 static int
1672 sample_set_interval (char *param)
1673 {
1674 if (!exp_initted)
1675 return COL_ERROR_SMPLINIT;
1676 __collector_sample_period = CALL_UTIL (strtol)(param, NULL, 0); /* seconds */
1677 TprintfT (DBG_LT1, "collector: collector_sample period set to %d seconds.\n",
1678 __collector_sample_period);
1679 if (__collector_sample_period > 0)
1680 (void) __collector_log_write ("<setting %s=\"%d\"/>\n",
1681 SP_JCMD_SAMPLE_PERIOD, __collector_sample_period);
1682 return COL_ERROR_NONE;
1683 }
1684
1685 /* set the experiment duration from the parameter */
1686
1687 /* parameter is of the form nnn:mmm, where nnn is the start delay in seconds,
1688 * and mmm is the terminate time in seconds; if nnn is zero,
1689 * data collection starts when the run starts. If mmm is zero,
1690 * data collection terminates when the run terminates. Otherwise,
1691 * nnn must be less than mmm
1692 */
1693 static int
1694 set_duration (char *param)
1695 {
1696 if (!exp_initted)
1697 return COL_ERROR_DURATION_INIT;
1698 int delay_start = CALL_UTIL (strtol)(param, &param, 0); /* seconds */
1699 int terminate_duration = 0;
1700 if (*param == 0)
1701 {
1702 /* we only have one parameter, the terminate time */
1703 terminate_duration = delay_start;
1704 delay_start = 0;
1705 }
1706 else if (*param == ':')
1707 {
1708 param++;
1709 terminate_duration = CALL_UTIL (strtol)(param, &param, 0); /* seconds */
1710 }
1711 else
1712 return COL_ERROR_DURATION_INIT;
1713 TprintfT (DBG_LT1, "collector: collector_delay_start duration set to %d seconds.\n",
1714 delay_start);
1715 TprintfT (DBG_LT1, "collector: collector_terminate duration set to %d seconds.\n",
1716 terminate_duration);
1717 if (terminate_duration > 0)
1718 __collector_log_write ("<setting %s=\"%d\"/>\n<setting %s=\"%d\"/>\n",
1719 SP_JCMD_DELAYSTART, delay_start,
1720 SP_JCMD_TERMINATE, terminate_duration);
1721 __collector_delay_start = (hrtime_t) 0;
1722 if (delay_start != 0)
1723 {
1724 __collector_delay_start = __collector_start_time + ((hrtime_t) NANOSEC) * delay_start;
1725 collector_paused = 1;
1726 }
1727 __collector_terminate_time = terminate_duration == 0 ? (hrtime_t) 0 :
1728 __collector_start_time + ((hrtime_t) NANOSEC) * terminate_duration;
1729 return COL_ERROR_NONE;
1730 }
1731
1732 static int
1733 sample_set_user_sig (char *par)
1734 {
1735 int sig = CALL_UTIL (strtol)(par, &par, 0);
1736 TprintfT (DBG_LT1, "collector: sample_set_user_sig(sig=%d,installed=%d)\n",
1737 sig, sample_installed);
1738 /* Installing the sampling signal handler more
1739 * than once is not good.
1740 */
1741 if (!sample_installed)
1742 {
1743 struct sigaction act;
1744 sigemptyset (&act.sa_mask);
1745 /* XXXX should any signals be blocked? */
1746 act.sa_sigaction = sample_handler;
1747 act.sa_flags = SA_RESTART | SA_SIGINFO;
1748 if (sigaction (sig, &act, &old_sample_handler) == -1)
1749 {
1750 TprintfT (DBG_LT0, "collector: ERROR: collector_sample_handler install failed (sig=%d).\n",
1751 __collector_sample_sig);
1752 return COL_ERROR_ARGS;
1753 }
1754 if (old_sample_handler.sa_handler == SIG_DFL ||
1755 old_sample_handler.sa_sigaction == sample_handler)
1756 old_sample_handler.sa_handler = SIG_IGN;
1757 TprintfT (DBG_LT1, "collector: collector_sample_handler installed (sig=%d,hndlr=0x%p).\n",
1758 sig, sample_handler);
1759 __collector_sample_sig = sig;
1760 sample_installed = 1;
1761 }
1762 (void) __collector_log_write ("<setting %s=\"%u\"/>\n", SP_JCMD_SAMPLE_SIG, __collector_sample_sig);
1763 return COL_ERROR_NONE;
1764 }
1765
1766 /* signal handler for sample signal */
1767 static void
1768 sample_handler (int sig, siginfo_t *sip, void *uap)
1769 {
1770 if (sip && sip->si_code == SI_USER)
1771 {
1772 TprintfT (DBG_LT1, "collector: collector_sample_handler sampling!\n");
1773 __collector_ext_usage_sample (MANUAL_SMPL, "signal");
1774 }
1775 else if (old_sample_handler.sa_handler != SIG_IGN)
1776 {
1777 TprintfT (DBG_LT1, "collector: collector_sample_handler forwarding signal.\n");
1778 (old_sample_handler.sa_sigaction)(sig, sip, uap);
1779 }
1780 }
1781
1782 void collector_pause () __attribute__ ((weak, alias ("__collector_pause")));
1783
1784 void
1785 __collector_pause ()
1786 {
1787 __collector_pause_m ("API");
1788 }
1789
1790 void
1791 __collector_pause_m (char *reason)
1792 {
1793 hrtime_t now;
1794 char xreason[MAXPATHLEN];
1795 TprintfT (DBG_LT0, "collector: __collector_pause_m(%s)\n", reason);
1796
1797 /* Stop data collection in all dynamic modules */
1798 for (int i = 0; i < nmodules; i++)
1799 if (modules[i]->stopDataCollection != NULL)
1800 modules[i]->stopDataCollection ();
1801
1802 /* Take a pause sample */
1803 CALL_UTIL (snprintf)(xreason, sizeof (xreason), "collector_pause(%s)", reason);
1804 __collector_ext_usage_sample (MASTER_SMPL, xreason);
1805
1806 /* Record the event in the log file */
1807 now = GETRELTIME ();
1808 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" name=\"%s\"/>\n", SP_JCMD_PAUSE,
1809 (unsigned) (now / NANOSEC), (unsigned) (now % NANOSEC), reason);
1810 __collector_expstate = EXP_PAUSED;
1811 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_PAUSED\n");
1812 collector_paused = 1;
1813 }
1814
1815 void collector_resume () __attribute__ ((weak, alias ("__collector_resume")));
1816
1817 void
1818 __collector_resume ()
1819 {
1820 TprintfT (DBG_LT0, "collector: __collector_resume()\n");
1821 __collector_expstate = EXP_OPEN;
1822 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_OPEN\n");
1823
1824 /* Record the event in the log file */
1825 hrtime_t now = GETRELTIME ();
1826 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n", SP_JCMD_RESUME,
1827 (unsigned) (now / NANOSEC), (unsigned) (now % NANOSEC));
1828 /* Take a resume sample */
1829 __collector_ext_usage_sample (MASTER_SMPL, "collector_resume");
1830
1831 /* Resume data collection in all dynamic modules */
1832 for (int i = 0; i < nmodules; i++)
1833 if (modules[i]->startDataCollection != NULL && modules_st[i] == 0)
1834 modules[i]->startDataCollection ();
1835 collector_paused = 0;
1836 }
1837
1838 static int
1839 pause_set_user_sig (char *par)
1840 {
1841 struct sigaction act;
1842 int sig = CALL_UTIL (strtol)(par, &par, 0);
1843 if (*par)
1844 {
1845 /* not end of the token */
1846 if (*par != 'p')
1847 {
1848 /* it should be a p */
1849 TprintfT (DBG_LT0, "collector: ERROR: collector_user_handler bad terminator (par=%p[0]=%d).\n",
1850 par, (int) *par);
1851 return COL_ERROR_ARGS;
1852
1853 }
1854 else
1855 {
1856 /*, it's a p, make sure next is end of token */
1857 par++;
1858 if (*par)
1859 {
1860 TprintfT (DBG_LT0, "collector: ERROR: collector_user_handler bad terminator (par=%p[0]=%d).\n",
1861 par, (int) *par);
1862 return COL_ERROR_ARGS;
1863 }
1864 else
1865 /* start off paused */
1866 collector_paused = 1;
1867 }
1868 }
1869 sigemptyset (&act.sa_mask);
1870 /* XXXX should any signals be blocked? */
1871 act.sa_sigaction = pause_handler;
1872 act.sa_flags = SA_RESTART | SA_SIGINFO;
1873 if (sigaction (sig, &act, &old_pause_handler) == -1)
1874 {
1875 TprintfT (DBG_LT0, "collector: ERROR: collector_pause_handler install failed (sig=%d).\n", sig);
1876 return COL_ERROR_ARGS;
1877 }
1878 if (old_pause_handler.sa_handler == SIG_DFL ||
1879 old_pause_handler.sa_sigaction == pause_handler)
1880 old_pause_handler.sa_handler = SIG_IGN;
1881 TprintfT (DBG_LT1, "collector: collector_pause_handler installed (sig=%d,hndlr=0x%p).\n",
1882 sig, pause_handler);
1883 __collector_pause_sig = sig;
1884 (void) __collector_log_write ("<setting %s=\"%u\"/>\n", SP_JCMD_PAUSE_SIG,
1885 __collector_pause_sig);
1886 return COL_ERROR_NONE;
1887 }
1888
1889 /* signal handler for pause/resume signal */
1890 static void
1891 pause_handler (int sig, siginfo_t *sip, void *uap)
1892 {
1893 if (sip && sip->si_code == SI_USER)
1894 {
1895 if (collector_paused == 1)
1896 {
1897 __collector_resume ();
1898 TprintfT (DBG_LT0, "collector: collector_pause_handler resumed!\n");
1899 }
1900 else
1901 {
1902 __collector_pause_m ("signal");
1903 TprintfT (DBG_LT0, "collector: collector_pause_handler paused!\n");
1904 }
1905 }
1906 else if (old_pause_handler.sa_handler != SIG_IGN)
1907 {
1908 TprintfT (DBG_LT0, "collector: collector_pause_handler forwarding signal.\n");
1909 (old_pause_handler.sa_sigaction)(sig, sip, uap);
1910 }
1911 }
1912
1913 static void
1914 get_progspec (char *retstr, int tmp_sz, char *name, int name_sz)
1915 {
1916 int procfd, count, i;
1917 *retstr = 0;
1918 tmp_sz--;
1919 *name = 0;
1920 name_sz--;
1921 procfd = CALL_UTIL (open)("/proc/self/cmdline", O_RDONLY);
1922 int getting_name = 0;
1923 if (procfd != -1)
1924 {
1925 count = CALL_UTIL (read)(procfd, retstr, tmp_sz);
1926 retstr[count] = '\0';
1927 for (i = 0; i < count; i++)
1928 {
1929 if (getting_name == 0)
1930 name[i] = retstr[i];
1931 if (retstr[i] == '\0')
1932 {
1933 getting_name = 1;
1934 if ((i + 1) < count)
1935 retstr[i] = ' ';
1936 }
1937 }
1938 CALL_UTIL (close)(procfd);
1939 }
1940 }
1941
1942 static void
1943 fs_warn ()
1944 {
1945 /* if data implies we don't care, just return */
1946 if (fs_matters == 0)
1947 return;
1948 }
1949
1950 static void
1951 close_handler (int sig, siginfo_t *sip, void *uap)
1952 {
1953 if (sip && sip->si_code == SI_USER)
1954 {
1955 TprintfT (DBG_LT0, "collector: close_handler: processing signal.\n");
1956 __collector_close_experiment ();
1957 }
1958 else if (old_close_handler.sa_handler != SIG_IGN)
1959 {
1960 TprintfT (DBG_LT0, "collector: close_handler forwarding signal.\n");
1961 (old_close_handler.sa_sigaction)(sig, sip, uap);
1962 }
1963 }
1964
1965 static void
1966 exit_handler (int sig, siginfo_t *sip, void *uap)
1967 {
1968 if (sip && sip->si_code == SI_USER)
1969 {
1970 TprintfT (DBG_LT0, "collector: exit_handler: processing signal.\n");
1971 CALL_UTIL (exit)(1);
1972 }
1973 else if (old_exit_handler.sa_handler != SIG_IGN)
1974 {
1975 TprintfT (DBG_LT0, "collector: exit_handler forwarding signal.\n");
1976 (old_exit_handler.sa_sigaction)(sig, sip, uap);
1977 }
1978 }
1979
1980 static int
1981 set_user_sig_action (char *par)
1982 {
1983 int sig = CALL_UTIL (strtol)(par, &par, 0);
1984 if (*par != '=')
1985 {
1986 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action bad separator: %s.\n", par);
1987 return COL_ERROR_ARGS;
1988 }
1989 par++;
1990 struct sigaction act;
1991 sigemptyset (&act.sa_mask);
1992 act.sa_flags = SA_RESTART | SA_SIGINFO;
1993 if (__collector_strcmp (par, "exit") == 0)
1994 {
1995 act.sa_sigaction = exit_handler;
1996 if (sigaction (sig, &act, &old_exit_handler) == -1)
1997 {
1998 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action failed: %d=%s.\n", sig, par);
1999 return COL_ERROR_ARGS;
2000 }
2001 }
2002 else if (__collector_strcmp (par, "close") == 0)
2003 {
2004 act.sa_sigaction = close_handler;
2005 if (sigaction (sig, &act, &old_close_handler) == -1)
2006 {
2007 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action failed: %d=%s.\n", sig, par);
2008 return COL_ERROR_ARGS;
2009 }
2010 }
2011 else
2012 {
2013 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action unknown action: %d=%s.\n", sig, par);
2014 return COL_ERROR_ARGS;
2015 }
2016 __collector_log_write ("<setting signal=\"%u\" action=\"%s\"/>\n", sig, par);
2017 return COL_ERROR_NONE;
2018 }
2019
2020 /*============================================================*/
2021 /*
2022 * Routines for handling the log file
2023 */
2024 static struct DataHandle *log_hndl = NULL;
2025 static int log_initted = 0;
2026 static int log_enabled = 0;
2027
2028 static int
2029 log_open ()
2030 {
2031 log_hndl = __collector_create_handle (SP_LOG_FILE);
2032 if (log_hndl == NULL)
2033 return COL_ERROR_LOG_OPEN;
2034 log_initted = 1;
2035 log_enabled = 1;
2036 TprintfT (DBG_LT1, "log_open()\n");
2037 return COL_ERROR_NONE;
2038 }
2039
2040 static void
2041 log_header_write (sp_origin_t origin)
2042 {
2043 __collector_log_write ("<experiment %s=\"%d.%d\">\n",
2044 SP_JCMD_VERSION, SUNPERF_VERNUM, SUNPERF_VERNUM_MINOR);
2045 __collector_log_write ("<collector>%s</collector>\n", VERSION);
2046 __collector_log_write ("</experiment>\n");
2047
2048 struct utsname sysinfo;
2049 if (uname (&sysinfo) < 0)
2050 {
2051 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\"/></event>\n", SP_JCMD_CERROR, COL_ERROR_SYSINFO, errno);
2052 __collector_log_write ("<system>\n");
2053 }
2054 else
2055 {
2056 long page_size = CALL_UTIL (sysconf)(_SC_PAGESIZE);
2057 long npages = CALL_UTIL (sysconf)(_SC_PHYS_PAGES);
2058 __collector_log_write ("<system hostname=\"%s\" arch=\"%s\" os=\"%s %s\" pagesz=\"%ld\" npages=\"%ld\">\n",
2059 sysinfo.nodename, sysinfo.machine, sysinfo.sysname, sysinfo.release, page_size, npages);
2060 }
2061
2062 //YXXX Updating this section? Check similar cut/paste code in:
2063 // collctrl.cc::Coll_Ctrl()
2064 // collector.c::log_header_write()
2065 // cpu_frequency.h::get_cpu_frequency()
2066
2067 FILE *procf = CALL_UTIL (fopen)("/proc/cpuinfo", "r");
2068 if (procf != NULL)
2069 {
2070 char temp[1024];
2071 int cpu = -1;
2072 while (CALL_UTIL (fgets)(temp, sizeof (temp), procf) != NULL)
2073 {
2074 #if ARCH(Intel)
2075 if (__collector_strStartWith (temp, "processor") == 0)
2076 {
2077 char *val = CALL_UTIL (strchr)(temp, ':');
2078 cpu = val ? CALL_UTIL (atoi)(val + 1) : -1;
2079 }
2080 // else if ( __collector_strStartWith(temp, "model") == 0
2081 // && CALL_UTIL(strstr)(temp, "name") == 0) {
2082 // char *val = CALL_UTIL(strchr)( temp, ':' );
2083 // int model = val ? CALL_UTIL(atoi)( val + 1 ) : -1;
2084 // }
2085 // else if ( __collector_strStartWith(temp, "cpu family") == 0 ) {
2086 // char *val = CALL_UTIL(strchr)( temp, ':' );
2087 // int family = val ? CALL_UTIL(atoi)( val + 1 ) : -1;
2088 // }
2089 else if (__collector_strStartWith (temp, "cpu MHz") == 0)
2090 {
2091 char *val = CALL_UTIL (strchr)(temp, ':');
2092 int mhz = val ? CALL_UTIL (atoi)(val + 1) : 0; /* reading it as int is fine */
2093 (void) __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu, mhz);
2094 }
2095 #elif ARCH(SPARC)
2096 if (__collector_strStartWith (temp, "Cpu") == 0 &&
2097 temp[3] != '\0' &&
2098 __collector_strStartWith ((CALL_UTIL (strchr)(temp + 1, 'C')) ? CALL_UTIL (strchr)(temp + 1, 'C') : (temp + 4), "ClkTck") == 0)
2099 { // sparc-Linux
2100 char *val = CALL_UTIL (strchr)(temp, ':');
2101 int mhz = 0;
2102 if (val)
2103 {
2104 unsigned long long freq;
2105 (*__collector_sscanfp) (val + 2, "%llx", &freq);
2106 mhz = (unsigned int) (((double) freq) / 1000000.0 + 0.5);
2107 }
2108 char *numend = CALL_UTIL (strchr)(temp + 1, 'C') ? CALL_UTIL (strchr)(temp + 1, 'C') : (temp + 4);
2109 *numend = '\0';
2110 cpu = CALL_UTIL (atoi)(temp + 3);
2111 __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu, mhz);
2112 }
2113 #elif defined(__aarch64__)
2114 if (__collector_strStartWith (temp, "processor") == 0)
2115 {
2116 char *val = CALL_UTIL (strchr)(temp, ':');
2117 cpu = val ? CALL_UTIL (atoi)(val + 1) : -1;
2118 if (cpu != -1)
2119 {
2120 unsigned int mhz;
2121 asm volatile("mrs %0, cntfrq_el0" : "=r" (mhz));
2122 __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu,
2123 mhz / 1000000);
2124 }
2125 }
2126 #endif
2127 }
2128 CALL_UTIL (fclose)(procf);
2129 }
2130 __collector_log_write ("</system>\n");
2131 __collector_log_write ("<process pid=\"%d\"></process>\n", getpid ());
2132 __collector_log_write ("<process ppid=\"%d\"></process>\n", getppid ());
2133 __collector_log_write ("<process pgrp=\"%d\"></process>\n", getpgrp ());
2134 __collector_log_write ("<process sid=\"%d\"></process>\n", getsid (0));
2135
2136 /* XXX -- cwd commented out
2137 It would be nice to get the current directory for the experiment,
2138 but neither method below will work--the /proc method returns a
2139 0-length string, and using getcwd will break collect on /bin/sh
2140 (as cuserid does) because of /bin/sh's private malloc
2141 omazur: readlink seems to work on Linux
2142 */
2143 /* write the current directory */
2144 char cwd[MAXPATHLEN + 1];
2145 int i = readlink ("/proc/self/cwd", cwd, sizeof (cwd));
2146 if (i >= 0)
2147 {
2148 cwd[i < sizeof (cwd) ? i : sizeof (cwd) - 1] = 0;
2149 (void) __collector_log_write ("<process cwd=\"%s\"></process>\n", cwd);
2150 }
2151 (void) __collector_log_write ("<process wsize=\"%d\"></process>\n", (int) (8 * sizeof (void *)));
2152
2153 ucontext_t ucp;
2154 ucp.uc_stack.ss_sp = NULL;
2155 ucp.uc_stack.ss_size = 0;
2156 if (getcontext (&ucp) == 0)
2157 {
2158 (void) __collector_log_write ("<process stackbase=\"0x%lx\"></process>\n",
2159 (unsigned long) ucp.uc_stack.ss_sp + ucp.uc_stack.ss_size);
2160 }
2161
2162 (void) __collector_log_write ("<process>%s</process>\n",
2163 origin == SP_ORIGIN_FORK ? "(fork)" : exp_progspec);
2164 __collector_libthread_T1 = 0;
2165 }
2166
2167 static void
2168 log_pause (void)
2169 {
2170 if (log_initted)
2171 log_enabled = 0;
2172 }
2173
2174 static void
2175 log_resume (void)
2176 {
2177 if (log_initted)
2178 log_enabled = 1;
2179 }
2180
2181 /* __collector_log_write -- write a line to the log file
2182 * return value:
2183 * 0 if OK
2184 * 1 if error (in creating or extending the log file)
2185 */
2186 int
2187 __collector_log_write (char *format, ...)
2188 {
2189 char buf[4096];
2190 va_list va;
2191 int rc = 0;
2192 static size_t loglen = 0;
2193
2194 va_start (va, format);
2195 char *bufptr = buf;
2196 int sz = __collector_xml_vsnprintf (bufptr, sizeof (buf), format, va);
2197 int allocated_sz = 0;
2198 va_end (va);
2199 if (sz >= sizeof (buf))
2200 {
2201 /* Allocate a new buffer.
2202 * We need this buffer only temporarily and locally.
2203 * But don't use the thread stack
2204 * since it already has buf
2205 * and is unlikely to have additonal room for something even larger than buf.
2206 */
2207 sz += 1; /* add the terminating null byte */
2208 bufptr = (char*) __collector_allocCSize (__collector_heap, sz, 0);
2209 if (bufptr)
2210 {
2211 allocated_sz = sz;
2212 va_start (va, format);
2213 sz = __collector_xml_vsnprintf (bufptr, sz, format, va);
2214 va_end (va);
2215 }
2216 }
2217 int newlen = CALL_UTIL (strlen)(bufptr);
2218 if (sz != newlen)
2219 // no need to free bufptr if we're going to abort anyhow
2220 abort ();
2221 bufptr[newlen + 1] = 0;
2222 loglen = loglen + newlen;
2223 TprintfT (DBG_LT2, "__collector_log_write len=%ld, loglen=%ld %s",
2224 (long) newlen, (long) loglen, bufptr);
2225 if (log_enabled <= 0)
2226 {
2227 #if 0
2228 /* XXX suppress log_write messages with no log file open
2229 * this is reached from SimApp dealing with the clock frequency, which it should
2230 * not be doing. For now, don't write a message.
2231 */
2232 CALL_UTIL (fprintf)(stderr, "__collector_log_write COL_ERROR_LOG_OPEN: %s", buf);
2233 #endif
2234 }
2235 else
2236 rc = __collector_write_string (log_hndl, bufptr, sz);
2237 if (allocated_sz)
2238 __collector_freeCSize (__collector_heap, (void *) bufptr, allocated_sz);
2239 return rc;
2240 }
2241
2242 static void
2243 log_close ()
2244 {
2245 log_enabled = 0;
2246 log_initted = 0;
2247 __collector_delete_handle (log_hndl);
2248 log_hndl = NULL;
2249 }
2250
2251 /*============================================================*/
2252 /*
2253 * Routines for handling the overview file
2254 */
2255 static void
2256 ovw_open ()
2257 {
2258 CALL_UTIL (strlcpy)(ovw_name, __collector_exp_dir_name, sizeof (ovw_name));
2259 CALL_UTIL (strlcat)(ovw_name, "/", sizeof (ovw_name));
2260 CALL_UTIL (strlcat)(ovw_name, SP_OVERVIEW_FILE, sizeof (ovw_name));
2261 int fd = CALL_UTIL (open)(ovw_name, O_WRONLY | O_CREAT | O_TRUNC,
2262 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
2263 if (fd < 0)
2264 {
2265 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2266 SP_JCMD_CERROR, COL_ERROR_OVWOPEN, errno, ovw_name);
2267 return;
2268 }
2269 CALL_UTIL (close)(fd);
2270 sample_mode = 1;
2271 }
2272
2273 static __inline__ void
2274 timeval_to_timespec(struct timeval *tval, struct timespec *value)
2275 {
2276 value->tv_nsec = tval->tv_usec * 1000;
2277 value->tv_sec = tval->tv_sec;
2278 }
2279
2280 /*
2281 * Resource usage. /proc/<pid>/usage /proc/<pid>/lwp/<lwpid>/lwpusage
2282 */
2283 typedef struct prusage
2284 {
2285 id_t pr_lwpid; /* lwp id. 0: process or defunct */
2286 int pr_count; /* number of contributing lwps */
2287 timestruc_t pr_tstamp; /* current time stamp */
2288 timestruc_t pr_create; /* process/lwp creation time stamp */
2289 timestruc_t pr_term; /* process/lwp termination time stamp */
2290 timestruc_t pr_rtime; /* total lwp real (elapsed) time */
2291 timestruc_t pr_utime; /* user level cpu time */
2292 timestruc_t pr_stime; /* system call cpu time */
2293 timestruc_t pr_ttime; /* other system trap cpu time */
2294 timestruc_t pr_tftime; /* text page fault sleep time */
2295 timestruc_t pr_dftime; /* data page fault sleep time */
2296 timestruc_t pr_kftime; /* kernel page fault sleep time */
2297 timestruc_t pr_ltime; /* user lock wait sleep time */
2298 timestruc_t pr_slptime; /* all other sleep time */
2299 timestruc_t pr_wtime; /* wait-cpu (latency) time */
2300 timestruc_t pr_stoptime; /* stopped time */
2301 timestruc_t filltime[6]; /* filler for future expansion */
2302 ulong_t pr_minf; /* minor page faults */
2303 ulong_t pr_majf; /* major page faults */
2304 ulong_t pr_nswap; /* swaps */
2305 ulong_t pr_inblk; /* input blocks */
2306 ulong_t pr_oublk; /* output blocks */
2307 ulong_t pr_msnd; /* messages sent */
2308 ulong_t pr_mrcv; /* messages received */
2309 ulong_t pr_sigs; /* signals received */
2310 ulong_t pr_vctx; /* voluntary context switches */
2311 ulong_t pr_ictx; /* involuntary context switches */
2312 ulong_t pr_sysc; /* system calls */
2313 ulong_t pr_ioch; /* chars read and written */
2314 ulong_t filler[10]; /* filler for future expansion */
2315 } prusage_t;
2316
2317 static hrtime_t starttime = 0;
2318
2319 static hrtime_t
2320 ovw_write ()
2321 {
2322 if (sample_mode == 0)
2323 return 0;
2324 int fd;
2325 int res;
2326 struct prusage usage;
2327 struct rusage rusage;
2328 hrtime_t hrt, delta;
2329
2330 /* Fill in the prusage structure with info from getrusage() */
2331 hrt = collector_interface.getHiResTime ();
2332 if (starttime == 0)
2333 starttime = hrt;
2334 res = getrusage (RUSAGE_SELF, &rusage);
2335 if (res != 0)
2336 {
2337 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2338 SP_JCMD_CERROR, COL_ERROR_OVWREAD, errno, ovw_name);
2339 return ( hrt);
2340 }
2341
2342 CALL_UTIL (memset)(&usage, 0, sizeof (struct prusage));
2343 usage.pr_lwpid = getpid ();
2344 usage.pr_count = 1;
2345 usage.pr_tstamp.tv_sec = hrt / NANOSEC;
2346 usage.pr_tstamp.tv_nsec = hrt % NANOSEC;
2347 usage.pr_create.tv_sec = starttime / NANOSEC;
2348 usage.pr_create.tv_nsec = starttime % NANOSEC;
2349 delta = hrt - starttime;
2350 usage.pr_rtime.tv_sec = delta / NANOSEC;
2351 usage.pr_rtime.tv_nsec = delta % NANOSEC;
2352 timeval_to_timespec (&rusage.ru_utime, &usage.pr_utime);
2353 timeval_to_timespec (&rusage.ru_stime, &usage.pr_stime);
2354
2355 /* make sure that user- and system cpu time are not negative */
2356 if (ts2hrt (usage.pr_utime) < 0)
2357 {
2358 usage.pr_utime.tv_sec = 0;
2359 usage.pr_utime.tv_nsec = 0;
2360 }
2361 if (ts2hrt (usage.pr_stime) < 0)
2362 {
2363 usage.pr_stime.tv_sec = 0;
2364 usage.pr_stime.tv_nsec = 0;
2365 }
2366
2367 /* fill in other fields */
2368 usage.pr_minf = (ulong_t) rusage.ru_minflt;
2369 usage.pr_majf = (ulong_t) rusage.ru_majflt;
2370 usage.pr_nswap = (ulong_t) rusage.ru_nswap;
2371 usage.pr_inblk = (ulong_t) rusage.ru_inblock;
2372 usage.pr_oublk = (ulong_t) rusage.ru_oublock;
2373 usage.pr_msnd = (ulong_t) rusage.ru_msgsnd;
2374 usage.pr_mrcv = (ulong_t) rusage.ru_msgrcv;
2375 usage.pr_sigs = (ulong_t) rusage.ru_nsignals;
2376 usage.pr_vctx = (ulong_t) rusage.ru_nvcsw;
2377 usage.pr_ictx = (ulong_t) rusage.ru_nivcsw;
2378
2379 fd = CALL_UTIL (open)(ovw_name, O_WRONLY | O_APPEND);
2380 if (fd < 0)
2381 {
2382 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2383 SP_JCMD_CERROR, COL_ERROR_OVWOPEN, errno, ovw_name);
2384 return ( ts2hrt (usage.pr_tstamp));
2385 }
2386
2387 CALL_UTIL (lseek)(fd, 0, SEEK_END);
2388 res = CALL_UTIL (write)(fd, &usage, sizeof (usage));
2389 CALL_UTIL (close)(fd);
2390 if (res != sizeof (usage))
2391 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2392 SP_JCMD_CERROR, COL_ERROR_OVWWRITE, errno, ovw_name);
2393 return (hrt);
2394 }
2395
2396 void
2397 __collector_dlog (int tflag, int level, char *format, ...)
2398 {
2399 if ((tflag & SP_DUMP_FLAG) == 0)
2400 {
2401 if (level > __collector_tracelevel)
2402 return;
2403 }
2404 else if ((tflag & collector_debug_opt) == 0)
2405 return;
2406
2407 /* In most cases this allocation should suffice */
2408 int bufsz = CALL_UTIL (strlen)(format) + 128;
2409 char *buf = (char*) alloca (bufsz);
2410 char *p = buf;
2411 int left = bufsz;
2412 if ((tflag & SP_DUMP_NOHEADER) == 0)
2413 {
2414 p += CALL_UTIL (snprintf)(p, left, "P%d,L%02u,t%02lu",
2415 (int) getpid (),
2416 (unsigned int) __collector_lwp_self (),
2417 __collector_no_threads ? 0 : __collector_thr_self ());
2418 left = bufsz - (p - buf);
2419 if (tflag)
2420 {
2421 hrtime_t ts = GETRELTIME ();
2422 p += CALL_UTIL (snprintf)(p, left, " %u.%09u ", (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC));
2423 }
2424 else
2425 p += CALL_UTIL (snprintf)(p, left, ": ");
2426 left = bufsz - (p - buf);
2427 }
2428
2429 va_list va;
2430 va_start (va, format);
2431 int nbufsz = CALL_UTIL (vsnprintf)(p, left, format, va);
2432 va_end (va);
2433
2434 if (nbufsz >= left)
2435 {
2436 /* Allocate a new buffer */
2437 nbufsz += 1; /* add the terminating null byte */
2438 char *nbuf = (char*) alloca (nbufsz + (p - buf));
2439 __collector_memcpy (nbuf, buf, p - buf);
2440 p = nbuf + (p - buf);
2441
2442 va_start (va, format);
2443 nbufsz = CALL_UTIL (vsnprintf)(p, nbufsz, format, va);
2444 va_end (va);
2445 buf = nbuf;
2446 }
2447 CALL_UTIL (write)(2, buf, CALL_UTIL (strlen)(buf));
2448 }
2449
2450 /*============================================================*/
2451 #if ! ARCH(SPARC) /* !sparc-Linux */
2452 /*
2453 * Routines for handling _exit and _Exit
2454 */
2455 /*------------------------------------------------------------- _exit */
2456
2457 #define CALL_REAL(x) (*(int(*)())__real_##x)
2458 #define NULL_PTR(x) ( __real_##x == NULL )
2459
2460 static void *__real__exit = NULL; /* libc only: _exit */
2461 static void *__real__Exit = NULL; /* libc only: _Exit */
2462 void _exit () __attribute__ ((weak, alias ("__collector_exit")));
2463 void _Exit () __attribute__ ((weak, alias ("__collector_Exit")));
2464
2465 void
2466 __collector_exit (int status)
2467 {
2468 if (NULL_PTR (_exit))
2469 {
2470 __real__exit = dlsym (RTLD_NEXT, "_exit");
2471 if (__real__exit == NULL)
2472 __real__exit = dlsym (RTLD_DEFAULT, "_exit");
2473 }
2474 TprintfT (DBG_LT1, "__collector_exit() interposing @0x%p __real__exit\n", __real__exit);
2475 __collector_terminate_expt ();
2476 TprintfT (DBG_LT1, "__collector_exit(): experiment terminated\n");
2477 CALL_REAL (_exit)(status); // this will exit the process
2478 }
2479
2480 void
2481 __collector_Exit (int status)
2482 {
2483 if (NULL_PTR (_Exit))
2484 {
2485 __real__Exit = dlsym (RTLD_NEXT, "_Exit");
2486 if (__real__Exit == NULL)
2487 __real__Exit = dlsym (RTLD_DEFAULT, "_exit");
2488 }
2489 TprintfT (DBG_LT1, "__collector_Exit() interposing @0x%p __real__Exit\n", __real__Exit);
2490 __collector_terminate_expt ();
2491 TprintfT (DBG_LT1, "__collector_Exit(): experiment terminated\n");
2492 CALL_REAL (_Exit)(status); // this will exit the process
2493 }
2494 #endif /* !sparc-Linux */