regen config
[binutils-gdb.git] / gprofng / libcollector / collector.c
1 /* Copyright (C) 2021-2023 Free Software Foundation, Inc.
2 Contributed by Oracle.
3
4 This file is part of GNU Binutils.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
20
21 #include "config.h"
22 #include <alloca.h>
23 #include <errno.h>
24 #include <signal.h>
25 #include <ucontext.h>
26 #include <stdlib.h> /* exit() */
27 #include <sys/param.h>
28 #include <sys/utsname.h> /* struct utsname */
29 #include <sys/resource.h>
30 #include <sys/syscall.h> /* system call fork() */
31
32 #include "gp-defs.h"
33 #include "collector.h"
34 #include "descendants.h"
35 #include "gp-experiment.h"
36 #include "memmgr.h"
37 #include "cc_libcollector.h"
38 #include "tsd.h"
39
40 typedef unsigned long ulong_t;
41
42 extern char **environ;
43 extern void __collector_close_experiment ();
44 extern int __collector_set_size_limit (char *par);
45
46 /* ------- internal function prototypes ---------- */
47 CollectorModule __collector_register_module (ModuleInterface *modint);
48 static void write_sample (char *name);
49 static const char *__collector_get_params ();
50 static const char *__collector_get_expdir ();
51 static FrameInfo __collector_getUserCtx (CollectorModule modl, HiResTime ts, int mode, void *arg);
52 static FrameInfo __collector_getUID1 (CM_Array *arg);
53 static int __collector_writeMetaData (CollectorModule modl, char *format, ...);
54 static int __collector_writeDataRecord (CollectorModule modl, struct Common_packet *pckt);
55 static int __collector_writeDataPacket (CollectorModule modl, struct CM_Packet *pckt);
56 static void *allocCSize (struct Heap*, unsigned, int);
57 static void freeCSize (struct Heap*, void*, unsigned);
58 static void *allocVSize (struct Heap*, unsigned);
59 static void *reallocVSize (struct Heap*, void*, unsigned);
60
61 static int collector_create_expr_dir (const char *new_exp_name);
62 static int collector_create_expr_dir_lineage (const char *parent_exp_name);
63 static int collector_exp_dir_append_x (int linenum, const char *parent_exp_name);
64 static int collector_tail_init (const char *parent_exp_name);
65 static int log_open ();
66 static void log_header_write (sp_origin_t origin);
67 static void log_pause ();
68 static void log_resume ();
69 static void fs_warn ();
70 static void log_close ();
71 static void get_progspec (char *cmdline, int tmp_sz, char *progname, int sz);
72 static void sample_handler (int, siginfo_t*, void*);
73 static int sample_set_interval (char *);
74 static int set_duration (char *);
75 static int sample_set_user_sig (char *);
76 static void pause_handler (int, siginfo_t*, void*);
77 static int pause_set_user_sig (char *);
78 static int set_user_sig_action (char*);
79 static void ovw_open ();
80 static hrtime_t ovw_write ();
81
82 /* ------- global data controlling the collector's behavior -------- */
83
84 static CollectorInterface collector_interface ={
85 __collector_register_module, /* registerModule */
86 __collector_get_params, /* getParams */
87 __collector_get_expdir, /* getExpDir */
88 __collector_log_write, /* writeLog */
89 __collector_getUserCtx, /* getFrameInfo */
90 __collector_getUID1, /* getUID */
91 __collector_getUID, /* getUID2 */
92 __collector_getStackTrace, /* getStackTrace */
93 __collector_writeMetaData, /* writeMetaData */
94 __collector_writeDataRecord, /* writeDataRecord */
95 __collector_writeDataPacket, /* writeDataPacket */
96 write_sample, /* write_sample */
97 get_progspec, /* get_progspec */
98 __collector_open_experiment, /* open_experiment */
99 NULL, /* getHiResTime */
100 __collector_newHeap, /* newHeap */
101 __collector_deleteHeap, /* deleteHeap */
102 allocCSize, /* allocCSize */
103 freeCSize, /* freeCSize */
104 allocVSize, /* allocVSize */
105 reallocVSize, /* reallocVSize */
106 __collector_tsd_create_key, /* createKey */
107 __collector_tsd_get_by_key, /* getKey */
108 __collector_dlog /* writeDebugInfo */
109 };
110
111 #define MAX_MODULES 32
112 static ModuleInterface *modules[MAX_MODULES];
113 static int modules_st[MAX_MODULES];
114 static void *modules_hndl[MAX_MODULES];
115 static volatile int nmodules = 0;
116
117 /* flag set non-zero, if data collected implies a filesystem warning is appropriate */
118 static int fs_matters = 0;
119 static const char *collector_params = NULL;
120 static const char *project_home = NULL;
121 Heap *__collector_heap = NULL;
122 int __collector_no_threads;
123 int __collector_libthread_T1 = -1;
124
125 static volatile int collector_paused = 0;
126
127 int __collector_tracelevel = -1;
128 static int collector_debug_opt = 0;
129
130 hrtime_t __collector_next_sample = 0;
131 int __collector_sample_period = 0; /* if non-zero, periodic sampling is enabled */
132
133 hrtime_t __collector_delay_start = 0; /* if non-zero, delay before starting data */
134 hrtime_t __collector_terminate_time = 0; /* if non-zero, fixed duration run */
135
136 static collector_mutex_t __collector_glob_lock = COLLECTOR_MUTEX_INITIALIZER;
137 static collector_mutex_t __collector_open_guard = COLLECTOR_MUTEX_INITIALIZER;
138 static collector_mutex_t __collector_close_guard = COLLECTOR_MUTEX_INITIALIZER;
139 static collector_mutex_t __collector_sample_guard = COLLECTOR_MUTEX_INITIALIZER;
140 static collector_mutex_t __collector_suspend_guard = COLLECTOR_MUTEX_INITIALIZER;
141 static collector_mutex_t __collector_resume_guard = COLLECTOR_MUTEX_INITIALIZER;
142 char __collector_exp_dir_name[MAXPATHLEN + 1] = ""; /* experiment directory */
143 int __collector_size_limit = 0;
144
145 static char *archive_mode = NULL;
146
147 volatile sp_state_t __collector_expstate = EXP_INIT;
148 static int exp_origin = SP_ORIGIN_LIBCOL_INIT;
149 static int exp_open = 0;
150 int __collector_exp_active = 0;
151 static int paused_when_suspended = 0;
152 static int exp_initted = 0;
153 static char exp_progspec[_POSIX_ARG_MAX + 1]; /* program cmdline. includes args */
154 static char exp_progname[_POSIX_ARG_MAX + 1]; /* program name == argv[0] */
155
156 hrtime_t __collector_start_time = 0;
157 static time_t start_sec_time = 0;
158
159 /* Sample related data */
160 static int sample_installed = 0; /* 1 if the sample signal handler installed */
161 static int sample_mode = 0; /* dynamically turns sample record writing on/off */
162 static int sample_number = 0; /* index of the current sample record */
163 static struct sigaction old_sample_handler;
164 int __collector_sample_sig = -1; /* user-specified sample signal */
165 int __collector_sample_sig_warn = 0; /* non-zero if warning already given */
166
167 /* Pause/resume related data */
168 static struct sigaction old_pause_handler;
169 int __collector_pause_sig = -1; /* user-specified pause signal */
170 int __collector_pause_sig_warn = 0; /* non-zero if warning already given */
171
172 static struct sigaction old_close_handler;
173 static struct sigaction old_exit_handler;
174
175 /* Experiment files */
176 static char ovw_name[MAXPATHLEN]; /* Overview data file name */
177
178 /* macro to convert a timestruc to hrtime_t */
179 #define ts2hrt(x) ((hrtime_t)(x).tv_sec*NANOSEC + (hrtime_t)(x).tv_nsec)
180
181 static void
182 init_tracelevel ()
183 {
184 #if DEBUG
185 char *s = CALL_UTIL (getenv)("SP_COLLECTOR_TRACELEVEL");
186 if (s != NULL)
187 __collector_tracelevel = CALL_UTIL (atoi)(s);
188 TprintfT (DBG_LT0, "collector: SP_COLLECTOR_TRACELEVEL=%d\n", __collector_tracelevel);
189 s = CALL_UTIL (getenv)("SP_COLLECTOR_DEBUG");
190 if (s != NULL)
191 collector_debug_opt = CALL_UTIL (atoi)(s) & ~(SP_DUMP_TIME | SP_DUMP_FLAG);
192 #endif
193 }
194
195 static CollectorInterface *
196 get_collector_interface ()
197 {
198 if (collector_interface.getHiResTime == NULL)
199 collector_interface.getHiResTime = __collector_gethrtime;
200 return &collector_interface;
201 }
202
203 /*
204 * __collector_module_init is an alternate method to initialize
205 * dynamic collector modules (er_heap, er_sync, er_iotrace, er_mpi, tha).
206 * Every module that needs to register itself with libcollector
207 * before the experiment is open implements its own global
208 * __collector_module_init and makes sure the next one is called.
209 */
210 static void
211 collector_module_init (CollectorInterface *col_intf)
212 {
213 int nmodules = 0;
214
215 ModuleInitFunc next_init = (ModuleInitFunc) dlsym (RTLD_DEFAULT, "__collector_module_init");
216 if (next_init != NULL)
217 {
218 nmodules++;
219 next_init (col_intf);
220 }
221 TprintfT (DBG_LT1, "collector_module_init: %d modules\n", nmodules);
222 }
223
224 /* Routines concerned with general experiment start and stop */
225
226 /* initialization -- init section routine -- called when libcollector loaded */
227 static void collector_init () __attribute__ ((constructor));
228
229 static void
230 collector_init ()
231 {
232 if (__collector_util_init () != 0)
233 /* we can't do anything without various utility functions */
234 abort ();
235 init_tracelevel ();
236
237 /*
238 * Unconditionally install the SIGPROF handler
239 * to process signals originated in dtracelets.
240 */
241 __collector_sigprof_install ();
242
243 /* Initialize all preloaded modules */
244 collector_module_init (get_collector_interface ());
245
246 /* determine experiment name */
247 char *exp = CALL_UTIL (getenv)("SP_COLLECTOR_EXPNAME");
248 if ((exp == NULL) || (CALL_UTIL (strlen)(exp) == 0))
249 {
250 TprintfT (DBG_LT0, "collector_init: SP_COLLECTOR_EXPNAME undefined - no experiment to start\n");
251 /* not set -- no experiment to run */
252 return;
253 }
254 else
255 TprintfT (DBG_LT1, "collector_init: found SP_COLLECTOR_EXPNAME = %s\n", exp);
256
257 /* determine the data descriptor for the experiment */
258 char *params = CALL_UTIL (getenv)("SP_COLLECTOR_PARAMS");
259 if (params == NULL)
260 {
261 TprintfT (0, "collector_init: SP_COLLECTOR_PARAMS undefined - no experiment to start\n");
262 return;
263 }
264
265 /* now do the real open of the experiment */
266 if (__collector_open_experiment (exp, params, SP_ORIGIN_LIBCOL_INIT))
267 {
268 TprintfT (0, "collector_init: __collector_open_experiment failed\n");
269 /* experiment open failed, close it */
270 __collector_close_experiment ();
271 return;
272 }
273 return;
274 }
275
276 CollectorModule
277 __collector_register_module (ModuleInterface *modint)
278 {
279 TprintfT (DBG_LT1, "collector: module %s calls for registration.\n",
280 modint->description == NULL ? "(null)" : modint->description);
281 if (modint == NULL)
282 return COLLECTOR_MODULE_ERR;
283 if (nmodules >= MAX_MODULES)
284 return COLLECTOR_MODULE_ERR;
285 if (modint->initInterface &&
286 modint->initInterface (get_collector_interface ()))
287 return COLLECTOR_MODULE_ERR;
288 int idx = nmodules++;
289 modules[idx] = modint;
290 modules_st[idx] = 0;
291
292 if (exp_open && modint->openExperiment)
293 {
294 modules_st[idx] = modint->openExperiment (__collector_exp_dir_name);
295 if (modules_st[idx] == COL_ERROR_NONE && modules[idx]->description != NULL)
296 {
297 modules_hndl[idx] = __collector_create_handle (modules[idx]->description);
298 if (modules_hndl[idx] == NULL)
299 modules_st[idx] = -1;
300 }
301 }
302 if (__collector_exp_active && collector_paused == 0 &&
303 modint->startDataCollection && modules_st[idx] == 0)
304 modint->startDataCollection ();
305 TprintfT (DBG_LT1, "collector: module %s (%d) registered.\n",
306 modint->description == NULL ? "(null)" : modint->description, idx);
307 return (CollectorModule) idx;
308 }
309
310 static const char *
311 __collector_get_params ()
312 {
313 return collector_params;
314 }
315
316 static const char *
317 __collector_get_expdir ()
318 {
319 return __collector_exp_dir_name;
320 }
321
322 static FrameInfo
323 __collector_getUserCtx (CollectorModule modl, HiResTime ts, int mode, void *arg)
324 {
325 return __collector_get_frame_info (ts, mode, arg);
326 }
327
328 static FrameInfo
329 __collector_getUID1 (CM_Array *arg)
330 {
331 return __collector_getUID (arg, (FrameInfo) 0);
332 }
333
334 static int
335 __collector_writeMetaData (CollectorModule modl, char *format, ...)
336 {
337 if (modl < 0 || modl >= nmodules || modules[modl]->description == NULL)
338 {
339 TprintfT (DBG_LT0, "__collector_writeMetaData(): bad module: %d\n", modl);
340 return 1;
341 }
342 char fname[MAXPATHLEN + 1];
343 CALL_UTIL (strlcpy)(fname, __collector_exp_dir_name, sizeof (fname));
344 CALL_UTIL (strlcat)(fname, "/metadata.", sizeof (fname));
345 CALL_UTIL (strlcat)(fname, modules[modl]->description, sizeof (fname));
346 CALL_UTIL (strlcat)(fname, ".xml", sizeof (fname));
347 int fd = CALL_UTIL (open)(fname, O_CREAT | O_WRONLY | O_APPEND,
348 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
349 if (fd < 0)
350 {
351 TprintfT (DBG_LT0, "__collector_writeMetaData(): can't open file: %s\n", fname);
352 return 1;
353 }
354 char buf[1024];
355 char *bufptr = buf;
356 va_list va;
357 va_start (va, format);
358 int sz = __collector_xml_vsnprintf (bufptr, sizeof (buf), format, va);
359 va_end (va);
360
361 if (sz >= sizeof (buf))
362 {
363 /* Allocate a new buffer */
364 sz += 1; /* add the terminating null byte */
365 bufptr = (char*) alloca (sz);
366
367 va_start (va, format);
368 sz = __collector_xml_vsnprintf (bufptr, sz, format, va);
369 va_end (va);
370 }
371 CALL_UTIL (write)(fd, bufptr, sz);
372 CALL_UTIL (close)(fd);
373 return COL_ERROR_NONE;
374 }
375
376 /* check that the header fields are filled-in, and then call __collector_writeDataPacket */
377 static int
378 __collector_writeDataRecord (CollectorModule modl, struct Common_packet *pckt)
379 {
380 return __collector_write_record (modules_hndl[modl], pckt);
381 }
382
383 static int
384 __collector_writeDataPacket (CollectorModule modl, struct CM_Packet *pckt)
385 {
386 return __collector_write_packet (modules_hndl[modl], pckt);
387 }
388
389 static void *
390 allocCSize (struct Heap *heap, unsigned sz, int log)
391 {
392 return __collector_allocCSize (heap ? heap : __collector_heap, sz, log);
393 }
394
395 static void
396 freeCSize (struct Heap *heap, void *ptr, unsigned sz)
397 {
398 __collector_freeCSize (heap ? heap : __collector_heap, ptr, sz);
399 }
400
401 static void *
402 allocVSize (struct Heap *heap, unsigned sz)
403 {
404 return __collector_allocVSize (heap ? heap : __collector_heap, sz);
405 }
406
407 static void *
408 reallocVSize (struct Heap *heap, void *ptr, unsigned sz)
409 {
410 return __collector_reallocVSize (heap ? heap : __collector_heap, ptr, sz);
411 }
412
413 static time_t
414 get_gm_time (struct tm *tp)
415 {
416 /*
417 Note that glibc contains a function of the same purpose named `timegm'.
418 But obviously, it is not universally available.
419
420 Some implementations of mktime return -1 for the nonexistent localtime hour
421 at the beginning of DST. In this event, use 'mktime(tm - 1hr) + 3600'.
422 nonexistent
423 tm_isdst is set to 0 to force mktime to introduce a consistent offset
424 (the non DST offset) since tm and tm+o might be on opposite sides of a DST change.
425
426 Schematically:
427 mktime(tm) --> t+o
428 gmtime_r(t+o) --> tm+o
429 mktime(tm+o) --> t+2o
430 t = t+o - (t+2o - t+o)
431 */
432 struct tm stm;
433 time_t tl = CALL_UTIL (mktime)(tp);
434 if (tl == -1)
435 {
436 stm = *tp;
437 stm.tm_hour--;
438 tl = CALL_UTIL (mktime)(&stm);
439 if (tl == -1)
440 return -1;
441 tl += 3600;
442 }
443
444 (void) (CALL_UTIL (gmtime_r)(&tl, &stm));
445 stm.tm_isdst = 0;
446 time_t tb = CALL_UTIL (mktime)(&stm);
447 if (tb == -1)
448 {
449 stm.tm_hour--;
450 tb = CALL_UTIL (mktime)(&stm);
451 if (tb == -1)
452 return -1;
453 tb += 3600;
454 }
455 return (tl - (tb - tl));
456 }
457
458 static void
459 log_write_event_run ()
460 {
461 /* get the gm and local time */
462 struct tm start_stm;
463 CALL_UTIL (gmtime_r)(&start_sec_time, &start_stm);
464 time_t start_gm_time = get_gm_time (&start_stm);
465 time_t lcl_time = CALL_UTIL (mktime)(&start_stm);
466 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" time=\"%lld\" tm_zone=\"%lld\"/>\n",
467 SP_JCMD_RUN,
468 (unsigned) (__collector_start_time / NANOSEC),
469 (unsigned) (__collector_start_time % NANOSEC),
470 (long long) start_gm_time,
471 (long long) (lcl_time - start_gm_time));
472 }
473
474 static void *
475 m_dlopen (const char *filename, int flag)
476 {
477 void *p = dlopen (filename, flag);
478 TprintfT (DBG_LT1, "collector.c: dlopen(%s, %d) returns %p\n", filename, flag, p);
479 return p;
480 }
481 /* real routine to open an experiment
482 * called by collector_init from libcollector init section
483 * called by __collector_start_experiment when a child is forked */
484 int
485 __collector_open_experiment (const char *exp, const char *params, sp_origin_t origin)
486 {
487 char *s;
488 char *buf = NULL;
489 char *duration_string = NULL;
490 int err;
491 int is_founder = 1;
492 int record_this_experiment = 1;
493 int seen_F_flag = 0;
494 static char buffer[32];
495 if (exp_open)
496 {
497 /* experiment already opened */
498 TprintfT (0, "collector: ERROR: Attempt to open opened experiment\n");
499 return COL_ERROR_EXPOPEN;
500 }
501 __collector_start_time = collector_interface.getHiResTime ();
502 TprintfT (DBG_LT1, "\n\t\t__collector_open_experiment(SP_COLLECTOR_EXPNAME=%s, params=%s, origin=%d); setting start_time\n",
503 exp, params, origin);
504 if (environ)
505 __collector_env_printall ("__collector_open_experiment", environ);
506 else
507 TprintfT (DBG_LT1, "collector_open_experiment found environ == NULL)\n");
508
509 /*
510 * Recheck sigprof handler
511 * XXXX Bug 18177509 - additional sigprof signal kills target program
512 */
513 __collector_sigprof_install ();
514 exp_origin = origin;
515 collector_params = params;
516
517 /* Determine which of the three possible threading models:
518 * singlethreaded
519 * multi-LWP (no threads)
520 * multithreaded
521 * is the one the target is actually using.
522 *
523 * we really only need to distinguish between first two
524 * and the third. The thr_main() trick does exactly that.
525 * is the one the target is actually using.
526 *
527 * __collector_no_threads applies to all signal handlers,
528 * and must be set before signal handlers are installed.
529 */
530 __collector_no_threads = 0;
531 __collector_exp_dir_name[0] = 0;
532 sample_mode = 0;
533 sample_number = 0;
534
535 /* create global heap */
536 if (__collector_heap == NULL)
537 {
538 __collector_heap = __collector_newHeap ();
539 if (__collector_heap == NULL)
540 {
541 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment COLERROR_NOZMEM 1\n");
542 return COL_ERROR_NOZMEM;
543 }
544 }
545 //check whether is origin is collect
546 char * envar = CALL_UTIL (getenv)("SP_COLLECTOR_ORIGIN_COLLECT");
547 TprintfT (DBG_LT1, "__collector_open_experiment SP_COLLECTOR_ORIGIN_COLLECT = '%s'\n",
548 (envar == NULL) ? "NULL" : envar);
549 if (envar)
550 exp_origin = SP_ORIGIN_COLLECT;
551
552 //check if this is the founder process
553 is_founder = getpid ();
554 if (origin != SP_ORIGIN_DBX_ATTACH)
555 {
556 envar = CALL_UTIL (getenv)("SP_COLLECTOR_FOUNDER");
557 if (envar)
558 is_founder = CALL_UTIL (atoi)(envar);
559 if (is_founder != 0)
560 {
561 if (is_founder != getpid ())
562 {
563 TprintfT (0, "__collector_open_experiment SP_COLLECTOR_FOUNDER=%d != pid(%d)\n",
564 is_founder, getpid ());
565 //CALL_UTIL(fprintf)(stderr, "__collector_open_experiment SP_COLLECTOR_FOUNDER=%d != pid(%d); not recording experiment\n",
566 //is_founder, getpid() );
567 //return COL_ERROR_UNEXP_FOUNDER;
568 is_founder = 0; // Special case (CR 22917352)
569 }
570 /* clear FOUNDER for descendant experiments */
571 TprintfT (0, "__collector_open_experiment setting SP_COLLECTOR_FOUNDER=0\n");
572 CALL_UTIL (strlcpy)(buffer, "SP_COLLECTOR_FOUNDER=0", sizeof (buffer));
573 CALL_UTIL (putenv)(buffer);
574 }
575 }
576
577 /* Set up fork/exec interposition (requires __collector_heap). */
578 /* Determine if "collect -F" specification enables this subexperiment */
579 get_progspec (exp_progspec, sizeof (exp_progspec), exp_progname, sizeof (exp_progname));
580
581 /* convert the returned exp_progname to a basename */
582 const char * base_name = __collector_strrchr (exp_progname, '/');
583 if (base_name == NULL)
584 base_name = exp_progname;
585 else
586 base_name = base_name + 1;
587 err = __collector_ext_line_init (&record_this_experiment, exp_progspec, base_name);
588 if (err != COL_ERROR_NONE)
589 {
590 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment COLERROR: %d\n", err);
591 return err;
592 }
593
594 /* Due to the fix of bug 15691122, we need to initialize unwind to make
595 * the function __collector_ext_return_address() work for dlopen interposition.
596 * */
597 if (!record_this_experiment && !is_founder)
598 {
599 TprintfT (DBG_LT0, "__collector_open_experiment: NOT creating experiment. (is_founder=%d, record=%d)\n",
600 is_founder, record_this_experiment);
601 return collector_tail_init (exp);
602 }
603 TprintfT (DBG_LT0, "__collector_open_experiment: is_founder=%d, record=%d\n",
604 is_founder, record_this_experiment);
605 if (is_founder || origin == SP_ORIGIN_FORK)
606 {
607 CALL_UTIL (strlcpy)(__collector_exp_dir_name, exp, sizeof (__collector_exp_dir_name));
608 if (origin == SP_ORIGIN_FORK)
609 { /*create exp dir for fork-child*/
610 if (collector_create_expr_dir (__collector_exp_dir_name))
611 {
612 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 1: `%s'\n", exp);
613 return COL_ERROR_BADDIR;
614 }
615 }
616 }
617 else
618 {/* founder/fork-child will already have created experiment dir, but exec/combo descendants must do so now */
619 if (collector_create_expr_dir_lineage (exp))
620 {
621 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 2: `%s'\n", exp);
622 return COL_ERROR_BADDIR;
623 }
624 static char exp_name_env[MAXPATHLEN + 1];
625 TprintfT (DBG_LT1, "collector_open_experiment: setting SP_COLLECTOR_EXPNAME to %s\n", __collector_exp_dir_name);
626 CALL_UTIL (snprintf)(exp_name_env, sizeof (exp_name_env), "SP_COLLECTOR_EXPNAME=%s", __collector_exp_dir_name);
627 CALL_UTIL (putenv)(exp_name_env);
628 }
629 /* Check that the name is that of a directory (new structure) */
630 DIR *expDir = CALL_UTIL (opendir)(__collector_exp_dir_name);
631 if (expDir == NULL)
632 {
633 /* can't open it */
634 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 3: `%s'\n", exp);
635 return COL_ERROR_BADDIR;
636 }
637 CALL_UTIL (closedir)(expDir);
638
639 if (CALL_UTIL (access)(__collector_exp_dir_name, W_OK))
640 {
641 TprintfT (0, "collector: ERROR: access error: errno=%d\n", errno);
642 if ((errno == EACCES) || (errno == EROFS))
643 {
644 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_DIRPERM: `%s'\n", exp);
645 TprintfT (DBG_LT0, "collector: ERROR: experiment directory `%s' is not writeable\n",
646 __collector_exp_dir_name);
647 return COL_ERROR_DIRPERM;
648 }
649 else
650 {
651 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_BADDIR 4: `%s'\n", exp);
652 return COL_ERROR_BADDIR;
653 }
654 }
655
656 /* reset the paused flag */
657 collector_paused = (origin == SP_ORIGIN_FORK ? paused_when_suspended : 0);
658
659 /* mark the experiment as opened */
660 __collector_expstate = EXP_OPEN;
661 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_OPEN\n");
662
663 /* open the log file */
664 err = log_open ();
665 if (err != COL_ERROR_NONE)
666 {
667 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_LOG_OPEN\n");
668 return COL_ERROR_LOG_OPEN;
669 }
670 if (origin != SP_ORIGIN_GENEXP && origin != SP_ORIGIN_KERNEL)
671 log_header_write (origin);
672
673 /* Make a copy of params so that we can modify the string */
674 int paramsz = CALL_UTIL (strlen)(params) + 1;
675 buf = (char*) alloca (paramsz);
676 if (buf == NULL)
677 {
678 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_ARGS2BIG: %s\n", params);
679 TprintfT (DBG_LT0, "collector: ERROR: experiment parameter `%s' is too long\n", params);
680 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n",
681 SP_JCMD_CERROR, COL_ERROR_ARGS2BIG);
682 return COL_ERROR_ARGS2BIG;
683 }
684 CALL_UTIL (strlcpy)(buf, params, paramsz);
685
686 /* create directory for archives (if founder) */
687 char archives[MAXPATHLEN];
688 CALL_UTIL (snprintf)(archives, MAXPATHLEN, "%s/%s", __collector_exp_dir_name,
689 SP_ARCHIVES_DIR);
690 if (is_founder)
691 {
692 mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
693 if ((CALL_UTIL (mkdir)(archives, dmode) != 0) && (errno != EEXIST))
694 {
695 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_MKDIR: %s: errno = %d\n", archives, errno);
696 TprintfT (0, "collector: ERROR: mkdir(%s) failed: errno = %d\n", archives, errno);
697 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">mkdir(%s): errno=%d</event>\n",
698 SP_JCMD_COMMENT, COL_COMMENT_NONE, archives, errno);
699 /* this is not a fatal error currently */
700 }
701 else
702 TprintfT (DBG_LT1, "collector: archive mkdir(%s) succeeded\n", archives);
703 }
704
705 /* initialize the segments map and mmap interposition */
706 if (origin != SP_ORIGIN_GENEXP && origin != SP_ORIGIN_KERNEL)
707 {
708 if ((err = __collector_ext_mmap_install (1)) != COL_ERROR_NONE)
709 {
710 __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", SP_JCMD_CERROR, err);
711 return err;
712 }
713 }
714
715 /* open the overview file for sample data */
716 if (origin != SP_ORIGIN_GENEXP)
717 ovw_open ();
718
719 /* initialize TSD module (note: relies on __collector_heap) */
720 if (__collector_tsd_init () != 0)
721 {
722 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_TSD_INIT\n");
723 __collector_log_write ("<event kind=\"%s\" id=\"%d\">TSD could not be initialized</event>\n", SP_JCMD_CERROR, COL_ERROR_TSD_INIT);
724 return COL_ERROR_TSD_INIT;
725 }
726
727 /* experiment is initialized; allow pause/resume/close */
728 exp_initted = 1;
729
730 // 24935305 should not use SIGPROF if collect -p -t and -S are all off
731 /* (check here if -t or -S is on; -p is checked later) */
732 if (((params[0] == 't' || params[0] == 'S') && params[1] == ':')
733 || CALL_UTIL (strstr)(params, ";t:")
734 || CALL_UTIL (strstr)(params, ";S:"))
735 {
736 /* set a default time to 100 ms.; use negative value to force setting */
737 TprintfT (DBG_LT1, "collector: open_experiment setting timer to 100000\n");
738 __collector_ext_itimer_set (-100000);
739 }
740
741 /* call open for all dynamic modules */
742 int i;
743 for (i = 0; i < nmodules; i++)
744 {
745 if (modules[i]->openExperiment != NULL)
746 {
747 modules_st[i] = modules[i]->openExperiment (__collector_exp_dir_name);
748 if (modules_st[i] == COL_ERROR_NONE && modules[i]->description != NULL)
749 {
750 modules_hndl[i] = __collector_create_handle (modules[i]->description);
751 if (modules_hndl[i] == NULL)
752 modules_st[i] = -1;
753 }
754 }
755 /* check to see if anyone closed the experiment */
756 if (!exp_initted)
757 {
758 CALL_UTIL (fprintf)(stderr, "__collector_open_experiment: COL_ERROR_EXP_OPEN\n");
759 __collector_log_write ("<event kind=\"%s\" id=\"%d\">Experiment closed prematurely</event>\n", SP_JCMD_CERROR, COL_ERROR_EXPOPEN);
760 return COL_ERROR_EXPOPEN;
761 }
762 }
763
764 /* initialize for subsequent stack unwinds */
765 __collector_ext_unwind_init (1);
766 TprintfT (DBG_LT0, "__collector_open_experiment(); module init done, params=%s\n",
767 buf);
768
769 /* now parse the data descriptor */
770 /* The parameter string is a series of specifiers,
771 * each of which is of the form:
772 * <key>:<param>;
773 * key is a single letter, the : and ; are mandatory,
774 * and param is a string which may be zero-length, and
775 * which contains any character except a null-byte or ;
776 * param is interpreted by the handler for the particular key
777 */
778
779 s = buf;
780
781 while (*s)
782 {
783 char *par;
784 char key = *s++;
785 /* ensure that it's followed by a colon */
786 if (*s++ != ':')
787 {
788 TprintfT (0, "collector: ERROR: parameter %c is not followed by a colon\n", key);
789 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, COL_ERROR_ARGS, params);
790 return COL_ERROR_ARGS;
791 }
792 /* find the semicolon terminator */
793 par = s;
794 while (*s && (*s != ';'))
795 s++;
796 if (*s != ';')
797 {
798 /* not followed by semicolon */
799 TprintfT (0, "collector: ERROR: parameter %c:%s is not terminated by a semicolon\n", key, par);
800 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, COL_ERROR_ARGS, params);
801 return COL_ERROR_ARGS;
802 }
803 /* terminate par, and position for next descriptor */
804 *s++ = 0;
805
806 /* now process that element of the data descriptor */
807 switch (key)
808 {
809 case 'g': /* g<sig>; */
810 if ((err = sample_set_user_sig (par)) != COL_ERROR_NONE)
811 {
812 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
813 return err;
814 }
815 break;
816 case 'd': /* d<sig>; -or- d<sig>p; */
817 if ((err = pause_set_user_sig (par)) != COL_ERROR_NONE)
818 {
819 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
820 return err;
821 }
822 break;
823 case 'H':
824 m_dlopen ("libgp-heap.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */
825 break;
826 case 's':
827 m_dlopen ("libgp-sync.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */
828 break;
829 case 'i':
830 m_dlopen ("libgp-iotrace.so", RTLD_LAZY); /* hack to force .so's constructor to be called (?) */
831 break;
832 case 'F': /* F; */
833 seen_F_flag = 1;
834 TprintfT (DBG_LT0, "__collector_open_experiment: calling __collector_ext_line_install (%s, %s)\n",
835 par, __collector_exp_dir_name);
836 if ((err = __collector_ext_line_install (par, __collector_exp_dir_name)) != COL_ERROR_NONE)
837 {
838 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
839 return err;
840 }
841 break;
842 case 'a': /* a; */
843 archive_mode = __collector_strdup (par);
844 break;
845 case 't': /* t:<expt-duration>; */
846 duration_string = par;
847 break;
848 case 'S': /* S:<sample-interval>; */
849 if ((err = sample_set_interval (par)) != COL_ERROR_NONE)
850 {
851 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
852 return err;
853 }
854 break;
855 case 'L': /* L:<experiment-size-limit>; */
856 if ((err = __collector_set_size_limit (par)) != COL_ERROR_NONE)
857 {
858 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
859 return err;
860 }
861 break;
862 case 'P': /* P:PROJECT_HOME; */
863 project_home = __collector_strdup (par);
864 break;
865 case 'h':
866 case 'p':
867 fs_matters = 1;
868 break;
869 case 'Y':
870 err = set_user_sig_action (par);
871 if (err != COL_ERROR_NONE)
872 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
873 break;
874 default:
875 /* Ignore unknown parameters; allow them to be handled by modules */
876 break;
877 }
878 }
879 /* end of data descriptor parsing */
880
881 if (!seen_F_flag)
882 {
883 char * par = "0"; // This will not happen when collect has no -F option
884 if ((err = __collector_ext_line_install (par, __collector_exp_dir_name)) != COL_ERROR_NONE)
885 {
886 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, par);
887 return err;
888 }
889 }
890
891 /* now that we know what data is being collected, we can set the filesystem warning */
892 fs_warn ();
893
894 // We have to create all tsd keys before __collector_tsd_allocate().
895 // With the pthreads-based implementation, this might no longer be necessary.
896 // In any case, we still have to create the key before a thread can use it.
897 __collector_ext_gettid_tsd_create_key ();
898 __collector_ext_dispatcher_tsd_create_key ();
899
900 /* allocate tsd for the current thread */
901 if (__collector_tsd_allocate () != 0)
902 {
903 __collector_log_write ("<event kind=\"%s\" id=\"%d\">TSD allocate failed</event>\n", SP_JCMD_CERROR, COL_ERROR_EXPOPEN);
904 return COL_ERROR_EXPOPEN;
905 }
906 /* init tsd for unwind, called right after __collector_tsd_allocate()*/
907 __collector_ext_unwind_key_init (1, NULL);
908
909 /* start java attach if suitable */
910 #if defined(GPROFNG_JAVA_PROFILING)
911 if (exp_origin == SP_ORIGIN_DBX_ATTACH)
912 __collector_jprofile_start_attach ();
913 #endif
914 start_sec_time = CALL_UTIL (time)(NULL);
915 __collector_start_time = collector_interface.getHiResTime ();
916 TprintfT (DBG_LT0, "\t__collector_open_experiment; resetting start_time\n");
917 if (duration_string != NULL && (err = set_duration (duration_string)) != COL_ERROR_NONE)
918 {
919 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR, err, duration_string);
920 return err;
921 }
922
923 /* install the common SIGPROF dispatcher (requires TSD) */
924 if ((err = __collector_ext_dispatcher_install ()) != COL_ERROR_NONE)
925 {
926 __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", SP_JCMD_CERROR, err);
927 return err;
928 }
929
930 /* mark the experiment open complete */
931 exp_open = 1;
932 if (exp_origin == SP_ORIGIN_DBX_ATTACH)
933 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" time=\"%lld\" tm_zone=\"%lld\"/>\n",
934 SP_JCMD_RUN,
935 (unsigned) (__collector_start_time / NANOSEC), (unsigned) (__collector_start_time % NANOSEC),
936 (long long) start_sec_time, (long long) 0);
937 else
938 log_write_event_run ();
939
940 /* schedule the first sample */
941 __collector_next_sample = __collector_start_time + ((hrtime_t) NANOSEC) * __collector_sample_period;
942 __collector_ext_usage_sample (MASTER_SMPL, "collector_open_experiment");
943
944 /* start data collection in dynamic modules */
945 if (collector_paused == 0)
946 {
947 for (i = 0; i < nmodules; i++)
948 if (modules[i]->startDataCollection != NULL && modules_st[i] == 0)
949 modules[i]->startDataCollection ();
950 }
951 else
952 {
953 hrtime_t ts = GETRELTIME ();
954 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n",
955 SP_JCMD_PAUSE, (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC));
956 }
957
958 /* mark the experiment active */
959 __collector_exp_active = 1;
960 return COL_ERROR_NONE;
961 }
962
963 /* prepare directory for new experiment of fork-child */
964
965 /* return 0 if successful */
966 static int
967 collector_create_expr_dir (const char *new_exp_name)
968 {
969 int ret = -1;
970 mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
971 TprintfT (DBG_LT1, "collector: __collector_create_expr_dir(%s)\n", new_exp_name);
972 if (CALL_UTIL (mkdir)(new_exp_name, dmode) < 0)
973 TprintfT (0, "__collector_create_expr_dir(%s) ERROR: errno=%d\n", new_exp_name, errno);
974 else
975 ret = 0;
976 return (ret);
977 }
978
979 /* append _xN to __collector_exp_dir_name*/
980 /* return 0 if successful */
981 static int
982 collector_exp_dir_append_x (int linenum, const char *parent_exp_name)
983 {
984 char buffer[MAXPATHLEN + 1];
985 char * p = __collector_strrchr (parent_exp_name, '/');
986 if (p == NULL || (*(p + 1) != '_'))
987 {
988 size_t sz = CALL_UTIL (strlen)(parent_exp_name);
989 const char * q = parent_exp_name + sz - 3;
990 if (sz < 3 || __collector_strncmp (q, ".er", CALL_UTIL (strlen)(q)) != 0
991 || CALL_UTIL (access)(parent_exp_name, F_OK) != 0)
992 {
993 TprintfT (0, "collector_exp_dir_append_x() ERROR: invalid parent_exp_name %s\n", parent_exp_name);
994 return -1;
995 }
996 CALL_UTIL (strlcpy)(buffer, parent_exp_name, sizeof (buffer));
997 CALL_UTIL (snprintf)(__collector_exp_dir_name, sizeof (__collector_exp_dir_name),
998 "%s/_x%d.er", buffer, linenum);
999 }
1000 else
1001 {
1002 p = __collector_strrchr (parent_exp_name, '.');
1003 if (p == NULL || *(p + 1) != 'e' || *(p + 2) != 'r')
1004 {
1005 TprintfT (0, "collector_exp_dir_append_x() ERROR: invalid parent_exp_name %s\n", parent_exp_name);
1006 return -1;
1007 }
1008 CALL_UTIL (strlcpy)(buffer, parent_exp_name,
1009 ((p - parent_exp_name + 1)<sizeof (buffer)) ? (p - parent_exp_name + 1) : sizeof (buffer));
1010 CALL_UTIL (snprintf)(__collector_exp_dir_name, sizeof (__collector_exp_dir_name),
1011 "%s_x%d.er", buffer, linenum);
1012 }
1013 return 0;
1014 }
1015
1016 /* prepare directory for new experiment of exec/combo child*/
1017
1018 /* return 0 if successful */
1019 static int
1020 collector_create_expr_dir_lineage (const char *parent_exp_name)
1021 {
1022 int ret = -1;
1023 mode_t dmode = S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
1024 int linenum = 1;
1025 while (linenum < INT_MAX)
1026 {
1027 if (collector_exp_dir_append_x (linenum, parent_exp_name) != 0)
1028 return -1;
1029 if (CALL_UTIL (access)(__collector_exp_dir_name, F_OK) != 0)
1030 {
1031 if (CALL_UTIL (mkdir)(__collector_exp_dir_name, dmode) == 0)
1032 return 0;
1033 }
1034 linenum++;
1035 TprintfT (DBG_LT0, "collector: collector_create_expr_dir_lineage(%s -> %s)\n", parent_exp_name, __collector_exp_dir_name);
1036 }
1037 return (ret);
1038 }
1039
1040 /* Finish the initializing work if we don't collect data while libcollector.so is preloaded. */
1041 /* return COL_ERROR_NONE if successful */
1042 static int
1043 collector_tail_init (const char *parent_exp_name)
1044 {
1045 int err = COL_ERROR_NONE;
1046 if (exp_origin != SP_ORIGIN_FORK)
1047 {
1048 /* For exec/combo descendants. Don't create dir for this subexp, but update lineage by appending "_x0". */
1049 /* Different children can have the same _x0 if their name don't match -F exp.
1050 * Assume their fork children inherit the program name, there will be no _x0_fN.er to create.
1051 * So we don't need to worry about the lineage messed up by _x0.
1052 */
1053 int linenum = 0;
1054 if (collector_exp_dir_append_x (linenum, parent_exp_name) != 0)
1055 return COL_ERROR_BADDIR;
1056 static char exp_name_env[MAXPATHLEN + 1];
1057 CALL_UTIL (snprintf)(exp_name_env, sizeof (exp_name_env), "SP_COLLECTOR_EXPNAME=%s", __collector_exp_dir_name);
1058 TprintfT (DBG_LT1, "collector_tail_init: setting SP_COLLECTOR_EXPNAME to %s\n", __collector_exp_dir_name);
1059 CALL_UTIL (putenv)(exp_name_env);
1060 }
1061 /* initialize the segments map and mmap interposition */
1062 if (exp_origin != SP_ORIGIN_GENEXP && exp_origin != SP_ORIGIN_KERNEL)
1063 if ((err = __collector_ext_mmap_install (0)) != COL_ERROR_NONE)
1064 return err;
1065
1066 /* initialize TSD module (note: relies on __collector_heap) */
1067 if (__collector_tsd_init () != 0)
1068 return COL_ERROR_EXPOPEN;
1069
1070 /* initialize for subsequent stack unwinds */
1071 __collector_ext_unwind_init (0);
1072
1073 char * buf = NULL;
1074 /* Make a copy of params so that we can modify the string */
1075 int paramsz = CALL_UTIL (strlen)(collector_params) + 1;
1076 buf = (char*) alloca (paramsz);
1077 CALL_UTIL (strlcpy)(buf, collector_params, paramsz);
1078
1079 char *par_F = "0";
1080 char *s;
1081 for (s = buf; *s;)
1082 {
1083 char key = *s++;
1084 /* ensure that it's followed by a colon */
1085 if (*s++ != ':')
1086 {
1087 TprintfT (DBG_LT0, "collector_tail_init: ERROR: parameter %c is not followed by a colon\n", key);
1088 return COL_ERROR_ARGS;
1089 }
1090
1091 /* find the semicolon terminator */
1092 char *par = s;
1093 while (*s && (*s != ';'))
1094 s++;
1095 if (*s != ';')
1096 {
1097 /* not followed by semicolon */
1098 TprintfT (0, "collector_tail_init: ERROR: parameter %c:%s is not terminated by a semicolon\n", key, par);
1099 return COL_ERROR_ARGS;
1100 }
1101 /* terminate par, and position for next descriptor */
1102 *s++ = 0;
1103 /* now process that element of the data descriptor */
1104 if (key == 'F')
1105 {
1106 par_F = par;
1107 break;
1108 }
1109 }
1110 if ((err = __collector_ext_line_install (par_F, __collector_exp_dir_name)) != COL_ERROR_NONE)
1111 return err;
1112
1113 /* allocate tsd for the current thread */
1114 if (__collector_tsd_allocate () != 0)
1115 return COL_ERROR_EXPOPEN;
1116 return COL_ERROR_NONE;
1117 }
1118
1119 /* routines concerning closing the experiment */
1120 /* close down -- fini section routine */
1121 static void collector_fini () __attribute__ ((destructor));
1122 static void
1123 collector_fini ()
1124 {
1125 TprintfT (DBG_LT0, "collector_fini: closing experiment\n");
1126 __collector_close_experiment ();
1127
1128 }
1129
1130 void collector_terminate_expt () __attribute__ ((weak, alias ("__collector_terminate_expt")));
1131
1132 /* __collector_terminate_expt called by user, or from dbx */
1133 void
1134 __collector_terminate_expt ()
1135 {
1136 TprintfT (DBG_LT0, "__collector_terminate_expt: %s; calling close\n", __collector_exp_dir_name);
1137 __collector_close_experiment ();
1138 TprintfT (DBG_LT0, "__collector_terminate_expt done\n\n");
1139 }
1140
1141 /*
1142 * We manage the SIGCHLD handler with sigaction and don't worry about signal or sigset().
1143 * This is in line with the comments in dispatcher.c
1144 * immediately preceding the wrapper function for (Linux) signal().
1145 */
1146 static struct sigaction original_sigchld_sigaction;
1147 static pid_t mychild_pid = -1;
1148
1149 /* __collector_SIGCHLD_signal_handler called when er_archive exits */
1150 static void
1151 __collector_SIGCHLD_signal_handler (int sig, siginfo_t *si, void *context)
1152 {
1153 pid_t calling_pid = si->si_pid;
1154 /* Potential race.
1155 * We get mychild_pid from the vfork() return value.
1156 * So there is an outside chance that the child completes and sends SIGCHLD
1157 * before the handler knows the value of mychild_pid.
1158 */
1159 if (calling_pid == mychild_pid)
1160 // er_archive has exited; so restore the user handler
1161 __collector_sigaction (SIGCHLD, &original_sigchld_sigaction, NULL);
1162 else
1163 {
1164 // if we can't identify the pid, the signal must be for the user's handler
1165 if (original_sigchld_sigaction.sa_handler != SIG_DFL
1166 && original_sigchld_sigaction.sa_handler != SIG_IGN)
1167 original_sigchld_sigaction.sa_sigaction (sig, si, context);
1168 }
1169 TprintfT (DBG_LT1, "__collector_SIGCHLD_signal_handler done\n\n");
1170 }
1171
1172 int
1173 collector_sigchld_sigaction (const struct sigaction *nact,
1174 struct sigaction *oact)
1175 {
1176 // get the current SIGCHLD handler
1177 struct sigaction cur_handler;
1178 __collector_sigaction (SIGCHLD, NULL, &cur_handler);
1179
1180 // if we have NOT installed our own handler, return an error
1181 // (force the caller to deal with this case)
1182 if (cur_handler.sa_sigaction != __collector_SIGCHLD_signal_handler)
1183 return -1;
1184
1185 // if we HAVE installed our own handler, act on the user's handler
1186 if (oact)
1187 __collector_memcpy (oact, &original_sigchld_sigaction, sizeof (struct sigaction));
1188 if (nact)
1189 __collector_memcpy (&original_sigchld_sigaction, nact, sizeof (struct sigaction));
1190 return 0;
1191 }
1192
1193 /*
1194 * __collector_close_experiment may be called either from
1195 * __collector_terminate_expt() or the .fini section
1196 */
1197 void
1198 __collector_close_experiment ()
1199 {
1200 hrtime_t ts;
1201 char *argv[10];
1202 int status;
1203 TprintfT (DBG_LT1, "collector: __collector_close_experiment(): %s\n", __collector_exp_dir_name);
1204 if (!exp_initted)
1205 return;
1206 /* The experiment may have been previously closed */
1207 if (!exp_open)
1208 return;
1209
1210 if (__collector_mutex_trylock (&__collector_close_guard))
1211 /* someone else is in the middle of closing the experiment */
1212 return;
1213
1214 /* record the termination of the experiment */
1215 ts = GETRELTIME ();
1216 collector_params = NULL;
1217
1218 /* tell all dynamic modules to stop data collection */
1219 int i;
1220 for (i = 0; i < nmodules; i++)
1221 if (modules[i]->stopDataCollection != NULL)
1222 modules[i]->stopDataCollection ();
1223
1224 /* notify all dynamic modules the experiment is being closed */
1225 for (i = 0; i < nmodules; i++)
1226 {
1227 if (modules[i]->closeExperiment != NULL)
1228 modules[i]->closeExperiment ();
1229 __collector_delete_handle (modules_hndl[i]);
1230 modules_hndl[i] = NULL;
1231 }
1232
1233 /* acquire the global lock -- only one close at a time */
1234 __collector_mutex_lock (&__collector_glob_lock);
1235 /* deinstall mmap tracing (with final update) */
1236 __collector_ext_mmap_deinstall (1);
1237
1238 /* deinstall common SIGPROF dispatcher */
1239 __collector_ext_dispatcher_deinstall ();
1240
1241 /* disable line interposition */
1242 __collector_ext_line_close ();
1243
1244 /* Other threads may be reading tsd now. */
1245 //__collector_tsd_fini();
1246
1247 /* delete global heap */
1248 /* omazur: do not delete the global heap
1249 * to avoid crashes in TSD. Need a better solution.
1250 __collector_deleteHeap( __collector_heap );
1251 __collector_heap = NULL;
1252 */
1253 __collector_mutex_unlock (&__collector_glob_lock);
1254
1255 /* take a final sample */
1256 __collector_ext_usage_sample (MASTER_SMPL, "collector_close_experiment");
1257 sample_mode = 0;
1258
1259 /* close the frameinfo file */
1260 __collector_ext_unwind_close ();
1261 if (exp_origin != SP_ORIGIN_DBX_ATTACH)
1262 log_write_event_run ();
1263
1264 /* mark the experiment as closed */
1265 __collector_expstate = EXP_CLOSED;
1266 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_CLOSED: project_home=%s\n",
1267 STR (project_home));
1268 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n",
1269 SP_JCMD_EXIT,
1270 (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC));
1271
1272 /* derive er_archive's absolute path from that of libcollector */
1273 argv[0] = NULL;
1274 if (project_home && archive_mode && __collector_strcmp (archive_mode, "off"))
1275 {
1276 /* construct a command to launch it */
1277 char *er_archive_name = "/bin/gp-archive";
1278 size_t cmdlen = CALL_UTIL (strlen)(project_home) + CALL_UTIL (strlen)(er_archive_name) + 1;
1279 char *command = (char*) alloca (cmdlen);
1280 CALL_UTIL (snprintf)(command, cmdlen, "%s%s", project_home, er_archive_name);
1281 if (CALL_UTIL (access)(command, F_OK) == 0)
1282 {
1283 // build the argument list
1284 int nargs = 0;
1285 argv[nargs++] = command;
1286 argv[nargs++] = "-n";
1287 argv[nargs++] = "-a";
1288 argv[nargs++] = archive_mode;
1289 size_t len = CALL_UTIL (strlen)(__collector_exp_dir_name) + 1;
1290 size_t len1 = CALL_UTIL (strlen)(SP_ARCHIVE_LOG_FILE) + 1;
1291 char *str = (char*) alloca (len + len1);
1292 CALL_UTIL (snprintf)(str, len + 15, "%s/%s", __collector_exp_dir_name, SP_ARCHIVE_LOG_FILE);
1293 argv[nargs++] = "--outfile";
1294 argv[nargs++] = str;
1295 str = (char*) alloca (len);
1296 CALL_UTIL (snprintf)(str, len, "%s", __collector_exp_dir_name);
1297 argv[nargs++] = str;
1298 argv[nargs] = NULL;
1299 }
1300 }
1301
1302 /* log the archive command to be run */
1303 if (argv[0] == NULL)
1304 {
1305 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n",
1306 SP_JCMD_COMMENT, COL_COMMENT_NONE, "No archive command run");
1307 TprintfT (DBG_LT1, "collector: No archive command run\n");
1308 }
1309 else
1310 {
1311 char cmdbuf[4096];
1312 int bufoffset = 0;
1313 int i;
1314 for (i = 0; argv[i] != NULL; i++)
1315 {
1316 bufoffset += CALL_UTIL (snprintf)(&cmdbuf[bufoffset], (sizeof (cmdbuf) - bufoffset),
1317 " %s", argv[i]);
1318 }
1319 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">Archive command `%s'</event>\n",
1320 SP_JCMD_COMMENT, COL_COMMENT_NONE, cmdbuf);
1321 TprintfT (DBG_LT1, "collector: running `%s'\n", cmdbuf);
1322 }
1323 log_close ();
1324 TprintfT (DBG_LT1, "__collector_close_experiment(%s) done\n", __collector_exp_dir_name);
1325 exp_open = 0; /* mark the experiment as closed */
1326 __collector_exp_active = 0; /* mark the experiment as inactive */
1327
1328 /* reset all experiment parameters */
1329 sample_mode = 0;
1330 collector_paused = 0;
1331 __collector_pause_sig = -1;
1332 __collector_pause_sig_warn = 0;
1333 __collector_sample_sig = -1;
1334 __collector_sample_sig_warn = 0;
1335 __collector_sample_period = 0;
1336 __collector_exp_dir_name[0] = 0;
1337
1338 /* uninstall the pause and sample signal handlers */
1339 /* XXXX -- not yet, because of potential race conditions in libthread */
1340 if (argv[0] == NULL)
1341 {
1342 /* er_archive command will not be run */
1343 __collector_mutex_unlock (&__collector_close_guard);
1344 return;
1345 }
1346
1347 struct sigaction sa;
1348 CALL_UTIL (memset)(&sa, 0, sizeof (struct sigaction));
1349 sa.sa_sigaction = __collector_SIGCHLD_signal_handler;
1350 sa.sa_flags = SA_SIGINFO;
1351 __collector_sigaction (SIGCHLD, &sa, &original_sigchld_sigaction);
1352
1353 /* linetrace interposition takes care of unsetting Environment variables */
1354 /* create a child process to invoke er_archive */
1355 pid_t pid = CALL_UTIL (vfork)();
1356 if (pid == 0)
1357 {
1358 /* pid is zero == child process -- invoke er_archive */
1359 /* Unset LD_PRELOAD environment variables */
1360 CALL_UTIL (unsetenv)("LD_PRELOAD_32");
1361 CALL_UTIL (unsetenv)("LD_PRELOAD_64");
1362 CALL_UTIL (unsetenv)("LD_PRELOAD");
1363 /* Invoke er_archive */
1364 CALL_UTIL (execv)(argv[0], argv);
1365 CALL_UTIL (exit)(1); /* exec failed -- child exits with an error */
1366 }
1367 else if (pid != -1)
1368 {
1369 mychild_pid = pid; // notify our signal handler who the child is
1370 pid_t w;
1371 /* copied from system.c */
1372 do
1373 {
1374 w = CALL_UTIL (waitpid)(pid, &status, 0);
1375 }
1376 while (w == -1 && errno == EINTR);
1377 TprintfT (DBG_LT1, "collector: creating archive done\n");
1378 // __collector_SIGCHLD_signal_handler should now be de-installed, but it does so itself
1379 }
1380 else
1381 /* child-process creation failed */
1382 TprintfT (DBG_LT0, "collector: creating archive process failed\n");
1383
1384 __collector_mutex_unlock (&__collector_close_guard);
1385 TprintfT (DBG_LT1, "collector: __collector_close_experiment done\n");
1386 return;
1387 }
1388
1389 /*
1390 * void __collector_clean_state()
1391 * Perform all necessary cleanup steps in child process after fork().
1392 */
1393 void
1394 __collector_clean_state ()
1395 {
1396 TprintfT (DBG_LT1, "collector: collector_clean_state()\n");
1397 int i;
1398 /*
1399 * We are in child process after fork().
1400 * First of all we have to reset all mutex locks in collector's subsystems.
1401 * After that we can reinitialize modules.
1402 */
1403 __collector_mmgr_init_mutex_locks (__collector_heap);
1404 __collector_mutex_init (&__collector_glob_lock);
1405 __collector_mutex_init (&__collector_open_guard);
1406 __collector_mutex_init (&__collector_close_guard);
1407 __collector_mutex_init (&__collector_sample_guard);
1408 __collector_mutex_init (&__collector_suspend_guard);
1409 __collector_mutex_init (&__collector_resume_guard);
1410
1411 if (__collector_mutex_trylock (&__collector_close_guard))
1412 /* someone else is in the middle of closing the experiment */
1413 return;
1414
1415 /* Stop data collection in all dynamic modules */
1416 for (i = 0; i < nmodules; i++)
1417 if (modules[i]->stopDataCollection != NULL)
1418 modules[i]->stopDataCollection ();
1419
1420 // Now we can reset modules
1421 for (i = 0; i < nmodules; i++)
1422 {
1423 if (modules[i]->detachExperiment != NULL && modules_st[i] == 0)
1424 modules[i]->detachExperiment ();
1425 __collector_delete_handle (modules_hndl[i]);
1426 modules_hndl[i] = NULL;
1427 }
1428
1429 /* acquire the global lock -- only one suspend at a time */
1430 __collector_mutex_lock (&__collector_glob_lock);
1431 {
1432
1433 /* stop any profile data writing */
1434 paused_when_suspended = collector_paused;
1435 collector_paused = 1;
1436
1437 /* deinstall common SIGPROF dispatcher */
1438 __collector_ext_dispatcher_suspend ();
1439
1440 /* mark the experiment as suspended */
1441 __collector_exp_active = 0;
1442
1443 /* XXXX mark the experiment as closed! */
1444 exp_open = 0; /* This is a hack to allow fork child to call__collector_open_experiment() */
1445
1446 /* mark the experiment log closed! */
1447 log_close ();
1448 }
1449 __collector_mutex_unlock (&__collector_glob_lock);
1450
1451 // Now we can reset subsystems.
1452 __collector_ext_dispatcher_fork_child_cleanup ();
1453 __collector_mmap_fork_child_cleanup ();
1454 __collector_tsd_fork_child_cleanup ();
1455 paused_when_suspended = 0;
1456 collector_paused = 0;
1457 __collector_expstate = EXP_INIT;
1458 TprintfT (DBG_LT1, "__collector_clean_slate: __collector_expstate->EXP_INIT\n");
1459 exp_origin = SP_ORIGIN_LIBCOL_INIT;
1460 exp_initted = 0;
1461 __collector_start_time = collector_interface.getHiResTime ();
1462 TprintfT (DBG_LT1, " -->__collector_clean_slate; resetting start_time\n");
1463 start_sec_time = 0;
1464
1465 /* Sample related data */
1466 sample_installed = 0; // 1 if the sample signal handler installed
1467 sample_mode = 0; // dynamically turns sample record writing on/off
1468 sample_number = 0; // index of the current sample record
1469 __collector_sample_sig = -1; // user-specified sample signal
1470 __collector_sample_sig_warn = 0; // non-zero if warning already given
1471
1472 /* Pause/resume related data */
1473 __collector_pause_sig = -1; // user-specified pause signal
1474 __collector_pause_sig_warn = 0; // non-zero if warning already given
1475 __collector_mutex_unlock (&__collector_close_guard);
1476 return;
1477 }
1478
1479 /* modelled on __collector_close_experiment */
1480 void
1481 __collector_suspend_experiment (char *why)
1482 {
1483 if (!exp_initted)
1484 return;
1485 /* The experiment may have been previously closed */
1486 if (!exp_open)
1487 return;
1488 /* The experiment may have been previously suspended */
1489 if (!__collector_exp_active)
1490 return;
1491 if (__collector_mutex_trylock (&__collector_suspend_guard))
1492 /* someone else is in the middle of suspending the experiment */
1493 return;
1494
1495 /* Stop data collection in all dynamic modules */
1496 int i;
1497 for (i = 0; i < nmodules; i++)
1498 if (modules[i]->stopDataCollection != NULL)
1499 modules[i]->stopDataCollection ();
1500
1501 /* take a pre-suspension sample */
1502 __collector_ext_usage_sample (MASTER_SMPL, why);
1503
1504 /* acquire the global lock -- only one suspend at a time */
1505 __collector_mutex_lock (&__collector_glob_lock);
1506 /* stop any profile data writing */
1507 paused_when_suspended = collector_paused;
1508 collector_paused = 1;
1509
1510 /* deinstall common SIGPROF dispatcher */
1511 __collector_ext_dispatcher_suspend ();
1512
1513 /* mark the experiment as suspended */
1514 __collector_exp_active = 0;
1515
1516 /* XXXX mark the experiment as closed! */
1517 exp_open = 0; // This is a hack to allow fork child to call __collector_open_experiment()
1518 log_pause (); // mark the experiment log closed!
1519 TprintfT (DBG_LT0, "collector: collector_suspend_experiment(%s, %d)\n\n", why, collector_paused);
1520 __collector_mutex_unlock (&__collector_glob_lock);
1521 __collector_mutex_unlock (&__collector_suspend_guard);
1522 return;
1523 }
1524
1525 void
1526 __collector_resume_experiment ()
1527 {
1528 if (!exp_initted)
1529 return;
1530
1531 /* The experiment may have been previously resumed */
1532 if (__collector_exp_active)
1533 return;
1534 if (__collector_mutex_trylock (&__collector_resume_guard))
1535 /* someone else is in the middle of resuming the experiment */
1536 return;
1537
1538 /* acquire the global lock -- only one resume at a time */
1539 __collector_mutex_lock (&__collector_glob_lock);
1540 /* mark the experiment as re-activated */
1541 __collector_exp_active = 1;
1542 /* XXXX mark the experiment as open! */
1543 exp_open = 1; // This is a hack to allow fork child to call__collector_open_experiment()
1544 log_resume (); // mark the experiment log re-opened!
1545 TprintfT (DBG_LT0, "collector: collector_resume_experiment(%d)\n", paused_when_suspended);
1546 /* resume any profile data writing */
1547 collector_paused = paused_when_suspended;
1548 /* restart common SIGPROF dispatcher */
1549 __collector_ext_dispatcher_restart ();
1550 __collector_mutex_unlock (&__collector_glob_lock);
1551
1552 /* take a post-suspension sample */
1553 __collector_ext_usage_sample (MASTER_SMPL, "collector_resume_experiment");
1554
1555 /* Resume data collection in all dynamic modules */
1556 if (collector_paused == 0)
1557 {
1558 int i;
1559 for (i = 0; i < nmodules; i++)
1560 if (modules[i]->startDataCollection != NULL && modules_st[i] == 0)
1561 modules[i]->startDataCollection ();
1562 }
1563
1564 if (__collector_sample_period != 0)
1565 {
1566 hrtime_t now = collector_interface.getHiResTime ();
1567 while (__collector_next_sample < now)
1568 __collector_next_sample += ((hrtime_t) NANOSEC) * __collector_sample_period;
1569 }
1570
1571 /* check for experiment past termination time */
1572 if (__collector_terminate_time != 0)
1573 {
1574 hrtime_t now = collector_interface.getHiResTime ();
1575 if (__collector_terminate_time < now)
1576 {
1577 TprintfT (DBG_LT0, "__collector_resume_experiment: now (%lld) > terminate_time (%lld); closing experiment\n",
1578 (now - __collector_start_time), (__collector_terminate_time - __collector_start_time));
1579 __collector_close_experiment ();
1580 }
1581 }
1582 __collector_mutex_unlock (&__collector_resume_guard);
1583 return;
1584 }
1585
1586 /* Code to support Samples and Pause/Resume */
1587 void collector_sample () __attribute__ ((weak, alias ("__collector_sample")));
1588 void
1589 __collector_sample (char *name)
1590 {
1591 __collector_ext_usage_sample (PROGRAM_SMPL, name);
1592 }
1593
1594 static void
1595 write_sample (char *name)
1596 {
1597 if (sample_mode == 0)
1598 return;
1599 /* make the sample timestamp relative to the start */
1600 hrtime_t ts, now = collector_interface.getHiResTime ();
1601
1602 /* update time for next periodic sample */
1603 /* since this is common to all LWPs, and only one (the first!) will
1604 update it to the next period, doing the update early will avoid
1605 the overhead/frustration of the other LWPs
1606 */
1607 if (__collector_sample_period != 0)
1608 {
1609 /* this update should only be done for periodic samples */
1610 while (__collector_next_sample < now)
1611 __collector_next_sample += ((hrtime_t) NANOSEC) * __collector_sample_period;
1612 }
1613
1614 /* take the sample and record it; use (return - __collector_start_time) for timestamp */
1615 now = ovw_write ();
1616 ts = now - __collector_start_time;
1617
1618 /* write sample records to log file */
1619 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" id=\"%d\" label=\"%s\"/>\n",
1620 SP_JCMD_SAMPLE,
1621 (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC),
1622 sample_number,
1623 name);
1624 /* increment the sample number */
1625 sample_number++;
1626 }
1627
1628 /*
1629 * __collector_ext_usage_sample
1630 *
1631 * Handle taking a process usage sample and recording it.
1632 * Common to all different types of sample:
1633 * libcollector master samples at initiation and close,
1634 * programmatic samples via libcollector API calls,
1635 * periodic samples originating in the dispatcher,
1636 * manual samples originating in the signal sample handler,
1637 * manual samples originating from the debugger
1638 * Differentiating type and name information is currently not recorded.
1639 */
1640 void
1641 __collector_ext_usage_sample (Smpl_type type, char *name)
1642 {
1643 /* name is optional */
1644 if (name == NULL)
1645 name = "";
1646 TprintfT (DBG_LT3, "collector: __collector_ext_usage_sample(%d,%s)\n", type, name);
1647 if (!exp_initted)
1648 return;
1649
1650 /* if paused, don't record periodic samples */
1651 if ((type == PERIOD_SMPL) && (collector_paused == 1))
1652 return;
1653
1654 /* There is a possibility of entering this function
1655 * from sample_handler, dbx direct call to __collector_sample,
1656 * and user called collector_sample. Since we are making a
1657 * new sample anyway just return.
1658 */
1659 if (__collector_mutex_trylock (&__collector_sample_guard))
1660 return;
1661 if (type != PERIOD_SMPL || __collector_sample_period != 0)
1662 write_sample (name);
1663 __collector_mutex_unlock (&__collector_sample_guard);
1664 }
1665
1666 /* set the sample period from the parameter */
1667 static int
1668 sample_set_interval (char *param)
1669 {
1670 if (!exp_initted)
1671 return COL_ERROR_SMPLINIT;
1672 __collector_sample_period = CALL_UTIL (strtol)(param, NULL, 0); /* seconds */
1673 TprintfT (DBG_LT1, "collector: collector_sample period set to %d seconds.\n",
1674 __collector_sample_period);
1675 if (__collector_sample_period > 0)
1676 (void) __collector_log_write ("<setting %s=\"%d\"/>\n",
1677 SP_JCMD_SAMPLE_PERIOD, __collector_sample_period);
1678 return COL_ERROR_NONE;
1679 }
1680
1681 /* set the experiment duration from the parameter */
1682
1683 /* parameter is of the form nnn:mmm, where nnn is the start delay in seconds,
1684 * and mmm is the terminate time in seconds; if nnn is zero,
1685 * data collection starts when the run starts. If mmm is zero,
1686 * data collection terminates when the run terminates. Otherwise,
1687 * nnn must be less than mmm
1688 */
1689 static int
1690 set_duration (char *param)
1691 {
1692 if (!exp_initted)
1693 return COL_ERROR_DURATION_INIT;
1694 int delay_start = CALL_UTIL (strtol)(param, &param, 0); /* seconds */
1695 int terminate_duration = 0;
1696 if (*param == 0)
1697 {
1698 /* we only have one parameter, the terminate time */
1699 terminate_duration = delay_start;
1700 delay_start = 0;
1701 }
1702 else if (*param == ':')
1703 {
1704 param++;
1705 terminate_duration = CALL_UTIL (strtol)(param, &param, 0); /* seconds */
1706 }
1707 else
1708 return COL_ERROR_DURATION_INIT;
1709 TprintfT (DBG_LT1, "collector: collector_delay_start duration set to %d seconds.\n",
1710 delay_start);
1711 TprintfT (DBG_LT1, "collector: collector_terminate duration set to %d seconds.\n",
1712 terminate_duration);
1713 if (terminate_duration > 0)
1714 __collector_log_write ("<setting %s=\"%d\"/>\n<setting %s=\"%d\"/>\n",
1715 SP_JCMD_DELAYSTART, delay_start,
1716 SP_JCMD_TERMINATE, terminate_duration);
1717 __collector_delay_start = (hrtime_t) 0;
1718 if (delay_start != 0)
1719 {
1720 __collector_delay_start = __collector_start_time + ((hrtime_t) NANOSEC) * delay_start;
1721 collector_paused = 1;
1722 }
1723 __collector_terminate_time = terminate_duration == 0 ? (hrtime_t) 0 :
1724 __collector_start_time + ((hrtime_t) NANOSEC) * terminate_duration;
1725 return COL_ERROR_NONE;
1726 }
1727
1728 static int
1729 sample_set_user_sig (char *par)
1730 {
1731 int sig = CALL_UTIL (strtol)(par, &par, 0);
1732 TprintfT (DBG_LT1, "collector: sample_set_user_sig(sig=%d,installed=%d)\n",
1733 sig, sample_installed);
1734 /* Installing the sampling signal handler more
1735 * than once is not good.
1736 */
1737 if (!sample_installed)
1738 {
1739 struct sigaction act;
1740 sigemptyset (&act.sa_mask);
1741 /* XXXX should any signals be blocked? */
1742 act.sa_sigaction = sample_handler;
1743 act.sa_flags = SA_RESTART | SA_SIGINFO;
1744 if (sigaction (sig, &act, &old_sample_handler) == -1)
1745 {
1746 TprintfT (DBG_LT0, "collector: ERROR: collector_sample_handler install failed (sig=%d).\n",
1747 __collector_sample_sig);
1748 return COL_ERROR_ARGS;
1749 }
1750 if (old_sample_handler.sa_handler == SIG_DFL ||
1751 old_sample_handler.sa_sigaction == sample_handler)
1752 old_sample_handler.sa_handler = SIG_IGN;
1753 TprintfT (DBG_LT1, "collector: collector_sample_handler installed (sig=%d,hndlr=0x%p).\n",
1754 sig, sample_handler);
1755 __collector_sample_sig = sig;
1756 sample_installed = 1;
1757 }
1758 (void) __collector_log_write ("<setting %s=\"%u\"/>\n", SP_JCMD_SAMPLE_SIG, __collector_sample_sig);
1759 return COL_ERROR_NONE;
1760 }
1761
1762 /* signal handler for sample signal */
1763 static void
1764 sample_handler (int sig, siginfo_t *sip, void *uap)
1765 {
1766 if (sip && sip->si_code == SI_USER)
1767 {
1768 TprintfT (DBG_LT1, "collector: collector_sample_handler sampling!\n");
1769 __collector_ext_usage_sample (MANUAL_SMPL, "signal");
1770 }
1771 else if (old_sample_handler.sa_handler != SIG_IGN)
1772 {
1773 TprintfT (DBG_LT1, "collector: collector_sample_handler forwarding signal.\n");
1774 (old_sample_handler.sa_sigaction)(sig, sip, uap);
1775 }
1776 }
1777
1778 void collector_pause () __attribute__ ((weak, alias ("__collector_pause")));
1779
1780 void
1781 __collector_pause ()
1782 {
1783 __collector_pause_m ("API");
1784 }
1785
1786 void
1787 __collector_pause_m (char *reason)
1788 {
1789 hrtime_t now;
1790 char xreason[MAXPATHLEN];
1791 TprintfT (DBG_LT0, "collector: __collector_pause_m(%s)\n", reason);
1792
1793 /* Stop data collection in all dynamic modules */
1794 for (int i = 0; i < nmodules; i++)
1795 if (modules[i]->stopDataCollection != NULL)
1796 modules[i]->stopDataCollection ();
1797
1798 /* Take a pause sample */
1799 CALL_UTIL (snprintf)(xreason, sizeof (xreason), "collector_pause(%s)", reason);
1800 __collector_ext_usage_sample (MASTER_SMPL, xreason);
1801
1802 /* Record the event in the log file */
1803 now = GETRELTIME ();
1804 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" name=\"%s\"/>\n", SP_JCMD_PAUSE,
1805 (unsigned) (now / NANOSEC), (unsigned) (now % NANOSEC), reason);
1806 __collector_expstate = EXP_PAUSED;
1807 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_PAUSED\n");
1808 collector_paused = 1;
1809 }
1810
1811 void collector_resume () __attribute__ ((weak, alias ("__collector_resume")));
1812
1813 void
1814 __collector_resume ()
1815 {
1816 TprintfT (DBG_LT0, "collector: __collector_resume()\n");
1817 __collector_expstate = EXP_OPEN;
1818 TprintfT (DBG_LT1, "collector: __collector_expstate->EXP_OPEN\n");
1819
1820 /* Record the event in the log file */
1821 hrtime_t now = GETRELTIME ();
1822 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n", SP_JCMD_RESUME,
1823 (unsigned) (now / NANOSEC), (unsigned) (now % NANOSEC));
1824 /* Take a resume sample */
1825 __collector_ext_usage_sample (MASTER_SMPL, "collector_resume");
1826
1827 /* Resume data collection in all dynamic modules */
1828 for (int i = 0; i < nmodules; i++)
1829 if (modules[i]->startDataCollection != NULL && modules_st[i] == 0)
1830 modules[i]->startDataCollection ();
1831 collector_paused = 0;
1832 }
1833
1834 static int
1835 pause_set_user_sig (char *par)
1836 {
1837 struct sigaction act;
1838 int sig = CALL_UTIL (strtol)(par, &par, 0);
1839 if (*par)
1840 {
1841 /* not end of the token */
1842 if (*par != 'p')
1843 {
1844 /* it should be a p */
1845 TprintfT (DBG_LT0, "collector: ERROR: collector_user_handler bad terminator (par=%p[0]=%d).\n",
1846 par, (int) *par);
1847 return COL_ERROR_ARGS;
1848
1849 }
1850 else
1851 {
1852 /*, it's a p, make sure next is end of token */
1853 par++;
1854 if (*par)
1855 {
1856 TprintfT (DBG_LT0, "collector: ERROR: collector_user_handler bad terminator (par=%p[0]=%d).\n",
1857 par, (int) *par);
1858 return COL_ERROR_ARGS;
1859 }
1860 else
1861 /* start off paused */
1862 collector_paused = 1;
1863 }
1864 }
1865 sigemptyset (&act.sa_mask);
1866 /* XXXX should any signals be blocked? */
1867 act.sa_sigaction = pause_handler;
1868 act.sa_flags = SA_RESTART | SA_SIGINFO;
1869 if (sigaction (sig, &act, &old_pause_handler) == -1)
1870 {
1871 TprintfT (DBG_LT0, "collector: ERROR: collector_pause_handler install failed (sig=%d).\n", sig);
1872 return COL_ERROR_ARGS;
1873 }
1874 if (old_pause_handler.sa_handler == SIG_DFL ||
1875 old_pause_handler.sa_sigaction == pause_handler)
1876 old_pause_handler.sa_handler = SIG_IGN;
1877 TprintfT (DBG_LT1, "collector: collector_pause_handler installed (sig=%d,hndlr=0x%p).\n",
1878 sig, pause_handler);
1879 __collector_pause_sig = sig;
1880 (void) __collector_log_write ("<setting %s=\"%u\"/>\n", SP_JCMD_PAUSE_SIG,
1881 __collector_pause_sig);
1882 return COL_ERROR_NONE;
1883 }
1884
1885 /* signal handler for pause/resume signal */
1886 static void
1887 pause_handler (int sig, siginfo_t *sip, void *uap)
1888 {
1889 if (sip && sip->si_code == SI_USER)
1890 {
1891 if (collector_paused == 1)
1892 {
1893 __collector_resume ();
1894 TprintfT (DBG_LT0, "collector: collector_pause_handler resumed!\n");
1895 }
1896 else
1897 {
1898 __collector_pause_m ("signal");
1899 TprintfT (DBG_LT0, "collector: collector_pause_handler paused!\n");
1900 }
1901 }
1902 else if (old_pause_handler.sa_handler != SIG_IGN)
1903 {
1904 TprintfT (DBG_LT0, "collector: collector_pause_handler forwarding signal.\n");
1905 (old_pause_handler.sa_sigaction)(sig, sip, uap);
1906 }
1907 }
1908
1909 static void
1910 get_progspec (char *retstr, int tmp_sz, char *name, int name_sz)
1911 {
1912 int procfd, count, i;
1913 *retstr = 0;
1914 tmp_sz--;
1915 *name = 0;
1916 name_sz--;
1917 procfd = CALL_UTIL (open)("/proc/self/cmdline", O_RDONLY);
1918 int getting_name = 0;
1919 if (procfd != -1)
1920 {
1921 count = CALL_UTIL (read)(procfd, retstr, tmp_sz);
1922 retstr[count] = '\0';
1923 for (i = 0; i < count; i++)
1924 {
1925 if (getting_name == 0)
1926 name[i] = retstr[i];
1927 if (retstr[i] == '\0')
1928 {
1929 getting_name = 1;
1930 if ((i + 1) < count)
1931 retstr[i] = ' ';
1932 }
1933 }
1934 CALL_UTIL (close)(procfd);
1935 }
1936 }
1937
1938 static void
1939 fs_warn ()
1940 {
1941 /* if data implies we don't care, just return */
1942 if (fs_matters == 0)
1943 return;
1944 }
1945
1946 static void
1947 close_handler (int sig, siginfo_t *sip, void *uap)
1948 {
1949 if (sip && sip->si_code == SI_USER)
1950 {
1951 TprintfT (DBG_LT0, "collector: close_handler: processing signal.\n");
1952 __collector_close_experiment ();
1953 }
1954 else if (old_close_handler.sa_handler != SIG_IGN)
1955 {
1956 TprintfT (DBG_LT0, "collector: close_handler forwarding signal.\n");
1957 (old_close_handler.sa_sigaction)(sig, sip, uap);
1958 }
1959 }
1960
1961 static void
1962 exit_handler (int sig, siginfo_t *sip, void *uap)
1963 {
1964 if (sip && sip->si_code == SI_USER)
1965 {
1966 TprintfT (DBG_LT0, "collector: exit_handler: processing signal.\n");
1967 CALL_UTIL (exit)(1);
1968 }
1969 else if (old_exit_handler.sa_handler != SIG_IGN)
1970 {
1971 TprintfT (DBG_LT0, "collector: exit_handler forwarding signal.\n");
1972 (old_exit_handler.sa_sigaction)(sig, sip, uap);
1973 }
1974 }
1975
1976 static int
1977 set_user_sig_action (char *par)
1978 {
1979 int sig = CALL_UTIL (strtol)(par, &par, 0);
1980 if (*par != '=')
1981 {
1982 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action bad separator: %s.\n", par);
1983 return COL_ERROR_ARGS;
1984 }
1985 par++;
1986 struct sigaction act;
1987 sigemptyset (&act.sa_mask);
1988 act.sa_flags = SA_RESTART | SA_SIGINFO;
1989 if (__collector_strcmp (par, "exit") == 0)
1990 {
1991 act.sa_sigaction = exit_handler;
1992 if (sigaction (sig, &act, &old_exit_handler) == -1)
1993 {
1994 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action failed: %d=%s.\n", sig, par);
1995 return COL_ERROR_ARGS;
1996 }
1997 }
1998 else if (__collector_strcmp (par, "close") == 0)
1999 {
2000 act.sa_sigaction = close_handler;
2001 if (sigaction (sig, &act, &old_close_handler) == -1)
2002 {
2003 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action failed: %d=%s.\n", sig, par);
2004 return COL_ERROR_ARGS;
2005 }
2006 }
2007 else
2008 {
2009 TprintfT (DBG_LT0, "collector: ERROR: set_user_sig_action unknown action: %d=%s.\n", sig, par);
2010 return COL_ERROR_ARGS;
2011 }
2012 __collector_log_write ("<setting signal=\"%u\" action=\"%s\"/>\n", sig, par);
2013 return COL_ERROR_NONE;
2014 }
2015
2016 /*============================================================*/
2017 /*
2018 * Routines for handling the log file
2019 */
2020 static struct DataHandle *log_hndl = NULL;
2021 static int log_initted = 0;
2022 static int log_enabled = 0;
2023
2024 static int
2025 log_open ()
2026 {
2027 log_hndl = __collector_create_handle (SP_LOG_FILE);
2028 if (log_hndl == NULL)
2029 return COL_ERROR_LOG_OPEN;
2030 log_initted = 1;
2031 log_enabled = 1;
2032 TprintfT (DBG_LT1, "log_open()\n");
2033 return COL_ERROR_NONE;
2034 }
2035
2036 static void
2037 log_header_write (sp_origin_t origin)
2038 {
2039 __collector_log_write ("<experiment %s=\"%d.%d\">\n",
2040 SP_JCMD_VERSION, SUNPERF_VERNUM, SUNPERF_VERNUM_MINOR);
2041 __collector_log_write ("<collector>%s</collector>\n", VERSION);
2042 __collector_log_write ("</experiment>\n");
2043
2044 struct utsname sysinfo;
2045 if (uname (&sysinfo) < 0)
2046 {
2047 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\"/></event>\n", SP_JCMD_CERROR, COL_ERROR_SYSINFO, errno);
2048 __collector_log_write ("<system>\n");
2049 }
2050 else
2051 {
2052 long page_size = CALL_UTIL (sysconf)(_SC_PAGESIZE);
2053 long npages = CALL_UTIL (sysconf)(_SC_PHYS_PAGES);
2054 __collector_log_write ("<system hostname=\"%s\" arch=\"%s\" os=\"%s %s\" pagesz=\"%ld\" npages=\"%ld\">\n",
2055 sysinfo.nodename, sysinfo.machine, sysinfo.sysname, sysinfo.release, page_size, npages);
2056 }
2057
2058 //YXXX Updating this section? Check similar cut/paste code in:
2059 // collctrl.cc::Coll_Ctrl()
2060 // collector.c::log_header_write()
2061 // cpu_frequency.h::get_cpu_frequency()
2062
2063 FILE *procf = CALL_UTIL (fopen)("/proc/cpuinfo", "r");
2064 if (procf != NULL)
2065 {
2066 char temp[1024];
2067 int cpu = -1;
2068 while (CALL_UTIL (fgets)(temp, sizeof (temp), procf) != NULL)
2069 {
2070 #if ARCH(Intel)
2071 if (__collector_strStartWith (temp, "processor") == 0)
2072 {
2073 char *val = CALL_UTIL (strchr)(temp, ':');
2074 cpu = val ? CALL_UTIL (atoi)(val + 1) : -1;
2075 }
2076 // else if ( __collector_strStartWith(temp, "model") == 0
2077 // && CALL_UTIL(strstr)(temp, "name") == 0) {
2078 // char *val = CALL_UTIL(strchr)( temp, ':' );
2079 // int model = val ? CALL_UTIL(atoi)( val + 1 ) : -1;
2080 // }
2081 // else if ( __collector_strStartWith(temp, "cpu family") == 0 ) {
2082 // char *val = CALL_UTIL(strchr)( temp, ':' );
2083 // int family = val ? CALL_UTIL(atoi)( val + 1 ) : -1;
2084 // }
2085 else if (__collector_strStartWith (temp, "cpu MHz") == 0)
2086 {
2087 char *val = CALL_UTIL (strchr)(temp, ':');
2088 int mhz = val ? CALL_UTIL (atoi)(val + 1) : 0; /* reading it as int is fine */
2089 (void) __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu, mhz);
2090 }
2091 #elif ARCH(SPARC)
2092 if (__collector_strStartWith (temp, "Cpu") == 0 &&
2093 temp[3] != '\0' &&
2094 __collector_strStartWith ((CALL_UTIL (strchr)(temp + 1, 'C')) ? CALL_UTIL (strchr)(temp + 1, 'C') : (temp + 4), "ClkTck") == 0)
2095 { // sparc-Linux
2096 char *val = CALL_UTIL (strchr)(temp, ':');
2097 int mhz = 0;
2098 if (val)
2099 {
2100 unsigned long long freq;
2101 (*__collector_sscanfp) (val + 2, "%llx", &freq);
2102 mhz = (unsigned int) (((double) freq) / 1000000.0 + 0.5);
2103 }
2104 char *numend = CALL_UTIL (strchr)(temp + 1, 'C') ? CALL_UTIL (strchr)(temp + 1, 'C') : (temp + 4);
2105 *numend = '\0';
2106 cpu = CALL_UTIL (atoi)(temp + 3);
2107 __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu, mhz);
2108 }
2109 #elif defined(__aarch64__)
2110 if (__collector_strStartWith (temp, "processor") == 0)
2111 {
2112 char *val = CALL_UTIL (strchr)(temp, ':');
2113 cpu = val ? CALL_UTIL (atoi)(val + 1) : -1;
2114 if (cpu != -1)
2115 {
2116 unsigned int mhz;
2117 asm volatile("mrs %0, cntfrq_el0" : "=r" (mhz));
2118 __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu,
2119 mhz / 1000000);
2120 }
2121 }
2122 #endif
2123 }
2124 CALL_UTIL (fclose)(procf);
2125 }
2126 __collector_log_write ("</system>\n");
2127 __collector_log_write ("<process pid=\"%d\"></process>\n", getpid ());
2128 __collector_log_write ("<process ppid=\"%d\"></process>\n", getppid ());
2129 __collector_log_write ("<process pgrp=\"%d\"></process>\n", getpgrp ());
2130 __collector_log_write ("<process sid=\"%d\"></process>\n", getsid (0));
2131
2132 /* XXX -- cwd commented out
2133 It would be nice to get the current directory for the experiment,
2134 but neither method below will work--the /proc method returns a
2135 0-length string, and using getcwd will break collect on /bin/sh
2136 (as cuserid does) because of /bin/sh's private malloc
2137 omazur: readlink seems to work on Linux
2138 */
2139 /* write the current directory */
2140 char cwd[MAXPATHLEN + 1];
2141 int i = readlink ("/proc/self/cwd", cwd, sizeof (cwd));
2142 if (i >= 0)
2143 {
2144 cwd[i < sizeof (cwd) ? i : sizeof (cwd) - 1] = 0;
2145 (void) __collector_log_write ("<process cwd=\"%s\"></process>\n", cwd);
2146 }
2147 (void) __collector_log_write ("<process wsize=\"%d\"></process>\n", (int) (8 * sizeof (void *)));
2148
2149 ucontext_t ucp;
2150 ucp.uc_stack.ss_sp = NULL;
2151 ucp.uc_stack.ss_size = 0;
2152 if (CALL_UTIL (getcontext) (&ucp) == 0)
2153 {
2154 (void) __collector_log_write ("<process stackbase=\"0x%lx\"></process>\n",
2155 (unsigned long) ucp.uc_stack.ss_sp + ucp.uc_stack.ss_size);
2156 }
2157
2158 (void) __collector_log_write ("<process>%s</process>\n",
2159 origin == SP_ORIGIN_FORK ? "(fork)" : exp_progspec);
2160 __collector_libthread_T1 = 0;
2161 }
2162
2163 static void
2164 log_pause (void)
2165 {
2166 if (log_initted)
2167 log_enabled = 0;
2168 }
2169
2170 static void
2171 log_resume (void)
2172 {
2173 if (log_initted)
2174 log_enabled = 1;
2175 }
2176
2177 /* __collector_log_write -- write a line to the log file
2178 * return value:
2179 * 0 if OK
2180 * 1 if error (in creating or extending the log file)
2181 */
2182 int
2183 __collector_log_write (char *format, ...)
2184 {
2185 char buf[4096];
2186 va_list va;
2187 int rc = 0;
2188 static size_t loglen = 0;
2189
2190 va_start (va, format);
2191 char *bufptr = buf;
2192 int sz = __collector_xml_vsnprintf (bufptr, sizeof (buf), format, va);
2193 int allocated_sz = 0;
2194 va_end (va);
2195 if (sz >= sizeof (buf))
2196 {
2197 /* Allocate a new buffer.
2198 * We need this buffer only temporarily and locally.
2199 * But don't use the thread stack
2200 * since it already has buf
2201 * and is unlikely to have additonal room for something even larger than buf.
2202 */
2203 sz += 1; /* add the terminating null byte */
2204 bufptr = (char*) __collector_allocCSize (__collector_heap, sz, 0);
2205 if (bufptr)
2206 {
2207 allocated_sz = sz;
2208 va_start (va, format);
2209 sz = __collector_xml_vsnprintf (bufptr, sz, format, va);
2210 va_end (va);
2211 }
2212 }
2213 int newlen = CALL_UTIL (strlen)(bufptr);
2214 if (sz != newlen)
2215 // no need to free bufptr if we're going to abort anyhow
2216 abort ();
2217 bufptr[newlen + 1] = 0;
2218 loglen = loglen + newlen;
2219 TprintfT (DBG_LT2, "__collector_log_write len=%ld, loglen=%ld %s",
2220 (long) newlen, (long) loglen, bufptr);
2221 if (log_enabled <= 0)
2222 {
2223 #if 0
2224 /* XXX suppress log_write messages with no log file open
2225 * this is reached from SimApp dealing with the clock frequency, which it should
2226 * not be doing. For now, don't write a message.
2227 */
2228 CALL_UTIL (fprintf)(stderr, "__collector_log_write COL_ERROR_LOG_OPEN: %s", buf);
2229 #endif
2230 }
2231 else
2232 rc = __collector_write_string (log_hndl, bufptr, sz);
2233 if (allocated_sz)
2234 __collector_freeCSize (__collector_heap, (void *) bufptr, allocated_sz);
2235 return rc;
2236 }
2237
2238 static void
2239 log_close ()
2240 {
2241 log_enabled = 0;
2242 log_initted = 0;
2243 __collector_delete_handle (log_hndl);
2244 log_hndl = NULL;
2245 }
2246
2247 /*============================================================*/
2248 /*
2249 * Routines for handling the overview file
2250 */
2251 static void
2252 ovw_open ()
2253 {
2254 CALL_UTIL (strlcpy)(ovw_name, __collector_exp_dir_name, sizeof (ovw_name));
2255 CALL_UTIL (strlcat)(ovw_name, "/", sizeof (ovw_name));
2256 CALL_UTIL (strlcat)(ovw_name, SP_OVERVIEW_FILE, sizeof (ovw_name));
2257 int fd = CALL_UTIL (open)(ovw_name, O_WRONLY | O_CREAT | O_TRUNC,
2258 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
2259 if (fd < 0)
2260 {
2261 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2262 SP_JCMD_CERROR, COL_ERROR_OVWOPEN, errno, ovw_name);
2263 return;
2264 }
2265 CALL_UTIL (close)(fd);
2266 sample_mode = 1;
2267 }
2268
2269 static __inline__ void
2270 timeval_to_timespec(struct timeval *tval, struct timespec *value)
2271 {
2272 value->tv_nsec = tval->tv_usec * 1000;
2273 value->tv_sec = tval->tv_sec;
2274 }
2275
2276 /*
2277 * Resource usage. /proc/<pid>/usage /proc/<pid>/lwp/<lwpid>/lwpusage
2278 */
2279 typedef struct prusage
2280 {
2281 id_t pr_lwpid; /* lwp id. 0: process or defunct */
2282 int pr_count; /* number of contributing lwps */
2283 timestruc_t pr_tstamp; /* current time stamp */
2284 timestruc_t pr_create; /* process/lwp creation time stamp */
2285 timestruc_t pr_term; /* process/lwp termination time stamp */
2286 timestruc_t pr_rtime; /* total lwp real (elapsed) time */
2287 timestruc_t pr_utime; /* user level cpu time */
2288 timestruc_t pr_stime; /* system call cpu time */
2289 timestruc_t pr_ttime; /* other system trap cpu time */
2290 timestruc_t pr_tftime; /* text page fault sleep time */
2291 timestruc_t pr_dftime; /* data page fault sleep time */
2292 timestruc_t pr_kftime; /* kernel page fault sleep time */
2293 timestruc_t pr_ltime; /* user lock wait sleep time */
2294 timestruc_t pr_slptime; /* all other sleep time */
2295 timestruc_t pr_wtime; /* wait-cpu (latency) time */
2296 timestruc_t pr_stoptime; /* stopped time */
2297 timestruc_t filltime[6]; /* filler for future expansion */
2298 ulong_t pr_minf; /* minor page faults */
2299 ulong_t pr_majf; /* major page faults */
2300 ulong_t pr_nswap; /* swaps */
2301 ulong_t pr_inblk; /* input blocks */
2302 ulong_t pr_oublk; /* output blocks */
2303 ulong_t pr_msnd; /* messages sent */
2304 ulong_t pr_mrcv; /* messages received */
2305 ulong_t pr_sigs; /* signals received */
2306 ulong_t pr_vctx; /* voluntary context switches */
2307 ulong_t pr_ictx; /* involuntary context switches */
2308 ulong_t pr_sysc; /* system calls */
2309 ulong_t pr_ioch; /* chars read and written */
2310 ulong_t filler[10]; /* filler for future expansion */
2311 } prusage_t;
2312
2313 static hrtime_t starttime = 0;
2314
2315 static hrtime_t
2316 ovw_write ()
2317 {
2318 if (sample_mode == 0)
2319 return 0;
2320 int fd;
2321 int res;
2322 struct prusage usage;
2323 struct rusage rusage;
2324 hrtime_t hrt, delta;
2325
2326 /* Fill in the prusage structure with info from getrusage() */
2327 hrt = collector_interface.getHiResTime ();
2328 if (starttime == 0)
2329 starttime = hrt;
2330 res = getrusage (RUSAGE_SELF, &rusage);
2331 if (res != 0)
2332 {
2333 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2334 SP_JCMD_CERROR, COL_ERROR_OVWREAD, errno, ovw_name);
2335 return ( hrt);
2336 }
2337
2338 CALL_UTIL (memset)(&usage, 0, sizeof (struct prusage));
2339 usage.pr_lwpid = getpid ();
2340 usage.pr_count = 1;
2341 usage.pr_tstamp.tv_sec = hrt / NANOSEC;
2342 usage.pr_tstamp.tv_nsec = hrt % NANOSEC;
2343 usage.pr_create.tv_sec = starttime / NANOSEC;
2344 usage.pr_create.tv_nsec = starttime % NANOSEC;
2345 delta = hrt - starttime;
2346 usage.pr_rtime.tv_sec = delta / NANOSEC;
2347 usage.pr_rtime.tv_nsec = delta % NANOSEC;
2348 timeval_to_timespec (&rusage.ru_utime, &usage.pr_utime);
2349 timeval_to_timespec (&rusage.ru_stime, &usage.pr_stime);
2350
2351 /* make sure that user- and system cpu time are not negative */
2352 if (ts2hrt (usage.pr_utime) < 0)
2353 {
2354 usage.pr_utime.tv_sec = 0;
2355 usage.pr_utime.tv_nsec = 0;
2356 }
2357 if (ts2hrt (usage.pr_stime) < 0)
2358 {
2359 usage.pr_stime.tv_sec = 0;
2360 usage.pr_stime.tv_nsec = 0;
2361 }
2362
2363 /* fill in other fields */
2364 usage.pr_minf = (ulong_t) rusage.ru_minflt;
2365 usage.pr_majf = (ulong_t) rusage.ru_majflt;
2366 usage.pr_nswap = (ulong_t) rusage.ru_nswap;
2367 usage.pr_inblk = (ulong_t) rusage.ru_inblock;
2368 usage.pr_oublk = (ulong_t) rusage.ru_oublock;
2369 usage.pr_msnd = (ulong_t) rusage.ru_msgsnd;
2370 usage.pr_mrcv = (ulong_t) rusage.ru_msgrcv;
2371 usage.pr_sigs = (ulong_t) rusage.ru_nsignals;
2372 usage.pr_vctx = (ulong_t) rusage.ru_nvcsw;
2373 usage.pr_ictx = (ulong_t) rusage.ru_nivcsw;
2374
2375 fd = CALL_UTIL (open)(ovw_name, O_WRONLY | O_APPEND);
2376 if (fd < 0)
2377 {
2378 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2379 SP_JCMD_CERROR, COL_ERROR_OVWOPEN, errno, ovw_name);
2380 return ( ts2hrt (usage.pr_tstamp));
2381 }
2382
2383 CALL_UTIL (lseek)(fd, 0, SEEK_END);
2384 res = CALL_UTIL (write)(fd, &usage, sizeof (usage));
2385 CALL_UTIL (close)(fd);
2386 if (res != sizeof (usage))
2387 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2388 SP_JCMD_CERROR, COL_ERROR_OVWWRITE, errno, ovw_name);
2389 return (hrt);
2390 }
2391
2392 void
2393 __collector_dlog (int tflag, int level, char *format, ...)
2394 {
2395 if ((tflag & SP_DUMP_FLAG) == 0)
2396 {
2397 if (level > __collector_tracelevel)
2398 return;
2399 }
2400 else if ((tflag & collector_debug_opt) == 0)
2401 return;
2402
2403 /* In most cases this allocation should suffice */
2404 int bufsz = CALL_UTIL (strlen)(format) + 128;
2405 char *buf = (char*) alloca (bufsz);
2406 char *p = buf;
2407 int left = bufsz;
2408 if ((tflag & SP_DUMP_NOHEADER) == 0)
2409 {
2410 p += CALL_UTIL (snprintf) (p, left, "P%ld,L%02lu,t%02lu",
2411 (long) getpid (), (unsigned long) __collector_lwp_self (),
2412 (unsigned long) (__collector_no_threads ? 0 : __collector_thr_self ()));
2413 left = bufsz - (p - buf);
2414 if (tflag)
2415 {
2416 hrtime_t ts = GETRELTIME ();
2417 p += CALL_UTIL (snprintf)(p, left, " %u.%09u ", (unsigned) (ts / NANOSEC), (unsigned) (ts % NANOSEC));
2418 }
2419 else
2420 p += CALL_UTIL (snprintf)(p, left, ": ");
2421 left = bufsz - (p - buf);
2422 }
2423
2424 va_list va;
2425 va_start (va, format);
2426 int nbufsz = CALL_UTIL (vsnprintf)(p, left, format, va);
2427 va_end (va);
2428
2429 if (nbufsz >= left)
2430 {
2431 /* Allocate a new buffer */
2432 nbufsz += 1; /* add the terminating null byte */
2433 char *nbuf = (char*) alloca (nbufsz + (p - buf));
2434 __collector_memcpy (nbuf, buf, p - buf);
2435 p = nbuf + (p - buf);
2436
2437 va_start (va, format);
2438 nbufsz = CALL_UTIL (vsnprintf)(p, nbufsz, format, va);
2439 va_end (va);
2440 buf = nbuf;
2441 }
2442 CALL_UTIL (write)(2, buf, CALL_UTIL (strlen)(buf));
2443 }
2444
2445 /*============================================================*/
2446 #if ! ARCH(SPARC) /* !sparc-Linux */
2447 /*
2448 * Routines for handling _exit and _Exit
2449 */
2450 /*------------------------------------------------------------- _exit */
2451
2452 static void (*__real__exit) (int status) = NULL; /* libc only: _exit */
2453 static void (*__real__Exit) (int status) = NULL; /* libc only: _Exit */
2454 void _exit () __attribute__ ((weak, alias ("__collector_exit")));
2455 void _Exit () __attribute__ ((weak, alias ("__collector_Exit")));
2456
2457 void
2458 __collector_exit (int status)
2459 {
2460 if (NULL_PTR (_exit))
2461 {
2462 __real__exit = dlsym (RTLD_NEXT, "_exit");
2463 if (__real__exit == NULL)
2464 __real__exit = dlsym (RTLD_DEFAULT, "_exit");
2465 }
2466 TprintfT (DBG_LT1, "__collector_exit() interposing @0x%p __real__exit\n", __real__exit);
2467 __collector_terminate_expt ();
2468 TprintfT (DBG_LT1, "__collector_exit(): experiment terminated\n");
2469 CALL_REAL (_exit)(status); // this will exit the process
2470 }
2471
2472 void
2473 __collector_Exit (int status)
2474 {
2475 if (NULL_PTR (_Exit))
2476 {
2477 __real__Exit = dlsym (RTLD_NEXT, "_Exit");
2478 if (__real__Exit == NULL)
2479 __real__Exit = dlsym (RTLD_DEFAULT, "_exit");
2480 }
2481 TprintfT (DBG_LT1, "__collector_Exit() interposing @0x%p __real__Exit\n", __real__Exit);
2482 __collector_terminate_expt ();
2483 TprintfT (DBG_LT1, "__collector_Exit(): experiment terminated\n");
2484 CALL_REAL (_Exit)(status); // this will exit the process
2485 }
2486 #endif /* !sparc-Linux */