1 /* Copyright (C) 2021 Free Software Foundation, Inc.
4 This file is part of GNU Binutils.
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
26 #include <stdlib.h> /* exit() */
27 #include <sys/param.h>
28 #include <sys/utsname.h> /* struct utsname */
29 #include <sys/resource.h>
30 #include <sys/syscall.h> /* system call fork() */
33 #include "collector.h"
34 #include "descendants.h"
35 #include "gp-experiment.h"
37 #include "cc_libcollector.h"
40 /* TprintfT(<level>,...) definitions. Adjust per module as needed */
41 #define DBG_LT0 0 // for high-level configuration, unexpected errors/warnings
42 #define DBG_LT1 1 // for configuration details, warnings
46 typedef unsigned long ulong_t
;
48 extern char **environ
;
49 extern void __collector_close_experiment ();
50 extern int __collector_set_size_limit (char *par
);
52 /* ------- internal function prototypes ---------- */
53 CollectorModule
__collector_register_module (ModuleInterface
*modint
);
54 static void write_sample (char *name
);
55 static const char *__collector_get_params ();
56 static const char *__collector_get_expdir ();
57 static FrameInfo
__collector_getUserCtx (CollectorModule modl
, HiResTime ts
, int mode
, void *arg
);
58 static FrameInfo
__collector_getUID1 (CM_Array
*arg
);
59 static int __collector_writeMetaData (CollectorModule modl
, char *format
, ...);
60 static int __collector_writeDataRecord (CollectorModule modl
, struct Common_packet
*pckt
);
61 static int __collector_writeDataPacket (CollectorModule modl
, struct CM_Packet
*pckt
);
62 static void *allocCSize (struct Heap
*, unsigned, int);
63 static void freeCSize (struct Heap
*, void*, unsigned);
64 static void *allocVSize (struct Heap
*, unsigned);
65 static void *reallocVSize (struct Heap
*, void*, unsigned);
67 static int collector_create_expr_dir (const char *new_exp_name
);
68 static int collector_create_expr_dir_lineage (const char *parent_exp_name
);
69 static int collector_exp_dir_append_x (int linenum
, const char *parent_exp_name
);
70 static int collector_tail_init (const char *parent_exp_name
);
71 static int log_open ();
72 static void log_header_write (sp_origin_t origin
);
73 static void log_pause ();
74 static void log_resume ();
75 static void fs_warn ();
76 static void log_close ();
77 static void get_progspec (char *cmdline
, int tmp_sz
, char *progname
, int sz
);
78 static void sample_handler (int, siginfo_t
*, void*);
79 static int sample_set_interval (char *);
80 static int set_duration (char *);
81 static int sample_set_user_sig (char *);
82 static void pause_handler (int, siginfo_t
*, void*);
83 static int pause_set_user_sig (char *);
84 static int set_user_sig_action (char*);
85 static void ovw_open ();
86 static hrtime_t
ovw_write ();
88 /* ------- global data controlling the collector's behavior -------- */
90 static CollectorInterface collector_interface
={
91 __collector_register_module
, /* registerModule */
92 __collector_get_params
, /* getParams */
93 __collector_get_expdir
, /* getExpDir */
94 __collector_log_write
, /* writeLog */
95 __collector_getUserCtx
, /* getFrameInfo */
96 __collector_getUID1
, /* getUID */
97 __collector_getUID
, /* getUID2 */
98 __collector_getStackTrace
, /* getStackTrace */
99 __collector_writeMetaData
, /* writeMetaData */
100 __collector_writeDataRecord
, /* writeDataRecord */
101 __collector_writeDataPacket
, /* writeDataPacket */
102 write_sample
, /* write_sample */
103 get_progspec
, /* get_progspec */
104 __collector_open_experiment
, /* open_experiment */
105 NULL
, /* getHiResTime */
106 __collector_newHeap
, /* newHeap */
107 __collector_deleteHeap
, /* deleteHeap */
108 allocCSize
, /* allocCSize */
109 freeCSize
, /* freeCSize */
110 allocVSize
, /* allocVSize */
111 reallocVSize
, /* reallocVSize */
112 __collector_tsd_create_key
, /* createKey */
113 __collector_tsd_get_by_key
, /* getKey */
114 __collector_dlog
/* writeDebugInfo */
117 #define MAX_MODULES 32
118 static ModuleInterface
*modules
[MAX_MODULES
];
119 static int modules_st
[MAX_MODULES
];
120 static void *modules_hndl
[MAX_MODULES
];
121 static volatile int nmodules
= 0;
123 /* flag set non-zero, if data collected implies a filesystem warning is appropriate */
124 static int fs_matters
= 0;
125 static const char *collector_params
= NULL
;
126 static const char *project_home
= NULL
;
127 Heap
*__collector_heap
= NULL
;
128 int __collector_no_threads
;
129 int __collector_libthread_T1
= -1;
131 static volatile int collector_paused
= 0;
133 int __collector_tracelevel
= -1;
134 static int collector_debug_opt
= 0;
136 hrtime_t __collector_next_sample
= 0;
137 int __collector_sample_period
= 0; /* if non-zero, periodic sampling is enabled */
139 hrtime_t __collector_delay_start
= 0; /* if non-zero, delay before starting data */
140 hrtime_t __collector_terminate_time
= 0; /* if non-zero, fixed duration run */
142 static collector_mutex_t __collector_glob_lock
= COLLECTOR_MUTEX_INITIALIZER
;
143 static collector_mutex_t __collector_open_guard
= COLLECTOR_MUTEX_INITIALIZER
;
144 static collector_mutex_t __collector_close_guard
= COLLECTOR_MUTEX_INITIALIZER
;
145 static collector_mutex_t __collector_sample_guard
= COLLECTOR_MUTEX_INITIALIZER
;
146 static collector_mutex_t __collector_suspend_guard
= COLLECTOR_MUTEX_INITIALIZER
;
147 static collector_mutex_t __collector_resume_guard
= COLLECTOR_MUTEX_INITIALIZER
;
148 char __collector_exp_dir_name
[MAXPATHLEN
+ 1] = ""; /* experiment directory */
149 int __collector_size_limit
= 0;
151 static char *archive_mode
= NULL
;
153 volatile sp_state_t __collector_expstate
= EXP_INIT
;
154 static int exp_origin
= SP_ORIGIN_LIBCOL_INIT
;
155 static int exp_open
= 0;
156 int __collector_exp_active
= 0;
157 static int paused_when_suspended
= 0;
158 static int exp_initted
= 0;
159 static char exp_progspec
[_POSIX_ARG_MAX
+ 1]; /* program cmdline. includes args */
160 static char exp_progname
[_POSIX_ARG_MAX
+ 1]; /* program name == argv[0] */
162 hrtime_t __collector_start_time
= 0;
163 static time_t start_sec_time
= 0;
165 /* Sample related data */
166 static int sample_installed
= 0; /* 1 if the sample signal handler installed */
167 static int sample_mode
= 0; /* dynamically turns sample record writing on/off */
168 static int sample_number
= 0; /* index of the current sample record */
169 static struct sigaction old_sample_handler
;
170 int __collector_sample_sig
= -1; /* user-specified sample signal */
171 int __collector_sample_sig_warn
= 0; /* non-zero if warning already given */
173 /* Pause/resume related data */
174 static struct sigaction old_pause_handler
;
175 int __collector_pause_sig
= -1; /* user-specified pause signal */
176 int __collector_pause_sig_warn
= 0; /* non-zero if warning already given */
178 static struct sigaction old_close_handler
;
179 static struct sigaction old_exit_handler
;
181 /* Experiment files */
182 static char ovw_name
[MAXPATHLEN
]; /* Overview data file name */
184 /* macro to convert a timestruc to hrtime_t */
185 #define ts2hrt(x) ((hrtime_t)(x).tv_sec*NANOSEC + (hrtime_t)(x).tv_nsec)
191 char *s
= CALL_UTIL (getenv
)("SP_COLLECTOR_TRACELEVEL");
193 __collector_tracelevel
= CALL_UTIL (atoi
)(s
);
194 TprintfT (DBG_LT0
, "collector: SP_COLLECTOR_TRACELEVEL=%d\n", __collector_tracelevel
);
195 s
= CALL_UTIL (getenv
)("SP_COLLECTOR_DEBUG");
197 collector_debug_opt
= CALL_UTIL (atoi
)(s
) & ~(SP_DUMP_TIME
| SP_DUMP_FLAG
);
201 static CollectorInterface
*
202 get_collector_interface ()
204 if (collector_interface
.getHiResTime
== NULL
)
205 collector_interface
.getHiResTime
= __collector_gethrtime
;
206 return &collector_interface
;
210 * __collector_module_init is an alternate method to initialize
211 * dynamic collector modules (er_heap, er_sync, er_iotrace, er_mpi, tha).
212 * Every module that needs to register itself with libcollector
213 * before the experiment is open implements its own global
214 * __collector_module_init and makes sure the next one is called.
217 collector_module_init (CollectorInterface
*col_intf
)
221 ModuleInitFunc next_init
= (ModuleInitFunc
) dlsym (RTLD_DEFAULT
, "__collector_module_init");
222 if (next_init
!= NULL
)
225 next_init (col_intf
);
227 TprintfT (DBG_LT1
, "collector_module_init: %d modules\n", nmodules
);
230 /* Routines concerned with general experiment start and stop */
232 /* initialization -- init section routine -- called when libcollector loaded */
233 static void collector_init () __attribute__ ((constructor
));
238 if (__collector_util_init () != 0)
239 /* we can't do anything without various utility functions */
244 * Unconditionally install the SIGPROF handler
245 * to process signals originated in dtracelets.
247 __collector_sigprof_install ();
249 /* Initialize all preloaded modules */
250 collector_module_init (get_collector_interface ());
252 /* determine experiment name */
253 char *exp
= CALL_UTIL (getenv
)("SP_COLLECTOR_EXPNAME");
254 if ((exp
== NULL
) || (CALL_UTIL (strlen
)(exp
) == 0))
256 TprintfT (DBG_LT0
, "collector_init: SP_COLLECTOR_EXPNAME undefined - no experiment to start\n");
257 /* not set -- no experiment to run */
261 TprintfT (DBG_LT1
, "collector_init: found SP_COLLECTOR_EXPNAME = %s\n", exp
);
263 /* determine the data descriptor for the experiment */
264 char *params
= CALL_UTIL (getenv
)("SP_COLLECTOR_PARAMS");
267 TprintfT (0, "collector_init: SP_COLLECTOR_PARAMS undefined - no experiment to start\n");
271 /* now do the real open of the experiment */
272 if (__collector_open_experiment (exp
, params
, SP_ORIGIN_LIBCOL_INIT
))
274 TprintfT (0, "collector_init: __collector_open_experiment failed\n");
275 /* experiment open failed, close it */
276 __collector_close_experiment ();
283 __collector_register_module (ModuleInterface
*modint
)
285 TprintfT (DBG_LT1
, "collector: module %s calls for registration.\n",
286 modint
->description
== NULL
? "(null)" : modint
->description
);
288 return COLLECTOR_MODULE_ERR
;
289 if (nmodules
>= MAX_MODULES
)
290 return COLLECTOR_MODULE_ERR
;
291 if (modint
->initInterface
&&
292 modint
->initInterface (get_collector_interface ()))
293 return COLLECTOR_MODULE_ERR
;
294 int idx
= nmodules
++;
295 modules
[idx
] = modint
;
298 if (exp_open
&& modint
->openExperiment
)
300 modules_st
[idx
] = modint
->openExperiment (__collector_exp_dir_name
);
301 if (modules_st
[idx
] == COL_ERROR_NONE
&& modules
[idx
]->description
!= NULL
)
303 modules_hndl
[idx
] = __collector_create_handle (modules
[idx
]->description
);
304 if (modules_hndl
[idx
] == NULL
)
305 modules_st
[idx
] = -1;
308 if (__collector_exp_active
&& collector_paused
== 0 &&
309 modint
->startDataCollection
&& modules_st
[idx
] == 0)
310 modint
->startDataCollection ();
311 TprintfT (DBG_LT1
, "collector: module %s (%d) registered.\n",
312 modint
->description
== NULL
? "(null)" : modint
->description
, idx
);
313 return (CollectorModule
) idx
;
317 __collector_get_params ()
319 return collector_params
;
323 __collector_get_expdir ()
325 return __collector_exp_dir_name
;
329 __collector_getUserCtx (CollectorModule modl
, HiResTime ts
, int mode
, void *arg
)
331 return __collector_get_frame_info (ts
, mode
, arg
);
335 __collector_getUID1 (CM_Array
*arg
)
337 return __collector_getUID (arg
, (FrameInfo
) 0);
341 __collector_writeMetaData (CollectorModule modl
, char *format
, ...)
343 if (modl
< 0 || modl
>= nmodules
|| modules
[modl
]->description
== NULL
)
345 TprintfT (DBG_LT0
, "__collector_writeMetaData(): bad module: %d\n", modl
);
348 char fname
[MAXPATHLEN
+ 1];
349 CALL_UTIL (strlcpy
)(fname
, __collector_exp_dir_name
, sizeof (fname
));
350 CALL_UTIL (strlcat
)(fname
, "/metadata.", sizeof (fname
));
351 CALL_UTIL (strlcat
)(fname
, modules
[modl
]->description
, sizeof (fname
));
352 CALL_UTIL (strlcat
)(fname
, ".xml", sizeof (fname
));
353 int fd
= CALL_UTIL (open
)(fname
, O_CREAT
| O_WRONLY
| O_APPEND
,
354 S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IROTH
);
357 TprintfT (DBG_LT0
, "__collector_writeMetaData(): can't open file: %s\n", fname
);
363 va_start (va
, format
);
364 int sz
= __collector_xml_vsnprintf (bufptr
, sizeof (buf
), format
, va
);
367 if (sz
>= sizeof (buf
))
369 /* Allocate a new buffer */
370 sz
+= 1; /* add the terminating null byte */
371 bufptr
= (char*) alloca (sz
);
373 va_start (va
, format
);
374 sz
= __collector_xml_vsnprintf (bufptr
, sz
, format
, va
);
377 CALL_UTIL (write
)(fd
, bufptr
, sz
);
378 CALL_UTIL (close
)(fd
);
379 return COL_ERROR_NONE
;
382 /* check that the header fields are filled-in, and then call __collector_writeDataPacket */
384 __collector_writeDataRecord (CollectorModule modl
, struct Common_packet
*pckt
)
386 return __collector_write_record (modules_hndl
[modl
], pckt
);
390 __collector_writeDataPacket (CollectorModule modl
, struct CM_Packet
*pckt
)
392 return __collector_write_packet (modules_hndl
[modl
], pckt
);
396 allocCSize (struct Heap
*heap
, unsigned sz
, int log
)
398 return __collector_allocCSize (heap
? heap
: __collector_heap
, sz
, log
);
402 freeCSize (struct Heap
*heap
, void *ptr
, unsigned sz
)
404 __collector_freeCSize (heap
? heap
: __collector_heap
, ptr
, sz
);
408 allocVSize (struct Heap
*heap
, unsigned sz
)
410 return __collector_allocVSize (heap
? heap
: __collector_heap
, sz
);
414 reallocVSize (struct Heap
*heap
, void *ptr
, unsigned sz
)
416 return __collector_reallocVSize (heap
? heap
: __collector_heap
, ptr
, sz
);
420 get_gm_time (struct tm
*tp
)
423 Note that glibc contains a function of the same purpose named `timegm'.
424 But obviously, it is not universally available.
426 Some implementations of mktime return -1 for the nonexistent localtime hour
427 at the beginning of DST. In this event, use 'mktime(tm - 1hr) + 3600'.
429 tm_isdst is set to 0 to force mktime to introduce a consistent offset
430 (the non DST offset) since tm and tm+o might be on opposite sides of a DST change.
434 gmtime_r(t+o) --> tm+o
435 mktime(tm+o) --> t+2o
436 t = t+o - (t+2o - t+o)
439 time_t tl
= CALL_UTIL (mktime
)(tp
);
444 tl
= CALL_UTIL (mktime
)(&stm
);
450 (void) (CALL_UTIL (gmtime_r
)(&tl
, &stm
));
452 time_t tb
= CALL_UTIL (mktime
)(&stm
);
456 tb
= CALL_UTIL (mktime
)(&stm
);
461 return (tl
- (tb
- tl
));
465 log_write_event_run ()
467 /* get the gm and local time */
469 CALL_UTIL (gmtime_r
)(&start_sec_time
, &start_stm
);
470 time_t start_gm_time
= get_gm_time (&start_stm
);
471 time_t lcl_time
= CALL_UTIL (mktime
)(&start_stm
);
472 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" time=\"%lld\" tm_zone=\"%lld\"/>\n",
474 (unsigned) (__collector_start_time
/ NANOSEC
),
475 (unsigned) (__collector_start_time
% NANOSEC
),
476 (long long) start_gm_time
,
477 (long long) (lcl_time
- start_gm_time
));
481 m_dlopen (const char *filename
, int flag
)
483 void *p
= dlopen (filename
, flag
);
484 TprintfT (DBG_LT1
, "collector.c: dlopen(%s, %d) returns %p\n", filename
, flag
, p
);
487 /* real routine to open an experiment
488 * called by collector_init from libcollector init section
489 * called by __collector_start_experiment when a child is forked */
491 __collector_open_experiment (const char *exp
, const char *params
, sp_origin_t origin
)
495 char *duration_string
= NULL
;
498 int record_this_experiment
= 1;
500 static char buffer
[32];
503 /* experiment already opened */
504 TprintfT (0, "collector: ERROR: Attempt to open opened experiment\n");
505 return COL_ERROR_EXPOPEN
;
507 __collector_start_time
= collector_interface
.getHiResTime ();
508 TprintfT (DBG_LT1
, "\n\t\t__collector_open_experiment(SP_COLLECTOR_EXPNAME=%s, params=%s, origin=%d); setting start_time\n",
509 exp
, params
, origin
);
511 __collector_env_printall ("__collector_open_experiment", environ
);
513 TprintfT (DBG_LT1
, "collector_open_experiment found environ == NULL)\n");
516 * Recheck sigprof handler
517 * XXXX Bug 18177509 - additional sigprof signal kills target program
519 __collector_sigprof_install ();
521 collector_params
= params
;
523 /* Determine which of the three possible threading models:
525 * multi-LWP (no threads)
527 * is the one the target is actually using.
529 * we really only need to distinguish between first two
530 * and the third. The thr_main() trick does exactly that.
531 * is the one the target is actually using.
533 * __collector_no_threads applies to all signal handlers,
534 * and must be set before signal handlers are installed.
536 __collector_no_threads
= 0;
537 __collector_exp_dir_name
[0] = 0;
541 /* create global heap */
542 if (__collector_heap
== NULL
)
544 __collector_heap
= __collector_newHeap ();
545 if (__collector_heap
== NULL
)
547 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment COLERROR_NOZMEM 1\n");
548 return COL_ERROR_NOZMEM
;
551 //check whether is origin is collect
552 char * envar
= CALL_UTIL (getenv
)("SP_COLLECTOR_ORIGIN_COLLECT");
553 TprintfT (DBG_LT1
, "__collector_open_experiment SP_COLLECTOR_ORIGIN_COLLECT = '%s'\n",
554 (envar
== NULL
) ? "NULL" : envar
);
556 exp_origin
= SP_ORIGIN_COLLECT
;
558 //check if this is the founder process
559 is_founder
= getpid ();
560 if (origin
!= SP_ORIGIN_DBX_ATTACH
)
562 envar
= CALL_UTIL (getenv
)("SP_COLLECTOR_FOUNDER");
564 is_founder
= CALL_UTIL (atoi
)(envar
);
567 if (is_founder
!= getpid ())
569 TprintfT (0, "__collector_open_experiment SP_COLLECTOR_FOUNDER=%d != pid(%d)\n",
570 is_founder
, getpid ());
571 //CALL_UTIL(fprintf)(stderr, "__collector_open_experiment SP_COLLECTOR_FOUNDER=%d != pid(%d); not recording experiment\n",
572 //is_founder, getpid() );
573 //return COL_ERROR_UNEXP_FOUNDER;
574 is_founder
= 0; // Special case (CR 22917352)
576 /* clear FOUNDER for descendant experiments */
577 TprintfT (0, "__collector_open_experiment setting SP_COLLECTOR_FOUNDER=0\n");
578 CALL_UTIL (strlcpy
)(buffer
, "SP_COLLECTOR_FOUNDER=0", sizeof (buffer
));
579 CALL_UTIL (putenv
)(buffer
);
583 /* Set up fork/exec interposition (requires __collector_heap). */
584 /* Determine if "collect -F" specification enables this subexperiment */
585 get_progspec (exp_progspec
, sizeof (exp_progspec
), exp_progname
, sizeof (exp_progname
));
587 /* convert the returned exp_progname to a basename */
588 const char * base_name
= __collector_strrchr (exp_progname
, '/');
589 if (base_name
== NULL
)
590 base_name
= exp_progname
;
592 base_name
= base_name
+ 1;
593 err
= __collector_ext_line_init (&record_this_experiment
, exp_progspec
, base_name
);
594 if (err
!= COL_ERROR_NONE
)
596 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment COLERROR: %d\n", err
);
600 /* Due to the fix of bug 15691122, we need to initialize unwind to make
601 * the function __collector_ext_return_address() work for dlopen interposition.
603 if (!record_this_experiment
&& !is_founder
)
605 TprintfT (DBG_LT0
, "__collector_open_experiment: NOT creating experiment. (is_founder=%d, record=%d)\n",
606 is_founder
, record_this_experiment
);
607 return collector_tail_init (exp
);
609 TprintfT (DBG_LT0
, "__collector_open_experiment: is_founder=%d, record=%d\n",
610 is_founder
, record_this_experiment
);
611 if (is_founder
|| origin
== SP_ORIGIN_FORK
)
613 CALL_UTIL (strlcpy
)(__collector_exp_dir_name
, exp
, sizeof (__collector_exp_dir_name
));
614 if (origin
== SP_ORIGIN_FORK
)
615 { /*create exp dir for fork-child*/
616 if (collector_create_expr_dir (__collector_exp_dir_name
))
618 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment: COL_ERROR_BADDIR 1: `%s'\n", exp
);
619 return COL_ERROR_BADDIR
;
624 {/* founder/fork-child will already have created experiment dir, but exec/combo descendants must do so now */
625 if (collector_create_expr_dir_lineage (exp
))
627 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment: COL_ERROR_BADDIR 2: `%s'\n", exp
);
628 return COL_ERROR_BADDIR
;
630 static char exp_name_env
[MAXPATHLEN
+ 1];
631 TprintfT (DBG_LT1
, "collector_open_experiment: setting SP_COLLECTOR_EXPNAME to %s\n", __collector_exp_dir_name
);
632 CALL_UTIL (snprintf
)(exp_name_env
, sizeof (exp_name_env
), "SP_COLLECTOR_EXPNAME=%s", __collector_exp_dir_name
);
633 CALL_UTIL (putenv
)(exp_name_env
);
635 /* Check that the name is that of a directory (new structure) */
636 DIR *expDir
= CALL_UTIL (opendir
)(__collector_exp_dir_name
);
640 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment: COL_ERROR_BADDIR 3: `%s'\n", exp
);
641 return COL_ERROR_BADDIR
;
643 CALL_UTIL (closedir
)(expDir
);
645 if (CALL_UTIL (access
)(__collector_exp_dir_name
, W_OK
))
647 TprintfT (0, "collector: ERROR: access error: errno=%d\n", errno
);
648 if ((errno
== EACCES
) || (errno
== EROFS
))
650 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment: COL_ERROR_DIRPERM: `%s'\n", exp
);
651 TprintfT (DBG_LT0
, "collector: ERROR: experiment directory `%s' is not writeable\n",
652 __collector_exp_dir_name
);
653 return COL_ERROR_DIRPERM
;
657 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment: COL_ERROR_BADDIR 4: `%s'\n", exp
);
658 return COL_ERROR_BADDIR
;
662 /* reset the paused flag */
663 collector_paused
= (origin
== SP_ORIGIN_FORK
? paused_when_suspended
: 0);
665 /* mark the experiment as opened */
666 __collector_expstate
= EXP_OPEN
;
667 TprintfT (DBG_LT1
, "collector: __collector_expstate->EXP_OPEN\n");
669 /* open the log file */
671 if (err
!= COL_ERROR_NONE
)
673 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment: COL_ERROR_LOG_OPEN\n");
674 return COL_ERROR_LOG_OPEN
;
676 if (origin
!= SP_ORIGIN_GENEXP
&& origin
!= SP_ORIGIN_KERNEL
)
677 log_header_write (origin
);
679 /* Make a copy of params so that we can modify the string */
680 int paramsz
= CALL_UTIL (strlen
)(params
) + 1;
681 buf
= (char*) alloca (paramsz
);
684 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment: COL_ERROR_ARGS2BIG: %s\n", params
);
685 TprintfT (DBG_LT0
, "collector: ERROR: experiment parameter `%s' is too long\n", params
);
686 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n",
687 SP_JCMD_CERROR
, COL_ERROR_ARGS2BIG
);
688 return COL_ERROR_ARGS2BIG
;
690 CALL_UTIL (strlcpy
)(buf
, params
, paramsz
);
692 /* create directory for archives (if founder) */
693 char archives
[MAXPATHLEN
];
694 CALL_UTIL (snprintf
)(archives
, MAXPATHLEN
, "%s/%s", __collector_exp_dir_name
,
698 mode_t dmode
= S_IRUSR
| S_IWUSR
| S_IXUSR
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
;
699 if ((CALL_UTIL (mkdir
)(archives
, dmode
) != 0) && (errno
!= EEXIST
))
701 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment: COL_ERROR_MKDIR: %s: errno = %d\n", archives
, errno
);
702 TprintfT (0, "collector: ERROR: mkdir(%s) failed: errno = %d\n", archives
, errno
);
703 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">mkdir(%s): errno=%d</event>\n",
704 SP_JCMD_COMMENT
, COL_COMMENT_NONE
, archives
, errno
);
705 /* this is not a fatal error currently */
708 TprintfT (DBG_LT1
, "collector: archive mkdir(%s) succeeded\n", archives
);
711 /* initialize the segments map and mmap interposition */
712 if (origin
!= SP_ORIGIN_GENEXP
&& origin
!= SP_ORIGIN_KERNEL
)
714 if ((err
= __collector_ext_mmap_install (1)) != COL_ERROR_NONE
)
716 __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", SP_JCMD_CERROR
, err
);
721 /* open the overview file for sample data */
722 if (origin
!= SP_ORIGIN_GENEXP
)
725 /* initialize TSD module (note: relies on __collector_heap) */
726 if (__collector_tsd_init () != 0)
728 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment: COL_ERROR_TSD_INIT\n");
729 __collector_log_write ("<event kind=\"%s\" id=\"%d\">TSD could not be initialized</event>\n", SP_JCMD_CERROR
, COL_ERROR_TSD_INIT
);
730 return COL_ERROR_TSD_INIT
;
733 /* experiment is initialized; allow pause/resume/close */
736 // 24935305 should not use SIGPROF if collect -p -t and -S are all off
737 /* (check here if -t or -S is on; -p is checked later) */
738 if (((params
[0] == 't' || params
[0] == 'S') && params
[1] == ':')
739 || CALL_UTIL (strstr
)(params
, ";t:")
740 || CALL_UTIL (strstr
)(params
, ";S:"))
742 /* set a default time to 100 ms.; use negative value to force setting */
743 TprintfT (DBG_LT1
, "collector: open_experiment setting timer to 100000\n");
744 __collector_ext_itimer_set (-100000);
747 /* call open for all dynamic modules */
749 for (i
= 0; i
< nmodules
; i
++)
751 if (modules
[i
]->openExperiment
!= NULL
)
753 modules_st
[i
] = modules
[i
]->openExperiment (__collector_exp_dir_name
);
754 if (modules_st
[i
] == COL_ERROR_NONE
&& modules
[i
]->description
!= NULL
)
756 modules_hndl
[i
] = __collector_create_handle (modules
[i
]->description
);
757 if (modules_hndl
[i
] == NULL
)
761 /* check to see if anyone closed the experiment */
764 CALL_UTIL (fprintf
)(stderr
, "__collector_open_experiment: COL_ERROR_EXP_OPEN\n");
765 __collector_log_write ("<event kind=\"%s\" id=\"%d\">Experiment closed prematurely</event>\n", SP_JCMD_CERROR
, COL_ERROR_EXPOPEN
);
766 return COL_ERROR_EXPOPEN
;
770 /* initialize for subsequent stack unwinds */
771 __collector_ext_unwind_init (1);
772 TprintfT (DBG_LT0
, "__collector_open_experiment(); module init done, params=%s\n",
775 /* now parse the data descriptor */
776 /* The parameter string is a series of specifiers,
777 * each of which is of the form:
779 * key is a single letter, the : and ; are mandatory,
780 * and param is a string which may be zero-length, and
781 * which contains any character except a null-byte or ;
782 * param is interpreted by the handler for the particular key
791 /* ensure that it's followed by a colon */
794 TprintfT (0, "collector: ERROR: parameter %c is not followed by a colon\n", key
);
795 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR
, COL_ERROR_ARGS
, params
);
796 return COL_ERROR_ARGS
;
798 /* find the semicolon terminator */
800 while (*s
&& (*s
!= ';'))
804 /* not followed by semicolon */
805 TprintfT (0, "collector: ERROR: parameter %c:%s is not terminated by a semicolon\n", key
, par
);
806 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR
, COL_ERROR_ARGS
, params
);
807 return COL_ERROR_ARGS
;
809 /* terminate par, and position for next descriptor */
812 /* now process that element of the data descriptor */
815 case 'g': /* g<sig>; */
816 if ((err
= sample_set_user_sig (par
)) != COL_ERROR_NONE
)
818 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR
, err
, par
);
822 case 'd': /* d<sig>; -or- d<sig>p; */
823 if ((err
= pause_set_user_sig (par
)) != COL_ERROR_NONE
)
825 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR
, err
, par
);
830 m_dlopen ("libgp-heap.so", RTLD_LAZY
); /* hack to force .so's constructor to be called (?) */
833 m_dlopen ("libgp-sync.so", RTLD_LAZY
); /* hack to force .so's constructor to be called (?) */
836 m_dlopen ("libgp-iotrace.so", RTLD_LAZY
); /* hack to force .so's constructor to be called (?) */
840 TprintfT (DBG_LT0
, "__collector_open_experiment: calling __collector_ext_line_install (%s, %s)\n",
841 par
, __collector_exp_dir_name
);
842 if ((err
= __collector_ext_line_install (par
, __collector_exp_dir_name
)) != COL_ERROR_NONE
)
844 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR
, err
, par
);
849 archive_mode
= __collector_strdup (par
);
851 case 't': /* t:<expt-duration>; */
852 duration_string
= par
;
854 case 'S': /* S:<sample-interval>; */
855 if ((err
= sample_set_interval (par
)) != COL_ERROR_NONE
)
857 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR
, err
, par
);
861 case 'L': /* L:<experiment-size-limit>; */
862 if ((err
= __collector_set_size_limit (par
)) != COL_ERROR_NONE
)
864 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR
, err
, par
);
868 case 'P': /* P:PROJECT_HOME; */
869 project_home
= __collector_strdup (par
);
876 err
= set_user_sig_action (par
);
877 if (err
!= COL_ERROR_NONE
)
878 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR
, err
, par
);
881 /* Ignore unknown parameters; allow them to be handled by modules */
885 /* end of data descriptor parsing */
889 char * par
= "0"; // This will not happen when collect has no -F option
890 if ((err
= __collector_ext_line_install (par
, __collector_exp_dir_name
)) != COL_ERROR_NONE
)
892 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR
, err
, par
);
897 /* now that we know what data is being collected, we can set the filesystem warning */
900 // We have to create all tsd keys before __collector_tsd_allocate().
901 // With the pthreads-based implementation, this might no longer be necessary.
902 // In any case, we still have to create the key before a thread can use it.
903 __collector_ext_gettid_tsd_create_key ();
904 __collector_ext_dispatcher_tsd_create_key ();
906 /* allocate tsd for the current thread */
907 if (__collector_tsd_allocate () != 0)
909 __collector_log_write ("<event kind=\"%s\" id=\"%d\">TSD allocate failed</event>\n", SP_JCMD_CERROR
, COL_ERROR_EXPOPEN
);
910 return COL_ERROR_EXPOPEN
;
912 /* init tsd for unwind, called right after __collector_tsd_allocate()*/
913 __collector_ext_unwind_key_init (1, NULL
);
915 /* start java attach if suitable */
916 if (exp_origin
== SP_ORIGIN_DBX_ATTACH
)
917 __collector_jprofile_start_attach ();
918 start_sec_time
= CALL_UTIL (time
)(NULL
);
919 __collector_start_time
= collector_interface
.getHiResTime ();
920 TprintfT (DBG_LT0
, "\t__collector_open_experiment; resetting start_time\n");
921 if (duration_string
!= NULL
&& (err
= set_duration (duration_string
)) != COL_ERROR_NONE
)
923 __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n", SP_JCMD_CERROR
, err
, duration_string
);
927 /* install the common SIGPROF dispatcher (requires TSD) */
928 if ((err
= __collector_ext_dispatcher_install ()) != COL_ERROR_NONE
)
930 __collector_log_write ("<event kind=\"%s\" id=\"%d\"/></event>\n", SP_JCMD_CERROR
, err
);
934 /* mark the experiment open complete */
936 if (exp_origin
== SP_ORIGIN_DBX_ATTACH
)
937 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" time=\"%lld\" tm_zone=\"%lld\"/>\n",
939 (unsigned) (__collector_start_time
/ NANOSEC
), (unsigned) (__collector_start_time
% NANOSEC
),
940 (long long) start_sec_time
, (long long) 0);
942 log_write_event_run ();
944 /* schedule the first sample */
945 __collector_next_sample
= __collector_start_time
+ ((hrtime_t
) NANOSEC
) * __collector_sample_period
;
946 __collector_ext_usage_sample (MASTER_SMPL
, "collector_open_experiment");
948 /* start data collection in dynamic modules */
949 if (collector_paused
== 0)
951 for (i
= 0; i
< nmodules
; i
++)
952 if (modules
[i
]->startDataCollection
!= NULL
&& modules_st
[i
] == 0)
953 modules
[i
]->startDataCollection ();
957 hrtime_t ts
= GETRELTIME ();
958 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n",
959 SP_JCMD_PAUSE
, (unsigned) (ts
/ NANOSEC
), (unsigned) (ts
% NANOSEC
));
962 /* mark the experiment active */
963 __collector_exp_active
= 1;
964 return COL_ERROR_NONE
;
967 /* prepare directory for new experiment of fork-child */
969 /* return 0 if successful */
971 collector_create_expr_dir (const char *new_exp_name
)
974 mode_t dmode
= S_IRUSR
| S_IWUSR
| S_IXUSR
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
;
975 TprintfT (DBG_LT1
, "collector: __collector_create_expr_dir(%s)\n", new_exp_name
);
976 if (CALL_UTIL (mkdir
)(new_exp_name
, dmode
) < 0)
977 TprintfT (0, "__collector_create_expr_dir(%s) ERROR: errno=%d\n", new_exp_name
, errno
);
983 /* append _xN to __collector_exp_dir_name*/
984 /* return 0 if successful */
986 collector_exp_dir_append_x (int linenum
, const char *parent_exp_name
)
988 char buffer
[MAXPATHLEN
+ 1];
989 char * p
= __collector_strrchr (parent_exp_name
, '/');
990 if (p
== NULL
|| (*(p
+ 1) != '_'))
992 size_t sz
= CALL_UTIL (strlen
)(parent_exp_name
);
993 const char * q
= parent_exp_name
+ sz
- 3;
994 if (sz
< 3 || __collector_strncmp (q
, ".er", CALL_UTIL (strlen
)(q
)) != 0
995 || CALL_UTIL (access
)(parent_exp_name
, F_OK
) != 0)
997 TprintfT (0, "collector_exp_dir_append_x() ERROR: invalid parent_exp_name %s\n", parent_exp_name
);
1000 CALL_UTIL (strlcpy
)(buffer
, parent_exp_name
, sizeof (buffer
));
1001 CALL_UTIL (snprintf
)(__collector_exp_dir_name
, sizeof (__collector_exp_dir_name
),
1002 "%s/_x%d.er", buffer
, linenum
);
1006 p
= __collector_strrchr (parent_exp_name
, '.');
1007 if (p
== NULL
|| *(p
+ 1) != 'e' || *(p
+ 2) != 'r')
1009 TprintfT (0, "collector_exp_dir_append_x() ERROR: invalid parent_exp_name %s\n", parent_exp_name
);
1012 CALL_UTIL (strlcpy
)(buffer
, parent_exp_name
,
1013 ((p
- parent_exp_name
+ 1)<sizeof (buffer
)) ? (p
- parent_exp_name
+ 1) : sizeof (buffer
));
1014 CALL_UTIL (snprintf
)(__collector_exp_dir_name
, sizeof (__collector_exp_dir_name
),
1015 "%s_x%d.er", buffer
, linenum
);
1020 /* prepare directory for new experiment of exec/combo child*/
1022 /* return 0 if successful */
1024 collector_create_expr_dir_lineage (const char *parent_exp_name
)
1027 mode_t dmode
= S_IRUSR
| S_IWUSR
| S_IXUSR
| S_IRGRP
| S_IXGRP
| S_IROTH
| S_IXOTH
;
1029 while (linenum
< INT_MAX
)
1031 if (collector_exp_dir_append_x (linenum
, parent_exp_name
) != 0)
1033 if (CALL_UTIL (access
)(__collector_exp_dir_name
, F_OK
) != 0)
1035 if (CALL_UTIL (mkdir
)(__collector_exp_dir_name
, dmode
) == 0)
1039 TprintfT (DBG_LT0
, "collector: collector_create_expr_dir_lineage(%s -> %s)\n", parent_exp_name
, __collector_exp_dir_name
);
1044 /* Finish the initializing work if we don't collect data while libcollector.so is preloaded. */
1045 /* return COL_ERROR_NONE if successful */
1047 collector_tail_init (const char *parent_exp_name
)
1049 int err
= COL_ERROR_NONE
;
1050 if (exp_origin
!= SP_ORIGIN_FORK
)
1052 /* For exec/combo descendants. Don't create dir for this subexp, but update lineage by appending "_x0". */
1053 /* Different children can have the same _x0 if their name don't match -F exp.
1054 * Assume their fork children inherit the program name, there will be no _x0_fN.er to create.
1055 * So we don't need to worry about the lineage messed up by _x0.
1058 if (collector_exp_dir_append_x (linenum
, parent_exp_name
) != 0)
1059 return COL_ERROR_BADDIR
;
1060 static char exp_name_env
[MAXPATHLEN
+ 1];
1061 CALL_UTIL (snprintf
)(exp_name_env
, sizeof (exp_name_env
), "SP_COLLECTOR_EXPNAME=%s", __collector_exp_dir_name
);
1062 TprintfT (DBG_LT1
, "collector_tail_init: setting SP_COLLECTOR_EXPNAME to %s\n", __collector_exp_dir_name
);
1063 CALL_UTIL (putenv
)(exp_name_env
);
1065 /* initialize the segments map and mmap interposition */
1066 if (exp_origin
!= SP_ORIGIN_GENEXP
&& exp_origin
!= SP_ORIGIN_KERNEL
)
1067 if ((err
= __collector_ext_mmap_install (0)) != COL_ERROR_NONE
)
1070 /* initialize TSD module (note: relies on __collector_heap) */
1071 if (__collector_tsd_init () != 0)
1072 return COL_ERROR_EXPOPEN
;
1074 /* initialize for subsequent stack unwinds */
1075 __collector_ext_unwind_init (0);
1078 /* Make a copy of params so that we can modify the string */
1079 int paramsz
= CALL_UTIL (strlen
)(collector_params
) + 1;
1080 buf
= (char*) alloca (paramsz
);
1081 CALL_UTIL (strlcpy
)(buf
, collector_params
, paramsz
);
1088 /* ensure that it's followed by a colon */
1091 TprintfT (DBG_LT0
, "collector_tail_init: ERROR: parameter %c is not followed by a colon\n", key
);
1092 return COL_ERROR_ARGS
;
1095 /* find the semicolon terminator */
1097 while (*s
&& (*s
!= ';'))
1101 /* not followed by semicolon */
1102 TprintfT (0, "collector_tail_init: ERROR: parameter %c:%s is not terminated by a semicolon\n", key
, par
);
1103 return COL_ERROR_ARGS
;
1105 /* terminate par, and position for next descriptor */
1107 /* now process that element of the data descriptor */
1114 if ((err
= __collector_ext_line_install (par_F
, __collector_exp_dir_name
)) != COL_ERROR_NONE
)
1117 /* allocate tsd for the current thread */
1118 if (__collector_tsd_allocate () != 0)
1119 return COL_ERROR_EXPOPEN
;
1120 return COL_ERROR_NONE
;
1123 /* routines concerning closing the experiment */
1124 /* close down -- fini section routine */
1125 static void collector_fini () __attribute__ ((destructor
));
1129 TprintfT (DBG_LT0
, "collector_fini: closing experiment\n");
1130 __collector_close_experiment ();
1134 void collector_terminate_expt () __attribute__ ((weak
, alias ("__collector_terminate_expt")));
1136 /* __collector_terminate_expt called by user, or from dbx */
1138 __collector_terminate_expt ()
1140 TprintfT (DBG_LT0
, "__collector_terminate_expt: %s; calling close\n", __collector_exp_dir_name
);
1141 __collector_close_experiment ();
1142 TprintfT (DBG_LT0
, "__collector_terminate_expt done\n\n");
1146 * We manage the SIGCHLD handler with sigaction and don't worry about signal or sigset().
1147 * This is in line with the comments in dispatcher.c
1148 * immediately preceding the wrapper function for (Linux) signal().
1150 static struct sigaction original_sigchld_sigaction
;
1151 static pid_t mychild_pid
= -1;
1153 /* __collector_SIGCHLD_signal_handler called when er_archive exits */
1155 __collector_SIGCHLD_signal_handler (int sig
, siginfo_t
*si
, void *context
)
1157 pid_t calling_pid
= si
->si_pid
;
1159 * We get mychild_pid from the vfork() return value.
1160 * So there is an outside chance that the child completes and sends SIGCHLD
1161 * before the handler knows the value of mychild_pid.
1163 if (calling_pid
== mychild_pid
)
1164 // er_archive has exited; so restore the user handler
1165 __collector_sigaction (SIGCHLD
, &original_sigchld_sigaction
, NULL
);
1168 // if we can't identify the pid, the signal must be for the user's handler
1169 if (original_sigchld_sigaction
.sa_handler
!= SIG_DFL
1170 && original_sigchld_sigaction
.sa_handler
!= SIG_IGN
)
1171 original_sigchld_sigaction
.sa_sigaction (sig
, si
, context
);
1173 TprintfT (DBG_LT1
, "__collector_SIGCHLD_signal_handler done\n\n");
1177 collector_sigchld_sigaction (const struct sigaction
*nact
,
1178 struct sigaction
*oact
)
1180 // get the current SIGCHLD handler
1181 struct sigaction cur_handler
;
1182 __collector_sigaction (SIGCHLD
, NULL
, &cur_handler
);
1184 // if we have NOT installed our own handler, return an error
1185 // (force the caller to deal with this case)
1186 if (cur_handler
.sa_sigaction
!= __collector_SIGCHLD_signal_handler
)
1189 // if we HAVE installed our own handler, act on the user's handler
1191 __collector_memcpy (oact
, &original_sigchld_sigaction
, sizeof (struct sigaction
));
1193 __collector_memcpy (&original_sigchld_sigaction
, nact
, sizeof (struct sigaction
));
1198 * __collector_close_experiment may be called either from
1199 * __collector_terminate_expt() or the .fini section
1202 __collector_close_experiment ()
1207 TprintfT (DBG_LT1
, "collector: __collector_close_experiment(): %s\n", __collector_exp_dir_name
);
1210 /* The experiment may have been previously closed */
1214 if (__collector_mutex_trylock (&__collector_close_guard
))
1215 /* someone else is in the middle of closing the experiment */
1218 /* record the termination of the experiment */
1220 collector_params
= NULL
;
1222 /* tell all dynamic modules to stop data collection */
1224 for (i
= 0; i
< nmodules
; i
++)
1225 if (modules
[i
]->stopDataCollection
!= NULL
)
1226 modules
[i
]->stopDataCollection ();
1228 /* notify all dynamic modules the experiment is being closed */
1229 for (i
= 0; i
< nmodules
; i
++)
1231 if (modules
[i
]->closeExperiment
!= NULL
)
1232 modules
[i
]->closeExperiment ();
1233 __collector_delete_handle (modules_hndl
[i
]);
1234 modules_hndl
[i
] = NULL
;
1237 /* acquire the global lock -- only one close at a time */
1238 __collector_mutex_lock (&__collector_glob_lock
);
1239 /* deinstall mmap tracing (with final update) */
1240 __collector_ext_mmap_deinstall (1);
1242 /* deinstall common SIGPROF dispatcher */
1243 __collector_ext_dispatcher_deinstall ();
1245 /* disable line interposition */
1246 __collector_ext_line_close ();
1248 /* Other threads may be reading tsd now. */
1249 //__collector_tsd_fini();
1251 /* delete global heap */
1252 /* omazur: do not delete the global heap
1253 * to avoid crashes in TSD. Need a better solution.
1254 __collector_deleteHeap( __collector_heap );
1255 __collector_heap = NULL;
1257 __collector_mutex_unlock (&__collector_glob_lock
);
1259 /* take a final sample */
1260 __collector_ext_usage_sample (MASTER_SMPL
, "collector_close_experiment");
1263 /* close the frameinfo file */
1264 __collector_ext_unwind_close ();
1265 if (exp_origin
!= SP_ORIGIN_DBX_ATTACH
)
1266 log_write_event_run ();
1268 /* mark the experiment as closed */
1269 __collector_expstate
= EXP_CLOSED
;
1270 TprintfT (DBG_LT1
, "collector: __collector_expstate->EXP_CLOSED: project_home=%s\n",
1271 STR (project_home
));
1272 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n",
1274 (unsigned) (ts
/ NANOSEC
), (unsigned) (ts
% NANOSEC
));
1276 /* derive er_archive's absolute path from that of libcollector */
1278 if (project_home
&& archive_mode
&& __collector_strcmp (archive_mode
, "off"))
1280 /* construct a command to launch it */
1281 char *er_archive_name
= "/bin/gp-archive";
1282 size_t cmdlen
= CALL_UTIL (strlen
)(project_home
) + CALL_UTIL (strlen
)(er_archive_name
) + 1;
1283 char *command
= (char*) alloca (cmdlen
);
1284 CALL_UTIL (snprintf
)(command
, cmdlen
, "%s%s", project_home
, er_archive_name
);
1285 if (CALL_UTIL (access
)(command
, F_OK
) == 0)
1287 // build the argument list
1289 argv
[nargs
++] = command
;
1290 argv
[nargs
++] = "-n";
1291 argv
[nargs
++] = "-a";
1292 argv
[nargs
++] = archive_mode
;
1293 size_t len
= CALL_UTIL (strlen
)(__collector_exp_dir_name
) + 1;
1294 size_t len1
= CALL_UTIL (strlen
)(SP_ARCHIVE_LOG_FILE
) + 1;
1295 char *str
= (char*) alloca (len
+ len1
);
1296 CALL_UTIL (snprintf
)(str
, len
+ 15, "%s/%s", __collector_exp_dir_name
, SP_ARCHIVE_LOG_FILE
);
1297 argv
[nargs
++] = "--outfile";
1298 argv
[nargs
++] = str
;
1299 str
= (char*) alloca (len
);
1300 CALL_UTIL (snprintf
)(str
, len
, "%s", __collector_exp_dir_name
);
1301 argv
[nargs
++] = str
;
1306 /* log the archive command to be run */
1307 if (argv
[0] == NULL
)
1309 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">%s</event>\n",
1310 SP_JCMD_COMMENT
, COL_COMMENT_NONE
, "No archive command run");
1311 TprintfT (DBG_LT1
, "collector: No archive command run\n");
1318 for (i
= 0; argv
[i
] != NULL
; i
++)
1320 bufoffset
+= CALL_UTIL (snprintf
)(&cmdbuf
[bufoffset
], (sizeof (cmdbuf
) - bufoffset
),
1323 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\">Archive command `%s'</event>\n",
1324 SP_JCMD_COMMENT
, COL_COMMENT_NONE
, cmdbuf
);
1325 TprintfT (DBG_LT1
, "collector: running `%s'\n", cmdbuf
);
1328 TprintfT (DBG_LT1
, "__collector_close_experiment(%s) done\n", __collector_exp_dir_name
);
1329 exp_open
= 0; /* mark the experiment as closed */
1330 __collector_exp_active
= 0; /* mark the experiment as inactive */
1332 /* reset all experiment parameters */
1334 collector_paused
= 0;
1335 __collector_pause_sig
= -1;
1336 __collector_pause_sig_warn
= 0;
1337 __collector_sample_sig
= -1;
1338 __collector_sample_sig_warn
= 0;
1339 __collector_sample_period
= 0;
1340 __collector_exp_dir_name
[0] = 0;
1342 /* uninstall the pause and sample signal handlers */
1343 /* XXXX -- not yet, because of potential race conditions in libthread */
1344 if (argv
[0] == NULL
)
1346 /* er_archive command will not be run */
1347 __collector_mutex_unlock (&__collector_close_guard
);
1351 struct sigaction sa
;
1352 CALL_UTIL (memset
)(&sa
, 0, sizeof (struct sigaction
));
1353 sa
.sa_sigaction
= __collector_SIGCHLD_signal_handler
;
1354 sa
.sa_flags
= SA_SIGINFO
;
1355 __collector_sigaction (SIGCHLD
, &sa
, &original_sigchld_sigaction
);
1357 /* linetrace interposition takes care of unsetting Environment variables */
1358 /* create a child process to invoke er_archive */
1359 pid_t pid
= CALL_UTIL (vfork
)();
1362 /* pid is zero == child process -- invoke er_archive */
1363 /* Unset LD_PRELOAD environment variables */
1364 CALL_UTIL (unsetenv
)("LD_PRELOAD_32");
1365 CALL_UTIL (unsetenv
)("LD_PRELOAD_64");
1366 CALL_UTIL (unsetenv
)("LD_PRELOAD");
1367 /* Invoke er_archive */
1368 CALL_UTIL (execv
)(argv
[0], argv
);
1369 CALL_UTIL (exit
)(1); /* exec failed -- child exits with an error */
1373 mychild_pid
= pid
; // notify our signal handler who the child is
1375 /* copied from system.c */
1378 w
= CALL_UTIL (waitpid
)(pid
, &status
, 0);
1380 while (w
== -1 && errno
== EINTR
);
1381 TprintfT (DBG_LT1
, "collector: creating archive done\n");
1382 // __collector_SIGCHLD_signal_handler should now be de-installed, but it does so itself
1385 /* child-process creation failed */
1386 TprintfT (DBG_LT0
, "collector: creating archive process failed\n");
1388 __collector_mutex_unlock (&__collector_close_guard
);
1389 TprintfT (DBG_LT1
, "collector: __collector_close_experiment done\n");
1394 * void __collector_clean_state()
1395 * Perform all necessary cleanup steps in child process after fork().
1398 __collector_clean_state ()
1400 TprintfT (DBG_LT1
, "collector: collector_clean_state()\n");
1403 * We are in child process after fork().
1404 * First of all we have to reset all mutex locks in collector's subsystems.
1405 * After that we can reinitialize modules.
1407 __collector_mmgr_init_mutex_locks (__collector_heap
);
1408 __collector_mutex_init (&__collector_glob_lock
);
1409 __collector_mutex_init (&__collector_open_guard
);
1410 __collector_mutex_init (&__collector_close_guard
);
1411 __collector_mutex_init (&__collector_sample_guard
);
1412 __collector_mutex_init (&__collector_suspend_guard
);
1413 __collector_mutex_init (&__collector_resume_guard
);
1415 if (__collector_mutex_trylock (&__collector_close_guard
))
1416 /* someone else is in the middle of closing the experiment */
1419 /* Stop data collection in all dynamic modules */
1420 for (i
= 0; i
< nmodules
; i
++)
1421 if (modules
[i
]->stopDataCollection
!= NULL
)
1422 modules
[i
]->stopDataCollection ();
1424 // Now we can reset modules
1425 for (i
= 0; i
< nmodules
; i
++)
1427 if (modules
[i
]->detachExperiment
!= NULL
&& modules_st
[i
] == 0)
1428 modules
[i
]->detachExperiment ();
1429 __collector_delete_handle (modules_hndl
[i
]);
1430 modules_hndl
[i
] = NULL
;
1433 /* acquire the global lock -- only one suspend at a time */
1434 __collector_mutex_lock (&__collector_glob_lock
);
1437 /* stop any profile data writing */
1438 paused_when_suspended
= collector_paused
;
1439 collector_paused
= 1;
1441 /* deinstall common SIGPROF dispatcher */
1442 __collector_ext_dispatcher_suspend ();
1444 /* mark the experiment as suspended */
1445 __collector_exp_active
= 0;
1447 /* XXXX mark the experiment as closed! */
1448 exp_open
= 0; /* This is a hack to allow fork child to call__collector_open_experiment() */
1450 /* mark the experiment log closed! */
1453 __collector_mutex_unlock (&__collector_glob_lock
);
1455 // Now we can reset subsystems.
1456 __collector_ext_dispatcher_fork_child_cleanup ();
1457 __collector_mmap_fork_child_cleanup ();
1458 __collector_tsd_fork_child_cleanup ();
1459 paused_when_suspended
= 0;
1460 collector_paused
= 0;
1461 __collector_expstate
= EXP_INIT
;
1462 TprintfT (DBG_LT1
, "__collector_clean_slate: __collector_expstate->EXP_INIT\n");
1463 exp_origin
= SP_ORIGIN_LIBCOL_INIT
;
1465 __collector_start_time
= collector_interface
.getHiResTime ();
1466 TprintfT (DBG_LT1
, " -->__collector_clean_slate; resetting start_time\n");
1469 /* Sample related data */
1470 sample_installed
= 0; // 1 if the sample signal handler installed
1471 sample_mode
= 0; // dynamically turns sample record writing on/off
1472 sample_number
= 0; // index of the current sample record
1473 __collector_sample_sig
= -1; // user-specified sample signal
1474 __collector_sample_sig_warn
= 0; // non-zero if warning already given
1476 /* Pause/resume related data */
1477 __collector_pause_sig
= -1; // user-specified pause signal
1478 __collector_pause_sig_warn
= 0; // non-zero if warning already given
1479 __collector_mutex_unlock (&__collector_close_guard
);
1483 /* modelled on __collector_close_experiment */
1485 __collector_suspend_experiment (char *why
)
1489 /* The experiment may have been previously closed */
1492 /* The experiment may have been previously suspended */
1493 if (!__collector_exp_active
)
1495 if (__collector_mutex_trylock (&__collector_suspend_guard
))
1496 /* someone else is in the middle of suspending the experiment */
1499 /* Stop data collection in all dynamic modules */
1501 for (i
= 0; i
< nmodules
; i
++)
1502 if (modules
[i
]->stopDataCollection
!= NULL
)
1503 modules
[i
]->stopDataCollection ();
1505 /* take a pre-suspension sample */
1506 __collector_ext_usage_sample (MASTER_SMPL
, why
);
1508 /* acquire the global lock -- only one suspend at a time */
1509 __collector_mutex_lock (&__collector_glob_lock
);
1510 /* stop any profile data writing */
1511 paused_when_suspended
= collector_paused
;
1512 collector_paused
= 1;
1514 /* deinstall common SIGPROF dispatcher */
1515 __collector_ext_dispatcher_suspend ();
1517 /* mark the experiment as suspended */
1518 __collector_exp_active
= 0;
1520 /* XXXX mark the experiment as closed! */
1521 exp_open
= 0; // This is a hack to allow fork child to call __collector_open_experiment()
1522 log_pause (); // mark the experiment log closed!
1523 TprintfT (DBG_LT0
, "collector: collector_suspend_experiment(%s, %d)\n\n", why
, collector_paused
);
1524 __collector_mutex_unlock (&__collector_glob_lock
);
1525 __collector_mutex_unlock (&__collector_suspend_guard
);
1530 __collector_resume_experiment ()
1535 /* The experiment may have been previously resumed */
1536 if (__collector_exp_active
)
1538 if (__collector_mutex_trylock (&__collector_resume_guard
))
1539 /* someone else is in the middle of resuming the experiment */
1542 /* acquire the global lock -- only one resume at a time */
1543 __collector_mutex_lock (&__collector_glob_lock
);
1544 /* mark the experiment as re-activated */
1545 __collector_exp_active
= 1;
1546 /* XXXX mark the experiment as open! */
1547 exp_open
= 1; // This is a hack to allow fork child to call__collector_open_experiment()
1548 log_resume (); // mark the experiment log re-opened!
1549 TprintfT (DBG_LT0
, "collector: collector_resume_experiment(%d)\n", paused_when_suspended
);
1550 /* resume any profile data writing */
1551 collector_paused
= paused_when_suspended
;
1552 /* restart common SIGPROF dispatcher */
1553 __collector_ext_dispatcher_restart ();
1554 __collector_mutex_unlock (&__collector_glob_lock
);
1556 /* take a post-suspension sample */
1557 __collector_ext_usage_sample (MASTER_SMPL
, "collector_resume_experiment");
1559 /* Resume data collection in all dynamic modules */
1560 if (collector_paused
== 0)
1563 for (i
= 0; i
< nmodules
; i
++)
1564 if (modules
[i
]->startDataCollection
!= NULL
&& modules_st
[i
] == 0)
1565 modules
[i
]->startDataCollection ();
1568 if (__collector_sample_period
!= 0)
1570 hrtime_t now
= collector_interface
.getHiResTime ();
1571 while (__collector_next_sample
< now
)
1572 __collector_next_sample
+= ((hrtime_t
) NANOSEC
) * __collector_sample_period
;
1575 /* check for experiment past termination time */
1576 if (__collector_terminate_time
!= 0)
1578 hrtime_t now
= collector_interface
.getHiResTime ();
1579 if (__collector_terminate_time
< now
)
1581 TprintfT (DBG_LT0
, "__collector_resume_experiment: now (%lld) > terminate_time (%lld); closing experiment\n",
1582 (now
- __collector_start_time
), (__collector_terminate_time
- __collector_start_time
));
1583 __collector_close_experiment ();
1586 __collector_mutex_unlock (&__collector_resume_guard
);
1590 /* Code to support Samples and Pause/Resume */
1591 void collector_sample () __attribute__ ((weak
, alias ("__collector_sample")));
1593 __collector_sample (char *name
)
1595 __collector_ext_usage_sample (PROGRAM_SMPL
, name
);
1599 write_sample (char *name
)
1601 if (sample_mode
== 0)
1603 /* make the sample timestamp relative to the start */
1604 hrtime_t ts
, now
= collector_interface
.getHiResTime ();
1606 /* update time for next periodic sample */
1607 /* since this is common to all LWPs, and only one (the first!) will
1608 update it to the next period, doing the update early will avoid
1609 the overhead/frustration of the other LWPs
1611 if (__collector_sample_period
!= 0)
1613 /* this update should only be done for periodic samples */
1614 while (__collector_next_sample
< now
)
1615 __collector_next_sample
+= ((hrtime_t
) NANOSEC
) * __collector_sample_period
;
1618 /* take the sample and record it; use (return - __collector_start_time) for timestamp */
1620 ts
= now
- __collector_start_time
;
1622 /* write sample records to log file */
1623 __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" id=\"%d\" label=\"%s\"/>\n",
1625 (unsigned) (ts
/ NANOSEC
), (unsigned) (ts
% NANOSEC
),
1628 /* increment the sample number */
1633 * __collector_ext_usage_sample
1635 * Handle taking a process usage sample and recording it.
1636 * Common to all different types of sample:
1637 * libcollector master samples at initiation and close,
1638 * programmatic samples via libcollector API calls,
1639 * periodic samples originating in the dispatcher,
1640 * manual samples originating in the signal sample handler,
1641 * manual samples originating from the debugger
1642 * Differentiating type and name information is currently not recorded.
1645 __collector_ext_usage_sample (Smpl_type type
, char *name
)
1647 /* name is optional */
1650 TprintfT (DBG_LT3
, "collector: __collector_ext_usage_sample(%d,%s)\n", type
, name
);
1654 /* if paused, don't record periodic samples */
1655 if ((type
== PERIOD_SMPL
) && (collector_paused
== 1))
1658 /* There is a possibility of entering this function
1659 * from sample_handler, dbx direct call to __collector_sample,
1660 * and user called collector_sample. Since we are making a
1661 * new sample anyway just return.
1663 if (__collector_mutex_trylock (&__collector_sample_guard
))
1665 if (type
!= PERIOD_SMPL
|| __collector_sample_period
!= 0)
1666 write_sample (name
);
1667 __collector_mutex_unlock (&__collector_sample_guard
);
1670 /* set the sample period from the parameter */
1672 sample_set_interval (char *param
)
1675 return COL_ERROR_SMPLINIT
;
1676 __collector_sample_period
= CALL_UTIL (strtol
)(param
, NULL
, 0); /* seconds */
1677 TprintfT (DBG_LT1
, "collector: collector_sample period set to %d seconds.\n",
1678 __collector_sample_period
);
1679 if (__collector_sample_period
> 0)
1680 (void) __collector_log_write ("<setting %s=\"%d\"/>\n",
1681 SP_JCMD_SAMPLE_PERIOD
, __collector_sample_period
);
1682 return COL_ERROR_NONE
;
1685 /* set the experiment duration from the parameter */
1687 /* parameter is of the form nnn:mmm, where nnn is the start delay in seconds,
1688 * and mmm is the terminate time in seconds; if nnn is zero,
1689 * data collection starts when the run starts. If mmm is zero,
1690 * data collection terminates when the run terminates. Otherwise,
1691 * nnn must be less than mmm
1694 set_duration (char *param
)
1697 return COL_ERROR_DURATION_INIT
;
1698 int delay_start
= CALL_UTIL (strtol
)(param
, ¶m
, 0); /* seconds */
1699 int terminate_duration
= 0;
1702 /* we only have one parameter, the terminate time */
1703 terminate_duration
= delay_start
;
1706 else if (*param
== ':')
1709 terminate_duration
= CALL_UTIL (strtol
)(param
, ¶m
, 0); /* seconds */
1712 return COL_ERROR_DURATION_INIT
;
1713 TprintfT (DBG_LT1
, "collector: collector_delay_start duration set to %d seconds.\n",
1715 TprintfT (DBG_LT1
, "collector: collector_terminate duration set to %d seconds.\n",
1716 terminate_duration
);
1717 if (terminate_duration
> 0)
1718 __collector_log_write ("<setting %s=\"%d\"/>\n<setting %s=\"%d\"/>\n",
1719 SP_JCMD_DELAYSTART
, delay_start
,
1720 SP_JCMD_TERMINATE
, terminate_duration
);
1721 __collector_delay_start
= (hrtime_t
) 0;
1722 if (delay_start
!= 0)
1724 __collector_delay_start
= __collector_start_time
+ ((hrtime_t
) NANOSEC
) * delay_start
;
1725 collector_paused
= 1;
1727 __collector_terminate_time
= terminate_duration
== 0 ? (hrtime_t
) 0 :
1728 __collector_start_time
+ ((hrtime_t
) NANOSEC
) * terminate_duration
;
1729 return COL_ERROR_NONE
;
1733 sample_set_user_sig (char *par
)
1735 int sig
= CALL_UTIL (strtol
)(par
, &par
, 0);
1736 TprintfT (DBG_LT1
, "collector: sample_set_user_sig(sig=%d,installed=%d)\n",
1737 sig
, sample_installed
);
1738 /* Installing the sampling signal handler more
1739 * than once is not good.
1741 if (!sample_installed
)
1743 struct sigaction act
;
1744 sigemptyset (&act
.sa_mask
);
1745 /* XXXX should any signals be blocked? */
1746 act
.sa_sigaction
= sample_handler
;
1747 act
.sa_flags
= SA_RESTART
| SA_SIGINFO
;
1748 if (sigaction (sig
, &act
, &old_sample_handler
) == -1)
1750 TprintfT (DBG_LT0
, "collector: ERROR: collector_sample_handler install failed (sig=%d).\n",
1751 __collector_sample_sig
);
1752 return COL_ERROR_ARGS
;
1754 if (old_sample_handler
.sa_handler
== SIG_DFL
||
1755 old_sample_handler
.sa_sigaction
== sample_handler
)
1756 old_sample_handler
.sa_handler
= SIG_IGN
;
1757 TprintfT (DBG_LT1
, "collector: collector_sample_handler installed (sig=%d,hndlr=0x%p).\n",
1758 sig
, sample_handler
);
1759 __collector_sample_sig
= sig
;
1760 sample_installed
= 1;
1762 (void) __collector_log_write ("<setting %s=\"%u\"/>\n", SP_JCMD_SAMPLE_SIG
, __collector_sample_sig
);
1763 return COL_ERROR_NONE
;
1766 /* signal handler for sample signal */
1768 sample_handler (int sig
, siginfo_t
*sip
, void *uap
)
1770 if (sip
&& sip
->si_code
== SI_USER
)
1772 TprintfT (DBG_LT1
, "collector: collector_sample_handler sampling!\n");
1773 __collector_ext_usage_sample (MANUAL_SMPL
, "signal");
1775 else if (old_sample_handler
.sa_handler
!= SIG_IGN
)
1777 TprintfT (DBG_LT1
, "collector: collector_sample_handler forwarding signal.\n");
1778 (old_sample_handler
.sa_sigaction
)(sig
, sip
, uap
);
1782 void collector_pause () __attribute__ ((weak
, alias ("__collector_pause")));
1785 __collector_pause ()
1787 __collector_pause_m ("API");
1791 __collector_pause_m (char *reason
)
1794 char xreason
[MAXPATHLEN
];
1795 TprintfT (DBG_LT0
, "collector: __collector_pause_m(%s)\n", reason
);
1797 /* Stop data collection in all dynamic modules */
1798 for (int i
= 0; i
< nmodules
; i
++)
1799 if (modules
[i
]->stopDataCollection
!= NULL
)
1800 modules
[i
]->stopDataCollection ();
1802 /* Take a pause sample */
1803 CALL_UTIL (snprintf
)(xreason
, sizeof (xreason
), "collector_pause(%s)", reason
);
1804 __collector_ext_usage_sample (MASTER_SMPL
, xreason
);
1806 /* Record the event in the log file */
1807 now
= GETRELTIME ();
1808 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\" name=\"%s\"/>\n", SP_JCMD_PAUSE
,
1809 (unsigned) (now
/ NANOSEC
), (unsigned) (now
% NANOSEC
), reason
);
1810 __collector_expstate
= EXP_PAUSED
;
1811 TprintfT (DBG_LT1
, "collector: __collector_expstate->EXP_PAUSED\n");
1812 collector_paused
= 1;
1815 void collector_resume () __attribute__ ((weak
, alias ("__collector_resume")));
1818 __collector_resume ()
1820 TprintfT (DBG_LT0
, "collector: __collector_resume()\n");
1821 __collector_expstate
= EXP_OPEN
;
1822 TprintfT (DBG_LT1
, "collector: __collector_expstate->EXP_OPEN\n");
1824 /* Record the event in the log file */
1825 hrtime_t now
= GETRELTIME ();
1826 (void) __collector_log_write ("<event kind=\"%s\" tstamp=\"%u.%09u\"/>\n", SP_JCMD_RESUME
,
1827 (unsigned) (now
/ NANOSEC
), (unsigned) (now
% NANOSEC
));
1828 /* Take a resume sample */
1829 __collector_ext_usage_sample (MASTER_SMPL
, "collector_resume");
1831 /* Resume data collection in all dynamic modules */
1832 for (int i
= 0; i
< nmodules
; i
++)
1833 if (modules
[i
]->startDataCollection
!= NULL
&& modules_st
[i
] == 0)
1834 modules
[i
]->startDataCollection ();
1835 collector_paused
= 0;
1839 pause_set_user_sig (char *par
)
1841 struct sigaction act
;
1842 int sig
= CALL_UTIL (strtol
)(par
, &par
, 0);
1845 /* not end of the token */
1848 /* it should be a p */
1849 TprintfT (DBG_LT0
, "collector: ERROR: collector_user_handler bad terminator (par=%p[0]=%d).\n",
1851 return COL_ERROR_ARGS
;
1856 /*, it's a p, make sure next is end of token */
1860 TprintfT (DBG_LT0
, "collector: ERROR: collector_user_handler bad terminator (par=%p[0]=%d).\n",
1862 return COL_ERROR_ARGS
;
1865 /* start off paused */
1866 collector_paused
= 1;
1869 sigemptyset (&act
.sa_mask
);
1870 /* XXXX should any signals be blocked? */
1871 act
.sa_sigaction
= pause_handler
;
1872 act
.sa_flags
= SA_RESTART
| SA_SIGINFO
;
1873 if (sigaction (sig
, &act
, &old_pause_handler
) == -1)
1875 TprintfT (DBG_LT0
, "collector: ERROR: collector_pause_handler install failed (sig=%d).\n", sig
);
1876 return COL_ERROR_ARGS
;
1878 if (old_pause_handler
.sa_handler
== SIG_DFL
||
1879 old_pause_handler
.sa_sigaction
== pause_handler
)
1880 old_pause_handler
.sa_handler
= SIG_IGN
;
1881 TprintfT (DBG_LT1
, "collector: collector_pause_handler installed (sig=%d,hndlr=0x%p).\n",
1882 sig
, pause_handler
);
1883 __collector_pause_sig
= sig
;
1884 (void) __collector_log_write ("<setting %s=\"%u\"/>\n", SP_JCMD_PAUSE_SIG
,
1885 __collector_pause_sig
);
1886 return COL_ERROR_NONE
;
1889 /* signal handler for pause/resume signal */
1891 pause_handler (int sig
, siginfo_t
*sip
, void *uap
)
1893 if (sip
&& sip
->si_code
== SI_USER
)
1895 if (collector_paused
== 1)
1897 __collector_resume ();
1898 TprintfT (DBG_LT0
, "collector: collector_pause_handler resumed!\n");
1902 __collector_pause_m ("signal");
1903 TprintfT (DBG_LT0
, "collector: collector_pause_handler paused!\n");
1906 else if (old_pause_handler
.sa_handler
!= SIG_IGN
)
1908 TprintfT (DBG_LT0
, "collector: collector_pause_handler forwarding signal.\n");
1909 (old_pause_handler
.sa_sigaction
)(sig
, sip
, uap
);
1914 get_progspec (char *retstr
, int tmp_sz
, char *name
, int name_sz
)
1916 int procfd
, count
, i
;
1921 procfd
= CALL_UTIL (open
)("/proc/self/cmdline", O_RDONLY
);
1922 int getting_name
= 0;
1925 count
= CALL_UTIL (read
)(procfd
, retstr
, tmp_sz
);
1926 retstr
[count
] = '\0';
1927 for (i
= 0; i
< count
; i
++)
1929 if (getting_name
== 0)
1930 name
[i
] = retstr
[i
];
1931 if (retstr
[i
] == '\0')
1934 if ((i
+ 1) < count
)
1938 CALL_UTIL (close
)(procfd
);
1945 /* if data implies we don't care, just return */
1946 if (fs_matters
== 0)
1951 close_handler (int sig
, siginfo_t
*sip
, void *uap
)
1953 if (sip
&& sip
->si_code
== SI_USER
)
1955 TprintfT (DBG_LT0
, "collector: close_handler: processing signal.\n");
1956 __collector_close_experiment ();
1958 else if (old_close_handler
.sa_handler
!= SIG_IGN
)
1960 TprintfT (DBG_LT0
, "collector: close_handler forwarding signal.\n");
1961 (old_close_handler
.sa_sigaction
)(sig
, sip
, uap
);
1966 exit_handler (int sig
, siginfo_t
*sip
, void *uap
)
1968 if (sip
&& sip
->si_code
== SI_USER
)
1970 TprintfT (DBG_LT0
, "collector: exit_handler: processing signal.\n");
1971 CALL_UTIL (exit
)(1);
1973 else if (old_exit_handler
.sa_handler
!= SIG_IGN
)
1975 TprintfT (DBG_LT0
, "collector: exit_handler forwarding signal.\n");
1976 (old_exit_handler
.sa_sigaction
)(sig
, sip
, uap
);
1981 set_user_sig_action (char *par
)
1983 int sig
= CALL_UTIL (strtol
)(par
, &par
, 0);
1986 TprintfT (DBG_LT0
, "collector: ERROR: set_user_sig_action bad separator: %s.\n", par
);
1987 return COL_ERROR_ARGS
;
1990 struct sigaction act
;
1991 sigemptyset (&act
.sa_mask
);
1992 act
.sa_flags
= SA_RESTART
| SA_SIGINFO
;
1993 if (__collector_strcmp (par
, "exit") == 0)
1995 act
.sa_sigaction
= exit_handler
;
1996 if (sigaction (sig
, &act
, &old_exit_handler
) == -1)
1998 TprintfT (DBG_LT0
, "collector: ERROR: set_user_sig_action failed: %d=%s.\n", sig
, par
);
1999 return COL_ERROR_ARGS
;
2002 else if (__collector_strcmp (par
, "close") == 0)
2004 act
.sa_sigaction
= close_handler
;
2005 if (sigaction (sig
, &act
, &old_close_handler
) == -1)
2007 TprintfT (DBG_LT0
, "collector: ERROR: set_user_sig_action failed: %d=%s.\n", sig
, par
);
2008 return COL_ERROR_ARGS
;
2013 TprintfT (DBG_LT0
, "collector: ERROR: set_user_sig_action unknown action: %d=%s.\n", sig
, par
);
2014 return COL_ERROR_ARGS
;
2016 __collector_log_write ("<setting signal=\"%u\" action=\"%s\"/>\n", sig
, par
);
2017 return COL_ERROR_NONE
;
2020 /*============================================================*/
2022 * Routines for handling the log file
2024 static struct DataHandle
*log_hndl
= NULL
;
2025 static int log_initted
= 0;
2026 static int log_enabled
= 0;
2031 log_hndl
= __collector_create_handle (SP_LOG_FILE
);
2032 if (log_hndl
== NULL
)
2033 return COL_ERROR_LOG_OPEN
;
2036 TprintfT (DBG_LT1
, "log_open()\n");
2037 return COL_ERROR_NONE
;
2041 log_header_write (sp_origin_t origin
)
2043 __collector_log_write ("<experiment %s=\"%d.%d\">\n",
2044 SP_JCMD_VERSION
, SUNPERF_VERNUM
, SUNPERF_VERNUM_MINOR
);
2045 __collector_log_write ("<collector>%s</collector>\n", VERSION
);
2046 __collector_log_write ("</experiment>\n");
2048 struct utsname sysinfo
;
2049 if (uname (&sysinfo
) < 0)
2051 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\"/></event>\n", SP_JCMD_CERROR
, COL_ERROR_SYSINFO
, errno
);
2052 __collector_log_write ("<system>\n");
2056 long page_size
= CALL_UTIL (sysconf
)(_SC_PAGESIZE
);
2057 long npages
= CALL_UTIL (sysconf
)(_SC_PHYS_PAGES
);
2058 __collector_log_write ("<system hostname=\"%s\" arch=\"%s\" os=\"%s %s\" pagesz=\"%ld\" npages=\"%ld\">\n",
2059 sysinfo
.nodename
, sysinfo
.machine
, sysinfo
.sysname
, sysinfo
.release
, page_size
, npages
);
2062 //YXXX Updating this section? Check similar cut/paste code in:
2063 // collctrl.cc::Coll_Ctrl()
2064 // collector.c::log_header_write()
2065 // cpu_frequency.h::get_cpu_frequency()
2067 FILE *procf
= CALL_UTIL (fopen
)("/proc/cpuinfo", "r");
2072 while (CALL_UTIL (fgets
)(temp
, sizeof (temp
), procf
) != NULL
)
2075 if (__collector_strStartWith (temp
, "processor") == 0)
2077 char *val
= CALL_UTIL (strchr
)(temp
, ':');
2078 cpu
= val
? CALL_UTIL (atoi
)(val
+ 1) : -1;
2080 // else if ( __collector_strStartWith(temp, "model") == 0
2081 // && CALL_UTIL(strstr)(temp, "name") == 0) {
2082 // char *val = CALL_UTIL(strchr)( temp, ':' );
2083 // int model = val ? CALL_UTIL(atoi)( val + 1 ) : -1;
2085 // else if ( __collector_strStartWith(temp, "cpu family") == 0 ) {
2086 // char *val = CALL_UTIL(strchr)( temp, ':' );
2087 // int family = val ? CALL_UTIL(atoi)( val + 1 ) : -1;
2089 else if (__collector_strStartWith (temp
, "cpu MHz") == 0)
2091 char *val
= CALL_UTIL (strchr
)(temp
, ':');
2092 int mhz
= val
? CALL_UTIL (atoi
)(val
+ 1) : 0; /* reading it as int is fine */
2093 (void) __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu
, mhz
);
2096 if (__collector_strStartWith (temp
, "Cpu") == 0 &&
2098 __collector_strStartWith ((CALL_UTIL (strchr
)(temp
+ 1, 'C')) ? CALL_UTIL (strchr
)(temp
+ 1, 'C') : (temp
+ 4), "ClkTck") == 0)
2100 char *val
= CALL_UTIL (strchr
)(temp
, ':');
2104 unsigned long long freq
;
2105 (*__collector_sscanfp
) (val
+ 2, "%llx", &freq
);
2106 mhz
= (unsigned int) (((double) freq
) / 1000000.0 + 0.5);
2108 char *numend
= CALL_UTIL (strchr
)(temp
+ 1, 'C') ? CALL_UTIL (strchr
)(temp
+ 1, 'C') : (temp
+ 4);
2110 cpu
= CALL_UTIL (atoi
)(temp
+ 3);
2111 __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu
, mhz
);
2113 #elif defined(__aarch64__)
2114 if (__collector_strStartWith (temp
, "processor") == 0)
2116 char *val
= CALL_UTIL (strchr
)(temp
, ':');
2117 cpu
= val
? CALL_UTIL (atoi
)(val
+ 1) : -1;
2121 asm volatile("mrs %0, cntfrq_el0" : "=r" (mhz
));
2122 __collector_log_write (" <cpu id=\"%d\" clk=\"%d\"/>\n", cpu
,
2128 CALL_UTIL (fclose
)(procf
);
2130 __collector_log_write ("</system>\n");
2131 __collector_log_write ("<process pid=\"%d\"></process>\n", getpid ());
2132 __collector_log_write ("<process ppid=\"%d\"></process>\n", getppid ());
2133 __collector_log_write ("<process pgrp=\"%d\"></process>\n", getpgrp ());
2134 __collector_log_write ("<process sid=\"%d\"></process>\n", getsid (0));
2136 /* XXX -- cwd commented out
2137 It would be nice to get the current directory for the experiment,
2138 but neither method below will work--the /proc method returns a
2139 0-length string, and using getcwd will break collect on /bin/sh
2140 (as cuserid does) because of /bin/sh's private malloc
2141 omazur: readlink seems to work on Linux
2143 /* write the current directory */
2144 char cwd
[MAXPATHLEN
+ 1];
2145 int i
= readlink ("/proc/self/cwd", cwd
, sizeof (cwd
));
2148 cwd
[i
< sizeof (cwd
) ? i
: sizeof (cwd
) - 1] = 0;
2149 (void) __collector_log_write ("<process cwd=\"%s\"></process>\n", cwd
);
2151 (void) __collector_log_write ("<process wsize=\"%d\"></process>\n", (int) (8 * sizeof (void *)));
2154 ucp
.uc_stack
.ss_sp
= NULL
;
2155 ucp
.uc_stack
.ss_size
= 0;
2156 if (getcontext (&ucp
) == 0)
2158 (void) __collector_log_write ("<process stackbase=\"0x%lx\"></process>\n",
2159 (unsigned long) ucp
.uc_stack
.ss_sp
+ ucp
.uc_stack
.ss_size
);
2162 (void) __collector_log_write ("<process>%s</process>\n",
2163 origin
== SP_ORIGIN_FORK
? "(fork)" : exp_progspec
);
2164 __collector_libthread_T1
= 0;
2181 /* __collector_log_write -- write a line to the log file
2184 * 1 if error (in creating or extending the log file)
2187 __collector_log_write (char *format
, ...)
2192 static size_t loglen
= 0;
2194 va_start (va
, format
);
2196 int sz
= __collector_xml_vsnprintf (bufptr
, sizeof (buf
), format
, va
);
2197 int allocated_sz
= 0;
2199 if (sz
>= sizeof (buf
))
2201 /* Allocate a new buffer.
2202 * We need this buffer only temporarily and locally.
2203 * But don't use the thread stack
2204 * since it already has buf
2205 * and is unlikely to have additonal room for something even larger than buf.
2207 sz
+= 1; /* add the terminating null byte */
2208 bufptr
= (char*) __collector_allocCSize (__collector_heap
, sz
, 0);
2212 va_start (va
, format
);
2213 sz
= __collector_xml_vsnprintf (bufptr
, sz
, format
, va
);
2217 int newlen
= CALL_UTIL (strlen
)(bufptr
);
2219 // no need to free bufptr if we're going to abort anyhow
2221 bufptr
[newlen
+ 1] = 0;
2222 loglen
= loglen
+ newlen
;
2223 TprintfT (DBG_LT2
, "__collector_log_write len=%ld, loglen=%ld %s",
2224 (long) newlen
, (long) loglen
, bufptr
);
2225 if (log_enabled
<= 0)
2228 /* XXX suppress log_write messages with no log file open
2229 * this is reached from SimApp dealing with the clock frequency, which it should
2230 * not be doing. For now, don't write a message.
2232 CALL_UTIL (fprintf
)(stderr
, "__collector_log_write COL_ERROR_LOG_OPEN: %s", buf
);
2236 rc
= __collector_write_string (log_hndl
, bufptr
, sz
);
2238 __collector_freeCSize (__collector_heap
, (void *) bufptr
, allocated_sz
);
2247 __collector_delete_handle (log_hndl
);
2251 /*============================================================*/
2253 * Routines for handling the overview file
2258 CALL_UTIL (strlcpy
)(ovw_name
, __collector_exp_dir_name
, sizeof (ovw_name
));
2259 CALL_UTIL (strlcat
)(ovw_name
, "/", sizeof (ovw_name
));
2260 CALL_UTIL (strlcat
)(ovw_name
, SP_OVERVIEW_FILE
, sizeof (ovw_name
));
2261 int fd
= CALL_UTIL (open
)(ovw_name
, O_WRONLY
| O_CREAT
| O_TRUNC
,
2262 S_IRUSR
| S_IWUSR
| S_IRGRP
| S_IROTH
);
2265 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2266 SP_JCMD_CERROR
, COL_ERROR_OVWOPEN
, errno
, ovw_name
);
2269 CALL_UTIL (close
)(fd
);
2273 static __inline__
void
2274 timeval_to_timespec(struct timeval
*tval
, struct timespec
*value
)
2276 value
->tv_nsec
= tval
->tv_usec
* 1000;
2277 value
->tv_sec
= tval
->tv_sec
;
2281 * Resource usage. /proc/<pid>/usage /proc/<pid>/lwp/<lwpid>/lwpusage
2283 typedef struct prusage
2285 id_t pr_lwpid
; /* lwp id. 0: process or defunct */
2286 int pr_count
; /* number of contributing lwps */
2287 timestruc_t pr_tstamp
; /* current time stamp */
2288 timestruc_t pr_create
; /* process/lwp creation time stamp */
2289 timestruc_t pr_term
; /* process/lwp termination time stamp */
2290 timestruc_t pr_rtime
; /* total lwp real (elapsed) time */
2291 timestruc_t pr_utime
; /* user level cpu time */
2292 timestruc_t pr_stime
; /* system call cpu time */
2293 timestruc_t pr_ttime
; /* other system trap cpu time */
2294 timestruc_t pr_tftime
; /* text page fault sleep time */
2295 timestruc_t pr_dftime
; /* data page fault sleep time */
2296 timestruc_t pr_kftime
; /* kernel page fault sleep time */
2297 timestruc_t pr_ltime
; /* user lock wait sleep time */
2298 timestruc_t pr_slptime
; /* all other sleep time */
2299 timestruc_t pr_wtime
; /* wait-cpu (latency) time */
2300 timestruc_t pr_stoptime
; /* stopped time */
2301 timestruc_t filltime
[6]; /* filler for future expansion */
2302 ulong_t pr_minf
; /* minor page faults */
2303 ulong_t pr_majf
; /* major page faults */
2304 ulong_t pr_nswap
; /* swaps */
2305 ulong_t pr_inblk
; /* input blocks */
2306 ulong_t pr_oublk
; /* output blocks */
2307 ulong_t pr_msnd
; /* messages sent */
2308 ulong_t pr_mrcv
; /* messages received */
2309 ulong_t pr_sigs
; /* signals received */
2310 ulong_t pr_vctx
; /* voluntary context switches */
2311 ulong_t pr_ictx
; /* involuntary context switches */
2312 ulong_t pr_sysc
; /* system calls */
2313 ulong_t pr_ioch
; /* chars read and written */
2314 ulong_t filler
[10]; /* filler for future expansion */
2317 static hrtime_t starttime
= 0;
2322 if (sample_mode
== 0)
2326 struct prusage usage
;
2327 struct rusage rusage
;
2328 hrtime_t hrt
, delta
;
2330 /* Fill in the prusage structure with info from getrusage() */
2331 hrt
= collector_interface
.getHiResTime ();
2334 res
= getrusage (RUSAGE_SELF
, &rusage
);
2337 (void) __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2338 SP_JCMD_CERROR
, COL_ERROR_OVWREAD
, errno
, ovw_name
);
2342 CALL_UTIL (memset
)(&usage
, 0, sizeof (struct prusage
));
2343 usage
.pr_lwpid
= getpid ();
2345 usage
.pr_tstamp
.tv_sec
= hrt
/ NANOSEC
;
2346 usage
.pr_tstamp
.tv_nsec
= hrt
% NANOSEC
;
2347 usage
.pr_create
.tv_sec
= starttime
/ NANOSEC
;
2348 usage
.pr_create
.tv_nsec
= starttime
% NANOSEC
;
2349 delta
= hrt
- starttime
;
2350 usage
.pr_rtime
.tv_sec
= delta
/ NANOSEC
;
2351 usage
.pr_rtime
.tv_nsec
= delta
% NANOSEC
;
2352 timeval_to_timespec (&rusage
.ru_utime
, &usage
.pr_utime
);
2353 timeval_to_timespec (&rusage
.ru_stime
, &usage
.pr_stime
);
2355 /* make sure that user- and system cpu time are not negative */
2356 if (ts2hrt (usage
.pr_utime
) < 0)
2358 usage
.pr_utime
.tv_sec
= 0;
2359 usage
.pr_utime
.tv_nsec
= 0;
2361 if (ts2hrt (usage
.pr_stime
) < 0)
2363 usage
.pr_stime
.tv_sec
= 0;
2364 usage
.pr_stime
.tv_nsec
= 0;
2367 /* fill in other fields */
2368 usage
.pr_minf
= (ulong_t
) rusage
.ru_minflt
;
2369 usage
.pr_majf
= (ulong_t
) rusage
.ru_majflt
;
2370 usage
.pr_nswap
= (ulong_t
) rusage
.ru_nswap
;
2371 usage
.pr_inblk
= (ulong_t
) rusage
.ru_inblock
;
2372 usage
.pr_oublk
= (ulong_t
) rusage
.ru_oublock
;
2373 usage
.pr_msnd
= (ulong_t
) rusage
.ru_msgsnd
;
2374 usage
.pr_mrcv
= (ulong_t
) rusage
.ru_msgrcv
;
2375 usage
.pr_sigs
= (ulong_t
) rusage
.ru_nsignals
;
2376 usage
.pr_vctx
= (ulong_t
) rusage
.ru_nvcsw
;
2377 usage
.pr_ictx
= (ulong_t
) rusage
.ru_nivcsw
;
2379 fd
= CALL_UTIL (open
)(ovw_name
, O_WRONLY
| O_APPEND
);
2382 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2383 SP_JCMD_CERROR
, COL_ERROR_OVWOPEN
, errno
, ovw_name
);
2384 return ( ts2hrt (usage
.pr_tstamp
));
2387 CALL_UTIL (lseek
)(fd
, 0, SEEK_END
);
2388 res
= CALL_UTIL (write
)(fd
, &usage
, sizeof (usage
));
2389 CALL_UTIL (close
)(fd
);
2390 if (res
!= sizeof (usage
))
2391 __collector_log_write ("<event kind=\"%s\" id=\"%d\" ec=\"%d\">%s</event>\n",
2392 SP_JCMD_CERROR
, COL_ERROR_OVWWRITE
, errno
, ovw_name
);
2397 __collector_dlog (int tflag
, int level
, char *format
, ...)
2399 if ((tflag
& SP_DUMP_FLAG
) == 0)
2401 if (level
> __collector_tracelevel
)
2404 else if ((tflag
& collector_debug_opt
) == 0)
2407 /* In most cases this allocation should suffice */
2408 int bufsz
= CALL_UTIL (strlen
)(format
) + 128;
2409 char *buf
= (char*) alloca (bufsz
);
2412 if ((tflag
& SP_DUMP_NOHEADER
) == 0)
2414 p
+= CALL_UTIL (snprintf
)(p
, left
, "P%d,L%02u,t%02lu",
2416 (unsigned int) __collector_lwp_self (),
2417 __collector_no_threads
? 0 : __collector_thr_self ());
2418 left
= bufsz
- (p
- buf
);
2421 hrtime_t ts
= GETRELTIME ();
2422 p
+= CALL_UTIL (snprintf
)(p
, left
, " %u.%09u ", (unsigned) (ts
/ NANOSEC
), (unsigned) (ts
% NANOSEC
));
2425 p
+= CALL_UTIL (snprintf
)(p
, left
, ": ");
2426 left
= bufsz
- (p
- buf
);
2430 va_start (va
, format
);
2431 int nbufsz
= CALL_UTIL (vsnprintf
)(p
, left
, format
, va
);
2436 /* Allocate a new buffer */
2437 nbufsz
+= 1; /* add the terminating null byte */
2438 char *nbuf
= (char*) alloca (nbufsz
+ (p
- buf
));
2439 __collector_memcpy (nbuf
, buf
, p
- buf
);
2440 p
= nbuf
+ (p
- buf
);
2442 va_start (va
, format
);
2443 nbufsz
= CALL_UTIL (vsnprintf
)(p
, nbufsz
, format
, va
);
2447 CALL_UTIL (write
)(2, buf
, CALL_UTIL (strlen
)(buf
));
2450 /*============================================================*/
2451 #if ! ARCH(SPARC) /* !sparc-Linux */
2453 * Routines for handling _exit and _Exit
2455 /*------------------------------------------------------------- _exit */
2457 #define CALL_REAL(x) (*(int(*)())__real_##x)
2458 #define NULL_PTR(x) ( __real_##x == NULL )
2460 static void *__real__exit
= NULL
; /* libc only: _exit */
2461 static void *__real__Exit
= NULL
; /* libc only: _Exit */
2462 void _exit () __attribute__ ((weak
, alias ("__collector_exit")));
2463 void _Exit () __attribute__ ((weak
, alias ("__collector_Exit")));
2466 __collector_exit (int status
)
2468 if (NULL_PTR (_exit
))
2470 __real__exit
= dlsym (RTLD_NEXT
, "_exit");
2471 if (__real__exit
== NULL
)
2472 __real__exit
= dlsym (RTLD_DEFAULT
, "_exit");
2474 TprintfT (DBG_LT1
, "__collector_exit() interposing @0x%p __real__exit\n", __real__exit
);
2475 __collector_terminate_expt ();
2476 TprintfT (DBG_LT1
, "__collector_exit(): experiment terminated\n");
2477 CALL_REAL (_exit
)(status
); // this will exit the process
2481 __collector_Exit (int status
)
2483 if (NULL_PTR (_Exit
))
2485 __real__Exit
= dlsym (RTLD_NEXT
, "_Exit");
2486 if (__real__Exit
== NULL
)
2487 __real__Exit
= dlsym (RTLD_DEFAULT
, "_exit");
2489 TprintfT (DBG_LT1
, "__collector_Exit() interposing @0x%p __real__Exit\n", __real__Exit
);
2490 __collector_terminate_expt ();
2491 TprintfT (DBG_LT1
, "__collector_Exit(): experiment terminated\n");
2492 CALL_REAL (_Exit
)(status
); // this will exit the process
2494 #endif /* !sparc-Linux */