Powerpc fix for gdb.base/ending-run.exp
[binutils-gdb.git] / gprofng / libcollector / unwind.c
1 /* Copyright (C) 2021 Free Software Foundation, Inc.
2 Contributed by Oracle.
3
4 This file is part of GNU Binutils.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, 51 Franklin Street - Fifth Floor, Boston,
19 MA 02110-1301, USA. */
20
21 #include "config.h"
22 #include <alloca.h>
23 #include <dlfcn.h>
24 #include <stdlib.h>
25 #include <signal.h>
26 #include <unistd.h>
27 #include <pthread.h>
28
29 #include "gp-defs.h"
30 #include "collector.h"
31 #include "gp-experiment.h"
32 #include "memmgr.h"
33 #include "tsd.h"
34
35 /* Get dynamic module interface*/
36 #include "collector_module.h"
37
38 /* Get definitions for SP_LEAF_CHECK_MARKER, SP_TRUNC_STACK_MARKER */
39 #include "data_pckts.h"
40
41 #if ARCH(SPARC)
42 struct frame
43 {
44 long fr_local[8]; /* saved locals */
45 long fr_arg[6]; /* saved arguments [0 - 5] */
46 struct frame *fr_savfp; /* saved frame pointer */
47 long fr_savpc; /* saved program counter */
48 #if WSIZE(32)
49 char *fr_stret; /* struct return addr */
50 #endif
51 long fr_argd[6]; /* arg dump area */
52 long fr_argx[1]; /* array of args past the sixth */
53 };
54
55 #elif ARCH(Intel)
56 struct frame
57 {
58 unsigned long fr_savfp;
59 unsigned long fr_savpc;
60 };
61 #endif
62
63 /* Set the debug trace level */
64 #define DBG_LT0 0
65 #define DBG_LT1 1
66 #define DBG_LT2 2
67 #define DBG_LT3 3
68
69 int (*__collector_VM_ReadByteInstruction)(unsigned char *) = NULL;
70 #define VM_NO_ACCESS (-1)
71 #define VM_NOT_VM_MEMORY (-2)
72 #define VM_NOT_X_SEGMENT (-3)
73
74 #define isInside(p, bgn, end) ((p) >= (bgn) && (p) < (end))
75
76 /*
77 * Weed through all the arch dependent stuff to get the right definition
78 * for 'pc' in the ucontext structure. The system header files are mess
79 * dealing with all the arch (just look for PC, R_PC, REG_PC).
80 *
81 */
82
83 #if ARCH(SPARC)
84
85 #define IN_BARRIER(x) \
86 ( barrier_hdl && \
87 (unsigned long)x >= barrier_hdl && \
88 (unsigned long)x < barrier_hdlx )
89 static unsigned long barrier_hdl = 0;
90 static unsigned long barrier_hdlx = 0;
91
92 #if WSIZE(64)
93 #define STACK_BIAS 2047
94 #define IN_TRAP_HANDLER(x) \
95 ( misalign_hdl && \
96 (unsigned long)x >= misalign_hdl && \
97 (unsigned long)x < misalign_hdlx )
98 static unsigned long misalign_hdl = 0;
99 static unsigned long misalign_hdlx = 0;
100 #elif WSIZE(32)
101 #define STACK_BIAS 0
102 #endif
103
104 #if WSIZE(64)
105 #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[(reg)])
106 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_O6])
107 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.mc_gregs[MC_PC])
108 #else
109 #define GET_GREG(ctx,reg) (((ucontext_t*)ctx)->uc_mcontext.gregs[(reg)])
110 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_O6])
111 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_PC])
112 #endif
113
114 #elif ARCH(Intel)
115 #include "opcodes/disassemble.h"
116
117 static int
118 fprintf_func (void *arg ATTRIBUTE_UNUSED, const char *fmt ATTRIBUTE_UNUSED, ...)
119 {
120 return 0;
121 }
122
123 /* Get LENGTH bytes from info's buffer, at target address memaddr.
124 Transfer them to myaddr. */
125 static int
126 read_memory_func (bfd_vma memaddr, bfd_byte *myaddr, unsigned int length,
127 disassemble_info *info)
128 {
129 unsigned int opb = info->octets_per_byte;
130 size_t end_addr_offset = length / opb;
131 size_t max_addr_offset = info->buffer_length / opb;
132 size_t octets = (memaddr - info->buffer_vma) * opb;
133 if (memaddr < info->buffer_vma
134 || memaddr - info->buffer_vma > max_addr_offset
135 || memaddr - info->buffer_vma + end_addr_offset > max_addr_offset
136 || (info->stop_vma && (memaddr >= info->stop_vma
137 || memaddr + end_addr_offset > info->stop_vma)))
138 return -1;
139 memcpy (myaddr, info->buffer + octets, length);
140 return 0;
141 }
142
143 static void
144 print_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
145 disassemble_info *info ATTRIBUTE_UNUSED) { }
146
147 static asymbol *
148 symbol_at_address_func (bfd_vma addr ATTRIBUTE_UNUSED,
149 disassemble_info *info ATTRIBUTE_UNUSED)
150 {
151 return NULL;
152 }
153
154 static bfd_boolean
155 symbol_is_valid (asymbol *sym ATTRIBUTE_UNUSED,
156 disassemble_info *info ATTRIBUTE_UNUSED)
157 {
158 return TRUE;
159 }
160
161 static void
162 memory_error_func (int status ATTRIBUTE_UNUSED, bfd_vma addr ATTRIBUTE_UNUSED,
163 disassemble_info *info ATTRIBUTE_UNUSED) { }
164
165
166 #if WSIZE(32)
167 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EIP])
168 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_ESP])
169 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_EBP])
170
171 #elif WSIZE(64)
172 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RIP])
173 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RSP])
174 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.gregs[REG_RBP])
175 #endif /* WSIZE() */
176
177 #elif ARCH(Aarch64)
178 #define GET_PC(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[15])
179 #define GET_SP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[13])
180 #define GET_FP(ctx) (((ucontext_t*)ctx)->uc_mcontext.regs[14])
181 #endif /* ARCH() */
182
183 /*
184 * FILL_CONTEXT() for all platforms
185 * Could use getcontext() except:
186 * - it's not guaranteed to be async signal safe
187 * - it's a system call and not that lightweight
188 * - it's not portable as of POSIX.1-2008
189 * So we just use low-level mechanisms to fill in the few fields we need.
190 */
191 #if ARCH(SPARC)
192 #if WSIZE(32)
193 #define FILL_CONTEXT(context) \
194 { \
195 greg_t fp; \
196 __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
197 __asm__ __volatile__( "ta 3" ); \
198 GET_SP(context) = fp; \
199 GET_PC(context) = (greg_t)0; \
200 }
201
202 #elif WSIZE(64)
203 #define FILL_CONTEXT(context) \
204 { \
205 greg_t fp; \
206 __asm__ __volatile__( "mov %%i6, %0" : "=r" (fp) ); \
207 __asm__ __volatile__( "flushw" ); \
208 GET_SP(context) = fp; \
209 GET_PC(context) = (greg_t)0; \
210 }
211 #endif /* WSIZE() */
212
213 #elif ARCH(Intel)
214 #define FILL_CONTEXT(context) \
215 { \
216 context->uc_link = NULL; \
217 void *sp = __collector_getsp(); \
218 GET_SP(context) = (greg_t)sp; \
219 GET_FP(context) = (greg_t)__collector_getfp(); \
220 GET_PC(context) = (greg_t)__collector_getpc(); \
221 context->uc_stack.ss_sp = sp; \
222 context->uc_stack.ss_size = 0x100000; \
223 }
224
225 #elif ARCH(Aarch64)
226 #define FILL_CONTEXT(context) \
227 { getcontext(context); \
228 context->uc_mcontext.sp = (__u64) __builtin_frame_address(0); \
229 }
230
231 #endif /* ARCH() */
232
233 static int
234 getByteInstruction (unsigned char *p)
235 {
236 if (__collector_VM_ReadByteInstruction)
237 {
238 int v = __collector_VM_ReadByteInstruction (p);
239 if (v != VM_NOT_VM_MEMORY)
240 return v;
241 }
242 return *p;
243 }
244
245 struct DataHandle *dhndl = NULL;
246
247 static unsigned unwind_key = COLLECTOR_TSD_INVALID_KEY;
248
249 /* To support two OpenMP API's we use a pointer
250 * to the actual function.
251 */
252 int (*__collector_omp_stack_trace)(char*, int, hrtime_t, void*) = NULL;
253 int (*__collector_mpi_stack_trace)(char*, int, hrtime_t) = NULL;
254
255 #define DEFAULT_MAX_NFRAMES 256
256 static int max_native_nframes = DEFAULT_MAX_NFRAMES;
257 static int max_java_nframes = DEFAULT_MAX_NFRAMES;
258
259 #define NATIVE_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) )
260 #define JAVA_FRAME_BYTES(nframes) ( ((nframes)+1) * sizeof(long) * 2 + 16 )
261 #define OVERHEAD_BYTES ( 2 * sizeof(long) + 2 * sizeof(Stack_info) )
262
263 #define ROOT_UID 801425552975190205ULL
264 #define ROOT_UID_INV 92251691606677ULL
265 #define ROOT_IDX 13907816567264074199ULL
266 #define ROOT_IDX_INV 2075111ULL
267 #define UIDTableSize 1048576
268 static volatile uint64_t *UIDTable = NULL;
269 static volatile int seen_omp = 0;
270
271 static int stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode);
272 static FrameInfo compute_uid (Frame_packet *frp);
273 static int omp_no_walk = 0;
274
275 #if ARCH(Intel)
276 #define ValTableSize 1048576
277 #define OmpValTableSize 65536
278 static unsigned long *AddrTable_RA_FROMFP = NULL; // Cache for RA_FROMFP pcs
279 static unsigned long *AddrTable_RA_EOSTCK = NULL; // Cache for RA_EOSTCK pcs
280 static struct WalkContext *OmpCurCtxs = NULL;
281 static struct WalkContext *OmpCtxs = NULL;
282 static uint32_t *OmpVals = NULL;
283 static unsigned long *OmpRAs = NULL;
284 static unsigned long adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend);
285 static int parse_x86_AVX_instruction (unsigned char *pc);
286
287 struct WalkContext
288 {
289 unsigned long pc;
290 unsigned long sp;
291 unsigned long fp;
292 unsigned long ln;
293 unsigned long sbase; /* stack boundary */
294 unsigned long tbgn; /* current memory segment start */
295 unsigned long tend; /* current memory segment end */
296 };
297 #endif
298
299 #if defined(DEBUG) && ARCH(Intel)
300 #include <execinfo.h>
301
302 static void
303 dump_stack (int nline)
304 {
305 if ((__collector_tracelevel & SP_DUMP_STACK) == 0)
306 return;
307
308 enum Constexpr { MAX_SIZE = 1024 };
309 void *array[MAX_SIZE];
310 size_t sz = backtrace (array, MAX_SIZE);
311 char **strings = backtrace_symbols (array, sz);
312 DprintfT (SP_DUMP_STACK, "\ndump_stack: %d size=%d\n", nline, (int) sz);
313 for (int i = 0; i < sz; i++)
314 DprintfT (SP_DUMP_STACK, " %3d: %p %s\n", i, array[i],
315 strings[i] ? strings[i] : "???");
316 }
317
318 #define dump_targets(nline, ntrg, targets) \
319 if ((__collector_tracelevel & SP_DUMP_UNWIND) != 0) \
320 for(int i = 0; i < ntrg; i++) \
321 DprintfT (SP_DUMP_UNWIND, " %2d: 0x%lx\n", i, (long) targets[i])
322 #else
323 #define dump_stack(x)
324 #define dump_targets(nline, ntrg, targets)
325 #endif
326
327 void
328 __collector_ext_unwind_key_init (int isPthread, void * stack)
329 {
330 void * ptr = __collector_tsd_get_by_key (unwind_key);
331 if (ptr == NULL)
332 {
333 TprintfT (DBG_LT2, "__collector_ext_unwind_key_init: cannot get tsd\n");
334 return;
335 }
336 if (isPthread)
337 {
338 size_t stack_size = 0;
339 void *stack_addr = 0;
340 pthread_t pthread = pthread_self ();
341 pthread_attr_t attr;
342 int err = pthread_getattr_np (pthread, &attr);
343 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: pthread: 0x%lx err: %d\n", pthread, err);
344 if (err == 0)
345 {
346 err = pthread_attr_getstack (&attr, &stack_addr, &stack_size);
347 if (err == 0)
348 stack_addr = (char*) stack_addr + stack_size;
349 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: stack_size=0x%lx eos=%p err=%d\n",
350 (long) stack_size, stack_addr, err);
351 err = pthread_attr_destroy (&attr);
352 TprintfT (DBG_LT1, "__collector_ext_unwind_key_init: destroy: %d\n", err);
353 }
354 *(void**) ptr = stack_addr;
355 }
356 else
357 *(void**) ptr = stack; // cloned thread
358 }
359
360 void
361 __collector_ext_unwind_init (int record)
362 {
363 int sz = UIDTableSize * sizeof (*UIDTable);
364 UIDTable = (uint64_t*) __collector_allocCSize (__collector_heap, sz, 1);
365 if (UIDTable == NULL)
366 {
367 __collector_terminate_expt ();
368 return;
369 }
370 CALL_UTIL (memset)((void*) UIDTable, 0, sz);
371
372 char *str = CALL_UTIL (getenv)("GPROFNG_JAVA_MAX_CALL_STACK_DEPTH");
373 if (str != NULL && *str != 0)
374 {
375 char *endptr;
376 int n = CALL_UTIL (strtol)(str, &endptr, 0);
377 if (endptr != str && n >= 0)
378 {
379 if (n < 5)
380 n = 5;
381 if (n > MAX_STACKDEPTH)
382 n = MAX_STACKDEPTH;
383 max_java_nframes = n;
384 }
385 }
386
387 str = CALL_UTIL (getenv)("GPROFNG_MAX_CALL_STACK_DEPTH");
388 if (str != NULL && *str != 0)
389 {
390 char *endptr = str;
391 int n = CALL_UTIL (strtol)(str, &endptr, 0);
392 if (endptr != str && n >= 0)
393 {
394 if (n < 5)
395 n = 5;
396 if (n > MAX_STACKDEPTH)
397 n = MAX_STACKDEPTH;
398 max_native_nframes = n;
399 }
400 }
401
402 TprintfT (DBG_LT0, "GPROFNG_MAX_CALL_STACK_DEPTH=%d GPROFNG_JAVA_MAX_CALL_STACK_DEPTH=%d\n",
403 max_native_nframes, max_java_nframes);
404 omp_no_walk = 1;
405
406 if (__collector_VM_ReadByteInstruction == NULL)
407 __collector_VM_ReadByteInstruction = (int(*)()) dlsym (RTLD_DEFAULT, "Async_VM_ReadByteInstruction");
408
409 #if ARCH(SPARC)
410 #if WSIZE(64)
411 misalign_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler");
412 misalign_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__misalign_trap_handler_end");
413 if (misalign_hdlx == 0)
414 misalign_hdlx = misalign_hdl + 292;
415 barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
416 barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
417 if (barrier_hdlx == 0)
418 barrier_hdl = 0;
419 #else
420 barrier_hdl = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_");
421 barrier_hdlx = (unsigned long) dlsym (RTLD_DEFAULT, "__mt_EndOfTask_Barrier_Dummy_");
422 if (barrier_hdlx == 0)
423 barrier_hdl = 0;
424 #endif /* WSIZE() */
425
426 #elif ARCH(Intel)
427 sz = ValTableSize * sizeof (*AddrTable_RA_FROMFP);
428 AddrTable_RA_FROMFP = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
429 sz = ValTableSize * sizeof (*AddrTable_RA_EOSTCK);
430 AddrTable_RA_EOSTCK = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
431 if (omp_no_walk && (__collector_omp_stack_trace != NULL || __collector_mpi_stack_trace != NULL))
432 {
433 sz = OmpValTableSize * sizeof (*OmpCurCtxs);
434 OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
435 sz = OmpValTableSize * sizeof (*OmpCtxs);
436 OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
437 sz = OmpValTableSize * sizeof (*OmpVals);
438 OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
439 sz = OmpValTableSize * sizeof (*OmpRAs);
440 OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
441 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
442 {
443 TprintfT (0, "unwind_init() ERROR: failed; terminating experiment\n");
444 __collector_terminate_expt ();
445 return;
446 }
447 }
448 #endif /* ARCH() */
449
450 if (record)
451 {
452 dhndl = __collector_create_handle (SP_FRINFO_FILE);
453 __collector_log_write ("<%s name=\"%s\" format=\"binary\"/>\n", SP_TAG_DATAPTR, SP_FRINFO_FILE);
454 }
455
456 unwind_key = __collector_tsd_create_key (sizeof (void*), NULL, NULL);
457 if (unwind_key == COLLECTOR_TSD_INVALID_KEY)
458 {
459 TprintfT (0, "unwind_init: ERROR: TSD key create failed.\n");
460 __collector_log_write ("<%s kind=\"%s\" id=\"%d\">TSD key not created</%s>\n",
461 SP_TAG_EVENT, SP_JCMD_CERROR, COL_ERROR_GENERAL, SP_TAG_EVENT);
462 return;
463 }
464 TprintfT (0, "unwind_init() completed normally\n");
465 return;
466 }
467
468 void
469 __collector_ext_unwind_close ()
470 {
471 __collector_delete_handle (dhndl);
472 dhndl = NULL;
473 }
474
475 void*
476 __collector_ext_return_address (unsigned level)
477 {
478 if (NULL == UIDTable) //unwind not initialized yet
479 return NULL;
480 unsigned size = (level + 4) * sizeof (long); // need to strip __collector_get_return_address and its caller
481 ucontext_t context;
482 FILL_CONTEXT ((&context));
483 char* buf = (char*) alloca (size);
484 if (buf == NULL)
485 {
486 TprintfT (DBG_LT0, "__collector_get_return_address: ERROR: alloca(%d) fails\n", size);
487 return NULL;
488 }
489 int sz = stack_unwind (buf, size, NULL, NULL, &context, 0);
490 if (sz < (level + 3) * sizeof (long))
491 {
492 TprintfT (DBG_LT0, "__collector_get_return_address: size=%d, but stack_unwind returns %d\n", size, sz);
493 return NULL;
494 }
495 long *lbuf = (long*) buf;
496 TprintfT (DBG_LT2, "__collector_get_return_address: return %lx\n", lbuf[level + 2]);
497 return (void *) (lbuf[level + 2]);
498 }
499 /*
500 * Collector interface method getFrameInfo
501 */
502 FrameInfo
503 __collector_get_frame_info (hrtime_t ts, int mode, void *arg)
504 {
505 ucontext_t *context = NULL;
506 void *bptr = NULL;
507 CM_Array *array = NULL;
508
509 int unwind_mode = 0;
510 int do_walk = 1;
511
512 if (mode & FRINFO_NO_WALK)
513 do_walk = 0;
514 int bmode = mode & 0xffff;
515 int pseudo_context = 0;
516 if (bmode == FRINFO_FROM_STACK_ARG || bmode == FRINFO_FROM_STACK)
517 {
518 bptr = arg;
519 context = (ucontext_t*) alloca (sizeof (ucontext_t));
520 FILL_CONTEXT (context);
521 unwind_mode |= bmode;
522 }
523 else if (bmode == FRINFO_FROM_UC)
524 {
525 context = (ucontext_t*) arg;
526 if (context == NULL)
527 return (FrameInfo) 0;
528 if (GET_SP (context) == 0)
529 pseudo_context = 1;
530 }
531 else if (bmode == FRINFO_FROM_ARRAY)
532 {
533 array = (CM_Array*) arg;
534 if (array == NULL || array->length <= 0)
535 return (FrameInfo) 0;
536 }
537 else
538 return (FrameInfo) 0;
539
540 int max_frame_size = OVERHEAD_BYTES + NATIVE_FRAME_BYTES (max_native_nframes);
541 if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
542 max_frame_size += JAVA_FRAME_BYTES (max_java_nframes);
543
544 Frame_packet *frpckt = alloca (sizeof (Frame_packet) + max_frame_size);
545 frpckt->type = FRAME_PCKT;
546 frpckt->hsize = sizeof (Frame_packet);
547
548 char *d = (char*) (frpckt + 1);
549 int size = max_frame_size;
550
551 #define MIN(a,b) ((a)<(b)?(a):(b))
552 /* get Java info */
553 if (__collector_java_mode && __collector_java_asyncgetcalltrace_loaded && context && !pseudo_context)
554 {
555 /* use only 2/3 of the buffer and leave the rest for the native stack */
556 int tmpsz = MIN (size, JAVA_FRAME_BYTES (max_java_nframes));
557 if (tmpsz > 0)
558 {
559 int sz = __collector_ext_jstack_unwind (d, tmpsz, context);
560 d += sz;
561 size -= sz;
562 }
563 }
564
565 /* get native stack */
566 if (context)
567 {
568 Stack_info *sinfo = (Stack_info*) d;
569 int sz = sizeof (Stack_info);
570 d += sz;
571 size -= sz;
572 #if ARCH(Intel)
573 if (omp_no_walk == 0)
574 do_walk = 1;
575 #endif
576 if (do_walk == 0)
577 unwind_mode |= FRINFO_NO_WALK;
578
579 int tmpsz = MIN (size, NATIVE_FRAME_BYTES (max_native_nframes));
580 if (tmpsz > 0)
581 {
582 sz = stack_unwind (d, tmpsz, bptr, NULL, context, unwind_mode);
583 d += sz;
584 size -= sz;
585 }
586 sinfo->kind = STACK_INFO;
587 sinfo->hsize = (d - (char*) sinfo);
588 }
589
590 /* create a stack image from user data */
591 if (array && array->length > 0)
592 {
593 Stack_info *sinfo = (Stack_info*) d;
594 int sz = sizeof (Stack_info);
595 d += sz;
596 size -= sz;
597 sz = array->length;
598 if (sz > size)
599 sz = size; // YXXX should we mark this with truncation frame?
600 __collector_memcpy (d, array->bytes, sz);
601 d += sz;
602 size -= sz;
603 sinfo->kind = STACK_INFO;
604 sinfo->hsize = (d - (char*) sinfo);
605 }
606
607 /* Compute the total size */
608 frpckt->tsize = d - (char*) frpckt;
609 FrameInfo uid = compute_uid (frpckt);
610 return uid;
611 }
612
613 FrameInfo
614 compute_uid (Frame_packet *frp)
615 {
616 uint64_t idxs[LAST_INFO];
617 uint64_t uid = ROOT_UID;
618 uint64_t idx = ROOT_IDX;
619
620 Common_info *cinfo = (Common_info*) ((char*) frp + frp->hsize);
621 char *end = (char*) frp + frp->tsize;
622 for (;;)
623 {
624 if ((char*) cinfo >= end || cinfo->hsize == 0 ||
625 (char*) cinfo + cinfo->hsize > end)
626 break;
627
628 /* Start with a different value to avoid matching with uid */
629 uint64_t uidt = 1;
630 uint64_t idxt = 1;
631 long *ptr = (long*) ((char*) cinfo + cinfo->hsize);
632 long *bnd = (long*) ((char*) cinfo + sizeof (Common_info));
633 TprintfT (DBG_LT2, "compute_uid: Cnt=%ld: ", (long) cinfo->hsize);
634 while (ptr > bnd)
635 {
636 long val = *(--ptr);
637 tprintf (DBG_LT2, "0x%8.8llx ", (unsigned long long) val);
638 uidt = (uidt + val) * ROOT_UID;
639 idxt = (idxt + val) * ROOT_IDX;
640 uid = (uid + val) * ROOT_UID;
641 idx = (idx + val) * ROOT_IDX;
642 }
643 if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
644 {
645 cinfo->uid = uidt;
646 idxs[cinfo->kind] = idxt;
647 }
648 cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
649 }
650 tprintf (DBG_LT2, "\n");
651
652 /* Check if we have already recorded that uid.
653 * The following fragment contains benign data races.
654 * It's important, though, that all reads from UIDTable
655 * happen before writes.
656 */
657 int found1 = 0;
658 int idx1 = (int) ((idx >> 44) % UIDTableSize);
659 if (UIDTable[idx1] == uid)
660 found1 = 1;
661 int found2 = 0;
662 int idx2 = (int) ((idx >> 24) % UIDTableSize);
663 if (UIDTable[idx2] == uid)
664 found2 = 1;
665 int found3 = 0;
666 int idx3 = (int) ((idx >> 4) % UIDTableSize);
667 if (UIDTable[idx3] == uid)
668 found3 = 1;
669 if (!found1)
670 UIDTable[idx1] = uid;
671 if (!found2)
672 UIDTable[idx2] = uid;
673 if (!found3)
674 UIDTable[idx3] = uid;
675
676 if (found1 || found2 || found3)
677 return (FrameInfo) uid;
678 frp->uid = uid;
679
680 /* Compress info's */
681 cinfo = (Common_info*) ((char*) frp + frp->hsize);
682 for (;;)
683 {
684 if ((char*) cinfo >= end || cinfo->hsize == 0 ||
685 (char*) cinfo + cinfo->hsize > end)
686 break;
687 if (cinfo->kind == STACK_INFO || cinfo->kind == JAVA_INFO)
688 {
689 long *ptr = (long*) ((char*) cinfo + sizeof (Common_info));
690 long *bnd = (long*) ((char*) cinfo + cinfo->hsize);
691 uint64_t uidt = cinfo->uid;
692 uint64_t idxt = idxs[cinfo->kind];
693 int found = 0;
694 int first = 1;
695 while (ptr < bnd - 1)
696 {
697 int idx1 = (int) ((idxt >> 44) % UIDTableSize);
698 if (UIDTable[idx1] == uidt)
699 {
700 found = 1;
701 break;
702 }
703 else if (first)
704 {
705 first = 0;
706 UIDTable[idx1] = uidt;
707 }
708 long val = *ptr++;
709 uidt = uidt * ROOT_UID_INV - val;
710 idxt = idxt * ROOT_IDX_INV - val;
711 }
712 if (found)
713 {
714 char *d = (char*) ptr;
715 char *s = (char*) bnd;
716 if (!first)
717 {
718 int i;
719 for (i = 0; i<sizeof (uidt); i++)
720 {
721 *d++ = (char) uidt;
722 uidt = uidt >> 8;
723 }
724 }
725 int delta = s - d;
726 while (s < end)
727 *d++ = *s++;
728 cinfo->kind |= COMPRESSED_INFO;
729 cinfo->hsize -= delta;
730 frp->tsize -= delta;
731 end -= delta;
732 }
733 }
734 cinfo = (Common_info*) ((char*) cinfo + cinfo->hsize);
735 }
736 __collector_write_packet (dhndl, (CM_Packet*) frp);
737 return (FrameInfo) uid;
738 }
739
740 FrameInfo
741 __collector_getUID (CM_Array *arg, FrameInfo suid)
742 {
743 if (arg->length % sizeof (long) != 0 ||
744 (long) arg->bytes % sizeof (long) != 0)
745 return (FrameInfo) - 1;
746 if (arg->length == 0)
747 return suid;
748
749 uint64_t uid = suid ? suid : 1;
750 uint64_t idx = suid ? suid : 1;
751 long *ptr = (long*) ((char*) arg->bytes + arg->length);
752 long *bnd = (long*) (arg->bytes);
753 while (ptr > bnd)
754 {
755 long val = *(--ptr);
756 uid = (uid + val) * ROOT_UID;
757 idx = (idx + val) * ROOT_IDX;
758 }
759
760 /* Check if we have already recorded that uid.
761 * The following fragment contains benign data races.
762 * It's important, though, that all reads from UIDTable
763 * happen before writes.
764 */
765 int found1 = 0;
766 int idx1 = (int) ((idx >> 44) % UIDTableSize);
767 if (UIDTable[idx1] == uid)
768 found1 = 1;
769 int found2 = 0;
770 int idx2 = (int) ((idx >> 24) % UIDTableSize);
771 if (UIDTable[idx2] == uid)
772 found2 = 1;
773 int found3 = 0;
774 int idx3 = (int) ((idx >> 4) % UIDTableSize);
775 if (UIDTable[idx3] == uid)
776 found3 = 1;
777
778 if (!found1)
779 UIDTable[idx1] = uid;
780 if (!found2)
781 UIDTable[idx2] = uid;
782 if (!found3)
783 UIDTable[idx3] = uid;
784 if (found1 || found2 || found3)
785 return (FrameInfo) uid;
786
787 int sz = sizeof (Uid_packet) + arg->length;
788 if (suid)
789 sz += sizeof (suid);
790 Uid_packet *uidp = alloca (sz);
791 uidp->tsize = sz;
792 uidp->type = UID_PCKT;
793 uidp->flags = 0;
794 uidp->uid = uid;
795
796 /* Compress */
797 ptr = (long*) (arg->bytes);
798 bnd = (long*) ((char*) arg->bytes + arg->length);
799 long *dst = (long*) (uidp + 1);
800 uint64_t uidt = uid;
801 uint64_t idxt = idx;
802 uint64_t luid = suid; /* link uid */
803
804 while (ptr < bnd)
805 {
806
807 long val = *ptr++;
808 *dst++ = val;
809
810 if ((bnd - ptr) > sizeof (uidt))
811 {
812 uidt = uidt * ROOT_UID_INV - val;
813 idxt = idxt * ROOT_IDX_INV - val;
814 int idx1 = (int) ((idxt >> 44) % UIDTableSize);
815 if (UIDTable[idx1] == uidt)
816 {
817 luid = uidt;
818 break;
819 }
820 }
821 }
822 if (luid)
823 {
824 char *d = (char*) dst;
825 for (int i = 0; i<sizeof (luid); i++)
826 {
827 *d++ = (char) luid;
828 luid = luid >> 8;
829 }
830 uidp->flags |= COMPRESSED_INFO;
831 uidp->tsize = d - (char*) uidp;
832 }
833 __collector_write_packet (dhndl, (CM_Packet*) uidp);
834
835 return (FrameInfo) uid;
836 }
837
838 int
839 __collector_getStackTrace (void *buf, int size, void *bptr, void *eptr, void *arg)
840 {
841 if (arg == (void*) __collector_omp_stack_trace)
842 seen_omp = 1;
843 int do_walk = 1;
844 if (arg == NULL || arg == (void*) __collector_omp_stack_trace)
845 {
846 do_walk = (arg == (void*) __collector_omp_stack_trace && omp_no_walk) ? 0 : 1;
847 ucontext_t *context = (ucontext_t*) alloca (sizeof (ucontext_t));
848 FILL_CONTEXT (context);
849 arg = context;
850 }
851 int unwind_mode = 0;
852 if (do_walk == 0)
853 unwind_mode |= FRINFO_NO_WALK;
854 return stack_unwind (buf, size, bptr, eptr, arg, unwind_mode);
855 }
856
857 #if ARCH(SPARC)
858 /*
859 * These are important data structures taken from the header files reg.h and
860 * ucontext.h. They are used for the stack trace algorithm explained below.
861 *
862 * typedef struct ucontext {
863 * u_long uc_flags;
864 * struct ucontext *uc_link;
865 * usigset_t uc_sigmask;
866 * stack_t uc_stack;
867 * mcontext_t uc_mcontext;
868 * long uc_filler[23];
869 * } ucontext_t;
870 *
871 * #define SPARC_MAXREGWINDOW 31
872 *
873 * struct rwindow {
874 * greg_t rw_local[8];
875 * greg_t rw_in[8];
876 * };
877 *
878 * #define rw_fp rw_in[6]
879 * #define rw_rtn rw_in[7]
880 *
881 * struct gwindows {
882 * int wbcnt;
883 * int *spbuf[SPARC_MAXREGWINDOW];
884 * struct rwindow wbuf[SPARC_MAXREGWINDOW];
885 * };
886 *
887 * typedef struct gwindows gwindows_t;
888 *
889 * typedef struct {
890 * gregset_t gregs;
891 * gwindows_t *gwins;
892 * fpregset_t fpregs;
893 * long filler[21];
894 * } mcontext_t;
895 *
896 * The stack would look like this when SIGPROF occurrs.
897 *
898 * ------------------------- <- high memory
899 * | |
900 * | |
901 * -------------------------
902 * | |
903 * ------------------------- <- fp' <-|
904 * | | |
905 * : : |
906 * | | |
907 * ------------------------- |
908 * | fp |----------|
909 * | |
910 * ------------------------- <- sp'
911 * | | | |
912 * | gwins | <- saved stack pointers & | |
913 * | | register windows | |- mcontext
914 * ------------------------- | |
915 * | gregs | <- saved registers | |
916 * ------------------------- |
917 * | | |- ucontext
918 * ------------------------- <- ucp (ucontext pointer) |
919 * | | |
920 * | | |- siginfo
921 * ------------------------- <- sip (siginfo pointer) |
922 * | |
923 * ------------------------- <- sp
924 *
925 * Then the signal handler is called with:
926 * handler( signo, sip, uip );
927 * When gwins is null, all the stack frames are saved in the user stack.
928 * In that case we can find sp' from gregs and walk the stack for a backtrace.
929 * However, if gwins is not null we will have a more complicated case.
930 * Wbcnt(in gwins) tells you how many saved register windows are valid.
931 * This is important because the kernel does not allocate the entire array.
932 * And the top most frame is saved in the lowest index element. The next
933 * paragraph explains the possible causes.
934 *
935 * There are two routines in the kernel to flush out user register windows.
936 * flush_user_windows and flush_user_windows_to_stack
937 * The first routine will not cause a page fault. Therefore if the user
938 * stack is not in memory, the register windows will be saved to the pcb.
939 * This can happen when the kernel is trying to deliver a signal and
940 * the user stack got swap out. The kernel will then build a new context for
941 * the signal handler and the saved register windows will
942 * be copied to the ucontext as show above. On the other hand,
943 * flush_user_windows_to_stack can cause a page fault, and if it failed
944 * then there is something wrong (stack overflow, misalign).
945 * The first saved register window does not necessary correspond to the
946 * first stack frame. So the current stack pointer must be compare with
947 * the stack pointers in spbuf to find a match.
948 *
949 * We will also follow the uc_link field in ucontext to trace also nested
950 * signal stack frames.
951 *
952 */
953
954 /* Dealing with trap handlers.
955 * When a user defined trap handler is invoked the return address
956 * (or actually the address of an instruction that raised the trap)
957 * is passed to the trap handler in %l6, whereas saved %o7 contains
958 * garbage. First, we need to find out if a particular pc belongs
959 * to the trap handler, and if so, take the %l6 value from the stack rather
960 * than %o7 from either the stack or the register.
961 * There are three possible situations represented
962 * by the following stacks:
963 *
964 * MARKER MARKER MARKER
965 * trap handler pc __func pc before 'save' __func pc after 'save'
966 * %l6 %o7 from reg %o7 (garbage)
967 * ... %l6 trap handler pc
968 * ... %l6
969 * ...
970 * where __func is a function called from the trap handler.
971 *
972 * Currently this is implemented to only deal with __misalign_trap_handler
973 * set for v9 FORTRAN applications. Implementation of IN_TRAP_HANDLER
974 * macro shows it. A general solution is postponed.
975 */
976
977 /* Special handling of unwind through the parallel loop barrier code:
978 *
979 * The library defines two symbols, __mt_EndOfTask_Barrier_ and
980 * __mt_EndOfTask_Barrier_Dummy_ representing the first word of
981 * the barrier sychronization code, and the first word following
982 * it. Whenever the leaf PC is between these two symbols,
983 * the unwind code is special-cased as follows:
984 * The __mt_EndOfTask_Barrier_ function is guaranteed to be a leaf
985 * function, so its return address is in a register, not saved on
986 * the stack.
987 *
988 * MARKER
989 * __mt_EndOfTask_Barrier_ PC -- the leaf PC
990 * loop body function address for the task -- implied caller of __mt_EndOfTask_Barrier_
991 * this address is taken from the %O0 register
992 * {mt_master or mt_slave} -- real caller of __mt_EndOfTask_Barrier_
993 * ...
994 *
995 * With this trick, the analyzer will show the time in the barrier
996 * attributed to the loop at the end of which the barrier synchronization
997 * is taking place. That loop body routine, will be shown as called
998 * from the function from which it was extracted, which will be shown
999 * as called from the real caller, either the slave or master library routine.
1000 */
1001
1002 /*
1003 * These no-fault-load (0x82) assembly functions are courtesy of Rob Gardner.
1004 *
1005 * Note that 0x82 is ASI_PNF. See
1006 * http://lxr.free-electrons.com/source/arch/sparc/include/uapi/asm/asi.h#L134
1007 * ASI address space identifier; PNF primary no fault
1008 */
1009
1010 /* load an int from an address */
1011
1012 /* if the address is illegal, return a 0 */
1013 static int
1014 SPARC_no_fault_load_int (void *addr)
1015 {
1016 int val;
1017 __asm__ __volatile__(
1018 "lda [%1] 0x82, %0\n\t"
1019 : "=r" (val)
1020 : "r" (addr)
1021 );
1022
1023 return val;
1024 }
1025
1026 /* check if an address is invalid
1027 *
1028 * A no-fault load of an illegal address still faults, but it does so silently to the calling process.
1029 * It returns a 0, but so could a load of a legal address.
1030 * So, we time the load. A "fast" load must be a successful load.
1031 * A "slow" load is probably a fault.
1032 * Since it could also be a cache/TLB miss or other abnormality,
1033 * it's safest to retry a slow load.
1034 * The cost of trying a valid address should be some nanosecs.
1035 * The cost of trying an invalid address up to 10 times could be some microsecs.
1036 */
1037 #if 0
1038 static
1039 int invalid_SPARC_addr(void *addr)
1040 {
1041 long t1, t2;
1042 int i;
1043
1044 for (i=0; i<10; i++) {
1045 __asm__ __volatile__(
1046 "rd %%tick, %0\n\t"
1047 "lduba [%2] 0x82, %%g0\n\t"
1048 "rd %%tick, %1\n\t"
1049 : "=r" (t1), "=r" (t2)
1050 : "r" (addr) );
1051 if ( (t2 - t1) < 100 )
1052 return 0;
1053 }
1054 return 1;
1055 }
1056 #endif
1057
1058 /*
1059 * The standard SPARC procedure-calling convention is that the
1060 * calling PC (for determining the return address when the procedure
1061 * is finished) is placed in register %o7. A called procedure
1062 * typically executes a "save" instruction that shifts the register
1063 * window, and %o7 becomes %i7.
1064 *
1065 * Optimized leaf procedures do not shift the register window.
1066 * They assume the return address will remain %o7. So when
1067 * we process a leaf PC, we walk instructions to see if there
1068 * is a call, restore, or other instruction that would indicate
1069 * we can IGNORE %o7 because this is NOT a leaf procedure.
1070 *
1071 * If a limited instruction walk uncovers no such hint, we save
1072 * not only the PC but the %o7 value as well... just to be safe.
1073 * Later, in DBE post-processing of the call stacks, we decide
1074 * whether any recorded %o7 value should be used as a caller
1075 * frame or should be discarded.
1076 */
1077
1078 #define IS_ILLTRAP(x) (((x) & 0xc1c00000) == 0)
1079 #define IS_SAVE(x) (((x) & 0xc1f80000) == 0x81e00000)
1080 #define IS_MOVO7R(x) (((x) & 0xc1f8201f) == 0x8160000f)
1081 #define IS_MOVRO7(x) (((x) & 0xfff82000) == 0x9f600000)
1082 #define IS_ORRG0O7(x) (((x) & 0xff78201f) == 0x9e100000)
1083 #define IS_ORG0RO7(x) (((x) & 0xff7fe000) == 0x9e100000)
1084 #define IS_ORG0O7R(x) (((x) & 0xc17fe01f) == 0x8010000f)
1085 #define IS_ORO7G0R(x) (((x) & 0xc17fe01f) == 0x8013c000)
1086 #define IS_RESTORE(x) (((x) & 0xc1f80000) == 0x81e80000)
1087 #define IS_RET(x) ((x) == 0x81c7e008)
1088 #define IS_RETL(x) ((x) == 0x81c3e008)
1089 #define IS_RETURN(x) (((x) & 0xc1f80000) == 0x81c80000)
1090 #define IS_BRANCH(x) ((((x) & 0xc0000000) == 0) && (((x) & 0x01c00000) != 0x01000000))
1091 #define IS_CALL(x) (((x) & 0xc0000000) == 0x40000000)
1092 #define IS_LDO7(x) (((x) & 0xfff80000) == 0xde000000)
1093
1094 static long pagesize = 0;
1095
1096 static int
1097 process_leaf (long *lbuf, int ind, int lsize, void *context)
1098 {
1099 greg_t pc = GET_PC (context);
1100 greg_t o7 = GET_GREG (context, REG_O7);
1101
1102 /* omazur: TBR START -- not used */
1103 if (IN_BARRIER (pc))
1104 {
1105 if (ind < lsize)
1106 lbuf[ind++] = pc;
1107 if (ind < lsize)
1108 lbuf[ind++] = GET_GREG (context, REG_O0);
1109 return ind;
1110 }
1111 /* omazur: TBR END */
1112 #if WSIZE(64)
1113 if (IN_TRAP_HANDLER (pc))
1114 {
1115 if (ind < lsize)
1116 lbuf[ind++] = pc;
1117 return ind;
1118 }
1119 #endif
1120 unsigned *instrp = (unsigned *) pc;
1121 unsigned *end_addr = instrp + 20;
1122 while (instrp < end_addr)
1123 {
1124 unsigned instr = *instrp++;
1125 if (IS_ILLTRAP (instr))
1126 break;
1127 else if (IS_SAVE (instr))
1128 {
1129 if (ind < lsize)
1130 lbuf[ind++] = pc;
1131 if (o7 && ind < lsize)
1132 lbuf[ind++] = o7;
1133 return ind;
1134 }
1135 else if (IS_MOVO7R (instr) || IS_ORG0O7R (instr) || IS_ORO7G0R (instr))
1136 break;
1137 else if (IS_MOVRO7 (instr) || IS_ORG0RO7 (instr))
1138 {
1139 int rs2 = (instr & 0x1f) + REG_G1 - 1;
1140 o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1141 break;
1142 }
1143 else if (IS_ORRG0O7 (instr))
1144 {
1145 int rs2 = ((instr & 0x7c000) >> 14) + REG_G1 - 1;
1146 o7 = (rs2 <= REG_O7) ? GET_GREG (context, rs2) : 0;
1147 break;
1148 }
1149 else if (IS_RESTORE (instr))
1150 {
1151 o7 = 0;
1152 break;
1153 }
1154 else if (IS_RETURN (instr))
1155 {
1156 o7 = 0;
1157 break;
1158 }
1159 else if (IS_RET (instr))
1160 {
1161 o7 = 0;
1162 break;
1163 }
1164 else if (IS_RETL (instr))
1165 {
1166 /* process delay slot */
1167 instr = *instrp++;
1168 if (IS_RESTORE (instr))
1169 o7 = 0;
1170 break;
1171 }
1172 else if (IS_BRANCH (instr))
1173 {
1174 unsigned *backbegin = ((unsigned *) pc - 1);
1175 unsigned *backend = backbegin - 12 + (instrp - (unsigned *) pc);
1176 while (backbegin > backend)
1177 {
1178 // 21920143 stack unwind: SPARC process_leaf backtracks too far
1179 /*
1180 * We've already dereferenced backbegin+1.
1181 * So if backbegin is on the same page, we're fine.
1182 * If we've gone to a different page, possibly things are not fine.
1183 * We don't really know how to test that.
1184 * Let's just assume the worst: that dereferencing backbegin would segv.
1185 * We won't know if we're in a leaf function or not.
1186 */
1187 if (pagesize == 0)
1188 pagesize = CALL_UTIL (sysconf)(_SC_PAGESIZE);
1189 if ((((long) (backbegin + 1)) & (pagesize - 1)) < sizeof (unsigned*))
1190 break;
1191 unsigned backinstr = *backbegin--;
1192 if (IS_LDO7 (backinstr))
1193 {
1194 o7 = 0;
1195 break;
1196 }
1197 else if (IS_ILLTRAP (backinstr))
1198 break;
1199 else if (IS_RETURN (backinstr))
1200 break;
1201 else if (IS_RET (backinstr))
1202 break;
1203 else if (IS_RETL (backinstr))
1204 break;
1205 else if (IS_CALL (backinstr))
1206 break;
1207 else if (IS_SAVE (backinstr))
1208 {
1209 o7 = 0;
1210 break;
1211 }
1212 }
1213 break;
1214 }
1215 else if (IS_CALL (instr))
1216 o7 = 0;
1217 }
1218
1219 #if WSIZE(64)
1220 if (o7 != 0 && ((long) o7) < 32 && ((long) o7) > -32)
1221 {
1222 /* 20924821 SEGV in unwind code on SPARC/Linux
1223 * We've seen this condition in some SPARC-Linux runs.
1224 * o7 is non-zero but not a valid address.
1225 * Values like 4 or -7 have been seen.
1226 * Let's check if o7 is unreasonably small.
1227 * If so, set to 0 so that it won't be recorded.
1228 * Otherwise, there is risk of it being dereferenced in process_sigreturn().
1229 */
1230 // __collector_log_write("<event kind=\"%s\" id=\"%d\">time %lld, internal debug unwind at leaf; o7 = %ld, pc = %x</event>\n",
1231 // SP_JCMD_COMMENT, COL_COMMENT_NONE, __collector_gethrtime() - __collector_start_time, (long) o7, pc );
1232 o7 = 0;
1233 }
1234 #endif
1235
1236 if (o7)
1237 {
1238 if (ind < lsize)
1239 lbuf[ind++] = SP_LEAF_CHECK_MARKER;
1240 if (ind < lsize)
1241 lbuf[ind++] = pc;
1242 if (ind < lsize)
1243 lbuf[ind++] = o7;
1244 }
1245 else if (ind < lsize)
1246 lbuf[ind++] = pc;
1247 return ind;
1248 }
1249
1250 #if WSIZE(64)
1251 // detect signal handler
1252 static int
1253 process_sigreturn (long *lbuf, int ind, int lsize, unsigned char * tpc,
1254 struct frame **pfp, void * bptr, int extra_frame)
1255 {
1256 // cheap checks whether tpc is obviously not an instruction address
1257 if ((4096 > (unsigned long) tpc) // the first page is off limits
1258 || (3 & (unsigned long) tpc))
1259 return ind; // the address is not aligned
1260
1261 // get the instruction at tpc, skipping over as many as 7 nop's (0x01000000)
1262 int insn, i;
1263 for (i = 0; i < 7; i++)
1264 {
1265 insn = SPARC_no_fault_load_int ((void *) tpc);
1266 if (insn != 0x01000000)
1267 break;
1268 tpc += 4;
1269 }
1270
1271 // we're not expecting 0 (and it could mean an illegal address)
1272 if (insn == 0)
1273 return ind;
1274
1275 // We are looking for __rt_sigreturn_stub with the instruction
1276 // 0x82102065 : mov 0x65 /* __NR_rt_sigreturn */, %g1
1277 if (insn == 0x82102065)
1278 {
1279 /*
1280 * according to linux kernel source code,
1281 * syscall(_NR_rt_sigreturn) uses the following data in stack:
1282 * struct rt_signal_frame {
1283 * struct sparc_stackf ss;
1284 * siginfo_t info;
1285 * struct pt_regs regs;
1286 * ....};
1287 * sizeof(struct sparc_stackf) is 192;
1288 * sizeof(siginfo_t) is 128;
1289 * we need to get the register values from regs, which is defined as:
1290 * struct pt_regs {
1291 * unsigned long u_regs[16];
1292 * unsigned long tstate;
1293 * unsigned long tpc;
1294 * unsigned long tnpc;
1295 * ....};
1296 * pc and fp register has offset of 120 and 112;
1297 * the pc of kill() is stored in tnpc, whose offest is 136.
1298 */
1299 greg_t pc = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 136));
1300 greg_t pc1 = *((unsigned long*) ((char*) ((*pfp)) + 192 + 128 + 120));
1301 (*pfp) = *((struct frame**) ((char*) ((*pfp)) + 192 + 128 + 112));
1302 if (pc && pc1)
1303 {
1304 if (bptr != NULL && extra_frame && ((char*) (*pfp) + STACK_BIAS) < (char*) bptr && ind < 2)
1305 {
1306 lbuf[0] = pc1;
1307 if (ind == 0)
1308 ind++;
1309 }
1310 if (bptr == NULL || ((char*) (*pfp) + STACK_BIAS) >= (char*) bptr)
1311 {
1312 if (ind < lsize)
1313 lbuf[ind++] = (unsigned long) tpc;
1314 if (ind < lsize)
1315 lbuf[ind++] = pc;
1316 if (ind < lsize)
1317 lbuf[ind++] = pc1;
1318 }
1319 }
1320 DprintfT (SP_DUMP_UNWIND, "unwind.c: resolved sigreturn pc=0x%lx, pc1=0x%lx, fp=0x%lx\n", pc, pc1, *(pfp));
1321 }
1322 return ind;
1323 }
1324 #endif
1325
1326 /*
1327 * int stack_unwind( char *buf, int size, ucontext_t *context )
1328 * This routine looks into the mcontext and
1329 * trace stack frames to record return addresses.
1330 */
1331 int
1332 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
1333 {
1334 /*
1335 * trace the stack frames from user stack.
1336 * We are assuming that the frame pointer and return address
1337 * are null when we are at the top level.
1338 */
1339 long *lbuf = (long*) buf;
1340 int lsize = size / sizeof (long);
1341 struct frame *fp = (struct frame *) GET_SP (context); /* frame pointer */
1342 greg_t pc; /* program counter */
1343 int extra_frame = 0;
1344 if ((mode & 0xffff) == FRINFO_FROM_STACK)
1345 extra_frame = 1;
1346
1347 int ind = 0;
1348 if (bptr == NULL)
1349 ind = process_leaf (lbuf, ind, lsize, context);
1350
1351 int extra_frame = 0;
1352 if ((mode & 0xffff) == FRINFO_FROM_STACK)
1353 extra_frame = 1;
1354 int ind = 0;
1355 if (bptr == NULL)
1356 ind = process_leaf (lbuf, ind, lsize, context);
1357
1358 while (fp)
1359 {
1360 if (ind >= lsize)
1361 break;
1362 fp = (struct frame *) ((char *) fp + STACK_BIAS);
1363 if (eptr && fp >= (struct frame *) eptr)
1364 {
1365 ind = ind >= 2 ? ind - 2 : 0;
1366 break;
1367 }
1368 #if WSIZE(64) // detect signal handler
1369 unsigned char * tpc = ((unsigned char*) (fp->fr_savpc));
1370 struct frame * tfp = (struct frame*) ((char*) (fp->fr_savfp) + STACK_BIAS);
1371 int old_ind = ind;
1372 ind = process_sigreturn (lbuf, old_ind, lsize, tpc, &tfp, bptr, extra_frame);
1373 if (ind != old_ind)
1374 {
1375 pc = (greg_t) tpc;
1376 fp = tfp;
1377 }
1378 else
1379 #endif
1380 {
1381 #if WSIZE(64)
1382 if (IN_TRAP_HANDLER (lbuf[ind - 1]))
1383 pc = fp->fr_local[6];
1384 else
1385 pc = fp->fr_savpc;
1386 #else
1387 pc = fp->fr_savpc;
1388 #endif
1389 fp = fp->fr_savfp;
1390 if (pc)
1391 {
1392 if (bptr != NULL && extra_frame && ((char*) fp + STACK_BIAS) < (char*) bptr && ind < 2)
1393 {
1394 lbuf[0] = pc;
1395 if (ind == 0)
1396 ind++;
1397 }
1398 if (bptr == NULL || ((char*) fp + STACK_BIAS) >= (char*) bptr)
1399 lbuf[ind++] = pc;
1400 }
1401 }
1402
1403 /* 4616238: _door_return may have a frame that has non-zero
1404 * saved stack pointer and zero pc
1405 */
1406 if (pc == (greg_t) NULL)
1407 break;
1408 }
1409
1410 if (ind >= lsize)
1411 { /* truncated stack handling */
1412 ind = lsize - 1;
1413 lbuf[ind++] = SP_TRUNC_STACK_MARKER;
1414 }
1415 return ind * sizeof (long);
1416 }
1417
1418 #elif ARCH(Intel)
1419
1420 /* get __NR_<syscall_name> constants */
1421 #include <syscall.h>
1422
1423 /*
1424 * From uts/intel/ia32/os/sendsig.c:
1425 *
1426 * An amd64 signal frame looks like this on the stack:
1427 *
1428 * old %rsp:
1429 * <128 bytes of untouched stack space>
1430 * <a siginfo_t [optional]>
1431 * <a ucontext_t>
1432 * <siginfo_t *>
1433 * <signal number>
1434 * new %rsp: <return address (deliberately invalid)>
1435 *
1436 * The signal number and siginfo_t pointer are only pushed onto the stack in
1437 * order to allow stack backtraces. The actual signal handling code expects the
1438 * arguments in registers.
1439 *
1440 * An i386 SVR4/ABI signal frame looks like this on the stack:
1441 *
1442 * old %esp:
1443 * <a siginfo32_t [optional]>
1444 * <a ucontext32_t>
1445 * <pointer to that ucontext32_t>
1446 * <pointer to that siginfo32_t>
1447 * <signo>
1448 * new %esp: <return address (deliberately invalid)>
1449 */
1450
1451 #if WSIZE(32)
1452 #define OPC_REG(x) ((x)&0x7)
1453 #define MRM_REGD(x) (((x)>>3)&0x7)
1454 #define MRM_REGS(x) ((x)&0x7)
1455 #define RED_ZONE 0
1456 #elif WSIZE(64)
1457 #define OPC_REG(x) (B|((x)&0x7))
1458 #define MRM_REGD(x) (R|(((x)>>3)&0x7))
1459 #define MRM_REGS(x) (B|((x)&0x7))
1460 #define RED_ZONE 16
1461 #endif
1462 #define MRM_EXT(x) (((x)>>3)&0x7)
1463 #define MRM_MOD(x) ((x)&0xc0)
1464
1465 #define RAX 0
1466 #define RDX 2
1467 #define RSP 4
1468 #define RBP 5
1469
1470 struct AdvWalkContext
1471 {
1472 unsigned char *pc;
1473 unsigned long *sp;
1474 unsigned long *sp_safe;
1475 unsigned long *fp;
1476 unsigned long *fp_sav;
1477 unsigned long *fp_loc;
1478 unsigned long rax;
1479 unsigned long rdx;
1480 unsigned long ra_sav;
1481 unsigned long *ra_loc;
1482 unsigned long regs[16];
1483 int tidx; /* targets table index */
1484 uint32_t cval; /* cache value */
1485 };
1486
1487 static unsigned long
1488 getRegVal (struct AdvWalkContext *cur, int r, int *undefRez)
1489 {
1490 if (cur->regs[r] == 0)
1491 {
1492 if (r == RBP)
1493 {
1494 tprintf (DBG_LT3, "getRegVal: returns cur->regs[RBP]=0x%lx cur->pc=0x%lx\n",
1495 (unsigned long) cur->fp, (unsigned long) cur->pc);
1496 return (unsigned long) cur->fp;
1497 }
1498 *undefRez = 1;
1499 }
1500 tprintf (DBG_LT3, "getRegVal: cur->regs[%d]=0x%lx cur->pc=0x%lx\n",
1501 r, (unsigned long) cur->regs[r], (unsigned long) cur->pc);
1502 return cur->regs[r];
1503 }
1504
1505 static unsigned char *
1506 check_modrm (unsigned char *pc)
1507 {
1508 unsigned char modrm = *pc++;
1509 unsigned char mod = MRM_MOD (modrm);
1510 if (mod == 0xc0)
1511 return pc;
1512 unsigned char regs = modrm & 0x07;
1513 if (regs == RSP)
1514 {
1515 if (mod == 0x40)
1516 return pc + 2; // SIB + disp8
1517 if (mod == 0x80)
1518 return pc + 5; // SIB + disp32
1519 return pc + 1; // SIB
1520 }
1521 if (mod == 0x0)
1522 {
1523 if (regs == RBP)
1524 pc += 4; // disp32
1525 }
1526 else if (mod == 0x40)
1527 pc += 1; /* byte */
1528 else if (mod == 0x80)
1529 pc += 4; /* word */
1530 return pc;
1531 }
1532
1533 static int
1534 read_int (unsigned char *pc, int w)
1535 {
1536 if (w == 1)
1537 return *((char *) pc);
1538 if (w == 2)
1539 return *(short*) pc;
1540 return *(int*) pc;
1541 }
1542
1543 /* Return codes */
1544 enum
1545 {
1546 RA_FAILURE = 0,
1547 RA_SUCCESS,
1548 RA_END_OF_STACK,
1549 RA_SIGRETURN,
1550 RA_RT_SIGRETURN
1551 };
1552
1553 /* Cache value encodings */
1554 static const uint32_t RA_FROMFP = (uint32_t) - 1; /* get the RA from the frame pointer */
1555 static const uint32_t RA_EOSTCK = (uint32_t) - 2; /* end-of-stack */
1556
1557
1558 #define MAXCTX 16
1559 #define MAXTRGTS 64
1560 #define MAXJMPREG 2
1561 #define MAXJMPREGCTX 3
1562
1563 #define DELETE_CURCTX() __collector_memcpy (cur, buf + (--nctx), sizeof (*cur))
1564
1565 /**
1566 * Look for pc in AddrTable_RA_FROMFP and in AddrTable_RA_EOSTCK
1567 * @param wctx
1568 * @return
1569 */
1570 static int
1571 cache_get (struct WalkContext *wctx)
1572 {
1573 unsigned long addr;
1574 if (AddrTable_RA_FROMFP != NULL)
1575 {
1576 uint64_t idx = wctx->pc % ValTableSize;
1577 addr = AddrTable_RA_FROMFP[ idx ];
1578 if (addr == wctx->pc)
1579 { // Found in AddrTable_RA_FROMFP
1580 unsigned long *sp = NULL;
1581 unsigned long fp = wctx->fp;
1582 /* validate fp before use */
1583 if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
1584 return RA_FAILURE;
1585 sp = (unsigned long *) fp;
1586 fp = *sp++;
1587 unsigned long ra = *sp++;
1588 unsigned long tbgn = wctx->tbgn;
1589 unsigned long tend = wctx->tend;
1590 if (ra < tbgn || ra >= tend)
1591 if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1592 return RA_FAILURE;
1593 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1594 if (npc == 0)
1595 return RA_FAILURE;
1596 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached pc=0x%lX\n", __LINE__, npc);
1597 wctx->pc = npc;
1598 wctx->sp = (unsigned long) sp;
1599 wctx->fp = fp;
1600 wctx->tbgn = tbgn;
1601 wctx->tend = tend;
1602 return RA_SUCCESS;
1603 }
1604 }
1605 if (NULL == AddrTable_RA_EOSTCK)
1606 return RA_FAILURE;
1607 uint64_t idx = wctx->pc % ValTableSize;
1608 addr = AddrTable_RA_EOSTCK[ idx ];
1609 if (addr != wctx->pc)
1610 return RA_FAILURE;
1611 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cached RA_END_OF_STACK\n", __LINE__);
1612 return RA_END_OF_STACK;
1613 }
1614 /**
1615 * Save pc in RA_FROMFP or RA_EOSTCK cache depending on val
1616 * @param wctx
1617 */
1618 static void
1619 cache_put (struct WalkContext *wctx, const uint32_t val)
1620 {
1621 if (RA_FROMFP == val)
1622 {
1623 // save pc in RA_FROMFP cache
1624 if (NULL != AddrTable_RA_FROMFP)
1625 {
1626 uint64_t idx = wctx->pc % ValTableSize;
1627 AddrTable_RA_FROMFP[ idx ] = wctx->pc;
1628 if (NULL != AddrTable_RA_EOSTCK)
1629 if (AddrTable_RA_EOSTCK[ idx ] == wctx->pc)
1630 // invalidate pc in RA_EOSTCK cache
1631 AddrTable_RA_EOSTCK[ idx ] = 0;
1632 }
1633 return;
1634 }
1635 if (RA_EOSTCK == val)
1636 {
1637 // save pc in RA_EOSTCK cache
1638 if (NULL != AddrTable_RA_EOSTCK)
1639 {
1640 uint64_t idx = wctx->pc % ValTableSize;
1641 AddrTable_RA_EOSTCK[ idx ] = wctx->pc;
1642 if (NULL != AddrTable_RA_FROMFP)
1643 {
1644 if (AddrTable_RA_FROMFP[ idx ] == wctx->pc)
1645 // invalidate pc in RA_FROMFP cache
1646 AddrTable_RA_FROMFP[ idx ] = 0;
1647 }
1648 }
1649 return;
1650 }
1651 }
1652
1653 static int
1654 process_return_real (struct WalkContext *wctx, struct AdvWalkContext *cur, int cache_on)
1655 {
1656 if ((unsigned long) cur->sp >= wctx->sbase ||
1657 (unsigned long) cur->sp < wctx->sp)
1658 {
1659 DprintfT (SP_DUMP_UNWIND, "unwind.c: not in stack: %p [0x%lX-0x%lX]\n",
1660 cur->sp, wctx->sp, wctx->sbase);
1661 return RA_FAILURE;
1662 }
1663
1664 unsigned long ra;
1665 if (cur->sp == cur->ra_loc)
1666 {
1667 ra = cur->ra_sav;
1668 cur->sp++;
1669 }
1670 else if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
1671 ra = *cur->sp++;
1672 else
1673 {
1674 DprintfT (SP_DUMP_UNWIND, "unwind.c: not safe: %p >= %p\n", cur->sp, cur->sp_safe);
1675 return RA_FAILURE;
1676 }
1677 if (ra == 0)
1678 {
1679 if (cache_on)
1680 cache_put (wctx, RA_EOSTCK);
1681 wctx->pc = ra;
1682 wctx->sp = (unsigned long) cur->sp;
1683 wctx->fp = (unsigned long) cur->fp;
1684 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d RA_END_OF_STACK\n", __LINE__);
1685 return RA_END_OF_STACK;
1686 }
1687
1688 unsigned long tbgn = wctx->tbgn;
1689 unsigned long tend = wctx->tend;
1690 if (ra < tbgn || ra >= tend)
1691 {
1692 if (!__collector_check_segment (ra, &tbgn, &tend, 0))
1693 {
1694 DprintfT (SP_DUMP_UNWIND, "unwind.c: not in segment: 0x%lX [0x%lX-0x%lX]\n",
1695 ra, wctx->tbgn, wctx->tend);
1696 return RA_FAILURE;
1697 }
1698 }
1699
1700 if (cur->cval == RA_FROMFP)
1701 {
1702 if (wctx->fp == (unsigned long) (cur->sp - 2))
1703 {
1704 if (cache_on)
1705 cache_put (wctx, RA_FROMFP);
1706 }
1707 else
1708 cur->cval = 0;
1709 }
1710
1711 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
1712 if (npc == 0)
1713 {
1714 if (cur->cval == RA_FROMFP)
1715 {
1716 /* We have another evidence that we can trust this RA */
1717 DprintfT (SP_DUMP_UNWIND, "unwind.c: trusted fp, pc = 0x%lX\n", wctx->pc);
1718 wctx->pc = ra;
1719 }
1720 else
1721 {
1722 DprintfT (SP_DUMP_UNWIND, "unwind.c: 0 after adjustment\n");
1723 return RA_FAILURE;
1724 }
1725 }
1726 else
1727 wctx->pc = npc;
1728 wctx->sp = (unsigned long) cur->sp;
1729 wctx->fp = (unsigned long) cur->fp;
1730 wctx->tbgn = tbgn;
1731 wctx->tend = tend;
1732 return RA_SUCCESS;
1733 }
1734
1735 static int
1736 process_return (struct WalkContext *wctx, struct AdvWalkContext *cur)
1737 {
1738 return process_return_real (wctx, cur, 1);
1739 }
1740
1741 static void
1742 omp_cache_put (unsigned long *cur_sp_safe, struct WalkContext * wctx_pc_save,
1743 struct WalkContext *wctx, uint32_t val)
1744 {
1745 if (omp_no_walk && (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL))
1746 {
1747 size_t sz = OmpValTableSize * sizeof (*OmpCurCtxs);
1748 OmpCurCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1749 sz = OmpValTableSize * sizeof (*OmpCtxs);
1750 OmpCtxs = (struct WalkContext *) __collector_allocCSize (__collector_heap, sz, 1);
1751 sz = OmpValTableSize * sizeof (*OmpVals);
1752 OmpVals = (uint32_t*) __collector_allocCSize (__collector_heap, sz, 1);
1753 sz = OmpValTableSize * sizeof (*OmpRAs);
1754 OmpRAs = (unsigned long*) __collector_allocCSize (__collector_heap, sz, 1);
1755 }
1756 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1757 return;
1758
1759 #define USE_18434988_OMP_CACHE_WORKAROUND
1760 #ifndef USE_18434988_OMP_CACHE_WORKAROUND
1761 uint64_t idx = wctx_pc_save->pc * ROOT_IDX;
1762 OmpVals[ idx % OmpValTableSize ] = val;
1763 idx = (idx + val) * ROOT_IDX;
1764 __collector_memcpy (&(OmpCurCtxs[ idx % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1765 idx = (idx + val) * ROOT_IDX;
1766 __collector_memcpy (&(OmpCtxs[ idx % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1767 #endif
1768 unsigned long *sp = NULL;
1769 unsigned long fp = wctx_pc_save->fp;
1770 int from_fp = 0;
1771 if (val == RA_END_OF_STACK)
1772 {
1773 sp = (unsigned long *) (wctx->sp);
1774 sp--;
1775 TprintfT (DBG_LT1, "omp_cache_put: get sp from EOS, sp=%p\n", sp);
1776 }
1777 else
1778 {
1779 if (fp < wctx_pc_save->sp || fp >= wctx_pc_save->sbase - sizeof (*sp))
1780 {
1781 sp = (unsigned long *) (wctx->sp);
1782 sp--;
1783 TprintfT (DBG_LT1, "omp_cache_put: get sp from sp, sp=%p\n", sp);
1784 }
1785 else
1786 {
1787 TprintfT (DBG_LT1, "omp_cache_put: get sp from fp=0x%lx\n", fp);
1788 sp = (unsigned long *) fp;
1789 from_fp = 1;
1790 }
1791 }
1792
1793 if (sp < cur_sp_safe || ((unsigned long) sp >= wctx->sbase))
1794 return;
1795
1796 unsigned long ra = *sp++;
1797 if (from_fp)
1798 {
1799 unsigned long tbgn = wctx_pc_save->tbgn;
1800 unsigned long tend = wctx_pc_save->tend;
1801 if (ra < tbgn || ra >= tend)
1802 {
1803 sp = (unsigned long *) (wctx->sp);
1804 sp--;
1805 ra = *sp++;
1806 }
1807 }
1808 #ifdef USE_18434988_OMP_CACHE_WORKAROUND
1809 uint64_t idx1 = wctx_pc_save->pc * ROOT_IDX;
1810 uint64_t idx2 = (idx1 + val) * ROOT_IDX;
1811 uint64_t idx3 = (idx2 + val) * ROOT_IDX;
1812 uint64_t idx4 = (idx3 + val) * ROOT_IDX;
1813 OmpRAs [ idx4 % OmpValTableSize ] = 0; // lock
1814 OmpVals[ idx1 % OmpValTableSize ] = val;
1815 __collector_memcpy (&(OmpCurCtxs[ idx2 % OmpValTableSize ]), wctx_pc_save, sizeof (struct WalkContext));
1816 __collector_memcpy (&(OmpCtxs [ idx3 % OmpValTableSize ]), wctx, sizeof (struct WalkContext));
1817 OmpRAs [ idx4 % OmpValTableSize ] = ra;
1818 #else
1819 idx = (idx + val) * ROOT_IDX;
1820 OmpRAs[ idx % OmpValTableSize ] = ra;
1821 #endif
1822 TprintfT (DBG_LT1, "omp_cache_put: pc=0x%lx\n", wctx_pc_save->pc);
1823 }
1824
1825 /*
1826 * See bug 17166877 - malloc_internal unwind failure.
1827 * Sometimes there are several calls right after ret, like:
1828 * leave
1829 * ret
1830 * call xxx
1831 * call xxxx
1832 * call xxxxx
1833 * If they are also jump targets, we should better not
1834 * create new jump context for those, since they may
1835 * end up into some other function.
1836 */
1837 static int
1838 is_after_ret (unsigned char * npc)
1839 {
1840 if (*npc != 0xe8)
1841 return 0;
1842 unsigned char * onpc = npc;
1843 int ncall = 1;
1844 int maxsteps = 10;
1845 int mincalls = 3;
1846 int steps = 0;
1847 while (*(npc - 5) == 0xe8 && steps < maxsteps)
1848 {
1849 npc -= 5;
1850 ncall++;
1851 steps++;
1852 }
1853 if (*(npc - 1) != 0xc3 || *(npc - 2) != 0xc9)
1854 return 0;
1855 steps = 0;
1856 while (*(onpc + 5) == 0xe8 && steps < maxsteps)
1857 {
1858 onpc += 5;
1859 ncall++;
1860 steps++;
1861 }
1862 if (ncall < mincalls)
1863 return 0;
1864 return 1;
1865 }
1866
1867 static int
1868 find_i386_ret_addr (struct WalkContext *wctx, int do_walk)
1869 {
1870 if (wctx->sp == 0)
1871 // Some artificial contexts may have %sp set to 0. See SETFUNCTIONCONTEXT()
1872 return RA_FAILURE;
1873
1874 /* Check cached values */
1875 int retc = cache_get (wctx);
1876 if (retc != RA_FAILURE)
1877 return retc;
1878
1879 /* An attempt to perform code analysis for call stack tracing */
1880 unsigned char opcode;
1881 unsigned char extop;
1882 unsigned char extop2;
1883 unsigned char modrm;
1884 int imm8; /* immediate operand, byte */
1885 int immv; /* immediate operand, word(2) or doubleword(4) */
1886 int reg; /* register code */
1887
1888 /* Buffer for branch targets (analysis stoppers) */
1889 unsigned char *targets[MAXTRGTS];
1890 int ntrg = 0; /* number of entries in the table */
1891 targets[ntrg++] = (unsigned char*) wctx->pc;
1892 targets[ntrg++] = (unsigned char*) - 1;
1893
1894 struct AdvWalkContext buf[MAXCTX];
1895 struct AdvWalkContext *cur = buf;
1896 CALL_UTIL (memset)((void*) cur, 0, sizeof (*cur));
1897
1898 cur->pc = (unsigned char*) wctx->pc;
1899 cur->sp = (unsigned long*) wctx->sp;
1900 cur->sp_safe = cur->sp - RED_ZONE; /* allow for the 128-byte red zone on amd64 */
1901 cur->fp = (unsigned long*) wctx->fp;
1902 cur->tidx = 1;
1903 DprintfT (SP_DUMP_UNWIND, "\nstack_unwind (x86 walk):%d %p start\n", __LINE__, cur->pc);
1904
1905 int nctx = 1; /* number of contexts being processed */
1906 int cnt = 8192; /* number of instructions to analyse */
1907
1908 /*
1909 * The basic idea of our x86 stack unwind is that we don't know
1910 * if we can trust the frame-pointer register. So we walk
1911 * instructions to find a return instruction, at which point
1912 * we know the return address is on the top of the stack, etc.
1913 *
1914 * A severe challenge to walking x86 instructions is when we
1915 * encounter "jmp *(reg)" instructions, where we are expected
1916 * to jump to the (unknown-to-us) contents of a register.
1917 *
1918 * The "jmp_reg" code here attempts to keep track of the
1919 * context for such a jump, deferring any handling of such
1920 * a difficult case. We continue with other contexts, hoping
1921 * that some other walk will take us to a return instruction.
1922 *
1923 * If no other walk helps, we return to "jmp_reg" contexts.
1924 * While we don't know the jump target, it is possible that the
1925 * bytes immediately following the jmp_reg instruction represent
1926 * one possible target, as might be the case when a "switch"
1927 * statement is compiled.
1928 *
1929 * Unfortunately, the bytes following a "jmp_reg" instruction might
1930 * instead be a jump target from somewhere else -- execution might
1931 * never "fall through" from the preceding "jmp_reg". Those bytes
1932 * might not even be instructions at all. There are many uses of
1933 * jmp_reg instructions beyond just compiling switch statements.
1934 *
1935 * So walking the bytes after a "jmp_reg" instruction can lead
1936 * to bugs and undefined behavior, including SEGV and core dump.
1937 *
1938 * We currently do not really understand the "jmp_reg" code below.
1939 */
1940 int jmp_reg_switch_mode = 0;
1941 int num_jmp_reg = 0; // number of jmp *reg met when switch mode is off or when in current switch case
1942 int total_num_jmp_reg = 0; // number of total jmp *reg met
1943 struct AdvWalkContext * jmp_reg_ctx[MAXJMPREG]; // context of jmp *reg met when switch mode is off or when in current switch case
1944 struct AdvWalkContext * jmp_reg_switch_ctx[MAXJMPREG]; // context of jmp *reg used in switch cases
1945 struct AdvWalkContext * jmp_reg_switch_backup_ctx = NULL; // context of the first jmp *reg used in switch cases
1946
1947 int cur_jmp_reg_switch = 0; // current switch table
1948 int num_jmp_reg_switch = 0; // number of switch table
1949 int jmp_reg_switch_case = 0; // case number in current switch table
1950 unsigned char * jmp_reg_switch_pc = NULL; // the start pc of current switch case
1951 unsigned char * jmp_reg_switch_pc_old = NULL; // backup for deleteing context of jump target
1952 unsigned char * jmp_reg_switch_base = NULL; // start pc for checking offsets
1953 int max_jmp_reg_switch_case = 2;
1954 #if WSIZE(32)
1955 int max_switch_pc_offset = 512;
1956 #else // WSIZE(64)
1957 int max_switch_pc_offset = 1024;
1958 #endif
1959 int expected_num_jmp_reg = 1; // should be smaller than MAXJMPREG
1960 int max_num_jmp_reg_seen = 4; // try to resolve return if there are so many such instructions
1961
1962
1963 int save_ctx = 0; // flag to save walk context in the cache to speed up unwind
1964 struct WalkContext wctx_pc_save;
1965 if (do_walk == 0)
1966 // do_walk is the flag indicating not walking through the instructions, resolving the RA from the stack fp first
1967 __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
1968
1969 startWalk:
1970 if (do_walk == 0)
1971 { // try to resolve RA from stack frame pointer
1972 if (OmpCurCtxs == NULL || OmpCtxs == NULL || OmpVals == NULL || OmpRAs == NULL)
1973 {
1974 do_walk = 1;
1975 goto startWalk;
1976 }
1977 // before goto checkFP, try the RA from cache (key: WalkContext -> value: caller's WalkContext))
1978 uint64_t idx = wctx->pc * ROOT_IDX;
1979 uint32_t val = OmpVals[idx % OmpValTableSize];
1980 idx = (idx + val) * ROOT_IDX;
1981 #ifdef USE_18434988_OMP_CACHE_WORKAROUND
1982 // Check ra: if it is 0 - then cache is invalid
1983 uint64_t idx4;
1984 idx4 = (idx + val) * ROOT_IDX;
1985 idx4 = (idx4 + val) * ROOT_IDX;
1986 if (0 == OmpRAs[ idx4 % OmpValTableSize ]) // Invalid cache
1987 goto checkFP;
1988 #endif
1989 struct WalkContext saved_ctx;
1990 __collector_memcpy (&saved_ctx, &OmpCurCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
1991 if (wctx->pc == saved_ctx.pc
1992 && wctx->sp == saved_ctx.sp
1993 && wctx->fp == saved_ctx.fp
1994 && wctx->tbgn == saved_ctx.tbgn
1995 && wctx->tend == saved_ctx.tend)
1996 { // key match, RA may be valid
1997 idx = (idx + val) * ROOT_IDX;
1998 unsigned long *sp = NULL;
1999 unsigned long fp = wctx->fp;
2000 int from_fp = 0;
2001 if (val == RA_END_OF_STACK)
2002 {
2003 DprintfT (SP_DUMP_UNWIND, "find_i386_ret_addr:%d -- RA_END_OF_STACK: pc=0x%lx\n", __LINE__, wctx->pc);
2004 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2005 return val;
2006 }
2007 else
2008 {
2009 if (fp < wctx->sp || fp >= wctx->sbase - sizeof (*sp))
2010 {
2011 TprintfT (DBG_LT1, "omp_cache_get -- wrong fp: pc=0x%lx\n", wctx->pc);
2012 sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2013 sp--;
2014 if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2015 {
2016 goto checkFP;
2017 }
2018 unsigned long ra = *sp;
2019 uint64_t idx2 = (idx + val) * ROOT_IDX;
2020 if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2021 {
2022 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2023 TprintfT (DBG_LT1, "omp_cache_get -- ra match with target sp: pc=0x%lx, ra=0x%lx, val=%d\n", wctx->pc, ra, val);
2024 return val;
2025 }
2026 TprintfT (DBG_LT1, "omp_cache_get -- ra mismatch: ra=0x%lx, expected ra=0x%lx, val=%d\n", ra, OmpRAs[ idx2 % OmpValTableSize ], val);
2027 goto checkFP;
2028 }
2029 sp = (unsigned long *) fp;
2030 from_fp = 1;
2031 }
2032
2033 uint64_t idx2 = (idx + val) * ROOT_IDX;
2034 unsigned long ra = *sp++;
2035 if (from_fp)
2036 {
2037 unsigned long tbgn = wctx->tbgn;
2038 unsigned long tend = wctx->tend;
2039 if (ra < tbgn || ra >= tend)
2040 {
2041 sp = (unsigned long *) (OmpCtxs[ idx % OmpValTableSize ].sp);
2042 sp--;
2043 //if (sp < cur->sp_safe - 16 || (unsigned long)sp >= wctx->sbase - sizeof(*sp)) {
2044 // The check above was replaced with the check below,
2045 // because we do not know why "- 16" and "- sizeof(*sp)" was used.
2046 if (sp < cur->sp_safe || (unsigned long) sp >= wctx->sbase)
2047 goto checkFP;
2048 else
2049 ra = *sp;
2050 }
2051 }
2052 if (OmpRAs[ idx2 % OmpValTableSize ] == ra)
2053 {
2054 TprintfT (DBG_LT1, "omp_cache_get -- ra match: pc=0x%lx\n", wctx->pc);
2055 __collector_memcpy (wctx, &OmpCtxs[ idx % OmpValTableSize ], sizeof (struct WalkContext));
2056 return val;
2057 }
2058 }
2059 goto checkFP;
2060 }
2061 else
2062 {
2063 CALL_UTIL (memset)(jmp_reg_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2064 CALL_UTIL (memset)(jmp_reg_switch_ctx, 0, MAXJMPREG * sizeof (struct AdvWalkContext *));
2065 }
2066 while (cnt--)
2067 {
2068 if (nctx == 0 && (num_jmp_reg == expected_num_jmp_reg || jmp_reg_switch_mode == 1))
2069 { // no context available, try jmp switch mode
2070 int i = 0;
2071 if (num_jmp_reg == expected_num_jmp_reg)
2072 jmp_reg_switch_mode = 0; // first jmp reg expected, restart switch mode
2073 DprintfT (SP_DUMP_UNWIND, "unwind.c: begin switch mode, num_jmp_reg = %d, jmp_reg_switch_backup_ctx=%p, jmp_reg_switch_case=%d, jmp_reg_switch_mode=%d.\n",
2074 num_jmp_reg, jmp_reg_switch_backup_ctx, jmp_reg_switch_case, jmp_reg_switch_mode);
2075 // the ideal asm of switch is
2076 // jmp reg
2077 // ...//case 1
2078 // ret
2079 // ...//case 2
2080 // ret
2081 // ...//etc
2082 if (jmp_reg_switch_mode == 0)
2083 {
2084 num_jmp_reg_switch = num_jmp_reg; // backup num_jmp_reg
2085 jmp_reg_switch_mode = 1; // begin switch mode
2086 for (i = 0; i < num_jmp_reg_switch; i++)
2087 {
2088 if (jmp_reg_switch_ctx[i] == NULL)
2089 jmp_reg_switch_ctx[i] = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_ctx[i]));
2090 if (jmp_reg_switch_ctx[i] != NULL)
2091 { // backup jmp_reg_ctx
2092 __collector_memcpy (jmp_reg_switch_ctx[i], jmp_reg_ctx[i], sizeof (*jmp_reg_switch_ctx[i]));
2093 cur_jmp_reg_switch = 0; // reset the current switch table
2094 jmp_reg_switch_case = 0; // reset the case number in current switch table
2095 }
2096 }
2097 if (jmp_reg_switch_backup_ctx == NULL)
2098 { // only backup when the first jmp *reg is met for restoring later, if switch mode fails to resolve RA
2099 jmp_reg_switch_backup_ctx = (struct AdvWalkContext*) alloca (sizeof (*jmp_reg_switch_backup_ctx));
2100 if (jmp_reg_switch_backup_ctx != NULL)
2101 __collector_memcpy (jmp_reg_switch_backup_ctx, cur, sizeof (*cur));
2102 DprintfT (SP_DUMP_UNWIND, "unwind.c: back up context for switch mode.\n");
2103 }
2104 }
2105 if (jmp_reg_switch_mode == 1)
2106 { // in the process of trying switch cases
2107 if (cur_jmp_reg_switch == num_jmp_reg_switch)
2108 {
2109 DprintfT (SP_DUMP_UNWIND, "unwind.c: have tried all switch with max_jmp_reg_switch_case for each\n");
2110 if (jmp_reg_switch_backup_ctx != NULL)
2111 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2112 int rc = process_return_real (wctx, cur, 0);
2113 if (rc == RA_SUCCESS)
2114 {
2115 if (save_ctx)
2116 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2117 return rc;
2118 }
2119 break; // have tried all switch with max_jmp_reg_switch_case for each, goto checkFP
2120 }
2121 unsigned char *npc = jmp_reg_switch_ctx[cur_jmp_reg_switch]->pc;
2122 if (jmp_reg_switch_case == 0)
2123 // first switch case
2124 npc = check_modrm (npc); // pc next to "jmp reg" instruction
2125 else if (jmp_reg_switch_pc != NULL)
2126 npc = jmp_reg_switch_pc; // // pc next to "ret" instruction of previous case
2127 else
2128 {
2129 DprintfT (SP_DUMP_UNWIND, "unwind.c: unexpected jum switch mode situation, jmp_reg_switch_case=%d, jmp_reg_switch_pc=%p\n",
2130 jmp_reg_switch_case, jmp_reg_switch_pc);
2131 break; //goto checkFP
2132 }
2133 jmp_reg_switch_base = npc;
2134 struct AdvWalkContext *new = buf + nctx;
2135 nctx += 1;
2136 __collector_memcpy (new, jmp_reg_switch_ctx[cur_jmp_reg_switch], sizeof (*new));
2137 new->pc = npc;
2138 cur = new; /* advance the new context first */
2139 jmp_reg_switch_pc = NULL;
2140 jmp_reg_switch_case++;
2141 if (jmp_reg_switch_case == max_jmp_reg_switch_case)
2142 { // done many cases, change to another switch table
2143 cur_jmp_reg_switch++;
2144 jmp_reg_switch_case = 0;
2145 }
2146 }
2147 num_jmp_reg = 0;
2148 }
2149 if (jmp_reg_switch_mode == 1)
2150 { // when processing switch cases, check pc each time
2151 unsigned long tbgn = wctx->tbgn;
2152 unsigned long tend = wctx->tend;
2153 if ((unsigned long) (cur->pc) < tbgn || (unsigned long) (cur->pc) >= tend)
2154 {
2155 DprintfT (SP_DUMP_UNWIND, "unwind.c: pc out of range, pc=0x%lx\n", (unsigned long) (cur->pc));
2156 break;
2157 }
2158 if (jmp_reg_switch_base != NULL && cur->pc > jmp_reg_switch_base + max_switch_pc_offset)
2159 {
2160 DprintfT (SP_DUMP_UNWIND, "unwind.c: limit the walk offset after jmp reg instruction\n");
2161 if (jmp_reg_switch_backup_ctx != NULL)
2162 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
2163 int rc = process_return_real (wctx, cur, 0);
2164 if (rc == RA_SUCCESS)
2165 {
2166 if (save_ctx)
2167 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
2168 return rc;
2169 }
2170 break; // limit the walk offset after jmp reg instruction, got checkFP
2171 }
2172 }
2173
2174 if (nctx == 0)
2175 break;
2176 // dump_targets (__LINE__, ntrg, targets);
2177 while (cur->pc > targets[cur->tidx])
2178 cur->tidx += 1;
2179 if (cur->pc == targets[cur->tidx])
2180 {
2181 /* Stop analysis. Delete context. */
2182 if (jmp_reg_switch_mode == 0 || cur->pc != jmp_reg_switch_pc_old)
2183 {
2184 if (jmp_reg_switch_mode == 1 && nctx == 1 && jmp_reg_switch_pc == NULL)
2185 {
2186 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d old target, cur->pc=%p, jmp_reg_switch_pc=%p, nctx=%d\n",
2187 __LINE__, cur->pc, jmp_reg_switch_pc, nctx);
2188 jmp_reg_switch_pc = cur->pc; // save cp before delete context, may be used as a start of switch case
2189 jmp_reg_switch_pc_old = jmp_reg_switch_pc;
2190 }
2191 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, old target.\n", __LINE__);
2192 DELETE_CURCTX ();
2193 if (cur >= buf + nctx)
2194 cur = buf;
2195 continue;
2196 }
2197 if (jmp_reg_switch_mode == 1 && cur->pc == jmp_reg_switch_pc_old)
2198 jmp_reg_switch_pc_old = NULL; // reset jmp_reg_switch_pc_old to delete the context later when cur->pc != jmp_reg_switch_pc_old
2199 }
2200
2201 /* let's walk the next x86 instruction */
2202 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d cur:%ld pc=0x%lx %02x %02x %02x %02x %02x %02x %02x sp=0x%lx\n",
2203 __LINE__, (long) (cur - buf), (unsigned long) cur->pc,
2204 (int) cur->pc[0], (int) cur->pc[1], (int) cur->pc[2],
2205 (int) cur->pc[3], (int) cur->pc[4], (int) cur->pc[5],
2206 (int) cur->pc[6], (unsigned long) cur->sp);
2207 int v = 4; /* Operand size */
2208 int a = 4; /* Address size */
2209 /* int W = 0; REX.W bit */
2210 #if WSIZE(64)
2211 int R = 0; /* REX.R bit */
2212 #endif
2213 int X = 0; /* REX.X bit */
2214 int B = 0; /* REX.B bit */
2215 /* Check prefixes */
2216 int done = 0;
2217 while (!done)
2218 {
2219 opcode = *cur->pc++;
2220 switch (opcode)
2221 {
2222 case 0x66: /* opd size override */
2223 v = 2;
2224 break;
2225 case 0x67: /*addr size override */
2226 a = 2;
2227 break;
2228 #if WSIZE(64)
2229 case 0x40: /* REX */
2230 case 0x41:
2231 case 0x42:
2232 case 0x43:
2233 case 0x44:
2234 case 0x45:
2235 case 0x46:
2236 case 0x47:
2237 case 0x48:
2238 case 0x49:
2239 case 0x4a:
2240 case 0x4b:
2241 case 0x4c:
2242 case 0x4d:
2243 case 0x4e:
2244 case 0x4f:
2245 B = (opcode & 0x1) ? 8 : 0;
2246 X = (opcode & 0x2) ? 8 : 0;
2247 R = (opcode & 0x4) ? 8 : 0;
2248 if (opcode & 0x8) /* 64 bit operand size */
2249 v = 8;
2250 opcode = *cur->pc++;
2251 done = 1;
2252 break;
2253 #endif
2254 default:
2255 done = 1;
2256 break;
2257 }
2258 }
2259 int z = (v == 8) ? 4 : v;
2260 switch (opcode)
2261 {
2262 case 0x0: /* add Eb,Gb */
2263 case 0x01: /* add Ev,Gv */
2264 case 0x02: /* add Gb,Eb */
2265 case 0x03: /* add Gv,Ev */
2266 cur->pc = check_modrm (cur->pc);
2267 break;
2268 case 0x04: /* add %al,Ib */
2269 cur->pc += 1;
2270 break;
2271 case 0x05: /* add %eax,Iz */
2272 cur->pc += z;
2273 break;
2274 case 0x06: /* push es */
2275 cur->sp -= 1;
2276 break;
2277 case 0x07: /* pop es */
2278 cur->sp += 1;
2279 if (cur->sp - RED_ZONE > cur->sp_safe)
2280 cur->sp_safe = cur->sp - RED_ZONE;
2281 break;
2282 case 0x08: /* or Eb,Gb */
2283 case 0x09: /* or Ev,Gv */
2284 case 0x0a: /* or Gb,Eb */
2285 case 0x0b: /* or Gv,Ev */
2286 cur->pc = check_modrm (cur->pc);
2287 break;
2288 case 0x0c: /* or %al,Ib */
2289 cur->pc += 1;
2290 break;
2291 case 0x0d: /* or %eax,Iz */
2292 cur->pc += z;
2293 break;
2294 case 0x0e: /* push cs */
2295 cur->sp -= 1;
2296 break;
2297 case 0x0f: /* two-byte opcodes */
2298 extop = *cur->pc++;
2299 switch (extop)
2300 { /* RTM or HLE */
2301 case 0x01:
2302 extop2 = *cur->pc;
2303 switch (extop2)
2304 {
2305 case 0xd5: /* xend */
2306 case 0xd6: /* xtest */
2307 cur->pc++;
2308 break;
2309 default:
2310 break;
2311 }
2312 break;
2313 case 0x03:
2314 cur->pc = check_modrm (cur->pc);
2315 break;
2316 case 0x0b:
2317 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, undefined instruction. opcode=0x%02x\n",
2318 __LINE__, (int) opcode);
2319 DELETE_CURCTX ();
2320 break;
2321 case 0x05: /* syscall */
2322 case 0x34: /* sysenter */
2323 if (cur->rax == __NR_exit)
2324 {
2325 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2326 __LINE__, (int) opcode);
2327 DELETE_CURCTX ();
2328 break;
2329 }
2330 else if (cur->rax == __NR_rt_sigreturn)
2331 {
2332 if (jmp_reg_switch_mode == 1)
2333 {
2334 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0x%02x\n",
2335 __LINE__, (int) opcode);
2336 goto checkFP;
2337 }
2338 wctx->sp = (unsigned long) cur->sp;
2339 if (save_ctx)
2340 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
2341 return RA_RT_SIGRETURN;
2342 }
2343 #if WSIZE(32)
2344 else if (cur->rax == __NR_sigreturn)
2345 {
2346 if (jmp_reg_switch_mode == 1)
2347 {
2348 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0x34\n");
2349 goto checkFP;
2350 }
2351 wctx->sp = (unsigned long) cur->sp;
2352 if (save_ctx)
2353 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
2354 return RA_SIGRETURN;
2355 }
2356 #endif
2357 /* Check for Linus' trick in the vsyscall page */
2358 while (*cur->pc == 0x90) /* nop */
2359 cur->pc++;
2360 if (*cur->pc == 0xeb) /* jmp imm8 */
2361 cur->pc += 2;
2362 break;
2363 case 0x0d: /* nop Ev */
2364 cur->pc = check_modrm (cur->pc);
2365 break;
2366 case 0x10: /* xmm Vq,Wq */
2367 case 0x11:
2368 case 0x12:
2369 case 0x13:
2370 case 0x14:
2371 case 0x15:
2372 case 0x16:
2373 case 0x17:
2374 cur->pc = check_modrm (cur->pc);
2375 break;
2376 case 0x18: /* prefetch */
2377 cur->pc = check_modrm (cur->pc);
2378 break;
2379 case 0x1E: /* endbr64/endbr32 (f3 0f 1e .. ) is parsing as repz nop edx */
2380 cur->pc += 2;
2381 break;
2382 case 0x1f: /* nop Ev */
2383 cur->pc = check_modrm (cur->pc);
2384 break;
2385 case 0x28: /* xmm Vq,Wq */
2386 case 0x29:
2387 case 0x2a:
2388 case 0x2b:
2389 case 0x2c:
2390 case 0x2d:
2391 case 0x2e:
2392 case 0x2f:
2393 cur->pc = check_modrm (cur->pc);
2394 break;
2395 case 0x30: /* wrmsr */
2396 case 0x31: /* rdtsc */
2397 case 0x32: /* rdmsr */
2398 case 0x33: /* rdpmc */
2399 break;
2400 /* case 0x34: sysenter (see above) */
2401 case 0x38: case 0x3a:
2402 extop2 = *cur->pc++;
2403 cur->pc = check_modrm (cur->pc);
2404 // 21275311 Unwind failure in native stack for java application running on jdk8
2405 // Three-byte opcodes "66 0f 3a ??" should consume an additional "immediate" byte.
2406 if (extop == 0x3a)
2407 cur->pc++;
2408 break;
2409 case 0x40: case 0x41: case 0x42: case 0x43: /* CMOVcc Gv,Ev */
2410 case 0x44: case 0x45: case 0x46: case 0x47:
2411 case 0x48: case 0x49: case 0x4a: case 0x4b:
2412 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
2413 cur->pc = check_modrm (cur->pc);
2414 break;
2415 case 0x50: case 0x51: case 0x52: case 0x53:
2416 case 0x54: case 0x55: case 0x56: case 0x57:
2417 case 0x58: case 0x59: case 0x5a: case 0x5b:
2418 case 0x5c: case 0x5d: case 0x5e: case 0x5f:
2419 case 0x60: case 0x61: case 0x62: case 0x63:
2420 case 0x64: case 0x65: case 0x66: case 0x67:
2421 case 0x68: case 0x69: case 0x6a: case 0x6b:
2422 case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2423 cur->pc = check_modrm (cur->pc);
2424 break;
2425 case 0x70: case 0x71: case 0x72: case 0x73:
2426 cur->pc = check_modrm (cur->pc) + 1;
2427 break;
2428 case 0x74: case 0x75: case 0x76:
2429 cur->pc = check_modrm (cur->pc);
2430 break;
2431 case 0x77:
2432 break;
2433 case 0x7c: case 0x7d: case 0x7e: case 0x7f:
2434 cur->pc = check_modrm (cur->pc);
2435 break;
2436 case 0x80: case 0x81: case 0x82: case 0x83: /* Jcc Jz */
2437 case 0x84: case 0x85: case 0x86: case 0x87:
2438 case 0x88: case 0x89: case 0x8a: case 0x8b:
2439 case 0x8c: case 0x8d: case 0x8e: case 0x8f:
2440 immv = read_int (cur->pc, z);
2441 cur->pc += z;
2442 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2443 {
2444 int tidx = 0;
2445 unsigned char *npc = cur->pc + immv;
2446 if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
2447 {
2448 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode=0x%02x\n",
2449 __LINE__, (int) opcode);
2450 DELETE_CURCTX ();
2451 break;
2452 }
2453 if (is_after_ret (npc))
2454 break;
2455 while (npc > targets[tidx])
2456 tidx += 1;
2457 if (npc != targets[tidx])
2458 {
2459 if (ntrg < MAXTRGTS)
2460 {
2461 for (int i = 0; i < nctx; i++)
2462 if (buf[i].tidx >= tidx)
2463 buf[i].tidx++;
2464
2465 /* insert a new target */
2466 for (int i = ntrg; i > tidx; i--)
2467 targets[i] = targets[i - 1];
2468 ntrg += 1;
2469 targets[tidx++] = npc;
2470 }
2471 else
2472 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg=max(%d)\n",
2473 __LINE__, ntrg);
2474 struct AdvWalkContext *new = buf + nctx;
2475 nctx += 1;
2476 __collector_memcpy (new, cur, sizeof (*new));
2477 new->pc = npc;
2478 new->tidx = tidx;
2479 cur = new; /* advance the new context first */
2480 continue;
2481 }
2482 }
2483 else
2484 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx=max(%d)\n",
2485 __LINE__, ntrg);
2486 break;
2487 case 0x90: case 0x91: case 0x92: case 0x93: /* setcc Eb */
2488 case 0x94: case 0x95: case 0x96: case 0x97:
2489 case 0x98: case 0x99: case 0x9a: case 0x9b:
2490 case 0x9c: case 0x9d: case 0x9e: case 0x9f:
2491 cur->pc = check_modrm (cur->pc);
2492 break;
2493 case 0xa0: /* push fs */
2494 cur->sp -= 1;
2495 break;
2496 case 0xa1: /* pop fs */
2497 cur->sp += 1;
2498 if (cur->sp - RED_ZONE > cur->sp_safe)
2499 cur->sp_safe = cur->sp - RED_ZONE;
2500 break;
2501 case 0xa2: /* cpuid */
2502 break;
2503 case 0xa3: /* bt Ev,Gv */
2504 cur->pc = check_modrm (cur->pc);
2505 break;
2506 case 0xa4: /* shld Ev,Gv,Ib */
2507 cur->pc = check_modrm (cur->pc);
2508 cur->pc += 1;
2509 break;
2510 case 0xa5: /* shld Ev,Gv,%cl */
2511 cur->pc = check_modrm (cur->pc);
2512 break;
2513 case 0xa8: /* push gs */
2514 cur->sp -= 1;
2515 break;
2516 case 0xa9: /* pop gs */
2517 cur->sp += 1;
2518 if (cur->sp - RED_ZONE > cur->sp_safe)
2519 cur->sp_safe = cur->sp - RED_ZONE;
2520 break;
2521 case 0xaa: /* rsm */
2522 break;
2523 case 0xab: /* bts Ev,Gv */
2524 cur->pc = check_modrm (cur->pc);
2525 break;
2526 case 0xac: /* shrd Ev,Gv,Ib */
2527 cur->pc = check_modrm (cur->pc);
2528 cur->pc += 1;
2529 break;
2530 case 0xad: /* shrd Ev,Gv,%cl */
2531 cur->pc = check_modrm (cur->pc);
2532 break;
2533 case 0xae: /* group15 */
2534 cur->pc = check_modrm (cur->pc);
2535 break;
2536 case 0xaf: /* imul Gv,Ev */
2537 cur->pc = check_modrm (cur->pc);
2538 break;
2539 case 0xb1: /* cmpxchg Ev,Gv */
2540 cur->pc = check_modrm (cur->pc);
2541 break;
2542 case 0xb3:
2543 case 0xb6: /* movzx Gv,Eb */
2544 case 0xb7: /* movzx Gv,Ew */
2545 cur->pc = check_modrm (cur->pc);
2546 break;
2547 case 0xba: /* group8 Ev,Ib */
2548 cur->pc = check_modrm (cur->pc);
2549 cur->pc += 1;
2550 break;
2551 case 0xbb: /* btc Ev,Gv */
2552 case 0xbc: /* bsf Gv,Ev */
2553 case 0xbd: /* bsr Gv,Ev */
2554 cur->pc = check_modrm (cur->pc);
2555 break;
2556 case 0xbe: /* movsx Gv,Eb */
2557 case 0xbf: /* movsx Gv,Ew */
2558 cur->pc = check_modrm (cur->pc);
2559 break;
2560 case 0xc0: /* xadd Eb,Gb */
2561 case 0xc1: /* xadd Ev,Gv */
2562 cur->pc = check_modrm (cur->pc);
2563 break;
2564 case 0xc2: /* cmpps V,W,Ib */
2565 cur->pc = check_modrm (cur->pc);
2566 cur->pc += 1;
2567 break;
2568 case 0xc3: /* movnti M,G */
2569 cur->pc = check_modrm (cur->pc);
2570 break;
2571 case 0xc6: /* shufps V,W,Ib */
2572 cur->pc = check_modrm (cur->pc);
2573 cur->pc += 1;
2574 break;
2575 case 0xc7: /* RDRAND */
2576 cur->pc = check_modrm (cur->pc);
2577 break;
2578 case 0xc8: case 0xc9: case 0xca: case 0xcb: /* bswap */
2579 case 0xcc: case 0xcd: case 0xce: case 0xcf:
2580 break;
2581 case 0xd0: case 0xd1: case 0xd2: case 0xd3:
2582 case 0xd4: case 0xd5: case 0xd6: case 0xd7:
2583 case 0xd8: case 0xd9: case 0xda: case 0xdb:
2584 case 0xdc: case 0xdd: case 0xde: case 0xdf:
2585 case 0xe0: case 0xe1: case 0xe2: case 0xe3:
2586 case 0xe4: case 0xe5: case 0xe6: case 0xe7:
2587 case 0xe8: case 0xe9: case 0xea: case 0xeb:
2588 case 0xec: case 0xed: case 0xee: case 0xef:
2589 case 0xf0: case 0xf1: case 0xf2: case 0xf3:
2590 case 0xf4: case 0xf5: case 0xf6: case 0xf7:
2591 case 0xf8: case 0xf9: case 0xfa: case 0xfb:
2592 case 0xfc: case 0xfd: case 0xfe: case 0xff:
2593 cur->pc = check_modrm (cur->pc);
2594 break;
2595 default:
2596 if (jmp_reg_switch_mode == 1 && extop == 0x0b)
2597 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d invalid opcode ub2: 0x0f %x jmp_reg_switch_mode=%d\n",
2598 __LINE__, (int) extop, jmp_reg_switch_mode);
2599 else
2600 {
2601 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x0f %x jmp_reg_switch_mode=%d\n",
2602 __LINE__, (int) extop, jmp_reg_switch_mode);
2603 DELETE_CURCTX ();
2604 }
2605 break;
2606 }
2607 break;
2608 case 0x10: /* adc Eb,Gb */
2609 case 0x11: /* adc Ev,Gv */
2610 case 0x12: /* adc Gb,Eb */
2611 case 0x13: /* adc Gv,Ev */
2612 cur->pc = check_modrm (cur->pc);
2613 break;
2614 case 0x14: /* adc %al,Ib */
2615 cur->pc += 1;
2616 break;
2617 case 0x15: /* adc %eax,Iz */
2618 cur->pc += z;
2619 break;
2620 case 0x16: /* push ss */
2621 cur->sp -= 1;
2622 break;
2623 case 0x17: /* pop ss */
2624 cur->sp += 1;
2625 if (cur->sp - RED_ZONE > cur->sp_safe)
2626 cur->sp_safe = cur->sp - RED_ZONE;
2627 break;
2628 case 0x18: /* sbb Eb,Gb */
2629 case 0x19: /* sbb Ev,Gv */
2630 case 0x1a: /* sbb Gb,Eb */
2631 case 0x1b: /* sbb Gv,Ev */
2632 cur->pc = check_modrm (cur->pc);
2633 break;
2634 case 0x1c: /* sbb %al,Ib */
2635 cur->pc += 1;
2636 break;
2637 case 0x1d: /* sbb %eax,Iz */
2638 cur->pc += z;
2639 break;
2640 case 0x1e: /* push ds */
2641 cur->sp -= 1;
2642 break;
2643 case 0x1f: /* pop ds */
2644 cur->sp += 1;
2645 if (cur->sp - RED_ZONE > cur->sp_safe)
2646 cur->sp_safe = cur->sp - RED_ZONE;
2647 break;
2648 case 0x20: /* and Eb,Gb */
2649 case 0x21: /* and Ev,Gv */
2650 case 0x22: /* and Gb,Eb */
2651 case 0x23: /* and Gv,Ev */
2652 cur->pc = check_modrm (cur->pc);
2653 break;
2654 case 0x24: /* and %al,Ib */
2655 cur->pc += 1;
2656 break;
2657 case 0x25: /* and %eax,Iz */
2658 cur->pc += z;
2659 break;
2660 case 0x26: /* seg=es prefix */
2661 break;
2662 case 0x27: /* daa */
2663 break;
2664 case 0x28: /* sub Eb,Gb */
2665 case 0x29: /* sub Ev,Gv */
2666 case 0x2a: /* sub Gb,Eb */
2667 case 0x2b: /* sub Gv,Ev */
2668 cur->pc = check_modrm (cur->pc);
2669 break;
2670 case 0x2c: /* sub %al,Ib */
2671 cur->pc += 1;
2672 break;
2673 case 0x2d: /* sub %eax,Iz */
2674 cur->pc += z;
2675 break;
2676 case 0x2e: /* seg=cs prefix */
2677 break;
2678 case 0x2f: /* das */
2679 break;
2680 case 0x30: /* xor Eb,Gb */
2681 case 0x31: /* xor Ev,Gv */
2682 case 0x32: /* xor Gb,Eb */
2683 case 0x33: /* xor Gv,Ev */
2684 cur->pc = check_modrm (cur->pc);
2685 break;
2686 case 0x34: /* xor %al,Ib */
2687 cur->pc += 1;
2688 break;
2689 case 0x35: /* xor %eax,Iz */
2690 cur->pc += z;
2691 break;
2692 case 0x36: /* seg=ss prefix */
2693 break;
2694 case 0x37: /* aaa */
2695 break;
2696 case 0x38: /* cmp Eb,Gb */
2697 case 0x39: /* cmp Ev,Gv */
2698 case 0x3a: /* cmp Gb,Eb */
2699 case 0x3b: /* cmp Gv,Ev */
2700 cur->pc = check_modrm (cur->pc);
2701 break;
2702 case 0x3c: /* cmp %al,Ib */
2703 cur->pc += 1;
2704 break;
2705 case 0x3d: /* cmp %eax,Iz */
2706 cur->pc += z;
2707 break;
2708 case 0x3e: /* seg=ds prefix */
2709 break;
2710 case 0x3f: /* aas */
2711 break;
2712 #if WSIZE(32)
2713 case 0x40: /* inc %eax */
2714 case 0x41: /* inc %ecx */
2715 case 0x42: /* inc %edx */
2716 case 0x43: /* inc %ebx */
2717 break;
2718 case 0x44: /* inc %esp */
2719 /* Can't be a valid stack pointer - delete context */
2720 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x44.\n", __LINE__);
2721 DELETE_CURCTX ();
2722 break;
2723 case 0x45: /* inc %ebp */
2724 case 0x46: /* inc %esi */
2725 case 0x47: /* inc %edi */
2726 case 0x48: /* dec %eax */
2727 case 0x49: /* dec %ecx */
2728 case 0x4a: /* dec %edx */
2729 case 0x4b: /* dec %ebx */
2730 break;
2731 case 0x4c: /* dec %esp */
2732 /* Can't be a valid stack pointer - delete context */
2733 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0x4c.\n", __LINE__);
2734 DELETE_CURCTX ();
2735 break;
2736 case 0x4d: /* dec %ebp */
2737 case 0x4e: /* dec %esi */
2738 case 0x4f: /* dec %edi */
2739 break;
2740 #endif
2741 case 0x50: /* push %eax */
2742 case 0x51: /* push %ecx */
2743 case 0x52: /* push %edx */
2744 case 0x53: /* push %ebx */
2745 case 0x54: /* push %esp */
2746 case 0x55: /* push %ebp */
2747 case 0x56: /* push %esi */
2748 case 0x57: /* push %edi */
2749 cur->sp -= 1;
2750 reg = OPC_REG (opcode);
2751 if (reg == RBP)
2752 {
2753 #if 0
2754 /* Don't do this check yet. Affects tail calls. */
2755 /* avoid other function's prologue */
2756 if ((cur->pc[0] == 0x89 && cur->pc[1] == 0xe5) ||
2757 (cur->pc[0] == 0x8b && cur->pc[1] == 0xec))
2758 {
2759 /* mov %esp,%ebp */
2760 DELETE_CURCTX ();
2761 break;
2762 }
2763 #endif
2764 if (cur->fp_loc == NULL)
2765 {
2766 cur->fp_loc = cur->sp;
2767 cur->fp_sav = cur->fp;
2768 }
2769 }
2770 break;
2771 case 0x58: /* pop %eax */
2772 case 0x59: /* pop %ecx */
2773 case 0x5a: /* pop %edx */
2774 case 0x5b: /* pop %ebx */
2775 case 0x5c: /* pop %esp */
2776 case 0x5d: /* pop %ebp */
2777 case 0x5e: /* pop %esi */
2778 case 0x5f: /* pop %edi */
2779 reg = OPC_REG (opcode);
2780 cur->regs[reg] = 0;
2781 if (isInside ((unsigned long) cur->sp, (unsigned long) cur->sp_safe, wctx->sbase))
2782 cur->regs[reg] = *cur->sp;
2783 DprintfT (SP_DUMP_UNWIND, "stack_unwind:%d cur->regs[%d]=0x%lx\n",
2784 __LINE__, reg, (unsigned long) cur->regs[reg]);
2785 if (reg == RDX)
2786 {
2787 if (cur->sp >= cur->sp_safe &&
2788 (unsigned long) cur->sp < wctx->sbase)
2789 cur->rdx = *cur->sp;
2790 }
2791 else if (reg == RBP)
2792 {
2793 if (cur->fp_loc == cur->sp)
2794 {
2795 cur->fp = cur->fp_sav;
2796 cur->fp_loc = NULL;
2797 }
2798 else if (cur->sp >= cur->sp_safe &&
2799 (unsigned long) cur->sp < wctx->sbase)
2800 cur->fp = (unsigned long*) (*cur->sp);
2801 }
2802 else if (reg == RSP)
2803 {
2804 /* f.e. JVM I2CAdapter */
2805 if (cur->sp >= cur->sp_safe && (unsigned long) cur->sp < wctx->sbase)
2806 {
2807 unsigned long *nsp = (unsigned long*) (*cur->sp);
2808 if (nsp >= cur->sp && nsp <= cur->fp)
2809 {
2810 cur->sp = nsp;
2811 }
2812 else
2813 {
2814 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address, opcode=0x%02x\n",
2815 __LINE__, opcode);
2816 goto checkFP;
2817 }
2818 }
2819 else
2820 {
2821 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode=0x%02x\n",
2822 __LINE__, opcode);
2823 goto checkFP;
2824 }
2825 break;
2826 }
2827 cur->sp += 1;
2828 if (cur->sp - RED_ZONE > cur->sp_safe)
2829 {
2830 cur->sp_safe = cur->sp - RED_ZONE;
2831 }
2832 break;
2833 case 0x60: /* pusha(d) */
2834 cur->sp -= 8;
2835 break;
2836 case 0x61: /* popa(d) */
2837 cur->sp += 8;
2838 if (cur->sp - RED_ZONE > cur->sp_safe)
2839 cur->sp_safe = cur->sp - RED_ZONE;
2840 break;
2841 case 0x62: /* group AVX, 4-bytes EVEX prefix */
2842 {
2843 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
2844 int len = parse_x86_AVX_instruction (pc);
2845 if (len < 4)
2846 {
2847 DELETE_CURCTX ();
2848 }
2849 else
2850 {
2851 pc += len;
2852 cur->pc = pc;
2853 }
2854 }
2855 break;
2856 case 0x63: /* arpl Ew,Gw (32) movsxd Gv,Ev (64)*/
2857 cur->pc = check_modrm (cur->pc);
2858 break;
2859 case 0x64: /* seg=fs prefix */
2860 case 0x65: /* seg=gs prefix */
2861 break;
2862 case 0x66: /* opd size override */
2863 case 0x67: /* addr size override */
2864 break;
2865 case 0x68: /* push Iz */
2866 cur->sp = (unsigned long*) ((long) cur->sp - z);
2867 cur->pc += z;
2868 break;
2869 case 0x69: /* imul Gv,Ev,Iz */
2870 cur->pc = check_modrm (cur->pc);
2871 cur->pc += z;
2872 break;
2873 case 0x6a: /* push Ib */
2874 cur->sp = (unsigned long*) ((long) cur->sp - v);
2875 cur->pc += 1;
2876 break;
2877 case 0x6b: /* imul Gv,Ev,Ib */
2878 cur->pc = check_modrm (cur->pc);
2879 cur->pc += 1;
2880 break;
2881 case 0x6c: case 0x6d: case 0x6e: case 0x6f:
2882 cur->pc = check_modrm (cur->pc);
2883 break;
2884 case 0x70: /* jo Jb */
2885 case 0x71: /* jno Jb */
2886 case 0x72: /* jb Jb */
2887 case 0x73: /* jnb Jb */
2888 case 0x74: /* jz Jb */
2889 case 0x75: /* jnz Jb */
2890 case 0x76: /* jna Jb */
2891 case 0x77: /* ja Jb */
2892 case 0x78: /* js Jb */
2893 case 0x79: /* jns Jb */
2894 case 0x7a: /* jp Jb */
2895 case 0x7b: /* jnp Jb */
2896 case 0x7c: /* jl Jb */
2897 case 0x7d: /* jge Jb */
2898 case 0x7e: /* jle Jb */
2899 case 0x7f: /* jg Jb */
2900 imm8 = *(char*) cur->pc++;
2901 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
2902 {
2903 int tidx = 0;
2904 unsigned char *npc = cur->pc + imm8;
2905 if (is_after_ret (npc))
2906 break;
2907 while (npc > targets[tidx])
2908 tidx += 1;
2909 if (npc != targets[tidx])
2910 {
2911 if (ntrg < MAXTRGTS)
2912 {
2913 for (int i = 0; i < nctx; i++)
2914 if (buf[i].tidx >= tidx)
2915 buf[i].tidx++;
2916
2917 /* insert a new target */
2918 for (int i = ntrg; i > tidx; i--)
2919 targets[i] = targets[i - 1];
2920 ntrg += 1;
2921 targets[tidx++] = npc;
2922 }
2923 else
2924 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d ntrg(%d)=max\n", __LINE__, ntrg);
2925 struct AdvWalkContext *new = buf + nctx;
2926 nctx += 1;
2927 __collector_memcpy (new, cur, sizeof (*new));
2928 new->pc = npc;
2929 new->tidx = tidx;
2930 cur = new; /* advance the new context first */
2931 continue;
2932 }
2933 }
2934 else
2935 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d nctx(%d)=max\n", __LINE__, nctx);
2936 break;
2937 case 0x80: /* group1 Eb,Ib */
2938 cur->pc = check_modrm (cur->pc);
2939 cur->pc += 1;
2940 break;
2941 case 0x81: /* group1 Ev,Iz */
2942 modrm = *cur->pc;
2943 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2944 {
2945 int immz = read_int (cur->pc + 1, z);
2946 extop = MRM_EXT (modrm);
2947 if (extop == 0) /* add imm32,%esp */
2948 cur->sp = (unsigned long*) ((long) cur->sp + immz);
2949 else if (extop == 4) /* and imm32,%esp */
2950 cur->sp = (unsigned long*) ((long) cur->sp & immz);
2951 else if (extop == 5) /* sub imm32,%esp */
2952 cur->sp = (unsigned long*) ((long) cur->sp - immz);
2953 if (cur->sp - RED_ZONE > cur->sp_safe)
2954 cur->sp_safe = cur->sp - RED_ZONE;
2955 }
2956 cur->pc = check_modrm (cur->pc);
2957 cur->pc += z;
2958 break;
2959 case 0x82: /* group1 Eb,Ib */
2960 cur->pc = check_modrm (cur->pc);
2961 cur->pc += 1;
2962 break;
2963 case 0x83: /* group1 Ev,Ib */
2964 modrm = *cur->pc;
2965 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RSP)
2966 {
2967 imm8 = (char) cur->pc[1]; /* sign extension */
2968 extop = MRM_EXT (modrm);
2969 if (extop == 0) /* add imm8,%esp */
2970 cur->sp = (unsigned long*) ((long) cur->sp + imm8);
2971 else if (extop == 4) /* and imm8,%esp */
2972 cur->sp = (unsigned long*) ((long) cur->sp & imm8);
2973 else if (extop == 5) /* sub imm8,%esp */
2974 cur->sp = (unsigned long*) ((long) cur->sp - imm8);
2975 if (cur->sp - RED_ZONE > cur->sp_safe)
2976 cur->sp_safe = cur->sp - RED_ZONE;
2977 }
2978 cur->pc = check_modrm (cur->pc);
2979 cur->pc += 1;
2980 break;
2981 case 0x84: /* test Eb,Gb */
2982 case 0x85: /* test Ev,Gv */
2983 case 0x86: /* xchg Eb,Gb */
2984 case 0x87: /* xchg Ev,Gv */
2985 cur->pc = check_modrm (cur->pc);
2986 break;
2987 case 0x88: /* mov Eb,Gb */
2988 cur->pc = check_modrm (cur->pc);
2989 break;
2990 case 0x89: /* mov Ev,Gv */
2991 modrm = *cur->pc;
2992 if (MRM_MOD (modrm) == 0xc0)
2993 {
2994 if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
2995 /* movl %esp,%ebp */
2996 cur->fp = cur->sp;
2997 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
2998 { /* mov %ebp,%esp */
2999 cur->sp = cur->fp;
3000 if (cur->sp - RED_ZONE > cur->sp_safe)
3001 cur->sp_safe = cur->sp - RED_ZONE;
3002 if (wctx->fp == (unsigned long) cur->sp)
3003 cur->cval = RA_FROMFP;
3004 }
3005 }
3006 else if (MRM_MOD (modrm) == 0x80)
3007 {
3008 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3009 {
3010 if (cur->pc[1] == 0x24)
3011 { /* mov %ebp,disp32(%esp) - JVM */
3012 immv = read_int (cur->pc + 2, 4);
3013 cur->fp_loc = (unsigned long*) ((char*) cur->sp + immv);
3014 cur->fp_sav = cur->fp;
3015 }
3016 }
3017 }
3018 else if (MRM_MOD (modrm) == 0x40)
3019 {
3020 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3021 {
3022 if (cur->pc[1] == 0x24 && cur->pc[2] == 0x0)
3023 { /* movl %edx,0(%esp) */
3024 cur->ra_loc = cur->sp;
3025 cur->ra_sav = cur->rdx;
3026 }
3027 }
3028 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3029 {
3030 if (cur->pc[1] == 0x24)
3031 { /* mov %ebp,disp8(%esp) - JVM */
3032 imm8 = ((char*) (cur->pc))[2];
3033 cur->fp_loc = (unsigned long*) ((char*) cur->sp + imm8);
3034 cur->fp_sav = cur->fp;
3035 }
3036 }
3037 }
3038 else if (MRM_MOD (modrm) == 0x0)
3039 {
3040 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3041 {
3042 if (cur->pc[1] == 0x24)
3043 { /* mov %ebp,(%esp) */
3044 cur->fp_loc = cur->sp;
3045 cur->fp_sav = cur->fp;
3046 }
3047 }
3048 else if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RDX)
3049 {
3050 if (cur->pc[1] == 0x24)
3051 { /* movl %edx,(%esp) */
3052 cur->ra_loc = cur->sp;
3053 cur->ra_sav = cur->rdx;
3054 }
3055 }
3056 }
3057 cur->pc = check_modrm (cur->pc);
3058 break;
3059 case 0x8a: /* mov Gb,Eb */
3060 cur->pc = check_modrm (cur->pc);
3061 break;
3062 case 0x8b: /* mov Gv,Ev */
3063 modrm = *cur->pc;
3064 if (MRM_MOD (modrm) == 0xc0)
3065 {
3066 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3067 /* mov %esp,%ebp */
3068 cur->fp = cur->sp;
3069 else if (MRM_REGS (modrm) == RBP && MRM_REGD (modrm) == RSP)
3070 { /* mov %ebp,%esp */
3071 cur->sp = cur->fp;
3072 if (cur->sp - RED_ZONE > cur->sp_safe)
3073 cur->sp_safe = cur->sp - RED_ZONE;
3074 if (wctx->fp == (unsigned long) cur->sp)
3075 cur->cval = RA_FROMFP;
3076 }
3077 }
3078 else if (MRM_MOD (modrm) == 0x80)
3079 {
3080 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3081 {
3082 if (cur->pc[1] == 0x24)
3083 { /* mov disp32(%esp),%ebp */
3084 immv = read_int (cur->pc + 2, 4);
3085 unsigned long *ptr = (unsigned long*) ((char*) cur->sp + immv);
3086 if (cur->fp_loc == ptr)
3087 {
3088 cur->fp = cur->fp_sav;
3089 cur->fp_loc = NULL;
3090 }
3091 else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3092 cur->fp = (unsigned long*) (*ptr);
3093 }
3094 }
3095 }
3096 else if (MRM_MOD (modrm) == 0x40)
3097 {
3098 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3099 {
3100 if (cur->pc[1] == 0x24)
3101 { /* mov disp8(%esp),%ebp - JVM */
3102 imm8 = ((char*) (cur->pc))[2];
3103 unsigned long *ptr = (unsigned long*) ((char*) cur->sp + imm8);
3104 if (cur->fp_loc == ptr)
3105 {
3106 cur->fp = cur->fp_sav;
3107 cur->fp_loc = NULL;
3108 }
3109 else if (ptr >= cur->sp_safe && (unsigned long) ptr < wctx->sbase)
3110 cur->fp = (unsigned long*) (*ptr);
3111 }
3112 }
3113 }
3114 else if (MRM_MOD (modrm) == 0x0)
3115 {
3116 if (MRM_REGS (modrm) == RSP && MRM_REGD (modrm) == RBP)
3117 {
3118 if (cur->pc[1] == 0x24)
3119 { /* mov (%esp),%ebp */
3120 if (cur->fp_loc == cur->sp)
3121 {
3122 cur->fp = cur->fp_sav;
3123 cur->fp_loc = NULL;
3124 }
3125 else if (cur->sp >= cur->sp_safe &&
3126 (unsigned long) cur->sp < wctx->sbase)
3127 cur->fp = (unsigned long*) *cur->sp;
3128 }
3129 }
3130 }
3131 cur->pc = check_modrm (cur->pc);
3132 break;
3133 case 0x8c: /* mov Mw,Sw */
3134 cur->pc = check_modrm (cur->pc);
3135 break;
3136 case 0x8d: /* lea Gv,M */
3137 modrm = *cur->pc;
3138 if (MRM_REGD (modrm) == RSP)
3139 {
3140 unsigned char *pc = cur->pc;
3141 // Mez: need to use always regs[RSP/RBP] instead cur->sp(or fp):
3142 cur->regs[RSP] = (unsigned long) cur->sp;
3143 cur->regs[RBP] = (unsigned long) cur->fp;
3144 cur->pc++;
3145 int mod = (modrm >> 6) & 3;
3146 int r_m = modrm & 7;
3147 long val = 0;
3148 int undefRez = 0;
3149 if (mod == 0x3)
3150 val = getRegVal (cur, MRM_REGS (modrm), &undefRez);
3151 else if (r_m == 4)
3152 { // SP or R12. Decode SIB-byte.
3153 int sib = *cur->pc++;
3154 int scale = 1 << (sib >> 6);
3155 int index = X | ((sib >> 3) & 7);
3156 int base = B | (sib & 7);
3157 if (mod == 0)
3158 {
3159 if ((base & 7) == 5)
3160 { // BP or R13
3161 if (index != 4) // SP
3162 val += getRegVal (cur, index, &undefRez) * scale;
3163 val += read_int (cur->pc, 4);
3164 cur->pc += 4;
3165 }
3166 else
3167 {
3168 val += getRegVal (cur, base, &undefRez);
3169 if (index != 4) // SP
3170 val += getRegVal (cur, index, &undefRez) * scale;
3171 }
3172 }
3173 else
3174 {
3175 val += getRegVal (cur, base, &undefRez);
3176 if (index != 4) // SP
3177 val += getRegVal (cur, index, &undefRez) * scale;
3178 if (mod == 1)
3179 {
3180 val += read_int (cur->pc, 1);
3181 cur->pc++;
3182 }
3183 else
3184 { // mod == 2
3185 val += read_int (cur->pc, 4);
3186 cur->pc += 4;
3187 }
3188 }
3189 }
3190 else if (mod == 0)
3191 {
3192 if (r_m == 5)
3193 { // BP or R13
3194 val += read_int (cur->pc, 4);
3195 cur->pc += 4;
3196 }
3197 else
3198 val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3199 }
3200 else
3201 { // mod == 1 || mod == 2
3202 val += getRegVal (cur, MRM_REGS (modrm), &undefRez);
3203 if (mod == 1)
3204 {
3205 val += read_int (cur->pc, 1);
3206 cur->pc++;
3207 }
3208 else
3209 { // mod == 2
3210 val += read_int (cur->pc, 4);
3211 cur->pc += 4;
3212 }
3213 }
3214 if (undefRez)
3215 {
3216 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx val=0x%lx\n",
3217 __LINE__, (unsigned long) cur->pc, (unsigned long) val);
3218 goto checkFP;
3219 }
3220 cur->regs[MRM_REGD (modrm)] = val;
3221 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cur->pc=0x%lx val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3222 __LINE__, (unsigned long) cur->pc, (unsigned long) val,
3223 (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3224 if (cur->pc != check_modrm (pc))
3225 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d ERROR: cur->pc=0x%lx != check_modrm(0x%lx)=0x%lx\n",
3226 __LINE__, (unsigned long) cur->pc, (unsigned long) pc,
3227 (unsigned long) check_modrm (pc));
3228 if (MRM_REGD (modrm) == RSP)
3229 {
3230 if (!isInside ((unsigned long) val, wctx->sp, wctx->sbase))
3231 {
3232 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d cannot calculate RSP. cur->pc=0x%lx opcode=0x%02x val=0x%lx wctx->sp=0x%lx wctx->sbase=0x%lx\n",
3233 __LINE__, (unsigned long) cur->pc, opcode, (unsigned long) val,
3234 (unsigned long) wctx->sp, (unsigned long) wctx->sbase);
3235 goto checkFP;
3236 }
3237 cur->sp = (unsigned long *) val;
3238 if (cur->sp - RED_ZONE > cur->sp_safe)
3239 cur->sp_safe = cur->sp - RED_ZONE;
3240 }
3241 }
3242 else
3243 cur->pc = check_modrm (cur->pc);
3244 break;
3245 case 0x8e: /* mov Sw,Ew */
3246 cur->pc = check_modrm (cur->pc);
3247 break;
3248 case 0x8f: /* pop Ev */
3249 cur->pc = check_modrm (cur->pc);
3250 cur->sp += 1;
3251 if (cur->sp - RED_ZONE > cur->sp_safe)
3252 cur->sp_safe = cur->sp - RED_ZONE;
3253 break;
3254 case 0x90: /* nop */
3255 break;
3256 case 0x91: /* xchg %eax,%ecx */
3257 case 0x92: /* xchg %eax,%edx */
3258 case 0x93: /* xchg %eax,%ebx */
3259 case 0x94: /* xchg %eax,%esp XXXX */
3260 case 0x95: /* xchg %eax,%ebp XXXX */
3261 case 0x96: /* xchg %eax,%esi */
3262 case 0x97: /* xchg %eax,%edi */
3263 break;
3264 case 0x98: /* cbw/cwde */
3265 case 0x99: /* cwd/cwq */
3266 break;
3267 case 0x9a: /* callf Ap */
3268 if (jmp_reg_switch_mode == 1)
3269 {
3270 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3271 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3272 int rc = process_return (wctx, tmpctx);
3273 if (rc != RA_FAILURE)
3274 {
3275 if (save_ctx)
3276 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3277 return rc;
3278 }
3279 }
3280 cur->pc += 2 + a;
3281 break;
3282 case 0x9b: /* fwait */
3283 case 0x9c: /* pushf Fv */
3284 case 0x9d: /* popf Fv */
3285 case 0x9e: /* sahf */
3286 case 0x9f: /* lahf */
3287 break;
3288 case 0xa0: /* mov al,Ob */
3289 case 0xa1: /* mov eax,Ov */
3290 case 0xa2: /* mov Ob,al */
3291 case 0xa3: /* mov Ov,eax */
3292 cur->pc += a;
3293 break;
3294 case 0xa4: /* movsb Yb,Xb */
3295 case 0xa5: /* movsd Yv,Xv */
3296 case 0xa6: /* cmpsb Yb,Xb */
3297 case 0xa7: /* cmpsd Xv,Yv */
3298 break;
3299 case 0xa8: /* test al,Ib */
3300 cur->pc += 1;
3301 break;
3302 case 0xa9: /* test eax,Iz */
3303 cur->pc += z;
3304 break;
3305 case 0xaa: /* stosb Yb,%al */
3306 case 0xab: /* stosd Yv,%eax */
3307 case 0xac: /* lodsb %al,Xb */
3308 case 0xad: /* lodsd %eax,Xv */
3309 case 0xae: /* scasb %al,Yb */
3310 case 0xaf: /* scasd %eax,Yv */
3311 break;
3312 case 0xb0: /* mov %al,Ib */
3313 case 0xb1: /* mov %cl,Ib */
3314 case 0xb2: /* mov %dl,Ib */
3315 case 0xb3: /* mov %bl,Ib */
3316 case 0xb4: /* mov %ah,Ib */
3317 case 0xb5: /* mov %ch,Ib */
3318 case 0xb6: /* mov %dh,Ib */
3319 case 0xb7: /* mov %bh,Ib */
3320 cur->pc += 1;
3321 break;
3322 case 0xb8: /* mov Iv,%eax */
3323 case 0xb9: /* mov Iv,%ecx */
3324 case 0xba: /* mov Iv,%edx */
3325 case 0xbb: /* mov Iv,%ebx */
3326 case 0xbc: /* mov Iv,%esp */
3327 case 0xbd: /* mov Iv,%rbp */
3328 case 0xbe: /* mov Iv,%esi */
3329 case 0xbf: /* mov Iv,%edi */
3330 reg = OPC_REG (opcode);
3331 if (reg == RAX)
3332 cur->rax = read_int (cur->pc, v);
3333 cur->pc += v;
3334 break;
3335 case 0xc0: /* group2 Eb,Ib */
3336 case 0xc1: /* group2 Ev,Ib */
3337 cur->pc = check_modrm (cur->pc) + 1;
3338 break;
3339 case 0xc2: /* ret Iw */
3340 /* In the dynamic linker we may see that
3341 * the actual return address is at sp+immv,
3342 * while sp points to the resolved address.
3343 */
3344 {
3345 immv = read_int (cur->pc, 2);
3346 int rc = process_return (wctx, cur);
3347 if (rc != RA_FAILURE)
3348 {
3349 if (jmp_reg_switch_mode == 1)
3350 {
3351 DprintfT (SP_DUMP_UNWIND, "stack_unwind%d give up return address under jmp switch mode, opcode = 0xc2\n", __LINE__);
3352 goto checkFP;
3353 }
3354 wctx->sp += immv;
3355 if (save_ctx)
3356 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3357 return rc;
3358 }
3359 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc2.\n", __LINE__);
3360 DELETE_CURCTX ();
3361 }
3362 break;
3363 case 0xc3: /* ret */
3364 {
3365 int rc = process_return (wctx, cur);
3366 if (rc != RA_FAILURE)
3367 {
3368 if (save_ctx)
3369 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3370 return rc;
3371 }
3372 if (jmp_reg_switch_mode == 1)
3373 jmp_reg_switch_pc = cur->pc;
3374 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xc3.\n", __LINE__);
3375 DELETE_CURCTX ();
3376 }
3377 break;
3378 case 0xc4: /* group AVX, 3-bytes VEX prefix */
3379 {
3380 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3381 int len = parse_x86_AVX_instruction (pc);
3382 if (len < 3)
3383 DELETE_CURCTX ();
3384 else
3385 {
3386 pc += len;
3387 cur->pc = pc;
3388 }
3389 }
3390 break;
3391 case 0xc5: /* group AVX, 2-bytes VEX prefix */
3392 {
3393 unsigned char *pc = cur->pc - 1; // points to the beginning of the instruction
3394 int len = parse_x86_AVX_instruction (pc);
3395 if (len < 2)
3396 DELETE_CURCTX ();
3397 else
3398 {
3399 pc += len;
3400 cur->pc = pc;
3401 }
3402 }
3403 break;
3404 case 0xc6:
3405 modrm = *cur->pc;
3406 if (modrm == 0xf8) /* xabort */
3407 cur->pc += 2;
3408 else /* mov Eb,Ib */
3409 cur->pc = check_modrm (cur->pc) + 1;
3410 break;
3411 case 0xc7:
3412 modrm = *cur->pc;
3413 if (modrm == 0xf8) /* xbegin */
3414 cur->pc += v + 1;
3415 else
3416 { /* mov Ev,Iz */
3417 extop = MRM_EXT (modrm);
3418 if (extop != 0)
3419 {
3420 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xc7\n", __LINE__);
3421 goto checkFP;
3422 }
3423 if (MRM_MOD (modrm) == 0xc0 && MRM_REGS (modrm) == RAX)
3424 cur->rax = read_int (cur->pc + 1, z);
3425 cur->pc = check_modrm (cur->pc) + z;
3426 }
3427 break;
3428 case 0xc8: /* enter Iw,Ib */
3429 cur->pc += 3;
3430 break;
3431 case 0xc9: /* leave */
3432 /* mov %ebp,%esp */
3433 cur->sp = cur->fp;
3434 /* pop %ebp */
3435 if (cur->fp_loc == cur->sp)
3436 {
3437 cur->fp = cur->fp_sav;
3438 cur->fp_loc = NULL;
3439 }
3440 else if (cur->sp >= cur->sp_safe &&
3441 (unsigned long) cur->sp < wctx->sbase)
3442 {
3443 cur->fp = (unsigned long*) (*cur->sp);
3444 if (wctx->fp == (unsigned long) cur->sp)
3445 cur->cval = RA_FROMFP;
3446 }
3447 cur->sp += 1;
3448 if (cur->sp - RED_ZONE > cur->sp_safe)
3449 cur->sp_safe = cur->sp - RED_ZONE;
3450 break;
3451 case 0xca: /* retf Iw */
3452 cur->pc += 2; /* XXXX process return */
3453 break;
3454 case 0xcb: /* retf */
3455 break; /* XXXX process return */
3456 case 0xcc: /* int 3 */
3457 break;
3458 case 0xcd: /* int Ib */
3459 if (*cur->pc == 0x80)
3460 {
3461 if (cur->rax == __NR_exit)
3462 {
3463 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xcd.\n", __LINE__);
3464 DELETE_CURCTX ();
3465 break;
3466 }
3467 else if (cur->rax == __NR_rt_sigreturn)
3468 {
3469 if (jmp_reg_switch_mode == 1)
3470 {
3471 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode=0xcd\n",
3472 __LINE__);
3473 goto checkFP;
3474 }
3475 wctx->sp = (unsigned long) cur->sp;
3476 if (save_ctx)
3477 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_RT_SIGRETURN);
3478 return RA_RT_SIGRETURN;
3479 }
3480 #if WSIZE(32)
3481 else if (cur->rax == __NR_sigreturn)
3482 {
3483 if (jmp_reg_switch_mode == 1)
3484 {
3485 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address under jmp switch mode, opcode = 0xc2\n",
3486 __LINE__);
3487 goto checkFP;
3488 }
3489 wctx->sp = (unsigned long) cur->sp;
3490 if (save_ctx)
3491 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SIGRETURN);
3492 return RA_SIGRETURN;
3493 }
3494 #endif
3495 }
3496 cur->pc += 1;
3497 break;
3498 case 0xce: /* into */
3499 case 0xcf: /* iret */
3500 break;
3501 case 0xd0: /* shift group2 Eb,1 */
3502 case 0xd1: /* shift group2 Ev,1 */
3503 case 0xd2: /* shift group2 Eb,%cl */
3504 case 0xd3: /* shift group2 Ev,%cl */
3505 cur->pc = check_modrm (cur->pc);
3506 break;
3507 case 0xd4: /* aam Ib */
3508 cur->pc += 1;
3509 break;
3510 case 0xd5: /* aad Ib */
3511 cur->pc += 1;
3512 break;
3513 case 0xd6: /* falc? */
3514 break;
3515 case 0xd7:
3516 cur->pc = check_modrm (cur->pc);
3517 cur->pc++;
3518 break;
3519 case 0xd8: /* esc instructions */
3520 case 0xd9:
3521 case 0xda:
3522 case 0xdb:
3523 case 0xdc:
3524 case 0xdd:
3525 case 0xde:
3526 case 0xdf:
3527 cur->pc = check_modrm (cur->pc);
3528 break;
3529 case 0xe0: /* loopne Jb */
3530 case 0xe1: /* loope Jb */
3531 case 0xe2: /* loop Jb */
3532 case 0xe3: /* jcxz Jb */
3533 imm8 = *(char*) cur->pc++;
3534 if (nctx < (jmp_reg_switch_mode ? MAXJMPREGCTX : MAXCTX))
3535 {
3536 int tidx = 0;
3537 unsigned char *npc = cur->pc + imm8;
3538 if (is_after_ret (npc))
3539 break;
3540 while (npc > targets[tidx])
3541 tidx += 1;
3542 if (npc != targets[tidx])
3543 {
3544 if (ntrg < MAXTRGTS)
3545 {
3546 for (int i = 0; i < nctx; i++)
3547 if (buf[i].tidx >= tidx)
3548 buf[i].tidx++;
3549 /* insert a new target */
3550 for (int i = ntrg; i > tidx; i--)
3551 targets[i] = targets[i - 1];
3552 ntrg += 1;
3553 targets[tidx++] = npc;
3554 }
3555 else
3556 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3557 struct AdvWalkContext *new = buf + nctx;
3558 nctx += 1;
3559 __collector_memcpy (new, cur, sizeof (*new));
3560 new->pc = npc;
3561 new->tidx = tidx;
3562 cur = new; /* advance the new context first */
3563 continue;
3564 }
3565 }
3566 else
3567 DprintfT (SP_DUMP_UNWIND, "unwind.c: nctx = max\n");
3568 break;
3569 case 0xe4: case 0xe5:
3570 cur->pc = check_modrm (cur->pc);
3571 cur->pc++;
3572 break;
3573 case 0xe6: case 0xe7:
3574 cur->pc++;
3575 cur->pc = check_modrm (cur->pc);
3576 break;
3577 case 0xec: case 0xed: case 0xee: case 0xef:
3578 cur->pc = check_modrm (cur->pc);
3579 break;
3580 case 0xe8: /* call Jz (f64) */
3581 {
3582 if (jmp_reg_switch_mode == 1)
3583 {
3584 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3585 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3586 int rc = process_return (wctx, tmpctx);
3587 if (rc != RA_FAILURE)
3588 {
3589 if (save_ctx)
3590 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3591 return rc;
3592 }
3593 }
3594 int immz = read_int (cur->pc, z);
3595 if (immz == 0)
3596 /* special case in PIC code */
3597 cur->sp -= 1;
3598 cur->pc += z;
3599 }
3600 break;
3601 case 0xe9: /* jump Jz */
3602 {
3603 int immz = read_int (cur->pc, z);
3604 unsigned char *npc = cur->pc + z + immz;
3605 if ((unsigned long) npc < wctx->tbgn || (unsigned long) npc >= wctx->tend)
3606 {
3607 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3608 DELETE_CURCTX ();
3609 break;
3610 }
3611 int tidx = 0;
3612 while (npc > targets[tidx])
3613 tidx += 1;
3614 if (npc != targets[tidx])
3615 {
3616 if (ntrg < MAXTRGTS)
3617 {
3618 for (int i = 0; i < nctx; i++)
3619 if (buf[i].tidx >= tidx)
3620 buf[i].tidx++;
3621 /* insert a new target */
3622 for (int i = ntrg; i > tidx; i--)
3623 targets[i] = targets[i - 1];
3624 ntrg += 1;
3625 targets[tidx++] = npc;
3626 }
3627 else
3628 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3629 cur->pc = npc;
3630 cur->tidx = tidx;
3631 continue; /* advance this context first */
3632 }
3633 else
3634 {
3635 /* Delete context */
3636 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xe9.\n", __LINE__);
3637 DELETE_CURCTX ();
3638 }
3639 }
3640 break;
3641 case 0xeb: /* jump imm8 */
3642 {
3643 imm8 = *(char*) cur->pc++;
3644 int tidx = 0;
3645 unsigned char *npc = cur->pc + imm8;
3646 while (npc > targets[tidx])
3647 tidx += 1;
3648 if (npc != targets[tidx])
3649 {
3650 if (ntrg < MAXTRGTS)
3651 {
3652 for (int i = 0; i < nctx; i++)
3653 if (buf[i].tidx >= tidx)
3654 buf[i].tidx++;
3655 /* insert a new target */
3656 for (int i = ntrg; i > tidx; i--)
3657 targets[i] = targets[i - 1];
3658 ntrg += 1;
3659 targets[tidx++] = npc;
3660 }
3661 else
3662 DprintfT (SP_DUMP_UNWIND, "unwind.c: ntrg = max\n");
3663 cur->pc = npc;
3664 cur->tidx = tidx;
3665 continue; /* advance this context first */
3666 }
3667 else
3668 {
3669 /* Delete context */
3670 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xeb.\n", __LINE__);
3671 DELETE_CURCTX ();
3672 }
3673 }
3674 break;
3675 case 0xf0: /* lock prefix */
3676 case 0xf2: /* repne prefix */
3677 case 0xf3: /* repz prefix */
3678 break;
3679 case 0xf4: /* hlt */
3680 extop2 = *(cur->pc - 3);
3681 if (extop2 == 0x90)
3682 {
3683 // 17851712 occasional SEGV in find_i386_ret_addr in unwind.c during attach
3684 if (save_ctx)
3685 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3686 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3687 return RA_END_OF_STACK;
3688 }
3689 /* We see 'hlt' in _start. Stop analysis, revert to FP */
3690 /* A workaround for the Linux main stack */
3691 if (nctx > 1)
3692 {
3693 DELETE_CURCTX ();
3694 break;
3695 }
3696 if (cur->fp == 0)
3697 {
3698 if (jmp_reg_switch_mode == 1)
3699 {
3700 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xf4\n");
3701 goto checkFP;
3702 }
3703 cache_put (wctx, RA_EOSTCK);
3704 wctx->pc = 0;
3705 wctx->sp = 0;
3706 wctx->fp = 0;
3707 if (save_ctx)
3708 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
3709 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK\n", __LINE__);
3710 return RA_END_OF_STACK;
3711 }
3712 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d give up return address, opcode = 0xf4\n", __LINE__);
3713 goto checkFP;
3714 case 0xf5: /* cmc */
3715 break;
3716 case 0xf6: /* group3 Eb */
3717 modrm = *cur->pc;
3718 extop = MRM_EXT (modrm);
3719 cur->pc = check_modrm (cur->pc);
3720 if (extop == 0x0) /* test Ib */
3721 cur->pc += 1;
3722 break;
3723 case 0xf7: /* group3 Ev */
3724 modrm = *cur->pc;
3725 extop = MRM_EXT (modrm);
3726 cur->pc = check_modrm (cur->pc);
3727 if (extop == 0x0) /* test Iz */
3728 cur->pc += z;
3729 break;
3730 case 0xf8: /* clc */
3731 case 0xf9: /* stc */
3732 case 0xfa: /* cli */
3733 case 0xfb: /* sti */
3734 case 0xfc: /* cld */
3735 case 0xfd: /* std */
3736 break;
3737 case 0xfe: /* group4 */
3738 modrm = *cur->pc;
3739 extop = MRM_EXT (modrm);
3740 switch (extop)
3741 {
3742 case 0x0: /* inc Eb */
3743 case 0x1: /* dec Eb */
3744 cur->pc = check_modrm (cur->pc);
3745 break;
3746 case 0x7:
3747 cur->pc = check_modrm (cur->pc);
3748 break;
3749 default:
3750 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xfe %x\n",
3751 __LINE__, extop);
3752 DELETE_CURCTX ();
3753 break;
3754 }
3755 break;
3756 case 0xff: /* group5 */
3757 modrm = *cur->pc;
3758 extop = MRM_EXT (modrm);
3759 switch (extop)
3760 {
3761 case 0x0: /* inc Ev */
3762 case 0x1: /* dec Ev */
3763 cur->pc = check_modrm (cur->pc);
3764 break;
3765 case 0x2: /* calln Ev */
3766 if (jmp_reg_switch_mode == 1)
3767 {
3768 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3769 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3770 int rc = process_return (wctx, tmpctx);
3771 if (rc != RA_FAILURE)
3772 {
3773 if (save_ctx)
3774 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3775 return rc;
3776 }
3777 }
3778 cur->pc = check_modrm (cur->pc);
3779 break;
3780 case 0x3: /* callf Ep */
3781 if (jmp_reg_switch_mode == 1)
3782 {
3783 struct AdvWalkContext* tmpctx = (struct AdvWalkContext *) alloca (sizeof (*cur));
3784 __collector_memcpy (tmpctx, cur, sizeof (*cur));
3785 int rc = process_return (wctx, tmpctx);
3786 if (rc != RA_FAILURE)
3787 {
3788 if (save_ctx)
3789 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3790 return rc;
3791 }
3792 }
3793 cur->pc = check_modrm (cur->pc); /* XXXX */
3794 break;
3795 case 0x4: /* jumpn Ev */
3796 /* This instruction appears in PLT or
3797 * in tail call optimization.
3798 * In both cases treat it as return.
3799 * Save jump *(reg) - switch, etc, for later use when no ctx left
3800 */
3801 if (modrm == 0x25 || /* jumpn *disp32 */
3802 MRM_MOD (modrm) == 0x40 || /* jumpn byte(reg) */
3803 MRM_MOD (modrm) == 0x80) /* jumpn word(reg) */
3804 {
3805 DprintfT (SP_DUMP_UNWIND, "unwind.c: PLT or tail call: %p\n", cur->pc - 1);
3806 int rc = process_return (wctx, cur);
3807 if (rc != RA_FAILURE)
3808 {
3809 if (jmp_reg_switch_mode == 1 && total_num_jmp_reg < max_num_jmp_reg_seen)
3810 {
3811 DprintfT (SP_DUMP_UNWIND, "unwind.c: give up return address under jmp switch mode, opcode = 0xff\n");
3812 goto checkFP;
3813 }
3814 if (save_ctx)
3815 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3816 return rc;
3817 }
3818 }
3819 else if (modrm != 0x24 /*ignore SIB*/) /* jumpn *(reg) or jumpn reg */
3820 {
3821 // 22846120 stack unwind does not find caller of __memcpy_ssse3_back with B64 intel-Linux
3822 /*
3823 * For now, let's deal rather narrowly with this scenario. If:
3824 * - we are in the middle of an "ff e2" instruction, and
3825 * - the next instruction is undefined ( 0f 0b == ud2 )
3826 * then test return. (Might eventually have to broaden the scope
3827 * of this fix to other registers/etc.)
3828 */
3829 if (cur->pc[0] == 0xe2 && cur->pc[1] == 0x0f && cur->pc[2] == 0x0b)
3830 {
3831 int rc = process_return_real (wctx, cur, 0);
3832 if (rc == RA_SUCCESS)
3833 {
3834 if (save_ctx)
3835 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3836 return rc;
3837 }
3838 }
3839
3840 // 22691241 shjsynprog, jsynprog core dump from find_i386_ret_addr
3841 /*
3842 * Here is another oddity. Java 9 seems to emit dynamically generated
3843 * code where a code block ends with a "jmp *reg" and then padding to a
3844 * multiple-of-16 boundary and then a bunch of 0s. In this case, let's
3845 * not continue to walk bytes since we would be walking off the end of
3846 * the instructions into ... something. Treating them as instructions
3847 * can lead to unexpected results, including SEGV.
3848 */
3849 /*
3850 * While the general problem deserves a better solution, let's look
3851 * here only for one particular case:
3852 * 0xff 0xe7 jmp *reg
3853 * nop to bring us to a multiple-of-16 boundary
3854 * 0x0000000000000a00 something that does not look like an instruction
3855 *
3856 * A different nop might be used depending on how much padding is needed
3857 * to reach that multiple-of-16 boundary. We've seen two:
3858 * 0x90 one byte
3859 * 0x0f 0x1f 0x40 0x00 four bytes
3860 */
3861 // confirm the instruction is 0xff 0xe7
3862 if (cur->pc[0] == 0xe7)
3863 {
3864 // check for correct-length nop and find next 16-byte boundary
3865 int found_nop = 0;
3866 unsigned long long *boundary = 0;
3867 switch ((((unsigned long) (cur->pc)) & 0xf))
3868 {
3869 case 0xb: // look for 4-byte nop
3870 if (*((unsigned *) (cur->pc + 1)) == 0x00401f0f)
3871 found_nop = 1;
3872 boundary = (unsigned long long *) (cur->pc + 5);
3873 break;
3874 case 0xe: // look for 1-byte nop
3875 if (cur->pc[1] == 0x90)
3876 found_nop = 1;
3877 boundary = (unsigned long long *) (cur->pc + 2);
3878 break;
3879 default:
3880 break;
3881 }
3882
3883 // if nop is found, check what's at the boundary
3884 if (found_nop && *boundary == 0x000000000a00)
3885 {
3886 DELETE_CURCTX ();
3887 break;
3888 }
3889 }
3890
3891 DprintfT (SP_DUMP_UNWIND, "unwind.c: probably PLT or tail call or switch table: %p\n",
3892 cur->pc - 1);
3893 if (num_jmp_reg < expected_num_jmp_reg)
3894 {
3895 if (jmp_reg_ctx[num_jmp_reg] == NULL)
3896 jmp_reg_ctx[num_jmp_reg] = (struct AdvWalkContext *) alloca (sizeof (*cur));
3897 if (jmp_reg_ctx[num_jmp_reg] != NULL)
3898 __collector_memcpy (jmp_reg_ctx[num_jmp_reg], cur, sizeof (*cur));
3899 }
3900 if (num_jmp_reg < expected_num_jmp_reg ||
3901 (num_jmp_reg >= expected_num_jmp_reg &&
3902 jmp_reg_ctx[expected_num_jmp_reg - 1] != NULL &&
3903 cur->pc != jmp_reg_ctx[expected_num_jmp_reg - 1]->pc))
3904 {
3905 num_jmp_reg++;
3906 total_num_jmp_reg++;
3907 }
3908 if (jmp_reg_switch_mode == 1 && total_num_jmp_reg >= max_num_jmp_reg_seen)
3909 {
3910 int rc = process_return_real (wctx, cur, 0);
3911 if (rc == RA_SUCCESS)
3912 {
3913 if (save_ctx)
3914 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3915 return rc;
3916 }
3917 }
3918 }
3919 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d delete context, opcode 0xff.\n", __LINE__);
3920 DELETE_CURCTX ();
3921 break;
3922 case 0x5: /* jmpf Ep */
3923 cur->pc = check_modrm (cur->pc); /* XXXX */
3924 break;
3925 case 0x6: /* push Ev */
3926 cur->pc = check_modrm (cur->pc);
3927 cur->sp -= 1;
3928 break;
3929 case 0x7:
3930 cur->pc = check_modrm (cur->pc); /* XXXX */
3931 if (jmp_reg_switch_mode == 1)
3932 {
3933 int rc = process_return_real (wctx, cur, 0);
3934 if (rc == RA_SUCCESS)
3935 {
3936 if (save_ctx)
3937 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, rc);
3938 return rc;
3939 }
3940 }
3941 break;
3942 default:
3943 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0xff %x\n",
3944 __LINE__, (int) extop);
3945 DELETE_CURCTX ();
3946 break;
3947 }
3948 break;
3949 default:
3950 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d unknown opcode: 0x%x\n",
3951 __LINE__, (int) opcode);
3952 DELETE_CURCTX ();
3953 break;
3954 }
3955
3956 /* switch to next context */
3957 if (++cur >= buf + nctx)
3958 cur = buf;
3959 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d switch context: cur=0x%lx(%ld) nctx=%d cnt=%d\n",
3960 __LINE__, (unsigned long) cur, (long) (cur - buf), (int) nctx, (int) cnt);
3961 }
3962
3963 checkFP:
3964 Tprintf (DBG_LT3, "find_i386_ret_addr:%d checkFP: wctx=0x%lx fp=0x%lx ln=0x%lx pc=0x%lx sbase=0x%lx sp=0x%lx tbgn=0x%lx tend=0x%lx\n",
3965 __LINE__, (unsigned long) wctx, (unsigned long) wctx->fp,
3966 (unsigned long) wctx->ln, (unsigned long) wctx->pc, (unsigned long) wctx->sbase,
3967 (unsigned long) wctx->sp, (unsigned long) wctx->tbgn, (unsigned long) wctx->tend);
3968
3969 if (jmp_reg_switch_mode == 1)
3970 { // not deal with switch cases not ending with ret
3971 if (jmp_reg_switch_backup_ctx != NULL)
3972 __collector_memcpy (cur, jmp_reg_switch_backup_ctx, sizeof (*cur));
3973 DprintfT (SP_DUMP_UNWIND, "stack_unwind jmp reg mode on: pc = 0x%lx cnt = %d, nctx = %d\n", wctx->pc, cnt, nctx);
3974 }
3975
3976 unsigned long *cur_fp = cur->fp;
3977 unsigned long *cur_sp = cur->sp;
3978 if (do_walk == 0)
3979 __collector_memcpy (&wctx_pc_save, wctx, sizeof (struct WalkContext));
3980
3981 /* Resort to the frame pointer */
3982 if (cur->fp_loc)
3983 cur->fp = cur->fp_sav;
3984 cur->sp = cur->fp;
3985 if ((unsigned long) cur->sp >= wctx->sbase ||
3986 (unsigned long) cur->sp < wctx->sp)
3987 {
3988 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d do_walk=%d cur->sp=0x%p out of range. wctx->sbase=0x%lx wctx->sp=0x%lx wctx->pc=0x%lx\n",
3989 __LINE__, (int) do_walk, cur->sp, (unsigned long) wctx->sbase,
3990 (unsigned long) wctx->sp, (unsigned long) wctx->pc);
3991 if (do_walk == 0)
3992 {
3993 cur->sp = cur_sp;
3994 cur->fp = cur_fp;
3995 do_walk = 1;
3996 save_ctx = 1;
3997 goto startWalk;
3998 }
3999 if (save_ctx)
4000 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4001 return RA_FAILURE;
4002 }
4003
4004 unsigned long fp = *cur->sp++;
4005 if (fp <= (unsigned long) cur->sp || fp >= wctx->sbase)
4006 {
4007 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d fp=0x%016llx out of range. cur->sp=%p wctx->sbase=0x%lx wctx->pc=0x%lx\n",
4008 __LINE__, (unsigned long long) fp, cur->sp,
4009 (unsigned long) wctx->sbase, (unsigned long) wctx->pc);
4010 if (do_walk == 0)
4011 {
4012 cur->sp = cur_sp;
4013 cur->fp = cur_fp;
4014 do_walk = 1;
4015 save_ctx = 1;
4016 goto startWalk;
4017 }
4018 if (save_ctx)
4019 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4020 return RA_FAILURE;
4021 }
4022
4023 unsigned long ra = *cur->sp++;
4024 if (ra == 0)
4025 {
4026 cache_put (wctx, RA_EOSTCK);
4027 DprintfT (SP_DUMP_UNWIND, "unwind.c:%d returns RA_END_OF_STACK wctx->pc = 0x%lx\n", __LINE__, wctx->pc);
4028 if (save_ctx)
4029 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_END_OF_STACK);
4030 return RA_END_OF_STACK;
4031 }
4032
4033 unsigned long tbgn = wctx->tbgn;
4034 unsigned long tend = wctx->tend;
4035 if (ra < tbgn || ra >= tend)
4036 {
4037 // We do not know yet if update_map_segments is really needed
4038 if (!__collector_check_segment (ra, &tbgn, &tend, 0))
4039 {
4040 DprintfT (SP_DUMP_UNWIND, "unwind.c: __collector_check_segment fail. wctx->pc = 0x%lx\n", wctx->pc);
4041 if (do_walk == 0)
4042 {
4043 cur->sp = cur_sp;
4044 cur->fp = cur_fp;
4045 do_walk = 1;
4046 save_ctx = 1;
4047 goto startWalk;
4048 }
4049 if (save_ctx)
4050 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4051 return RA_FAILURE;
4052 }
4053 }
4054
4055 unsigned long npc = adjust_ret_addr (ra, ra - tbgn, tend);
4056 if (npc == 0)
4057 {
4058 DprintfT (SP_DUMP_UNWIND, "unwind.c: adjust_ret_addr fail. wctx->pc = 0x%lx\n", wctx->pc);
4059 if (do_walk == 0)
4060 {
4061 cur->sp = cur_sp;
4062 cur->fp = cur_fp;
4063 do_walk = 1;
4064 save_ctx = 1;
4065 goto startWalk;
4066 }
4067 if (save_ctx)
4068 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_FAILURE);
4069 return RA_FAILURE;
4070 }
4071 wctx->pc = npc;
4072 wctx->sp = (unsigned long) cur->sp;
4073 wctx->fp = fp;
4074 wctx->tbgn = tbgn;
4075 wctx->tend = tend;
4076
4077 if (save_ctx)
4078 {
4079 omp_cache_put (cur->sp_safe, &wctx_pc_save, wctx, RA_SUCCESS);
4080 DprintfT (SP_DUMP_UNWIND, "unwind.c: cache walk context. wctx_pc_save->pc = 0x%lx\n", wctx_pc_save.pc);
4081 }
4082 return RA_SUCCESS;
4083 }
4084
4085 /*
4086 * We have the return address, but we would like to report to the user
4087 * the calling PC, which is the instruction immediately preceding the
4088 * return address. Unfortunately, x86 instructions can have variable
4089 * length. So we back up 8 bytes and try to figure out where the
4090 * calling PC starts. (FWIW, call instructions are often 5-bytes long.)
4091 */
4092 unsigned long
4093 adjust_ret_addr (unsigned long ra, unsigned long segoff, unsigned long tend)
4094 {
4095 unsigned long npc = 0;
4096 int i = segoff < 8 ? segoff : 8;
4097 for (; i > 1; i--)
4098 {
4099 unsigned char *ptr = (unsigned char*) ra - i;
4100 int z = 4;
4101 int a = 4;
4102 int done = 0;
4103 int bVal;
4104 while (!done)
4105 {
4106 bVal = getByteInstruction (ptr);
4107 if (bVal < 0)
4108 return 0;
4109 switch (bVal)
4110 {
4111 case 0x26:
4112 case 0x36:
4113 #if WSIZE(64)
4114 ptr += 1;
4115 break;
4116 #endif
4117 case 0x64:
4118 case 0x65:
4119 bVal = getByteInstruction (ptr + 1);
4120 if (bVal < 0)
4121 return 0;
4122 if (bVal == 0xe8)
4123 // a workaround for bug 16193041, assuming "call Jz" has no segment override prefix
4124 done = 1;
4125 else
4126 ptr += 1;
4127 break;
4128 case 0x66:
4129 z = 2;
4130 ptr += 1;
4131 break;
4132 case 0x67:
4133 a = 2;
4134 ptr += 1;
4135 break;
4136 default:
4137 done = 1;
4138 break;
4139 }
4140 }
4141 #if WSIZE(64)
4142 bVal = getByteInstruction (ptr);
4143 if (bVal < 0)
4144 return 0;
4145 if (bVal >= 0x40 && bVal <= 0x4f)
4146 { /* XXXX not all REX codes applicable */
4147 if (bVal & 0x8)
4148 z = 4;
4149 ptr += 1;
4150 }
4151 #endif
4152 int opcode = getByteInstruction (ptr);
4153 if (opcode < 0)
4154 return 0;
4155 ptr++;
4156 switch (opcode)
4157 {
4158 case 0xe8: /* call Jz (f64) */
4159 ptr += z;
4160 break;
4161 case 0x9a: /* callf Ap */
4162 ptr += 2 + a;
4163 break;
4164 case 0xff: /* calln Ev , callf Ep */
4165 {
4166 int extop = MRM_EXT (*ptr);
4167 if (extop == 2 || extop == 3)
4168 ptr = check_modrm (ptr);
4169 }
4170 break;
4171 default:
4172 continue;
4173 }
4174 if ((unsigned long) ptr == ra)
4175 {
4176 npc = ra - i;
4177 break;
4178 }
4179 }
4180 if (npc == 0)
4181 {
4182 unsigned char * ptr = (unsigned char *) ra;
4183 #if WSIZE(32)
4184 // test __kernel_sigreturn or __kernel_rt_sigreturn
4185 if ((ra + 7 < tend && getByteInstruction (ptr) == 0x58
4186 && getByteInstruction (ptr + 1) == 0xb8
4187 && getByteInstruction (ptr + 6) == 0xcd
4188 && getByteInstruction (ptr + 7) == 0x80) /* pop %eax; mov $NNNN, %eax; int */
4189 || (ra + 7 < tend && getByteInstruction (ptr) == 0x58
4190 && getByteInstruction (ptr + 1) == 0xb8
4191 && getByteInstruction (ptr + 6) == 0x0f
4192 && getByteInstruction (ptr + 7) == 0x05) /* pop %eax; mov $NNNN, %eax; syscall */
4193 || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4194 && getByteInstruction (ptr + 5) == 0xcd
4195 && getByteInstruction (ptr + 6) == 0x80) /* mov $NNNN, %eax; int */
4196 || (ra + 6 < tend && getByteInstruction (ptr) == 0xb8
4197 && getByteInstruction (ptr + 5) == 0x0f
4198 && getByteInstruction (ptr + 6) == 0x05)) /* mov $NNNN, %eax; syscall */
4199 #else //WSIZE(64)
4200 // test __restore_rt
4201 if (ra + 8 < tend && getByteInstruction (ptr) == 0x48
4202 && getByteInstruction (ptr + 7) == 0x0f
4203 && getByteInstruction (ptr + 8) == 0x05) /* mov $NNNNNNNN, %rax; syscall */
4204 #endif
4205 {
4206 npc = ra;
4207 }
4208 }
4209 if (npc == 0 && __collector_java_mode
4210 && __collector_java_asyncgetcalltrace_loaded)
4211 { // detect jvm interpreter code for java user threads
4212 unsigned char * ptr = (unsigned char *) ra;
4213 #if WSIZE(32)
4214 // up to J170
4215 /*
4216 * ff 24 9d e0 64 02 f5 jmp *-0xafd9b20(,%ebx,4)
4217 * 8b 4e 01 movl 1(%esi),%ecx
4218 * f7 d1 notl %ecx
4219 * 8b 5d ec movl -0x14(%ebp),%ebx
4220 * c1 e1 02 shll $2,%ecx
4221 * eb d8 jmp .-0x26 [ 0x92a ]
4222 * 83 ec 08 subl $8,%esp || 8b 65 f8 movl -8(%ebp),%esp
4223 * */
4224 if (ra - 20 >= (ra - segoff) && ((*ptr == 0x83 && *(ptr + 1) == 0xec) || (*ptr == 0x8b && *(ptr + 1) == 0x65))
4225 && *(ptr - 2) == 0xeb
4226 && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4227 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4228 && *(ptr - 10) == 0xf7 && *(ptr - 9) == 0xd1
4229 && *(ptr - 13) == 0x8b && *(ptr - 12) == 0x4e
4230 && *(ptr - 20) == 0xff && *(ptr - 19) == 0x24 && *(ptr - 18) == 0x9d)
4231 {
4232 npc = ra - 20;
4233 }
4234 // J180 J190
4235 // ff 24 9d ** ** ** ** jmp *-0x*******(,%ebx,4)
4236 if (npc == 0
4237 && ra - 7 >= (ra - segoff)
4238 && *(ptr - 7) == 0xff
4239 && *(ptr - 6) == 0x24
4240 && *(ptr - 5) == 0x9d)
4241 {
4242 npc = ra - 7;
4243 }
4244 #else //WSIZE(64)
4245 // up to J170
4246 /*
4247 * 41 ff 24 da jmp *(%r10,%rbx,8)
4248 * 41 8b 4d 01 movl 1(%r13),%ecx
4249 * f7 d1 notl %ecx
4250 * 48 8b 5d d8 movq -0x28(%rbp),%rbx
4251 * c1 e1 02 shll $2,%ecx
4252 * eb cc jmp .-0x32 [ 0xd23 ]
4253 * 48 8b 65 f0 movq -0x10(%rbp),%rsp
4254 */
4255 if (ra - 19 >= (ra - segoff) && *ptr == 0x48 && ((*(ptr + 1) == 0x8b && *(ptr + 2) == 0x65) || (*(ptr + 1) == 0x83 && *(ptr + 2) == 0xec))
4256 && *(ptr - 2) == 0xeb
4257 && *(ptr - 5) == 0xc1 && *(ptr - 4) == 0xe1
4258 && *(ptr - 9) == 0x48 && *(ptr - 8) == 0x8b && *(ptr - 7) == 0x5d
4259 && *(ptr - 11) == 0xf7 && *(ptr - 10) == 0xd1
4260 && *(ptr - 15) == 0x41 && *(ptr - 14) == 0x8b && *(ptr - 13) == 0x4d
4261 && *(ptr - 19) == 0x41 && *(ptr - 18) == 0xff)
4262 npc = ra - 19;
4263 // J180 J190
4264 // 41 ff 24 da jmp *(%r10,%rbx,8)
4265 if (npc == 0
4266 && ra - 4 >= (ra - segoff)
4267 && *(ptr - 4) == 0x41
4268 && *(ptr - 3) == 0xff
4269 && *(ptr - 2) == 0x24
4270 && *(ptr - 1) == 0xda)
4271 npc = ra - 4;
4272 #endif
4273 }
4274
4275 return npc;
4276 }
4277
4278 /*
4279 * Parses AVX instruction and returns its length.
4280 * Returns 0 if parsing failed.
4281 * https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
4282 */
4283 static int
4284 parse_x86_AVX_instruction (unsigned char *pc)
4285 {
4286 /*
4287 * VEX prefix has a two-byte form (0xc5) and a three byte form (0xc4).
4288 * If an instruction syntax can be encoded using the two-byte form,
4289 * it can also be encoded using the three byte form of VEX.
4290 * The latter increases the length of the instruction by one byte.
4291 * This may be helpful in some situations for code alignment.
4292 *
4293 Byte 0 Byte 1 Byte 2 Byte 3
4294 (Bit Position) 7 0 7 6 5 4 0 7 6 3 2 10
4295 3-byte VEX [ 11000100 ] [ R X B | m-mmmm ] [ W | vvvv | L | pp ]
4296 7 0 7 6 3 2 10
4297 2-byte VEX [ 11000101 ] [ R | vvvv | L | pp ]
4298 7 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0 7 6 5 4 3 2 1 0
4299 4-byte EVEX [ 01100010 ] [ R X B R1 0 0 m m ] [ W v v v v 1 p p ] [ z L1 L B1 V1 a a a ]
4300
4301 R: REX.R in 1's complement (inverted) form
4302 0: Same as REX.R=1 (64-bit mode only)
4303 1: Same as REX.R=0 (must be 1 in 32-bit mode)
4304
4305 X: REX.X in 1's complement (inverted) form
4306 0: Same as REX.X=1 (64-bit mode only)
4307 1: Same as REX.X=0 (must be 1 in 32-bit mode)
4308
4309 B: REX.B in 1's complement (inverted) form
4310 0: Same as REX.B=1 (64-bit mode only)
4311 1: Same as REX.B=0 (Ignored in 32-bit mode).
4312
4313 W: opcode specific (use like REX.W, or used for opcode
4314 extension, or ignored, depending on the opcode byte)
4315
4316 m-mmmm:
4317 00000: Reserved for future use (will #UD)
4318 00001: implied 0F leading opcode byte
4319 00010: implied 0F 38 leading opcode bytes
4320 00011: implied 0F 3A leading opcode bytes
4321 00100-11111: Reserved for future use (will #UD)
4322
4323 vvvv: a register specifier (in 1's complement form) or 1111 if unused.
4324
4325 L: Vector Length
4326 0: scalar or 128-bit vector
4327 1: 256-bit vector
4328
4329 pp: opcode extension providing equivalent functionality of a SIMD prefix
4330 00: None
4331 01: 66
4332 10: F3
4333 11: F2
4334 *
4335 * Example: 0xc5f877L vzeroupper
4336 * VEX prefix: 0xc5 0x77
4337 * Opcode: 0xf8
4338 *
4339 */
4340 int len = 0;
4341 disassemble_info dis_info;
4342 dis_info.arch = bfd_arch_i386;
4343 dis_info.mach = bfd_mach_x86_64;
4344 dis_info.flavour = bfd_target_unknown_flavour;
4345 dis_info.endian = BFD_ENDIAN_UNKNOWN;
4346 dis_info.endian_code = dis_info.endian;
4347 dis_info.octets_per_byte = 1;
4348 dis_info.disassembler_needs_relocs = FALSE;
4349 dis_info.fprintf_func = fprintf_func;
4350 dis_info.stream = NULL;
4351 dis_info.disassembler_options = NULL;
4352 dis_info.read_memory_func = read_memory_func;
4353 dis_info.memory_error_func = memory_error_func;
4354 dis_info.print_address_func = print_address_func;
4355 dis_info.symbol_at_address_func = symbol_at_address_func;
4356 dis_info.symbol_is_valid = symbol_is_valid;
4357 dis_info.display_endian = BFD_ENDIAN_UNKNOWN;
4358 dis_info.symtab = NULL;
4359 dis_info.symtab_size = 0;
4360 dis_info.buffer_vma = 0;
4361 dis_info.buffer = pc;
4362 dis_info.buffer_length = 8;
4363
4364 disassembler_ftype disassemble = print_insn_i386;
4365 if (disassemble == NULL)
4366 {
4367 DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction ERROR: unsupported disassemble\n");
4368 return 0;
4369 }
4370 len = disassemble (0, &dis_info);
4371 DprintfT (SP_DUMP_UNWIND, "parse_x86_AVX_instruction: returned %d pc: %p\n", len, pc);
4372 return len;
4373 }
4374
4375 /*
4376 * In the Intel world, a stack frame looks like this:
4377 *
4378 * %fp0->| |
4379 * |-------------------------------|
4380 * | Args to next subroutine |
4381 * |-------------------------------|-\
4382 * %sp0->| One word struct-ret address | |
4383 * |-------------------------------| > minimum stack frame (8 bytes)
4384 * | Previous frame pointer (%fp0)| |
4385 * %fp1->|-------------------------------|-/
4386 * | Local variables |
4387 * %sp1->|-------------------------------|
4388 *
4389 */
4390
4391 int
4392 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4393 {
4394 long *lbuf = (long*) buf;
4395 int lsize = size / sizeof (long);
4396 int ind = 0;
4397 int do_walk = 1;
4398 int extra_frame = 0;
4399 if (mode & FRINFO_NO_WALK)
4400 do_walk = 0;
4401 if ((mode & 0xffff) == FRINFO_FROM_STACK)
4402 extra_frame = 1;
4403
4404 /*
4405 * trace the stack frames from user stack.
4406 * We are assuming that the frame pointer and return address
4407 * are null when we are at the top level.
4408 */
4409 struct WalkContext wctx;
4410 wctx.pc = GET_PC (context);
4411 wctx.sp = GET_SP (context);
4412 wctx.fp = GET_FP (context);
4413 wctx.ln = (unsigned long) context->uc_link;
4414 unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4415 if (sbase && *sbase > wctx.sp)
4416 wctx.sbase = *sbase;
4417 else
4418 {
4419 wctx.sbase = wctx.sp + 0x100000;
4420 if (wctx.sbase < wctx.sp) /* overflow */
4421 wctx.sbase = (unsigned long) - 1;
4422 }
4423 // We do not know yet if update_map_segments is really needed
4424 __collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0);
4425
4426 for (;;)
4427 {
4428 if (ind >= lsize || wctx.pc == 0)
4429 break;
4430 if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4431 {
4432 lbuf[0] = wctx.pc;
4433 if (ind == 0)
4434 {
4435 ind++;
4436 if (ind >= lsize)
4437 break;
4438 }
4439 }
4440 if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4441 {
4442 lbuf[ind++] = wctx.pc;
4443 if (ind >= lsize)
4444 break;
4445 }
4446
4447 for (;;)
4448 {
4449 if (eptr != NULL && wctx.sp >= (unsigned long) eptr)
4450 {
4451 ind = ind >= 2 ? ind - 2 : 0;
4452 goto exit;
4453 }
4454 int ret = find_i386_ret_addr (&wctx, do_walk);
4455 DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d find_i386_ret_addr returns %d\n", __LINE__, ret);
4456 if (ret == RA_FAILURE)
4457 {
4458 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4459 goto exit;
4460 }
4461
4462 if (ret == RA_END_OF_STACK)
4463 goto exit;
4464 #if WSIZE(32)
4465 if (ret == RA_RT_SIGRETURN)
4466 {
4467 struct SigFrame
4468 {
4469 unsigned long arg0;
4470 unsigned long arg1;
4471 unsigned long arg2;
4472 } *sframe = (struct SigFrame*) wctx.sp;
4473 ucontext_t *ncontext = (ucontext_t*) sframe->arg2;
4474 wctx.pc = GET_PC (ncontext);
4475 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4476 {
4477 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4478 goto exit;
4479 }
4480 unsigned long nsp = GET_SP (ncontext);
4481 /* Check the new stack pointer */
4482 if (nsp <= sframe->arg2 || nsp > sframe->arg2 + sizeof (ucontext_t) + 1024)
4483 {
4484 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4485 goto exit;
4486 }
4487 wctx.sp = nsp;
4488 wctx.fp = GET_FP (ncontext);
4489 break;
4490 }
4491 else if (ret == RA_SIGRETURN)
4492 {
4493 struct sigcontext *sctx = (struct sigcontext*) wctx.sp;
4494 wctx.pc = sctx->eip;
4495 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4496 {
4497 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4498 goto exit;
4499 }
4500 wctx.sp = sctx->esp;
4501 wctx.fp = sctx->ebp;
4502 break;
4503 }
4504 #elif WSIZE(64)
4505 if (ret == RA_RT_SIGRETURN)
4506 {
4507 ucontext_t *ncontext = (ucontext_t*) wctx.sp;
4508 wctx.pc = GET_PC (ncontext);
4509 if (!__collector_check_segment (wctx.pc, &wctx.tbgn, &wctx.tend, 0))
4510 {
4511 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4512 goto exit;
4513 }
4514 unsigned long nsp = GET_SP (ncontext);
4515 /* Check the new stack pointer */
4516 if (nsp <= wctx.sp || nsp > wctx.sp + sizeof (ucontext_t) + 1024)
4517 {
4518 /* lbuf[ind++] = SP_FAILED_UNWIND_MARKER; */
4519 goto exit;
4520 }
4521 wctx.sp = nsp;
4522 wctx.fp = GET_FP (ncontext);
4523 break;
4524 }
4525 #endif /* WSIZE() */
4526 if (bptr != NULL && extra_frame && wctx.sp <= (unsigned long) bptr && ind < 2)
4527 {
4528 lbuf[0] = wctx.pc;
4529 if (ind == 0)
4530 {
4531 ind++;
4532 if (ind >= lsize)
4533 break;
4534 }
4535 }
4536 if (bptr == NULL || wctx.sp > (unsigned long) bptr)
4537 {
4538 lbuf[ind++] = wctx.pc;
4539 if (ind >= lsize)
4540 goto exit;
4541 }
4542 }
4543 }
4544
4545 exit:
4546 #if defined(DEBUG)
4547 if ((SP_DUMP_UNWIND & __collector_tracelevel) != 0)
4548 {
4549 DprintfT (SP_DUMP_UNWIND, "stack_unwind (x86 walk):%d found %d frames\n\n", __LINE__, ind);
4550 for (int i = 0; i < ind; i++)
4551 DprintfT (SP_DUMP_UNWIND, " %3d: 0x%lx\n", i, (unsigned long) lbuf[i]);
4552 }
4553 #endif
4554 dump_stack (__LINE__);
4555 if (ind >= lsize)
4556 {
4557 ind = lsize - 1;
4558 lbuf[ind++] = (unsigned long) SP_TRUNC_STACK_MARKER;
4559 }
4560 return ind * sizeof (long);
4561 }
4562
4563 #elif ARCH(Aarch64)
4564
4565 static int
4566 stack_unwind (char *buf, int size, void *bptr, void *eptr, ucontext_t *context, int mode)
4567 {
4568 if (buf && bptr && eptr && context && size + mode > 0)
4569 getByteInstruction ((unsigned char *) eptr);
4570 int ind = 0;
4571 __u64 *lbuf = (void *) buf;
4572 int lsize = size / sizeof (__u64);
4573 __u64 pc = context->uc_mcontext.pc;
4574 __u64 sp = context->uc_mcontext.sp;
4575 __u64 stack_base;
4576 unsigned long tbgn = 0;
4577 unsigned long tend = 0;
4578
4579 unsigned long *sbase = (unsigned long*) __collector_tsd_get_by_key (unwind_key);
4580 if (sbase && *sbase > sp)
4581 stack_base = *sbase;
4582 else
4583 {
4584 stack_base = sp + 0x100000;
4585 if (stack_base < sp) // overflow
4586 stack_base = (__u64) -1;
4587 }
4588 DprintfT (SP_DUMP_UNWIND,
4589 "unwind.c:%d stack_unwind %2d pc=0x%llx sp=0x%llx stack_base=0x%llx\n",
4590 __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp,
4591 (unsigned long long) stack_base);
4592
4593 while (sp && pc)
4594 {
4595 DprintfT (SP_DUMP_UNWIND,
4596 "unwind.c:%d stack_unwind %2d pc=0x%llx sp=0x%llx\n",
4597 __LINE__, ind, (unsigned long long) pc, (unsigned long long) sp);
4598 // Dl_info dlinfo;
4599 // if (!dladdr ((void *) pc, &dlinfo))
4600 // break;
4601 // DprintfT (SP_DUMP_UNWIND, "%2d: %llx <%s+%llu> (%s)\n",
4602 // ind, (unsigned long long) pc,
4603 // dlinfo.dli_sname ? dlinfo.dli_sname : "(?)",
4604 // (unsigned long long) pc - (unsigned long long) dlinfo.dli_saddr,
4605 // dlinfo.dli_fname);
4606 lbuf[ind++] = pc;
4607 if (ind >= lsize || sp >= stack_base || (sp & 15) != 0)
4608 break;
4609 if (pc < tbgn || pc >= tend)
4610 if (!__collector_check_segment ((unsigned long) pc, &tbgn, &tend, 0))
4611 {
4612 DprintfT (SP_DUMP_UNWIND,
4613 "unwind.c:%d __collector_check_segment failed. sp=0x%lx\n",
4614 __LINE__, (unsigned long) sp);
4615 break;
4616 }
4617 pc = ((__u64 *) sp)[1];
4618 __u64 old_sp = sp;
4619 sp = ((__u64 *) sp)[0];
4620 if (sp < old_sp)
4621 break;
4622 }
4623 if (ind >= lsize)
4624 {
4625 ind = lsize - 1;
4626 lbuf[ind++] = (__u64) SP_TRUNC_STACK_MARKER;
4627 }
4628 return ind * sizeof (__u64);
4629 }
4630 #endif /* ARCH() */