1 /* rx.c --- opcode semantics for stand-alone RX simulator.
3 Copyright (C) 2008, 2009, 2010 Free Software Foundation, Inc.
4 Contributed by Red Hat, Inc.
6 This file is part of the GNU simulators.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
27 #include "opcode/rx.h"
36 static const char * id_names
[] = {
38 "RXO_mov", /* d = s (signed) */
39 "RXO_movbi", /* d = [s,s2] (signed) */
40 "RXO_movbir", /* [s,s2] = d (signed) */
41 "RXO_pushm", /* s..s2 */
42 "RXO_popm", /* s..s2 */
43 "RXO_xchg", /* s <-> d */
44 "RXO_stcc", /* d = s if cond(s2) */
45 "RXO_rtsd", /* rtsd, 1=imm, 2-0 = reg if reg type */
47 /* These are all either d OP= s or, if s2 is set, d = s OP s2. Note
48 that d may be "None". */
61 "RXO_adc", /* d = d + s + carry */
62 "RXO_sbb", /* d = d - s - ~carry */
63 "RXO_abs", /* d = |s| */
64 "RXO_max", /* d = max(d,s) */
65 "RXO_min", /* d = min(d,s) */
66 "RXO_emul", /* d:64 = d:32 * s */
67 "RXO_emulu", /* d:64 = d:32 * s (unsigned) */
68 "RXO_ediv", /* d:64 / s; d = quot, d+1 = rem */
69 "RXO_edivu", /* d:64 / s; d = quot, d+1 = rem */
71 "RXO_rolc", /* d <<= 1 through carry */
72 "RXO_rorc", /* d >>= 1 through carry*/
73 "RXO_rotl", /* d <<= #s without carry */
74 "RXO_rotr", /* d >>= #s without carry*/
75 "RXO_revw", /* d = revw(s) */
76 "RXO_revl", /* d = revl(s) */
77 "RXO_branch", /* pc = d if cond(s) */
78 "RXO_branchrel",/* pc += d if cond(s) */
79 "RXO_jsr", /* pc = d */
80 "RXO_jsrrel", /* pc += d */
106 "RXO_sat", /* sat(d) */
109 "RXO_fadd", /* d op= s */
118 "RXO_bset", /* d |= (1<<s) */
119 "RXO_bclr", /* d &= ~(1<<s) */
120 "RXO_btst", /* s & (1<<s2) */
121 "RXO_bnot", /* d ^= (1<<s) */
122 "RXO_bmcc", /* d<s> = cond(s2) */
124 "RXO_clrpsw", /* flag index in d */
125 "RXO_setpsw", /* flag index in d */
126 "RXO_mvtipl", /* new IPL in s */
130 "RXO_rtd", /* undocumented */
132 "RXO_dbt", /* undocumented */
133 "RXO_int", /* vector id in s */
137 "RXO_sccnd", /* d = cond(s) ? 1 : 0 */
140 static const char * optype_names
[] = {
142 "#Imm", /* #addend */
144 "[Rn]", /* [Rn + addend] */
147 " cc ", /* eq, gtu, etc */
148 "Flag" /* [UIOSZC] */
151 #define N_RXO (sizeof(id_names)/sizeof(id_names[0]))
152 #define N_RXT (sizeof(optype_names)/sizeof(optype_names[0]))
155 static unsigned long long benchmark_start_cycle
;
156 static unsigned long long benchmark_end_cycle
;
158 static int op_cache
[N_RXT
][N_RXT
][N_RXT
];
159 static int op_cache_rev
[N_MAP
];
160 static int op_cache_idx
= 0;
163 op_lookup (int a
, int b
, int c
)
165 if (op_cache
[a
][b
][c
])
166 return op_cache
[a
][b
][c
];
168 if (op_cache_idx
>= N_MAP
)
170 printf("op_cache_idx exceeds %d\n", N_MAP
);
173 op_cache
[a
][b
][c
] = op_cache_idx
;
174 op_cache_rev
[op_cache_idx
] = (a
<<8) | (b
<<4) | c
;
179 op_cache_string (int map
)
182 static char cb
[5][20];
185 map
= op_cache_rev
[map
];
190 sprintf(cb
[ci
], "%s %s %s", optype_names
[a
], optype_names
[b
], optype_names
[c
]);
194 static unsigned long long cycles_per_id
[N_RXO
][N_MAP
];
195 static unsigned long long times_per_id
[N_RXO
][N_MAP
];
196 static unsigned long long memory_stalls
;
197 static unsigned long long register_stalls
;
198 static unsigned long long branch_stalls
;
199 static unsigned long long branch_alignment_stalls
;
200 static unsigned long long fast_returns
;
202 static unsigned long times_per_pair
[N_RXO
][N_MAP
][N_RXO
][N_MAP
];
203 static int prev_opcode_id
= RXO_unknown
;
210 #endif /* CYCLE_STATS */
213 #ifdef CYCLE_ACCURATE
215 static int new_rt
= -1;
217 /* Number of cycles to add if an insn spans an 8-byte boundary. */
218 static int branch_alignment_penalty
= 0;
222 static int running_benchmark
= 1;
224 #define tprintf if (trace && running_benchmark) printf
226 jmp_buf decode_jmp_buf
;
227 unsigned int rx_cycles
= 0;
229 #ifdef CYCLE_ACCURATE
230 /* If nonzero, memory was read at some point and cycle latency might
232 static int memory_source
= 0;
233 /* If nonzero, memory was written and extra cycles might be
235 static int memory_dest
= 0;
238 cycles (int throughput
)
240 tprintf("%d cycles\n", throughput
);
241 regs
.cycle_count
+= throughput
;
244 /* Number of execution (E) cycles the op uses. For memory sources, we
245 include the load micro-op stall as two extra E cycles. */
246 #define E(c) cycles (memory_source ? c + 2 : c)
247 #define E1 cycles (1)
248 #define E2 cycles (2)
249 #define EBIT cycles (memory_source ? 2 : 1)
251 /* Check to see if a read latency must be applied for a given register. */
255 tprintf("register %d load stall\n", r); \
256 regs.cycle_count ++; \
257 STATS(register_stalls ++); \
264 tprintf ("Rt now %d\n", r); \
268 #else /* !CYCLE_ACCURATE */
278 #endif /* else CYCLE_ACCURATE */
280 static int size2bytes
[] = {
281 4, 1, 1, 1, 2, 2, 2, 3, 4
288 #define rx_abort() _rx_abort(__FILE__, __LINE__)
290 _rx_abort (const char *file
, int line
)
292 if (strrchr (file
, '/'))
293 file
= strrchr (file
, '/') + 1;
294 fprintf(stderr
, "abort at %s:%d\n", file
, line
);
298 static unsigned char *get_byte_base
;
299 static SI get_byte_page
;
301 /* This gets called a *lot* so optimize it. */
303 rx_get_byte (void *vdata
)
305 RX_Data
*rx_data
= (RX_Data
*)vdata
;
306 SI tpc
= rx_data
->dpc
;
308 /* See load.c for an explanation of this. */
312 if (((tpc
^ get_byte_page
) & NONPAGE_MASK
) || enable_counting
)
314 get_byte_page
= tpc
& NONPAGE_MASK
;
315 get_byte_base
= rx_mem_ptr (get_byte_page
, MPA_READING
) - get_byte_page
;
319 return get_byte_base
[tpc
];
323 get_op (RX_Opcode_Decoded
*rd
, int i
)
325 RX_Opcode_Operand
*o
= rd
->op
+ i
;
330 case RX_Operand_None
:
333 case RX_Operand_Immediate
: /* #addend */
336 case RX_Operand_Register
: /* Rn */
338 rv
= get_reg (o
->reg
);
341 case RX_Operand_Predec
: /* [-Rn] */
342 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
344 case RX_Operand_Postinc
: /* [Rn+] */
345 case RX_Operand_Indirect
: /* [Rn + addend] */
346 #ifdef CYCLE_ACCURATE
349 if (regs
.m2m
== M2M_BOTH
)
351 tprintf("src memory stall\n");
362 addr
= get_reg (o
->reg
) + o
->addend
;
368 case RX_Byte
: /* undefined extension */
371 rv
= mem_get_qi (addr
);
374 case RX_Word
: /* undefined extension */
377 rv
= mem_get_hi (addr
);
381 rv
= mem_get_psi (addr
);
385 rv
= mem_get_si (addr
);
389 if (o
->type
== RX_Operand_Postinc
)
390 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
394 case RX_Operand_Condition
: /* eq, gtu, etc */
395 return condition_true (o
->reg
);
397 case RX_Operand_Flag
: /* [UIOSZC] */
398 return (regs
.r_psw
& (1 << o
->reg
)) ? 1 : 0;
401 /* if we've gotten here, we need to clip/extend the value according
408 case RX_Byte
: /* undefined extension */
409 rv
|= 0xdeadbe00; /* keep them honest */
417 rv
= sign_ext (rv
, 8);
420 case RX_Word
: /* undefined extension */
421 rv
|= 0xdead0000; /* keep them honest */
429 rv
= sign_ext (rv
, 16);
443 put_op (RX_Opcode_Decoded
*rd
, int i
, int v
)
445 RX_Opcode_Operand
*o
= rd
->op
+ i
;
451 if (o
->type
!= RX_Operand_Register
)
455 case RX_Byte
: /* undefined extension */
456 v
|= 0xdeadbe00; /* keep them honest */
467 case RX_Word
: /* undefined extension */
468 v
|= 0xdead0000; /* keep them honest */
476 v
= sign_ext (v
, 16);
489 case RX_Operand_None
:
490 /* Opcodes like TST and CMP use this. */
493 case RX_Operand_Immediate
: /* #addend */
494 case RX_Operand_Condition
: /* eq, gtu, etc */
497 case RX_Operand_Register
: /* Rn */
502 case RX_Operand_Predec
: /* [-Rn] */
503 put_reg (o
->reg
, get_reg (o
->reg
) - size2bytes
[o
->size
]);
505 case RX_Operand_Postinc
: /* [Rn+] */
506 case RX_Operand_Indirect
: /* [Rn + addend] */
508 #ifdef CYCLE_ACCURATE
509 if (regs
.m2m
== M2M_BOTH
)
511 tprintf("dst memory stall\n");
521 addr
= get_reg (o
->reg
) + o
->addend
;
527 case RX_Byte
: /* undefined extension */
530 mem_put_qi (addr
, v
);
533 case RX_Word
: /* undefined extension */
536 mem_put_hi (addr
, v
);
540 mem_put_psi (addr
, v
);
544 mem_put_si (addr
, v
);
548 if (o
->type
== RX_Operand_Postinc
)
549 put_reg (o
->reg
, get_reg (o
->reg
) + size2bytes
[o
->size
]);
553 case RX_Operand_Flag
: /* [UIOSZC] */
555 regs
.r_psw
|= (1 << o
->reg
);
557 regs
.r_psw
&= ~(1 << o
->reg
);
562 #define PD(x) put_op (&opcode, 0, x)
563 #define PS(x) put_op (&opcode, 1, x)
564 #define PS2(x) put_op (&opcode, 2, x)
565 #define GD() get_op (&opcode, 0)
566 #define GS() get_op (&opcode, 1)
567 #define GS2() get_op (&opcode, 2)
568 #define DSZ() size2bytes[opcode.op[0].size]
569 #define SSZ() size2bytes[opcode.op[0].size]
570 #define S2SZ() size2bytes[opcode.op[0].size]
572 /* "Universal" sources. */
573 #define US1() ((opcode.op[2].type == RX_Operand_None) ? GD() : GS())
574 #define US2() ((opcode.op[2].type == RX_Operand_None) ? GS() : GS2())
579 int rsp
= get_reg (sp
);
582 mem_put_si (rsp
, val
);
585 /* Just like the above, but tag the memory as "pushed pc" so if anyone
586 tries to write to it, it will cause an error. */
590 int rsp
= get_reg (sp
);
593 mem_put_si (rsp
, val
);
594 mem_set_content_range (rsp
, rsp
+3, MC_PUSHED_PC
);
601 int rsp
= get_reg (sp
);
602 rv
= mem_get_si (rsp
);
612 int rsp
= get_reg (sp
);
613 if (mem_get_content_type (rsp
) != MC_PUSHED_PC
)
614 execution_error (SIM_ERR_CORRUPT_STACK
, rsp
);
615 rv
= mem_get_si (rsp
);
616 mem_set_content_range (rsp
, rsp
+3, MC_UNINIT
);
622 #define MATH_OP(vop,c) \
626 ll = (unsigned long long) uma vop (unsigned long long) umb vop c; \
627 tprintf ("0x%x " #vop " 0x%x " #vop " 0x%x = 0x%llx\n", uma, umb, c, ll); \
628 ma = sign_ext (uma, DSZ() * 8); \
629 mb = sign_ext (umb, DSZ() * 8); \
630 sll = (long long) ma vop (long long) mb vop c; \
631 tprintf ("%d " #vop " %d " #vop " %d = %lld\n", ma, mb, c, sll); \
632 set_oszc (sll, DSZ(), (long long) ll > ((1 vop 1) ? (long long) b2mask[DSZ()] : (long long) -1)); \
637 #define LOGIC_OP(vop) \
642 tprintf("0x%x " #vop " 0x%x = 0x%x\n", ma, mb, v); \
648 #define SHIFT_OP(val, type, count, OP, carry_mask) \
653 tprintf("%lld " #OP " %d\n", val, count); \
654 for (i = 0; i < count; i ++) \
656 c = val & carry_mask; \
660 set_oszc (val, 4, c); \
686 fop_fadd (fp_t s1
, fp_t s2
, fp_t
*d
)
688 *d
= rxfp_add (s1
, s2
);
693 fop_fmul (fp_t s1
, fp_t s2
, fp_t
*d
)
695 *d
= rxfp_mul (s1
, s2
);
700 fop_fdiv (fp_t s1
, fp_t s2
, fp_t
*d
)
702 *d
= rxfp_div (s1
, s2
);
707 fop_fsub (fp_t s1
, fp_t s2
, fp_t
*d
)
709 *d
= rxfp_sub (s1
, s2
);
713 #define FPPENDING() (regs.r_fpsw & (FPSWBITS_CE | (FPSWBITS_FMASK & (regs.r_fpsw << FPSW_EFSH))))
714 #define FPCLEAR() regs.r_fpsw &= FPSWBITS_CLEAR
717 return do_fp_exception (opcode_pc)
719 #define FLOAT_OP(func) \
726 do_store = fop_##func (fa, fb, &fc); \
727 tprintf("%g " #func " %g = %g %08x\n", int2float(fa), int2float(fb), int2float(fc), fc); \
732 if ((fc & 0x80000000UL) != 0) \
734 if ((fc & 0x7fffffffUL) == 0) \
736 set_flags (FLAGBIT_S | FLAGBIT_Z, mb); \
739 #define carry (FLAG_C ? 1 : 0)
745 } exception_info
[] = {
746 { 0xFFFFFFD0UL
, "priviledged opcode", SIGILL
},
747 { 0xFFFFFFD4UL
, "access violation", SIGSEGV
},
748 { 0xFFFFFFDCUL
, "undefined opcode", SIGILL
},
749 { 0xFFFFFFE4UL
, "floating point", SIGFPE
}
751 #define EX_PRIVILEDGED 0
753 #define EX_UNDEFINED 2
754 #define EX_FLOATING 3
755 #define EXCEPTION(n) \
756 return generate_exception (n, opcode_pc)
758 #define PRIVILEDGED() \
760 EXCEPTION (EX_PRIVILEDGED)
763 generate_exception (unsigned long type
, SI opcode_pc
)
765 SI old_psw
, old_pc
, new_pc
;
767 new_pc
= mem_get_si (exception_info
[type
].vaddr
);
768 /* 0x00020000 is the value used to initialise the known
769 exception vectors (see rx.ld), but it is a reserved
770 area of memory so do not try to access it, and if the
771 value has not been changed by the program then the
772 vector has not been installed. */
773 if (new_pc
== 0 || new_pc
== 0x00020000)
776 return RX_MAKE_STOPPED (exception_info
[type
].signal
);
778 fprintf(stderr
, "Unhandled %s exception at pc = %#lx\n",
779 exception_info
[type
].str
, (unsigned long) opcode_pc
);
780 if (type
== EX_FLOATING
)
782 int mask
= FPPENDING ();
783 fprintf (stderr
, "Pending FP exceptions:");
784 if (mask
& FPSWBITS_FV
)
785 fprintf(stderr
, " Invalid");
786 if (mask
& FPSWBITS_FO
)
787 fprintf(stderr
, " Overflow");
788 if (mask
& FPSWBITS_FZ
)
789 fprintf(stderr
, " Division-by-zero");
790 if (mask
& FPSWBITS_FU
)
791 fprintf(stderr
, " Underflow");
792 if (mask
& FPSWBITS_FX
)
793 fprintf(stderr
, " Inexact");
794 if (mask
& FPSWBITS_CE
)
795 fprintf(stderr
, " Unimplemented");
796 fprintf(stderr
, "\n");
798 return RX_MAKE_EXITED (1);
801 tprintf ("Triggering %s exception\n", exception_info
[type
].str
);
803 old_psw
= regs
.r_psw
;
804 regs
.r_psw
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
809 return RX_MAKE_STEPPED ();
813 generate_access_exception (void)
817 rv
= generate_exception (EX_ACCESS
, regs
.r_pc
);
819 longjmp (decode_jmp_buf
, rv
);
823 do_fp_exception (unsigned long opcode_pc
)
826 EXCEPTION (EX_FLOATING
);
827 return RX_MAKE_STEPPED ();
831 op_is_memory (RX_Opcode_Decoded
*rd
, int i
)
833 switch (rd
->op
[i
].type
)
835 case RX_Operand_Predec
:
836 case RX_Operand_Postinc
:
837 case RX_Operand_Indirect
:
843 #define OM(i) op_is_memory (&opcode, i)
848 unsigned int uma
=0, umb
=0;
851 unsigned long long ll
;
853 unsigned long opcode_pc
;
855 RX_Opcode_Decoded opcode
;
858 unsigned long long prev_cycle_count
;
860 #ifdef CYCLE_ACCURATE
864 if ((rv
= setjmp (decode_jmp_buf
)))
868 prev_cycle_count
= regs
.cycle_count
;
871 #ifdef CYCLE_ACCURATE
878 rx_data
.dpc
= opcode_pc
= regs
.r_pc
;
879 memset (&opcode
, 0, sizeof(opcode
));
880 opcode_size
= rx_decode_opcode (opcode_pc
, &opcode
, rx_get_byte
, &rx_data
);
882 #ifdef CYCLE_ACCURATE
883 if (branch_alignment_penalty
)
885 if ((regs
.r_pc
^ (regs
.r_pc
+ opcode_size
- 1)) & ~7)
887 tprintf("1 cycle branch alignment penalty\n");
888 cycles (branch_alignment_penalty
);
890 branch_alignment_stalls
++;
893 branch_alignment_penalty
= 0;
897 regs
.r_pc
+= opcode_size
;
899 rx_flagmask
= opcode
.flags_s
;
900 rx_flagand
= ~(int)opcode
.flags_0
;
901 rx_flagor
= opcode
.flags_1
;
907 tprintf("|%lld| = ", sll
);
910 tprintf("%lld\n", sll
);
931 if (opcode
.op
[0].type
== RX_Operand_Register
)
943 if (opcode
.op
[0].type
== RX_Operand_Register
)
958 if (opcode
.op
[0].type
== RX_Operand_Register
)
970 #ifdef CYCLE_ACCURATE
971 SI old_pc
= regs
.r_pc
;
975 #ifdef CYCLE_ACCURATE
976 delta
= regs
.r_pc
- old_pc
;
977 if (delta
>= 0 && delta
< 16
980 tprintf("near forward branch bonus\n");
986 branch_alignment_penalty
= 1;
990 /* This is just for statistics */
991 if (opcode
.op
[1].reg
== 14)
992 opcode
.op
[1].type
= RX_Operand_None
;
996 #ifdef CYCLE_ACCURATE
1007 #ifdef CYCLE_ACCURATE
1008 /* Note: specs say 3, chip says 2. */
1009 if (delta
>= 0 && delta
< 16
1012 tprintf("near forward branch bonus\n");
1018 branch_alignment_penalty
= 1;
1025 #ifdef CYCLE_ACCURATE
1033 int old_psw
= regs
.r_psw
;
1035 return RX_MAKE_HIT_BREAK ();
1036 if (regs
.r_intb
== 0)
1038 tprintf("BREAK hit, no vector table.\n");
1039 return RX_MAKE_EXITED(1);
1041 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1044 regs
.r_pc
= mem_get_si (regs
.r_intb
);
1052 if (opcode
.op
[0].type
== RX_Operand_Register
)
1064 if (opcode
.op
[1].type
== RX_Operand_Register
)
1068 umb
= ma
& (1 << mb
);
1069 set_zc (! umb
, umb
);
1074 v
= 1 << opcode
.op
[0].reg
;
1083 case RXO_div
: /* d = d / s */
1086 tprintf("%d / %d = ", mb
, ma
);
1087 if (ma
== 0 || (ma
== -1 && (unsigned int) mb
== 0x80000000))
1090 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1096 set_flags (FLAGBIT_O
, 0);
1099 /* Note: spec says 3 to 22 cycles, we are pessimistic. */
1103 case RXO_divu
: /* d = d / s */
1106 tprintf("%u / %u = ", umb
, uma
);
1110 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1116 set_flags (FLAGBIT_O
, 0);
1119 /* Note: spec says 2 to 20 cycles, we are pessimistic. */
1126 tprintf("%d / %d = ", mb
, ma
);
1127 if (ma
== 0 || (ma
== -1 && (unsigned int) mb
== 0x80000000))
1130 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1136 tprintf("%d, rem %d\n", v
, mb
);
1137 set_flags (FLAGBIT_O
, 0);
1139 opcode
.op
[0].reg
++;
1142 /* Note: spec says 3 to 22 cycles, we are pessimistic. */
1149 tprintf("%u / %u = ", umb
, uma
);
1153 set_flags (FLAGBIT_O
, FLAGBIT_O
);
1159 tprintf("%u, rem %u\n", v
, umb
);
1160 set_flags (FLAGBIT_O
, 0);
1162 opcode
.op
[0].reg
++;
1165 /* Note: spec says 2 to 20 cycles, we are pessimistic. */
1172 sll
= (long long)ma
* (long long)mb
;
1173 tprintf("%d * %d = %lld\n", ma
, mb
, sll
);
1175 opcode
.op
[0].reg
++;
1183 ll
= (long long)uma
* (long long)umb
;
1184 tprintf("%#x * %#x = %#llx\n", uma
, umb
, ll
);
1186 opcode
.op
[0].reg
++;
1217 regs
.r_psw
= regs
.r_bpsw
;
1218 regs
.r_pc
= regs
.r_bpc
;
1219 #ifdef CYCLE_ACCURATE
1220 regs
.fast_return
= 0;
1233 mb
= rxfp_ftoi (ma
, FPRM_ZERO
);
1236 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1245 return rx_syscall (regs
.r
[5]);
1249 int old_psw
= regs
.r_psw
;
1250 regs
.r_psw
&= ~(FLAGBIT_I
| FLAGBIT_U
| FLAGBIT_PM
);
1253 regs
.r_pc
= mem_get_si (regs
.r_intb
+ 4 * v
);
1261 mb
= rxfp_itof (ma
, regs
.r_fpsw
);
1263 tprintf("(float) %d = %x\n", ma
, mb
);
1272 #ifdef CYCLE_ACCURATE
1277 #ifdef CYCLE_ACCURATE
1278 regs
.link_register
= regs
.r_pc
;
1280 pushpc (get_reg (pc
));
1281 if (opcode
.id
== RXO_jsrrel
)
1283 #ifdef CYCLE_ACCURATE
1284 delta
= v
- regs
.r_pc
;
1287 #ifdef CYCLE_ACCURATE
1288 /* Note: docs say 3, chip says 2 */
1289 if (delta
>= 0 && delta
< 16)
1291 tprintf ("near forward jsr bonus\n");
1296 branch_alignment_penalty
= 1;
1299 regs
.fast_return
= 1;
1305 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(GS2 () >> 16);
1307 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1312 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(GS2 ());
1314 put_reg64 (acc64
, ll
+ regs
.r_acc
);
1327 if (opcode
.op
[0].type
== RX_Operand_Register
1328 && opcode
.op
[1].type
== RX_Operand_Register
1329 && opcode
.op
[0].reg
== opcode
.op
[1].reg
)
1330 opcode
.id
= RXO_nop3
;
1347 if (opcode
.op
[0].type
== RX_Operand_Register
1348 && opcode
.op
[0].reg
== 16 /* PSW */)
1350 /* Special case, LDC and POPC can't ever modify PM. */
1351 int pm
= regs
.r_psw
& FLAGBIT_PM
;
1356 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1362 /* various things can't be changed in user mode. */
1363 if (opcode
.op
[0].type
== RX_Operand_Register
)
1364 if (opcode
.op
[0].reg
== 32)
1366 v
&= ~ (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1367 v
|= regs
.r_psw
& (FLAGBIT_I
| FLAGBIT_U
| FLAGBITS_IPL
);
1369 if (opcode
.op
[0].reg
== 34 /* ISP */
1370 || opcode
.op
[0].reg
== 37 /* BPSW */
1371 || opcode
.op
[0].reg
== 39 /* INTB */
1372 || opcode
.op
[0].reg
== 38 /* VCT */)
1373 /* These are ignored. */
1383 #ifdef CYCLE_ACCURATE
1384 if ((opcode
.op
[0].type
== RX_Operand_Predec
1385 && opcode
.op
[1].type
== RX_Operand_Register
)
1386 || (opcode
.op
[0].type
== RX_Operand_Postinc
1387 && opcode
.op
[1].type
== RX_Operand_Register
))
1389 /* Special case: push reg doesn't cause a memory stall. */
1391 tprintf("push special case\n");
1397 if (opcode
.op
[0].type
== RX_Operand_Register
1398 && opcode
.op
[1].type
== RX_Operand_Register
1399 && opcode
.op
[0].reg
== opcode
.op
[1].reg
)
1400 opcode
.id
= RXO_nop2
;
1405 /* We cheat to save on code duplication. */
1406 regs
.r_temp
= (get_reg (opcode
.op
[1].reg
) * size2bytes
[opcode
.size
]
1407 + get_reg (opcode
.op
[2].reg
));
1408 opcode
.op
[1].reg
= r_temp_idx
;
1409 opcode
.op
[1].type
= RX_Operand_Indirect
;
1410 opcode
.op
[1].addend
= 0;
1416 /* We cheat to save on code duplication. */
1417 regs
.r_temp
= (get_reg (opcode
.op
[1].reg
) * size2bytes
[opcode
.size
]
1418 + get_reg (opcode
.op
[2].reg
));
1419 opcode
.op
[1].reg
= r_temp_idx
;
1420 opcode
.op
[1].type
= RX_Operand_Indirect
;
1421 opcode
.op
[1].addend
= 0;
1428 ll
= (unsigned long long) US1() * (unsigned long long) v
;
1435 ll
= (long long)(signed short)(GS() >> 16) * (long long)(signed short)(v
>> 16);
1437 put_reg64 (acc64
, ll
);
1443 ll
= (long long)(signed short)(GS()) * (long long)(signed short)(v
);
1445 put_reg64 (acc64
, ll
);
1450 PD (get_reg (acchi
));
1455 PD (get_reg (acclo
));
1460 PD (get_reg (accmi
));
1465 put_reg (acchi
, GS ());
1470 put_reg (acclo
, GS ());
1475 regs
.r_psw
&= ~ FLAGBITS_IPL
;
1476 regs
.r_psw
|= (GS () << FLAGSHIFT_IPL
) & FLAGBITS_IPL
;
1489 /* POPM cannot pop R0 (sp). */
1490 if (opcode
.op
[1].reg
== 0 || opcode
.op
[2].reg
== 0)
1491 EXCEPTION (EX_UNDEFINED
);
1492 if (opcode
.op
[1].reg
>= opcode
.op
[2].reg
)
1494 regs
.r_pc
= opcode_pc
;
1495 return RX_MAKE_STOPPED (SIGILL
);
1497 for (v
= opcode
.op
[1].reg
; v
<= opcode
.op
[2].reg
; v
++)
1501 put_reg (v
, pop ());
1506 /* PUSHM cannot push R0 (sp). */
1507 if (opcode
.op
[1].reg
== 0 || opcode
.op
[2].reg
== 0)
1508 EXCEPTION (EX_UNDEFINED
);
1509 if (opcode
.op
[1].reg
>= opcode
.op
[2].reg
)
1511 regs
.r_pc
= opcode_pc
;
1512 return RX_MAKE_STOPPED (SIGILL
);
1514 for (v
= opcode
.op
[2].reg
; v
>= opcode
.op
[1].reg
; v
--)
1519 cycles (opcode
.op
[2].reg
- opcode
.op
[1].reg
+ 1);
1523 ll
= get_reg64 (acc64
) << GS ();
1524 ll
+= 0x80000000ULL
;
1525 if ((signed long long)ll
> (signed long long)0x00007fff00000000ULL
)
1526 ll
= 0x00007fff00000000ULL
;
1527 else if ((signed long long)ll
< (signed long long)0xffff800000000000ULL
)
1528 ll
= 0xffff800000000000ULL
;
1530 ll
&= 0xffffffff00000000ULL
;
1531 put_reg64 (acc64
, ll
);
1537 regs
.r_pc
= poppc ();
1538 regs
.r_psw
= poppc ();
1540 regs
.r_psw
|= FLAGBIT_U
;
1541 #ifdef CYCLE_ACCURATE
1542 regs
.fast_return
= 0;
1549 umb
= (((uma
>> 24) & 0xff)
1550 | ((uma
>> 8) & 0xff00)
1551 | ((uma
<< 8) & 0xff0000)
1552 | ((uma
<< 24) & 0xff000000UL
));
1559 umb
= (((uma
>> 8) & 0x00ff00ff)
1560 | ((uma
<< 8) & 0xff00ff00UL
));
1568 #ifdef CYCLE_ACCURATE
1572 while (regs
.r
[3] != 0)
1576 switch (opcode
.size
)
1579 ma
= mem_get_si (regs
.r
[1]);
1580 mb
= mem_get_si (regs
.r
[2]);
1585 ma
= sign_ext (mem_get_hi (regs
.r
[1]), 16);
1586 mb
= sign_ext (mem_get_hi (regs
.r
[2]), 16);
1591 ma
= sign_ext (mem_get_qi (regs
.r
[1]), 8);
1592 mb
= sign_ext (mem_get_qi (regs
.r
[2]), 8);
1599 /* We do the multiply as a signed value. */
1600 sll
= (long long)ma
* (long long)mb
;
1601 tprintf(" %016llx = %d * %d\n", sll
, ma
, mb
);
1602 /* but we do the sum as unsigned, while sign extending the operands. */
1603 tmp
= regs
.r
[4] + (sll
& 0xffffffffUL
);
1604 regs
.r
[4] = tmp
& 0xffffffffUL
;
1607 tmp
+= regs
.r
[5] + (sll
& 0xffffffffUL
);
1608 regs
.r
[5] = tmp
& 0xffffffffUL
;
1611 tmp
+= regs
.r
[6] + (sll
& 0xffffffffUL
);
1612 regs
.r
[6] = tmp
& 0xffffffffUL
;
1613 tprintf("%08lx\033[36m%08lx\033[0m%08lx\n",
1614 (unsigned long) regs
.r
[6],
1615 (unsigned long) regs
.r
[5],
1616 (unsigned long) regs
.r
[4]);
1620 if (regs
.r
[6] & 0x00008000)
1621 regs
.r
[6] |= 0xffff0000UL
;
1623 regs
.r
[6] &= 0x0000ffff;
1624 ma
= (regs
.r
[6] & 0x80000000UL
) ? FLAGBIT_S
: 0;
1625 if (regs
.r
[6] != 0 && regs
.r
[6] != 0xffffffffUL
)
1626 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
| FLAGBIT_O
);
1628 set_flags (FLAGBIT_O
|FLAGBIT_S
, ma
);
1629 #ifdef CYCLE_ACCURATE
1630 switch (opcode
.size
)
1633 cycles (6 + 4 * tx
);
1636 cycles (6 + 5 * (tx
/ 2) + 4 * (tx
% 2));
1639 cycles (6 + 7 * (tx
/ 4) + 4 * (tx
% 4));
1649 ma
= v
& 0x80000000UL
;
1661 uma
|= (carry
? 0x80000000UL
: 0);
1662 set_szc (uma
, 4, mb
);
1672 uma
= (uma
<< mb
) | (uma
>> (32-mb
));
1675 set_szc (uma
, 4, mb
);
1685 uma
= (uma
>> mb
) | (uma
<< (32-mb
));
1686 mb
= uma
& 0x80000000;
1688 set_szc (uma
, 4, mb
);
1696 mb
= rxfp_ftoi (ma
, regs
.r_fpsw
);
1699 tprintf("(int) %g = %d\n", int2float(ma
), mb
);
1706 #ifdef CYCLE_ACCURATE
1709 regs
.r_pc
= poppc ();
1710 #ifdef CYCLE_ACCURATE
1711 /* Note: specs say 5, chip says 3. */
1712 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1717 tprintf("fast return bonus\n");
1721 regs
.fast_return
= 0;
1722 branch_alignment_penalty
= 1;
1728 if (opcode
.op
[2].type
== RX_Operand_Register
)
1731 /* RTSD cannot pop R0 (sp). */
1732 put_reg (0, get_reg (0) + GS() - (opcode
.op
[0].reg
-opcode
.op
[2].reg
+1)*4);
1733 if (opcode
.op
[2].reg
== 0)
1734 EXCEPTION (EX_UNDEFINED
);
1735 #ifdef CYCLE_ACCURATE
1736 tx
= opcode
.op
[0].reg
- opcode
.op
[2].reg
+ 1;
1738 for (i
= opcode
.op
[2].reg
; i
<= opcode
.op
[0].reg
; i
++)
1741 put_reg (i
, pop ());
1746 #ifdef CYCLE_ACCURATE
1749 put_reg (0, get_reg (0) + GS());
1751 put_reg (pc
, poppc());
1752 #ifdef CYCLE_ACCURATE
1753 if (regs
.fast_return
&& regs
.link_register
== regs
.r_pc
)
1755 tprintf("fast return bonus\n");
1759 cycles (tx
< 3 ? 3 : tx
+ 1);
1763 cycles (tx
< 5 ? 5 : tx
+ 1);
1765 regs
.fast_return
= 0;
1766 branch_alignment_penalty
= 1;
1771 if (FLAG_O
&& FLAG_S
)
1773 else if (FLAG_O
&& ! FLAG_S
)
1779 MATH_OP (-, ! carry
);
1791 #ifdef CYCLE_ACCURATE
1794 while (regs
.r
[3] != 0)
1796 uma
= mem_get_qi (regs
.r
[1] ++);
1797 umb
= mem_get_qi (regs
.r
[2] ++);
1799 if (uma
!= umb
|| uma
== 0)
1805 set_zc (0, ((int)uma
- (int)umb
) >= 0);
1806 cycles (2 + 4 * (tx
/ 4) + 4 * (tx
% 4));
1810 v
= 1 << opcode
.op
[0].reg
;
1821 #ifdef CYCLE_ACCURATE
1826 uma
= mem_get_qi (regs
.r
[2] --);
1827 mem_put_qi (regs
.r
[1]--, uma
);
1830 #ifdef CYCLE_ACCURATE
1832 cycles (6 + 3 * (tx
/ 4) + 3 * (tx
% 4));
1834 cycles (2 + 3 * (tx
% 4));
1840 #ifdef CYCLE_ACCURATE
1845 uma
= mem_get_qi (regs
.r
[2] ++);
1846 mem_put_qi (regs
.r
[1]++, uma
);
1849 cycles (2 + 3 * (int)(tx
/ 4) + 3 * (tx
% 4));
1853 while (regs
.r
[3] != 0)
1855 uma
= mem_get_qi (regs
.r
[2] ++);
1856 mem_put_qi (regs
.r
[1]++, uma
);
1863 case RXO_shar
: /* d = ma >> mb */
1864 SHIFT_OP (sll
, int, mb
, >>=, 1);
1868 case RXO_shll
: /* d = ma << mb */
1869 SHIFT_OP (ll
, int, mb
, <<=, 0x80000000UL
);
1873 case RXO_shlr
: /* d = ma >> mb */
1874 SHIFT_OP (ll
, unsigned int, mb
, >>=, 1);
1880 #ifdef CYCLE_ACCURATE
1883 switch (opcode
.size
)
1886 while (regs
.r
[3] != 0)
1888 mem_put_si (regs
.r
[1], regs
.r
[2]);
1895 while (regs
.r
[3] != 0)
1897 mem_put_hi (regs
.r
[1], regs
.r
[2]);
1901 cycles (2 + (int)(tx
/ 2) + tx
% 2);
1904 while (regs
.r
[3] != 0)
1906 mem_put_qi (regs
.r
[1], regs
.r
[2]);
1910 cycles (2 + (int)(tx
/ 4) + tx
% 4);
1925 regs
.r_psw
|= FLAGBIT_I
;
1926 return RX_MAKE_STOPPED(0);
1934 #ifdef CYCLE_ACCURATE
1942 switch (opcode
.size
)
1946 while (regs
.r
[3] != 0)
1949 umb
= mem_get_si (get_reg (1));
1954 cycles (3 + 3 * tx
);
1957 uma
= get_reg (2) & 0xffff;
1958 while (regs
.r
[3] != 0)
1961 umb
= mem_get_hi (get_reg (1));
1966 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
1969 uma
= get_reg (2) & 0xff;
1970 while (regs
.r
[3] != 0)
1973 umb
= mem_get_qi (regs
.r
[1]);
1978 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
1986 set_zc (0, ((int)uma
- (int)umb
) >= 0);
1991 #ifdef CYCLE_ACCURATE
1996 switch (opcode
.size
)
2000 while (regs
.r
[3] != 0)
2003 umb
= mem_get_si (get_reg (1));
2008 cycles (3 + 3 * tx
);
2011 uma
= get_reg (2) & 0xffff;
2012 while (regs
.r
[3] != 0)
2015 umb
= mem_get_hi (get_reg (1));
2020 cycles (3 + 3 * (tx
/ 2) + 3 * (tx
% 2));
2023 uma
= get_reg (2) & 0xff;
2024 while (regs
.r
[3] != 0)
2027 umb
= mem_get_qi (regs
.r
[1]);
2032 cycles (3 + 3 * (tx
/ 4) + 3 * (tx
% 4));
2040 set_zc (0, ((int)uma
- (int)umb
) >= 0);
2045 regs
.r_psw
|= FLAGBIT_I
;
2046 return RX_MAKE_STOPPED(0);
2049 #ifdef CYCLE_ACCURATE
2052 v
= GS (); /* This is the memory operand, if any. */
2053 PS (GD ()); /* and this may change the address register. */
2056 #ifdef CYCLE_ACCURATE
2057 /* all M cycles happen during xchg's cycles. */
2068 EXCEPTION (EX_UNDEFINED
);
2071 #ifdef CYCLE_ACCURATE
2074 regs
.m2m
|= M2M_SRC
;
2076 regs
.m2m
|= M2M_DST
;
2083 if (prev_cycle_count
== regs
.cycle_count
)
2085 printf("Cycle count not updated! id %s\n", id_names
[opcode
.id
]);
2091 if (running_benchmark
)
2093 int omap
= op_lookup (opcode
.op
[0].type
, opcode
.op
[1].type
, opcode
.op
[2].type
);
2096 cycles_per_id
[opcode
.id
][omap
] += regs
.cycle_count
- prev_cycle_count
;
2097 times_per_id
[opcode
.id
][omap
] ++;
2099 times_per_pair
[prev_opcode_id
][po0
][opcode
.id
][omap
] ++;
2101 prev_opcode_id
= opcode
.id
;
2106 return RX_MAKE_STEPPED ();
2111 reset_pipeline_stats (void)
2113 memset (cycles_per_id
, 0, sizeof(cycles_per_id
));
2114 memset (times_per_id
, 0, sizeof(times_per_id
));
2116 register_stalls
= 0;
2118 branch_alignment_stalls
= 0;
2120 memset (times_per_pair
, 0, sizeof(times_per_pair
));
2121 running_benchmark
= 1;
2123 benchmark_start_cycle
= regs
.cycle_count
;
2127 halt_pipeline_stats (void)
2129 running_benchmark
= 0;
2130 benchmark_end_cycle
= regs
.cycle_count
;
2135 pipeline_stats (void)
2142 #ifdef CYCLE_ACCURATE
2145 printf ("cycles: %llu\n", regs
.cycle_count
);
2149 printf ("cycles: %13s\n", comma (regs
.cycle_count
));
2153 if (benchmark_start_cycle
)
2154 printf ("bmark: %13s\n", comma (benchmark_end_cycle
- benchmark_start_cycle
));
2157 for (i
= 0; i
< N_RXO
; i
++)
2158 for (o1
= 0; o1
< N_MAP
; o1
++)
2159 if (times_per_id
[i
][o1
])
2160 printf("%13s %13s %7.2f %s %s\n",
2161 comma (cycles_per_id
[i
][o1
]),
2162 comma (times_per_id
[i
][o1
]),
2163 (double)cycles_per_id
[i
][o1
] / times_per_id
[i
][o1
],
2164 op_cache_string(o1
),
2168 for (p
= 0; p
< N_RXO
; p
++)
2169 for (p1
= 0; p1
< N_MAP
; p1
++)
2170 for (i
= 0; i
< N_RXO
; i
++)
2171 for (o1
= 0; o1
< N_MAP
; o1
++)
2172 if (times_per_pair
[p
][p1
][i
][o1
])
2174 printf("%13s %s %-9s -> %s %s\n",
2175 comma (times_per_pair
[p
][p1
][i
][o1
]),
2176 op_cache_string(p1
),
2178 op_cache_string(o1
),
2183 printf("%13s memory stalls\n", comma (memory_stalls
));
2184 printf("%13s register stalls\n", comma (register_stalls
));
2185 printf("%13s branches taken (non-return)\n", comma (branch_stalls
));
2186 printf("%13s branch alignment stalls\n", comma (branch_alignment_stalls
));
2187 printf("%13s fast returns\n", comma (fast_returns
));