1 /* Simulation code for the MIPS MDMX ASE.
2 Copyright (C) 2002-2021 Free Software Foundation, Inc.
3 Contributed by Ed Satterthwaite and Chris Demetriou, of Broadcom
6 This file is part of GDB, the GNU debugger.
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
21 /* This must come before any other includes. */
28 /* Within mdmx.c we refer to the sim_cpu directly. */
30 #define SD (CPU_STATE(CPU))
32 /* XXX FIXME: temporary hack while the impact of making unpredictable()
33 a "normal" (non-igen) function is evaluated. */
35 #define Unpredictable() unpredictable_action (cpu, cia)
37 /* MDMX Representations
39 An 8-bit packed byte element (OB) is always unsigned.
40 The 24-bit accumulators are signed and are represented as 32-bit
41 signed values, which are reduced to 24-bit signed values prior to
42 Round and Clamp operations.
44 A 16-bit packed halfword element (QH) is always signed.
45 The 48-bit accumulators are signed and are represented as 64-bit
46 signed values, which are reduced to 48-bit signed values prior to
47 Round and Clamp operations.
49 The code below assumes a 2's-complement representation of signed
50 quantities. Care is required to clear extended sign bits when
53 The code (and the code for arithmetic shifts in mips.igen) also makes
54 the (not guaranteed portable) assumption that right shifts of signed
55 quantities in C do sign extension. */
57 typedef unsigned64 unsigned48
;
58 #define MASK48 (UNSIGNED64 (0xffffffffffff))
60 typedef unsigned32 unsigned24
;
61 #define MASK24 (UNSIGNED32 (0xffffff))
64 mdmx_ob
, /* OB (octal byte) */
65 mdmx_qh
/* QH (quad half-word) */
69 sel_elem
, /* element select */
70 sel_vect
, /* vector select */
71 sel_imm
/* immediate select */
74 #define OB_MAX ((unsigned8)0xFF)
75 #define QH_MIN ((signed16)0x8000)
76 #define QH_MAX ((signed16)0x7FFF)
78 #define OB_CLAMP(x) ((unsigned8)((x) > OB_MAX ? OB_MAX : (x)))
79 #define QH_CLAMP(x) ((signed16)((x) < QH_MIN ? QH_MIN : \
80 ((x) > QH_MAX ? QH_MAX : (x))))
82 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
83 #define MX_VT(fmtsel) (((fmtsel) & 0x10) == 0 ? sel_elem : \
84 (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
86 #define QH_ELEM(v,fmtsel) \
87 ((signed16)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
88 #define OB_ELEM(v,fmtsel) \
89 ((unsigned8)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
92 typedef signed16 (*QH_FUNC
)(signed16
, signed16
);
93 typedef unsigned8 (*OB_FUNC
)(unsigned8
, unsigned8
);
95 /* vectorized logical operators */
98 AndQH(signed16 ts
, signed16 tt
)
100 return (signed16
)((unsigned16
)ts
& (unsigned16
)tt
);
104 AndOB(unsigned8 ts
, unsigned8 tt
)
110 NorQH(signed16 ts
, signed16 tt
)
112 return (signed16
)(((unsigned16
)ts
| (unsigned16
)tt
) ^ 0xFFFF);
116 NorOB(unsigned8 ts
, unsigned8 tt
)
118 return (ts
| tt
) ^ 0xFF;
122 OrQH(signed16 ts
, signed16 tt
)
124 return (signed16
)((unsigned16
)ts
| (unsigned16
)tt
);
128 OrOB(unsigned8 ts
, unsigned8 tt
)
134 XorQH(signed16 ts
, signed16 tt
)
136 return (signed16
)((unsigned16
)ts
^ (unsigned16
)tt
);
140 XorOB(unsigned8 ts
, unsigned8 tt
)
146 SLLQH(signed16 ts
, signed16 tt
)
148 unsigned32 s
= (unsigned32
)tt
& 0xF;
149 return (signed16
)(((unsigned32
)ts
<< s
) & 0xFFFF);
153 SLLOB(unsigned8 ts
, unsigned8 tt
)
155 unsigned32 s
= tt
& 0x7;
156 return (ts
<< s
) & 0xFF;
160 SRLQH(signed16 ts
, signed16 tt
)
162 unsigned32 s
= (unsigned32
)tt
& 0xF;
163 return (signed16
)((unsigned16
)ts
>> s
);
167 SRLOB(unsigned8 ts
, unsigned8 tt
)
169 unsigned32 s
= tt
& 0x7;
174 /* Vectorized arithmetic operators. */
177 AddQH(signed16 ts
, signed16 tt
)
179 signed32 t
= (signed32
)ts
+ (signed32
)tt
;
184 AddOB(unsigned8 ts
, unsigned8 tt
)
186 unsigned32 t
= (unsigned32
)ts
+ (unsigned32
)tt
;
191 SubQH(signed16 ts
, signed16 tt
)
193 signed32 t
= (signed32
)ts
- (signed32
)tt
;
198 SubOB(unsigned8 ts
, unsigned8 tt
)
201 t
= (signed32
)ts
- (signed32
)tt
;
208 MinQH(signed16 ts
, signed16 tt
)
210 return (ts
< tt
? ts
: tt
);
214 MinOB(unsigned8 ts
, unsigned8 tt
)
216 return (ts
< tt
? ts
: tt
);
220 MaxQH(signed16 ts
, signed16 tt
)
222 return (ts
> tt
? ts
: tt
);
226 MaxOB(unsigned8 ts
, unsigned8 tt
)
228 return (ts
> tt
? ts
: tt
);
232 MulQH(signed16 ts
, signed16 tt
)
234 signed32 t
= (signed32
)ts
* (signed32
)tt
;
239 MulOB(unsigned8 ts
, unsigned8 tt
)
241 unsigned32 t
= (unsigned32
)ts
* (unsigned32
)tt
;
245 /* "msgn" and "sra" are defined only for QH format. */
248 MsgnQH(signed16 ts
, signed16 tt
)
252 t
= (tt
== QH_MIN
? QH_MAX
: -tt
);
261 SRAQH(signed16 ts
, signed16 tt
)
263 unsigned32 s
= (unsigned32
)tt
& 0xF;
264 return (signed16
)((signed32
)ts
>> s
);
268 /* "pabsdiff" and "pavg" are defined only for OB format. */
271 AbsDiffOB(unsigned8 ts
, unsigned8 tt
)
273 return (ts
>= tt
? ts
- tt
: tt
- ts
);
277 AvgOB(unsigned8 ts
, unsigned8 tt
)
279 return ((unsigned32
)ts
+ (unsigned32
)tt
+ 1) >> 1;
283 /* Dispatch tables for operations that update a CPR. */
285 static const QH_FUNC qh_func
[] = {
286 AndQH
, NorQH
, OrQH
, XorQH
, SLLQH
, SRLQH
,
287 AddQH
, SubQH
, MinQH
, MaxQH
,
288 MulQH
, MsgnQH
, SRAQH
, NULL
, NULL
291 static const OB_FUNC ob_func
[] = {
292 AndOB
, NorOB
, OrOB
, XorOB
, SLLOB
, SRLOB
,
293 AddOB
, SubOB
, MinOB
, MaxOB
,
294 MulOB
, NULL
, NULL
, AbsDiffOB
, AvgOB
297 /* Auxiliary functions for CPR updates. */
299 /* Vector mapping for QH format. */
301 qh_vector_op(unsigned64 v1
, unsigned64 v2
, QH_FUNC func
)
303 unsigned64 result
= 0;
307 for (i
= 0; i
< 64; i
+= 16)
309 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
310 h2
= (signed16
)(v2
& 0xFFFF); v2
>>= 16;
312 result
|= ((unsigned64
)((unsigned16
)h
) << i
);
318 qh_map_op(unsigned64 v1
, signed16 h2
, QH_FUNC func
)
320 unsigned64 result
= 0;
324 for (i
= 0; i
< 64; i
+= 16)
326 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
328 result
|= ((unsigned64
)((unsigned16
)h
) << i
);
334 /* Vector operations for OB format. */
337 ob_vector_op(unsigned64 v1
, unsigned64 v2
, OB_FUNC func
)
339 unsigned64 result
= 0;
343 for (i
= 0; i
< 64; i
+= 8)
345 b1
= v1
& 0xFF; v1
>>= 8;
346 b2
= v2
& 0xFF; v2
>>= 8;
348 result
|= ((unsigned64
)b
<< i
);
354 ob_map_op(unsigned64 v1
, unsigned8 b2
, OB_FUNC func
)
356 unsigned64 result
= 0;
360 for (i
= 0; i
< 64; i
+= 8)
362 b1
= v1
& 0xFF; v1
>>= 8;
364 result
|= ((unsigned64
)b
<< i
);
370 /* Primary entry for operations that update CPRs. */
372 mdmx_cpr_op(sim_cpu
*cpu
,
380 unsigned64 result
= 0;
382 switch (MX_FMT (fmtsel
))
385 switch (MX_VT (fmtsel
))
388 op2
= ValueFPR(vt
, fmt_mdmx
);
389 result
= qh_map_op(op1
, QH_ELEM(op2
, fmtsel
), qh_func
[op
]);
392 result
= qh_vector_op(op1
, ValueFPR(vt
, fmt_mdmx
), qh_func
[op
]);
395 result
= qh_map_op(op1
, vt
, qh_func
[op
]);
400 switch (MX_VT (fmtsel
))
403 op2
= ValueFPR(vt
, fmt_mdmx
);
404 result
= ob_map_op(op1
, OB_ELEM(op2
, fmtsel
), ob_func
[op
]);
407 result
= ob_vector_op(op1
, ValueFPR(vt
, fmt_mdmx
), ob_func
[op
]);
410 result
= ob_map_op(op1
, vt
, ob_func
[op
]);
422 /* Operations that update CCs */
425 qh_vector_test(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int cond
)
431 for (i
= 0; i
< 4; i
++)
433 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
434 h2
= (signed16
)(v2
& 0xFFFF); v2
>>= 16;
435 boolean
= ((cond
& MX_C_EQ
) && (h1
== h2
)) ||
436 ((cond
& MX_C_LT
) && (h1
< h2
));
442 qh_map_test(sim_cpu
*cpu
, unsigned64 v1
, signed16 h2
, int cond
)
448 for (i
= 0; i
< 4; i
++)
450 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
451 boolean
= ((cond
& MX_C_EQ
) && (h1
== h2
)) ||
452 ((cond
& MX_C_LT
) && (h1
< h2
));
458 ob_vector_test(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int cond
)
464 for (i
= 0; i
< 8; i
++)
466 b1
= v1
& 0xFF; v1
>>= 8;
467 b2
= v2
& 0xFF; v2
>>= 8;
468 boolean
= ((cond
& MX_C_EQ
) && (b1
== b2
)) ||
469 ((cond
& MX_C_LT
) && (b1
< b2
));
475 ob_map_test(sim_cpu
*cpu
, unsigned64 v1
, unsigned8 b2
, int cond
)
481 for (i
= 0; i
< 8; i
++)
483 b1
= (unsigned8
)(v1
& 0xFF); v1
>>= 8;
484 boolean
= ((cond
& MX_C_EQ
) && (b1
== b2
)) ||
485 ((cond
& MX_C_LT
) && (b1
< b2
));
492 mdmx_cc_op(sim_cpu
*cpu
,
501 switch (MX_FMT (fmtsel
))
504 switch (MX_VT (fmtsel
))
507 op2
= ValueFPR(vt
, fmt_mdmx
);
508 qh_map_test(cpu
, v1
, QH_ELEM(op2
, fmtsel
), cond
);
511 qh_vector_test(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), cond
);
514 qh_map_test(cpu
, v1
, vt
, cond
);
519 switch (MX_VT (fmtsel
))
522 op2
= ValueFPR(vt
, fmt_mdmx
);
523 ob_map_test(cpu
, v1
, OB_ELEM(op2
, fmtsel
), cond
);
526 ob_vector_test(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), cond
);
529 ob_map_test(cpu
, v1
, vt
, cond
);
539 /* Pick operations. */
542 qh_vector_pick(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int tf
)
544 unsigned64 result
= 0;
549 for (i
= 0; i
< 4; i
++)
551 h
= ((GETFCC(i
) == tf
) ? (v1
& 0xFFFF) : (v2
& 0xFFFF));
552 v1
>>= 16; v2
>>= 16;
553 result
|= ((unsigned64
)h
<< s
);
560 qh_map_pick(sim_cpu
*cpu
, unsigned64 v1
, signed16 h2
, int tf
)
562 unsigned64 result
= 0;
567 for (i
= 0; i
< 4; i
++)
569 h
= (GETFCC(i
) == tf
) ? (v1
& 0xFFFF) : (unsigned16
)h2
;
571 result
|= ((unsigned64
)h
<< s
);
578 ob_vector_pick(sim_cpu
*cpu
, unsigned64 v1
, unsigned64 v2
, int tf
)
580 unsigned64 result
= 0;
585 for (i
= 0; i
< 8; i
++)
587 b
= (GETFCC(i
) == tf
) ? (v1
& 0xFF) : (v2
& 0xFF);
589 result
|= ((unsigned64
)b
<< s
);
596 ob_map_pick(sim_cpu
*cpu
, unsigned64 v1
, unsigned8 b2
, int tf
)
598 unsigned64 result
= 0;
603 for (i
= 0; i
< 8; i
++)
605 b
= (GETFCC(i
) == tf
) ? (v1
& 0xFF) : b2
;
607 result
|= ((unsigned64
)b
<< s
);
615 mdmx_pick_op(sim_cpu
*cpu
,
622 unsigned64 result
= 0;
625 switch (MX_FMT (fmtsel
))
628 switch (MX_VT (fmtsel
))
631 op2
= ValueFPR(vt
, fmt_mdmx
);
632 result
= qh_map_pick(cpu
, v1
, QH_ELEM(op2
, fmtsel
), tf
);
635 result
= qh_vector_pick(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), tf
);
638 result
= qh_map_pick(cpu
, v1
, vt
, tf
);
643 switch (MX_VT (fmtsel
))
646 op2
= ValueFPR(vt
, fmt_mdmx
);
647 result
= ob_map_pick(cpu
, v1
, OB_ELEM(op2
, fmtsel
), tf
);
650 result
= ob_vector_pick(cpu
, v1
, ValueFPR(vt
, fmt_mdmx
), tf
);
653 result
= ob_map_pick(cpu
, v1
, vt
, tf
);
666 typedef void (*QH_ACC
)(signed48
*a
, signed16 ts
, signed16 tt
);
669 AccAddAQH(signed48
*a
, signed16 ts
, signed16 tt
)
671 *a
+= (signed48
)ts
+ (signed48
)tt
;
675 AccAddLQH(signed48
*a
, signed16 ts
, signed16 tt
)
677 *a
= (signed48
)ts
+ (signed48
)tt
;
681 AccMulAQH(signed48
*a
, signed16 ts
, signed16 tt
)
683 *a
+= (signed48
)ts
* (signed48
)tt
;
687 AccMulLQH(signed48
*a
, signed16 ts
, signed16 tt
)
689 *a
= (signed48
)ts
* (signed48
)tt
;
693 SubMulAQH(signed48
*a
, signed16 ts
, signed16 tt
)
695 *a
-= (signed48
)ts
* (signed48
)tt
;
699 SubMulLQH(signed48
*a
, signed16 ts
, signed16 tt
)
701 *a
= -((signed48
)ts
* (signed48
)tt
);
705 AccSubAQH(signed48
*a
, signed16 ts
, signed16 tt
)
707 *a
+= (signed48
)ts
- (signed48
)tt
;
711 AccSubLQH(signed48
*a
, signed16 ts
, signed16 tt
)
713 *a
= (signed48
)ts
- (signed48
)tt
;
717 typedef void (*OB_ACC
)(signed24
*acc
, unsigned8 ts
, unsigned8 tt
);
720 AccAddAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
722 *a
+= (signed24
)ts
+ (signed24
)tt
;
726 AccAddLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
728 *a
= (signed24
)ts
+ (signed24
)tt
;
732 AccMulAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
734 *a
+= (signed24
)ts
* (signed24
)tt
;
738 AccMulLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
740 *a
= (signed24
)ts
* (signed24
)tt
;
744 SubMulAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
746 *a
-= (signed24
)ts
* (signed24
)tt
;
750 SubMulLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
752 *a
= -((signed24
)ts
* (signed24
)tt
);
756 AccSubAOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
758 *a
+= (signed24
)ts
- (signed24
)tt
;
762 AccSubLOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
764 *a
= (signed24
)ts
- (signed24
)tt
;
768 AccAbsDiffOB(signed24
*a
, unsigned8 ts
, unsigned8 tt
)
770 unsigned8 t
= (ts
>= tt
? ts
- tt
: tt
- ts
);
775 /* Dispatch tables for operations that update a CPR. */
777 static const QH_ACC qh_acc
[] = {
778 AccAddAQH
, AccAddLQH
, AccMulAQH
, AccMulLQH
,
779 SubMulAQH
, SubMulLQH
, AccSubAQH
, AccSubLQH
,
783 static const OB_ACC ob_acc
[] = {
784 AccAddAOB
, AccAddLOB
, AccMulAOB
, AccMulLOB
,
785 SubMulAOB
, SubMulLOB
, AccSubAOB
, AccSubLOB
,
791 qh_vector_acc(signed48 a
[], unsigned64 v1
, unsigned64 v2
, QH_ACC acc
)
796 for (i
= 0; i
< 4; i
++)
798 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
799 h2
= (signed16
)(v2
& 0xFFFF); v2
>>= 16;
800 (*acc
)(&a
[i
], h1
, h2
);
805 qh_map_acc(signed48 a
[], unsigned64 v1
, signed16 h2
, QH_ACC acc
)
810 for (i
= 0; i
< 4; i
++)
812 h1
= (signed16
)(v1
& 0xFFFF); v1
>>= 16;
813 (*acc
)(&a
[i
], h1
, h2
);
818 ob_vector_acc(signed24 a
[], unsigned64 v1
, unsigned64 v2
, OB_ACC acc
)
823 for (i
= 0; i
< 8; i
++)
825 b1
= v1
& 0xFF; v1
>>= 8;
826 b2
= v2
& 0xFF; v2
>>= 8;
827 (*acc
)(&a
[i
], b1
, b2
);
832 ob_map_acc(signed24 a
[], unsigned64 v1
, unsigned8 b2
, OB_ACC acc
)
837 for (i
= 0; i
< 8; i
++)
839 b1
= v1
& 0xFF; v1
>>= 8;
840 (*acc
)(&a
[i
], b1
, b2
);
845 /* Primary entry for operations that accumulate */
847 mdmx_acc_op(sim_cpu
*cpu
,
856 switch (MX_FMT (fmtsel
))
859 switch (MX_VT (fmtsel
))
862 op2
= ValueFPR(vt
, fmt_mdmx
);
863 qh_map_acc(ACC
.qh
, op1
, QH_ELEM(op2
, fmtsel
), qh_acc
[op
]);
866 qh_vector_acc(ACC
.qh
, op1
, ValueFPR(vt
, fmt_mdmx
), qh_acc
[op
]);
869 qh_map_acc(ACC
.qh
, op1
, vt
, qh_acc
[op
]);
874 switch (MX_VT (fmtsel
))
877 op2
= ValueFPR(vt
, fmt_mdmx
);
878 ob_map_acc(ACC
.ob
, op1
, OB_ELEM(op2
, fmtsel
), ob_acc
[op
]);
881 ob_vector_acc(ACC
.ob
, op1
, ValueFPR(vt
, fmt_mdmx
), ob_acc
[op
]);
884 ob_map_acc(ACC
.ob
, op1
, vt
, ob_acc
[op
]);
894 /* Reading and writing accumulator (no conversion). */
897 mdmx_rac_op(sim_cpu
*cpu
,
906 shift
= op
; /* L = 00, M = 01, H = 10. */
912 shift
<<= 4; /* 16 bits per element. */
913 for (i
= 3; i
>= 0; --i
)
916 result
|= ((ACC
.qh
[i
] >> shift
) & 0xFFFF);
920 shift
<<= 3; /* 8 bits per element. */
921 for (i
= 7; i
>= 0; --i
)
924 result
|= ((ACC
.ob
[i
] >> shift
) & 0xFF);
934 mdmx_wacl(sim_cpu
*cpu
,
945 for (i
= 0; i
< 4; i
++)
947 signed32 s
= (signed16
)(vs
& 0xFFFF);
948 ACC
.qh
[i
] = ((signed48
)s
<< 16) | (vt
& 0xFFFF);
949 vs
>>= 16; vt
>>= 16;
953 for (i
= 0; i
< 8; i
++)
955 signed16 s
= (signed8
)(vs
& 0xFF);
956 ACC
.ob
[i
] = ((signed24
)s
<< 8) | (vt
& 0xFF);
966 mdmx_wach(sim_cpu
*cpu
,
976 for (i
= 0; i
< 4; i
++)
978 signed32 s
= (signed16
)(vs
& 0xFFFF);
979 ACC
.qh
[i
] &= ~((signed48
)0xFFFF << 32);
980 ACC
.qh
[i
] |= ((signed48
)s
<< 32);
985 for (i
= 0; i
< 8; i
++)
987 ACC
.ob
[i
] &= ~((signed24
)0xFF << 16);
988 ACC
.ob
[i
] |= ((signed24
)(vs
& 0xFF) << 16);
998 /* Reading and writing accumulator (rounding conversions).
999 Enumerating function guarantees s >= 0 for QH ops. */
1001 typedef signed16 (*QH_ROUND
)(signed48 a
, signed16 s
);
1003 #define QH_BIT(n) ((unsigned48)1 << (n))
1004 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1007 RNASQH(signed48 a
, signed16 s
)
1010 signed16 result
= 0;
1017 if ((a
& QH_BIT(47)) == 0)
1019 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1026 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1028 if (s
> 1 && ((unsigned48
)a
& QH_ONES(s
-1)) != 0)
1034 result
= (signed16
)t
;
1040 RNAUQH(signed48 a
, signed16 s
)
1048 result
= ((unsigned48
)a
& MASK48
) >> 47;
1051 t
= ((unsigned48
)a
& MASK48
) >> s
;
1052 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1056 result
= (signed16
)t
;
1062 RNESQH(signed48 a
, signed16 s
)
1065 signed16 result
= 0;
1072 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1074 if (s
== 1 || (a
& QH_ONES(s
-1)) == 0)
1079 if ((a
& QH_BIT(47)) == 0)
1089 result
= (signed16
)t
;
1095 RNEUQH(signed48 a
, signed16 s
)
1103 result
= ((unsigned48
)a
> QH_BIT(47) ? 1 : 0);
1106 t
= ((unsigned48
)a
& MASK48
) >> s
;
1107 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1109 if (s
> 1 && (a
& QH_ONES(s
-1)) != 0)
1116 result
= (signed16
)t
;
1122 RZSQH(signed48 a
, signed16 s
)
1125 signed16 result
= 0;
1132 if ((a
& QH_BIT(47)) == 0)
1142 result
= (signed16
)t
;
1148 RZUQH(signed48 a
, signed16 s
)
1151 signed16 result
= 0;
1156 result
= ((unsigned48
)a
> QH_BIT(47) ? 1 : 0);
1159 t
= ((unsigned48
)a
& MASK48
) >> s
;
1162 result
= (signed16
)t
;
1168 typedef unsigned8 (*OB_ROUND
)(signed24 a
, unsigned8 s
);
1170 #define OB_BIT(n) ((unsigned24)1 << (n))
1171 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1174 RNAUOB(signed24 a
, unsigned8 s
)
1182 result
= ((unsigned24
)a
& MASK24
) >> 23;
1185 t
= ((unsigned24
)a
& MASK24
) >> s
;
1186 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1188 result
= OB_CLAMP(t
);
1194 RNEUOB(signed24 a
, unsigned8 s
)
1202 result
= (((unsigned24
)a
& MASK24
) > OB_BIT(23) ? 1 : 0);
1205 t
= ((unsigned24
)a
& MASK24
) >> s
;
1206 if (s
> 0 && ((a
>> (s
-1)) & 1) == 1)
1208 if (s
> 1 && (a
& OB_ONES(s
-1)) != 0)
1213 result
= OB_CLAMP(t
);
1219 RZUOB(signed24 a
, unsigned8 s
)
1228 t
= ((unsigned24
)a
& MASK24
) >> s
;
1229 result
= OB_CLAMP(t
);
1235 static const QH_ROUND qh_round
[] = {
1236 RNASQH
, RNAUQH
, RNESQH
, RNEUQH
, RZSQH
, RZUQH
1239 static const OB_ROUND ob_round
[] = {
1240 NULL
, RNAUOB
, NULL
, RNEUOB
, NULL
, RZUOB
1245 qh_vector_round(sim_cpu
*cpu
, address_word cia
, unsigned64 v2
, QH_ROUND round
)
1247 unsigned64 result
= 0;
1252 for (i
= 0; i
< 4; i
++)
1254 h2
= (signed16
)(v2
& 0xFFFF);
1256 h
= (*round
)(ACC
.qh
[i
], h2
);
1259 UnpredictableResult ();
1263 result
|= ((unsigned64
)((unsigned16
)h
) << s
);
1270 qh_map_round(sim_cpu
*cpu
, address_word cia
, signed16 h2
, QH_ROUND round
)
1272 unsigned64 result
= 0;
1277 for (i
= 0; i
< 4; i
++)
1280 h
= (*round
)(ACC
.qh
[i
], h2
);
1283 UnpredictableResult ();
1286 result
|= ((unsigned64
)((unsigned16
)h
) << s
);
1293 ob_vector_round(sim_cpu
*cpu
, address_word cia
, unsigned64 v2
, OB_ROUND round
)
1295 unsigned64 result
= 0;
1300 for (i
= 0; i
< 8; i
++)
1302 b2
= v2
& 0xFF; v2
>>= 8;
1303 b
= (*round
)(ACC
.ob
[i
], b2
);
1304 result
|= ((unsigned64
)b
<< s
);
1311 ob_map_round(sim_cpu
*cpu
, address_word cia
, unsigned8 b2
, OB_ROUND round
)
1313 unsigned64 result
= 0;
1318 for (i
= 0; i
< 8; i
++)
1320 b
= (*round
)(ACC
.ob
[i
], b2
);
1321 result
|= ((unsigned64
)b
<< s
);
1329 mdmx_round_op(sim_cpu
*cpu
,
1336 unsigned64 result
= 0;
1338 switch (MX_FMT (fmtsel
))
1341 switch (MX_VT (fmtsel
))
1344 op2
= ValueFPR(vt
, fmt_mdmx
);
1345 result
= qh_map_round(cpu
, cia
, QH_ELEM(op2
, fmtsel
), qh_round
[rm
]);
1348 op2
= ValueFPR(vt
, fmt_mdmx
);
1349 result
= qh_vector_round(cpu
, cia
, op2
, qh_round
[rm
]);
1352 result
= qh_map_round(cpu
, cia
, vt
, qh_round
[rm
]);
1357 switch (MX_VT (fmtsel
))
1360 op2
= ValueFPR(vt
, fmt_mdmx
);
1361 result
= ob_map_round(cpu
, cia
, OB_ELEM(op2
, fmtsel
), ob_round
[rm
]);
1364 op2
= ValueFPR(vt
, fmt_mdmx
);
1365 result
= ob_vector_round(cpu
, cia
, op2
, ob_round
[rm
]);
1368 result
= ob_map_round(cpu
, cia
, vt
, ob_round
[rm
]);
1380 /* Shuffle operation. */
1383 enum {vs
, ss
, vt
} source
;
1387 static const sh_map ob_shuffle
[][8] = {
1388 /* MDMX 2.0 encodings (3-4, 6-7). */
1389 /* vr5400 encoding (5), otherwise. */
1391 {{vt
,4}, {vs
,4}, {vt
,5}, {vs
,5}, {vt
,6}, {vs
,6}, {vt
,7}, {vs
,7}}, /* RSVD */
1392 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}, {vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}}, /* RSVD */
1393 {{vs
,0}, {ss
,0}, {vs
,1}, {ss
,1}, {vs
,2}, {ss
,2}, {vs
,3}, {ss
,3}}, /* upsl */
1394 {{vt
,1}, {vt
,3}, {vt
,5}, {vt
,7}, {vs
,1}, {vs
,3}, {vs
,5}, {vs
,7}}, /* pach */
1395 {{vt
,0}, {vt
,2}, {vt
,4}, {vt
,6}, {vs
,0}, {vs
,2}, {vs
,4}, {vs
,6}}, /* pacl */
1396 {{vt
,4}, {vs
,4}, {vt
,5}, {vs
,5}, {vt
,6}, {vs
,6}, {vt
,7}, {vs
,7}}, /* mixh */
1397 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}, {vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}} /* mixl */
1400 static const sh_map qh_shuffle
[][4] = {
1401 {{vt
,2}, {vs
,2}, {vt
,3}, {vs
,3}}, /* mixh */
1402 {{vt
,0}, {vs
,0}, {vt
,1}, {vs
,1}}, /* mixl */
1403 {{vt
,1}, {vt
,3}, {vs
,1}, {vs
,3}}, /* pach */
1405 {{vt
,1}, {vs
,0}, {vt
,3}, {vs
,2}}, /* bfla */
1407 {{vt
,2}, {vt
,3}, {vs
,2}, {vs
,3}}, /* repa */
1408 {{vt
,0}, {vt
,1}, {vs
,0}, {vs
,1}} /* repb */
1413 mdmx_shuffle(sim_cpu
*cpu
,
1419 unsigned64 result
= 0;
1423 if ((shop
& 0x3) == 0x1) /* QH format. */
1427 for (i
= 0; i
< 4; i
++)
1431 switch (qh_shuffle
[op
][i
].source
)
1443 result
|= (((v
>> 16*qh_shuffle
[op
][i
].index
) & 0xFFFF) << s
);
1447 else if ((shop
& 0x1) == 0x0) /* OB format. */
1451 for (i
= 0; i
< 8; i
++)
1454 unsigned int ishift
= 8*ob_shuffle
[op
][i
].index
;
1456 switch (ob_shuffle
[op
][i
].source
)
1459 b
= (op1
>> ishift
) & 0xFF;
1462 b
= ((op1
>> ishift
) & 0x80) ? 0xFF : 0;
1465 b
= (op2
>> ishift
) & 0xFF;
1471 result
|= ((unsigned64
)b
<< s
);