PowerPC64 .branch_lt address
[binutils-gdb.git] / sim / mips / mdmx.c
1 /* Simulation code for the MIPS MDMX ASE.
2 Copyright (C) 2002-2022 Free Software Foundation, Inc.
3 Contributed by Ed Satterthwaite and Chris Demetriou, of Broadcom
4 Corporation (SiByte).
5
6 This file is part of GDB, the GNU debugger.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>. */
20
21 /* This must come before any other includes. */
22 #include "defs.h"
23
24 #include <stdio.h>
25
26 #include "sim-main.h"
27
28 /* Within mdmx.c we refer to the sim_cpu directly. */
29 #define CPU cpu
30 #define SD (CPU_STATE(CPU))
31
32 /* XXX FIXME: temporary hack while the impact of making unpredictable()
33 a "normal" (non-igen) function is evaluated. */
34 #undef Unpredictable
35 #define Unpredictable() unpredictable_action (cpu, cia)
36
37 /* MDMX Representations
38
39 An 8-bit packed byte element (OB) is always unsigned.
40 The 24-bit accumulators are signed and are represented as 32-bit
41 signed values, which are reduced to 24-bit signed values prior to
42 Round and Clamp operations.
43
44 A 16-bit packed halfword element (QH) is always signed.
45 The 48-bit accumulators are signed and are represented as 64-bit
46 signed values, which are reduced to 48-bit signed values prior to
47 Round and Clamp operations.
48
49 The code below assumes a 2's-complement representation of signed
50 quantities. Care is required to clear extended sign bits when
51 repacking fields.
52
53 The code (and the code for arithmetic shifts in mips.igen) also makes
54 the (not guaranteed portable) assumption that right shifts of signed
55 quantities in C do sign extension. */
56
57 typedef uint64_t unsigned48;
58 #define MASK48 (UNSIGNED64 (0xffffffffffff))
59
60 typedef uint32_t unsigned24;
61 #define MASK24 (UNSIGNED32 (0xffffff))
62
63 typedef enum {
64 mdmx_ob, /* OB (octal byte) */
65 mdmx_qh /* QH (quad half-word) */
66 } MX_fmt;
67
68 typedef enum {
69 sel_elem, /* element select */
70 sel_vect, /* vector select */
71 sel_imm /* immediate select */
72 } VT_select;
73
74 #define OB_MAX ((uint8_t)0xFF)
75 #define QH_MIN ((int16_t)0x8000)
76 #define QH_MAX ((int16_t)0x7FFF)
77
78 #define OB_CLAMP(x) ((uint8_t)((x) > OB_MAX ? OB_MAX : (x)))
79 #define QH_CLAMP(x) ((int16_t)((x) < QH_MIN ? QH_MIN : \
80 ((x) > QH_MAX ? QH_MAX : (x))))
81
82 #define MX_FMT(fmtsel) (((fmtsel) & 0x1) == 0 ? mdmx_ob : mdmx_qh)
83 #define MX_VT(fmtsel) (((fmtsel) & 0x10) == 0 ? sel_elem : \
84 (((fmtsel) & 0x18) == 0x10 ? sel_vect : sel_imm))
85
86 #define QH_ELEM(v,fmtsel) \
87 ((int16_t)(((v) >> (((fmtsel) & 0xC) << 2)) & 0xFFFF))
88 #define OB_ELEM(v,fmtsel) \
89 ((uint8_t)(((v) >> (((fmtsel) & 0xE) << 2)) & 0xFF))
90
91
92 typedef int16_t (*QH_FUNC)(int16_t, int16_t);
93 typedef uint8_t (*OB_FUNC)(uint8_t, uint8_t);
94
95 /* vectorized logical operators */
96
97 static int16_t
98 AndQH(int16_t ts, int16_t tt)
99 {
100 return (int16_t)((uint16_t)ts & (uint16_t)tt);
101 }
102
103 static uint8_t
104 AndOB(uint8_t ts, uint8_t tt)
105 {
106 return ts & tt;
107 }
108
109 static int16_t
110 NorQH(int16_t ts, int16_t tt)
111 {
112 return (int16_t)(((uint16_t)ts | (uint16_t)tt) ^ 0xFFFF);
113 }
114
115 static uint8_t
116 NorOB(uint8_t ts, uint8_t tt)
117 {
118 return (ts | tt) ^ 0xFF;
119 }
120
121 static int16_t
122 OrQH(int16_t ts, int16_t tt)
123 {
124 return (int16_t)((uint16_t)ts | (uint16_t)tt);
125 }
126
127 static uint8_t
128 OrOB(uint8_t ts, uint8_t tt)
129 {
130 return ts | tt;
131 }
132
133 static int16_t
134 XorQH(int16_t ts, int16_t tt)
135 {
136 return (int16_t)((uint16_t)ts ^ (uint16_t)tt);
137 }
138
139 static uint8_t
140 XorOB(uint8_t ts, uint8_t tt)
141 {
142 return ts ^ tt;
143 }
144
145 static int16_t
146 SLLQH(int16_t ts, int16_t tt)
147 {
148 uint32_t s = (uint32_t)tt & 0xF;
149 return (int16_t)(((uint32_t)ts << s) & 0xFFFF);
150 }
151
152 static uint8_t
153 SLLOB(uint8_t ts, uint8_t tt)
154 {
155 uint32_t s = tt & 0x7;
156 return (ts << s) & 0xFF;
157 }
158
159 static int16_t
160 SRLQH(int16_t ts, int16_t tt)
161 {
162 uint32_t s = (uint32_t)tt & 0xF;
163 return (int16_t)((uint16_t)ts >> s);
164 }
165
166 static uint8_t
167 SRLOB(uint8_t ts, uint8_t tt)
168 {
169 uint32_t s = tt & 0x7;
170 return ts >> s;
171 }
172
173
174 /* Vectorized arithmetic operators. */
175
176 static int16_t
177 AddQH(int16_t ts, int16_t tt)
178 {
179 int32_t t = (int32_t)ts + (int32_t)tt;
180 return QH_CLAMP(t);
181 }
182
183 static uint8_t
184 AddOB(uint8_t ts, uint8_t tt)
185 {
186 uint32_t t = (uint32_t)ts + (uint32_t)tt;
187 return OB_CLAMP(t);
188 }
189
190 static int16_t
191 SubQH(int16_t ts, int16_t tt)
192 {
193 int32_t t = (int32_t)ts - (int32_t)tt;
194 return QH_CLAMP(t);
195 }
196
197 static uint8_t
198 SubOB(uint8_t ts, uint8_t tt)
199 {
200 int32_t t;
201 t = (int32_t)ts - (int32_t)tt;
202 if (t < 0)
203 t = 0;
204 return (uint8_t)t;
205 }
206
207 static int16_t
208 MinQH(int16_t ts, int16_t tt)
209 {
210 return (ts < tt ? ts : tt);
211 }
212
213 static uint8_t
214 MinOB(uint8_t ts, uint8_t tt)
215 {
216 return (ts < tt ? ts : tt);
217 }
218
219 static int16_t
220 MaxQH(int16_t ts, int16_t tt)
221 {
222 return (ts > tt ? ts : tt);
223 }
224
225 static uint8_t
226 MaxOB(uint8_t ts, uint8_t tt)
227 {
228 return (ts > tt ? ts : tt);
229 }
230
231 static int16_t
232 MulQH(int16_t ts, int16_t tt)
233 {
234 int32_t t = (int32_t)ts * (int32_t)tt;
235 return QH_CLAMP(t);
236 }
237
238 static uint8_t
239 MulOB(uint8_t ts, uint8_t tt)
240 {
241 uint32_t t = (uint32_t)ts * (uint32_t)tt;
242 return OB_CLAMP(t);
243 }
244
245 /* "msgn" and "sra" are defined only for QH format. */
246
247 static int16_t
248 MsgnQH(int16_t ts, int16_t tt)
249 {
250 int16_t t;
251 if (ts < 0)
252 t = (tt == QH_MIN ? QH_MAX : -tt);
253 else if (ts == 0)
254 t = 0;
255 else
256 t = tt;
257 return t;
258 }
259
260 static int16_t
261 SRAQH(int16_t ts, int16_t tt)
262 {
263 uint32_t s = (uint32_t)tt & 0xF;
264 return (int16_t)((int32_t)ts >> s);
265 }
266
267
268 /* "pabsdiff" and "pavg" are defined only for OB format. */
269
270 static uint8_t
271 AbsDiffOB(uint8_t ts, uint8_t tt)
272 {
273 return (ts >= tt ? ts - tt : tt - ts);
274 }
275
276 static uint8_t
277 AvgOB(uint8_t ts, uint8_t tt)
278 {
279 return ((uint32_t)ts + (uint32_t)tt + 1) >> 1;
280 }
281
282
283 /* Dispatch tables for operations that update a CPR. */
284
285 static const QH_FUNC qh_func[] = {
286 AndQH, NorQH, OrQH, XorQH, SLLQH, SRLQH,
287 AddQH, SubQH, MinQH, MaxQH,
288 MulQH, MsgnQH, SRAQH, NULL, NULL
289 };
290
291 static const OB_FUNC ob_func[] = {
292 AndOB, NorOB, OrOB, XorOB, SLLOB, SRLOB,
293 AddOB, SubOB, MinOB, MaxOB,
294 MulOB, NULL, NULL, AbsDiffOB, AvgOB
295 };
296
297 /* Auxiliary functions for CPR updates. */
298
299 /* Vector mapping for QH format. */
300 static uint64_t
301 qh_vector_op(uint64_t v1, uint64_t v2, QH_FUNC func)
302 {
303 uint64_t result = 0;
304 int i;
305 int16_t h, h1, h2;
306
307 for (i = 0; i < 64; i += 16)
308 {
309 h1 = (int16_t)(v1 & 0xFFFF); v1 >>= 16;
310 h2 = (int16_t)(v2 & 0xFFFF); v2 >>= 16;
311 h = (*func)(h1, h2);
312 result |= ((uint64_t)((uint16_t)h) << i);
313 }
314 return result;
315 }
316
317 static uint64_t
318 qh_map_op(uint64_t v1, int16_t h2, QH_FUNC func)
319 {
320 uint64_t result = 0;
321 int i;
322 int16_t h, h1;
323
324 for (i = 0; i < 64; i += 16)
325 {
326 h1 = (int16_t)(v1 & 0xFFFF); v1 >>= 16;
327 h = (*func)(h1, h2);
328 result |= ((uint64_t)((uint16_t)h) << i);
329 }
330 return result;
331 }
332
333
334 /* Vector operations for OB format. */
335
336 static uint64_t
337 ob_vector_op(uint64_t v1, uint64_t v2, OB_FUNC func)
338 {
339 uint64_t result = 0;
340 int i;
341 uint8_t b, b1, b2;
342
343 for (i = 0; i < 64; i += 8)
344 {
345 b1 = v1 & 0xFF; v1 >>= 8;
346 b2 = v2 & 0xFF; v2 >>= 8;
347 b = (*func)(b1, b2);
348 result |= ((uint64_t)b << i);
349 }
350 return result;
351 }
352
353 static uint64_t
354 ob_map_op(uint64_t v1, uint8_t b2, OB_FUNC func)
355 {
356 uint64_t result = 0;
357 int i;
358 uint8_t b, b1;
359
360 for (i = 0; i < 64; i += 8)
361 {
362 b1 = v1 & 0xFF; v1 >>= 8;
363 b = (*func)(b1, b2);
364 result |= ((uint64_t)b << i);
365 }
366 return result;
367 }
368
369
370 /* Primary entry for operations that update CPRs. */
371 uint64_t
372 mdmx_cpr_op(sim_cpu *cpu,
373 address_word cia,
374 int op,
375 uint64_t op1,
376 int vt,
377 MX_fmtsel fmtsel)
378 {
379 uint64_t op2;
380 uint64_t result = 0;
381
382 switch (MX_FMT (fmtsel))
383 {
384 case mdmx_qh:
385 switch (MX_VT (fmtsel))
386 {
387 case sel_elem:
388 op2 = ValueFPR(vt, fmt_mdmx);
389 result = qh_map_op(op1, QH_ELEM(op2, fmtsel), qh_func[op]);
390 break;
391 case sel_vect:
392 result = qh_vector_op(op1, ValueFPR(vt, fmt_mdmx), qh_func[op]);
393 break;
394 case sel_imm:
395 result = qh_map_op(op1, vt, qh_func[op]);
396 break;
397 }
398 break;
399 case mdmx_ob:
400 switch (MX_VT (fmtsel))
401 {
402 case sel_elem:
403 op2 = ValueFPR(vt, fmt_mdmx);
404 result = ob_map_op(op1, OB_ELEM(op2, fmtsel), ob_func[op]);
405 break;
406 case sel_vect:
407 result = ob_vector_op(op1, ValueFPR(vt, fmt_mdmx), ob_func[op]);
408 break;
409 case sel_imm:
410 result = ob_map_op(op1, vt, ob_func[op]);
411 break;
412 }
413 break;
414 default:
415 Unpredictable ();
416 }
417
418 return result;
419 }
420
421
422 /* Operations that update CCs */
423
424 static void
425 qh_vector_test(sim_cpu *cpu, uint64_t v1, uint64_t v2, int cond)
426 {
427 int i;
428 int16_t h1, h2;
429 int boolean;
430
431 for (i = 0; i < 4; i++)
432 {
433 h1 = (int16_t)(v1 & 0xFFFF); v1 >>= 16;
434 h2 = (int16_t)(v2 & 0xFFFF); v2 >>= 16;
435 boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
436 ((cond & MX_C_LT) && (h1 < h2));
437 SETFCC(i, boolean);
438 }
439 }
440
441 static void
442 qh_map_test(sim_cpu *cpu, uint64_t v1, int16_t h2, int cond)
443 {
444 int i;
445 int16_t h1;
446 int boolean;
447
448 for (i = 0; i < 4; i++)
449 {
450 h1 = (int16_t)(v1 & 0xFFFF); v1 >>= 16;
451 boolean = ((cond & MX_C_EQ) && (h1 == h2)) ||
452 ((cond & MX_C_LT) && (h1 < h2));
453 SETFCC(i, boolean);
454 }
455 }
456
457 static void
458 ob_vector_test(sim_cpu *cpu, uint64_t v1, uint64_t v2, int cond)
459 {
460 int i;
461 uint8_t b1, b2;
462 int boolean;
463
464 for (i = 0; i < 8; i++)
465 {
466 b1 = v1 & 0xFF; v1 >>= 8;
467 b2 = v2 & 0xFF; v2 >>= 8;
468 boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
469 ((cond & MX_C_LT) && (b1 < b2));
470 SETFCC(i, boolean);
471 }
472 }
473
474 static void
475 ob_map_test(sim_cpu *cpu, uint64_t v1, uint8_t b2, int cond)
476 {
477 int i;
478 uint8_t b1;
479 int boolean;
480
481 for (i = 0; i < 8; i++)
482 {
483 b1 = (uint8_t)(v1 & 0xFF); v1 >>= 8;
484 boolean = ((cond & MX_C_EQ) && (b1 == b2)) ||
485 ((cond & MX_C_LT) && (b1 < b2));
486 SETFCC(i, boolean);
487 }
488 }
489
490
491 void
492 mdmx_cc_op(sim_cpu *cpu,
493 address_word cia,
494 int cond,
495 uint64_t v1,
496 int vt,
497 MX_fmtsel fmtsel)
498 {
499 uint64_t op2;
500
501 switch (MX_FMT (fmtsel))
502 {
503 case mdmx_qh:
504 switch (MX_VT (fmtsel))
505 {
506 case sel_elem:
507 op2 = ValueFPR(vt, fmt_mdmx);
508 qh_map_test(cpu, v1, QH_ELEM(op2, fmtsel), cond);
509 break;
510 case sel_vect:
511 qh_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
512 break;
513 case sel_imm:
514 qh_map_test(cpu, v1, vt, cond);
515 break;
516 }
517 break;
518 case mdmx_ob:
519 switch (MX_VT (fmtsel))
520 {
521 case sel_elem:
522 op2 = ValueFPR(vt, fmt_mdmx);
523 ob_map_test(cpu, v1, OB_ELEM(op2, fmtsel), cond);
524 break;
525 case sel_vect:
526 ob_vector_test(cpu, v1, ValueFPR(vt, fmt_mdmx), cond);
527 break;
528 case sel_imm:
529 ob_map_test(cpu, v1, vt, cond);
530 break;
531 }
532 break;
533 default:
534 Unpredictable ();
535 }
536 }
537
538
539 /* Pick operations. */
540
541 static uint64_t
542 qh_vector_pick(sim_cpu *cpu, uint64_t v1, uint64_t v2, int tf)
543 {
544 uint64_t result = 0;
545 int i, s;
546 uint16_t h;
547
548 s = 0;
549 for (i = 0; i < 4; i++)
550 {
551 h = ((GETFCC(i) == tf) ? (v1 & 0xFFFF) : (v2 & 0xFFFF));
552 v1 >>= 16; v2 >>= 16;
553 result |= ((uint64_t)h << s);
554 s += 16;
555 }
556 return result;
557 }
558
559 static uint64_t
560 qh_map_pick(sim_cpu *cpu, uint64_t v1, int16_t h2, int tf)
561 {
562 uint64_t result = 0;
563 int i, s;
564 uint16_t h;
565
566 s = 0;
567 for (i = 0; i < 4; i++)
568 {
569 h = (GETFCC(i) == tf) ? (v1 & 0xFFFF) : (uint16_t)h2;
570 v1 >>= 16;
571 result |= ((uint64_t)h << s);
572 s += 16;
573 }
574 return result;
575 }
576
577 static uint64_t
578 ob_vector_pick(sim_cpu *cpu, uint64_t v1, uint64_t v2, int tf)
579 {
580 uint64_t result = 0;
581 int i, s;
582 uint8_t b;
583
584 s = 0;
585 for (i = 0; i < 8; i++)
586 {
587 b = (GETFCC(i) == tf) ? (v1 & 0xFF) : (v2 & 0xFF);
588 v1 >>= 8; v2 >>= 8;
589 result |= ((uint64_t)b << s);
590 s += 8;
591 }
592 return result;
593 }
594
595 static uint64_t
596 ob_map_pick(sim_cpu *cpu, uint64_t v1, uint8_t b2, int tf)
597 {
598 uint64_t result = 0;
599 int i, s;
600 uint8_t b;
601
602 s = 0;
603 for (i = 0; i < 8; i++)
604 {
605 b = (GETFCC(i) == tf) ? (v1 & 0xFF) : b2;
606 v1 >>= 8;
607 result |= ((uint64_t)b << s);
608 s += 8;
609 }
610 return result;
611 }
612
613
614 uint64_t
615 mdmx_pick_op(sim_cpu *cpu,
616 address_word cia,
617 int tf,
618 uint64_t v1,
619 int vt,
620 MX_fmtsel fmtsel)
621 {
622 uint64_t result = 0;
623 uint64_t op2;
624
625 switch (MX_FMT (fmtsel))
626 {
627 case mdmx_qh:
628 switch (MX_VT (fmtsel))
629 {
630 case sel_elem:
631 op2 = ValueFPR(vt, fmt_mdmx);
632 result = qh_map_pick(cpu, v1, QH_ELEM(op2, fmtsel), tf);
633 break;
634 case sel_vect:
635 result = qh_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
636 break;
637 case sel_imm:
638 result = qh_map_pick(cpu, v1, vt, tf);
639 break;
640 }
641 break;
642 case mdmx_ob:
643 switch (MX_VT (fmtsel))
644 {
645 case sel_elem:
646 op2 = ValueFPR(vt, fmt_mdmx);
647 result = ob_map_pick(cpu, v1, OB_ELEM(op2, fmtsel), tf);
648 break;
649 case sel_vect:
650 result = ob_vector_pick(cpu, v1, ValueFPR(vt, fmt_mdmx), tf);
651 break;
652 case sel_imm:
653 result = ob_map_pick(cpu, v1, vt, tf);
654 break;
655 }
656 break;
657 default:
658 Unpredictable ();
659 }
660 return result;
661 }
662
663
664 /* Accumulators. */
665
666 typedef void (*QH_ACC)(signed48 *a, int16_t ts, int16_t tt);
667
668 static void
669 AccAddAQH(signed48 *a, int16_t ts, int16_t tt)
670 {
671 *a += (signed48)ts + (signed48)tt;
672 }
673
674 static void
675 AccAddLQH(signed48 *a, int16_t ts, int16_t tt)
676 {
677 *a = (signed48)ts + (signed48)tt;
678 }
679
680 static void
681 AccMulAQH(signed48 *a, int16_t ts, int16_t tt)
682 {
683 *a += (signed48)ts * (signed48)tt;
684 }
685
686 static void
687 AccMulLQH(signed48 *a, int16_t ts, int16_t tt)
688 {
689 *a = (signed48)ts * (signed48)tt;
690 }
691
692 static void
693 SubMulAQH(signed48 *a, int16_t ts, int16_t tt)
694 {
695 *a -= (signed48)ts * (signed48)tt;
696 }
697
698 static void
699 SubMulLQH(signed48 *a, int16_t ts, int16_t tt)
700 {
701 *a = -((signed48)ts * (signed48)tt);
702 }
703
704 static void
705 AccSubAQH(signed48 *a, int16_t ts, int16_t tt)
706 {
707 *a += (signed48)ts - (signed48)tt;
708 }
709
710 static void
711 AccSubLQH(signed48 *a, int16_t ts, int16_t tt)
712 {
713 *a = (signed48)ts - (signed48)tt;
714 }
715
716
717 typedef void (*OB_ACC)(signed24 *acc, uint8_t ts, uint8_t tt);
718
719 static void
720 AccAddAOB(signed24 *a, uint8_t ts, uint8_t tt)
721 {
722 *a += (signed24)ts + (signed24)tt;
723 }
724
725 static void
726 AccAddLOB(signed24 *a, uint8_t ts, uint8_t tt)
727 {
728 *a = (signed24)ts + (signed24)tt;
729 }
730
731 static void
732 AccMulAOB(signed24 *a, uint8_t ts, uint8_t tt)
733 {
734 *a += (signed24)ts * (signed24)tt;
735 }
736
737 static void
738 AccMulLOB(signed24 *a, uint8_t ts, uint8_t tt)
739 {
740 *a = (signed24)ts * (signed24)tt;
741 }
742
743 static void
744 SubMulAOB(signed24 *a, uint8_t ts, uint8_t tt)
745 {
746 *a -= (signed24)ts * (signed24)tt;
747 }
748
749 static void
750 SubMulLOB(signed24 *a, uint8_t ts, uint8_t tt)
751 {
752 *a = -((signed24)ts * (signed24)tt);
753 }
754
755 static void
756 AccSubAOB(signed24 *a, uint8_t ts, uint8_t tt)
757 {
758 *a += (signed24)ts - (signed24)tt;
759 }
760
761 static void
762 AccSubLOB(signed24 *a, uint8_t ts, uint8_t tt)
763 {
764 *a = (signed24)ts - (signed24)tt;
765 }
766
767 static void
768 AccAbsDiffOB(signed24 *a, uint8_t ts, uint8_t tt)
769 {
770 uint8_t t = (ts >= tt ? ts - tt : tt - ts);
771 *a += (signed24)t;
772 }
773
774
775 /* Dispatch tables for operations that update a CPR. */
776
777 static const QH_ACC qh_acc[] = {
778 AccAddAQH, AccAddLQH, AccMulAQH, AccMulLQH,
779 SubMulAQH, SubMulLQH, AccSubAQH, AccSubLQH,
780 NULL
781 };
782
783 static const OB_ACC ob_acc[] = {
784 AccAddAOB, AccAddLOB, AccMulAOB, AccMulLOB,
785 SubMulAOB, SubMulLOB, AccSubAOB, AccSubLOB,
786 AccAbsDiffOB
787 };
788
789
790 static void
791 qh_vector_acc(signed48 a[], uint64_t v1, uint64_t v2, QH_ACC acc)
792 {
793 int i;
794 int16_t h1, h2;
795
796 for (i = 0; i < 4; i++)
797 {
798 h1 = (int16_t)(v1 & 0xFFFF); v1 >>= 16;
799 h2 = (int16_t)(v2 & 0xFFFF); v2 >>= 16;
800 (*acc)(&a[i], h1, h2);
801 }
802 }
803
804 static void
805 qh_map_acc(signed48 a[], uint64_t v1, int16_t h2, QH_ACC acc)
806 {
807 int i;
808 int16_t h1;
809
810 for (i = 0; i < 4; i++)
811 {
812 h1 = (int16_t)(v1 & 0xFFFF); v1 >>= 16;
813 (*acc)(&a[i], h1, h2);
814 }
815 }
816
817 static void
818 ob_vector_acc(signed24 a[], uint64_t v1, uint64_t v2, OB_ACC acc)
819 {
820 int i;
821 uint8_t b1, b2;
822
823 for (i = 0; i < 8; i++)
824 {
825 b1 = v1 & 0xFF; v1 >>= 8;
826 b2 = v2 & 0xFF; v2 >>= 8;
827 (*acc)(&a[i], b1, b2);
828 }
829 }
830
831 static void
832 ob_map_acc(signed24 a[], uint64_t v1, uint8_t b2, OB_ACC acc)
833 {
834 int i;
835 uint8_t b1;
836
837 for (i = 0; i < 8; i++)
838 {
839 b1 = v1 & 0xFF; v1 >>= 8;
840 (*acc)(&a[i], b1, b2);
841 }
842 }
843
844
845 /* Primary entry for operations that accumulate */
846 void
847 mdmx_acc_op(sim_cpu *cpu,
848 address_word cia,
849 int op,
850 uint64_t op1,
851 int vt,
852 MX_fmtsel fmtsel)
853 {
854 uint64_t op2;
855
856 switch (MX_FMT (fmtsel))
857 {
858 case mdmx_qh:
859 switch (MX_VT (fmtsel))
860 {
861 case sel_elem:
862 op2 = ValueFPR(vt, fmt_mdmx);
863 qh_map_acc(ACC.qh, op1, QH_ELEM(op2, fmtsel), qh_acc[op]);
864 break;
865 case sel_vect:
866 qh_vector_acc(ACC.qh, op1, ValueFPR(vt, fmt_mdmx), qh_acc[op]);
867 break;
868 case sel_imm:
869 qh_map_acc(ACC.qh, op1, vt, qh_acc[op]);
870 break;
871 }
872 break;
873 case mdmx_ob:
874 switch (MX_VT (fmtsel))
875 {
876 case sel_elem:
877 op2 = ValueFPR(vt, fmt_mdmx);
878 ob_map_acc(ACC.ob, op1, OB_ELEM(op2, fmtsel), ob_acc[op]);
879 break;
880 case sel_vect:
881 ob_vector_acc(ACC.ob, op1, ValueFPR(vt, fmt_mdmx), ob_acc[op]);
882 break;
883 case sel_imm:
884 ob_map_acc(ACC.ob, op1, vt, ob_acc[op]);
885 break;
886 }
887 break;
888 default:
889 Unpredictable ();
890 }
891 }
892
893
894 /* Reading and writing accumulator (no conversion). */
895
896 uint64_t
897 mdmx_rac_op(sim_cpu *cpu,
898 address_word cia,
899 int op,
900 int fmt)
901 {
902 uint64_t result;
903 unsigned int shift;
904 int i;
905
906 shift = op; /* L = 00, M = 01, H = 10. */
907 result = 0;
908
909 switch (fmt)
910 {
911 case MX_FMT_QH:
912 shift <<= 4; /* 16 bits per element. */
913 for (i = 3; i >= 0; --i)
914 {
915 result <<= 16;
916 result |= ((ACC.qh[i] >> shift) & 0xFFFF);
917 }
918 break;
919 case MX_FMT_OB:
920 shift <<= 3; /* 8 bits per element. */
921 for (i = 7; i >= 0; --i)
922 {
923 result <<= 8;
924 result |= ((ACC.ob[i] >> shift) & 0xFF);
925 }
926 break;
927 default:
928 Unpredictable ();
929 }
930 return result;
931 }
932
933 void
934 mdmx_wacl(sim_cpu *cpu,
935 address_word cia,
936 int fmt,
937 uint64_t vs,
938 uint64_t vt)
939 {
940 int i;
941
942 switch (fmt)
943 {
944 case MX_FMT_QH:
945 for (i = 0; i < 4; i++)
946 {
947 int32_t s = (int16_t)(vs & 0xFFFF);
948 ACC.qh[i] = ((signed48)s << 16) | (vt & 0xFFFF);
949 vs >>= 16; vt >>= 16;
950 }
951 break;
952 case MX_FMT_OB:
953 for (i = 0; i < 8; i++)
954 {
955 int16_t s = (int8_t)(vs & 0xFF);
956 ACC.ob[i] = ((signed24)s << 8) | (vt & 0xFF);
957 vs >>= 8; vt >>= 8;
958 }
959 break;
960 default:
961 Unpredictable ();
962 }
963 }
964
965 void
966 mdmx_wach(sim_cpu *cpu,
967 address_word cia,
968 int fmt,
969 uint64_t vs)
970 {
971 int i;
972
973 switch (fmt)
974 {
975 case MX_FMT_QH:
976 for (i = 0; i < 4; i++)
977 {
978 int32_t s = (int16_t)(vs & 0xFFFF);
979 ACC.qh[i] &= ~((signed48)0xFFFF << 32);
980 ACC.qh[i] |= ((signed48)s << 32);
981 vs >>= 16;
982 }
983 break;
984 case MX_FMT_OB:
985 for (i = 0; i < 8; i++)
986 {
987 ACC.ob[i] &= ~((signed24)0xFF << 16);
988 ACC.ob[i] |= ((signed24)(vs & 0xFF) << 16);
989 vs >>= 8;
990 }
991 break;
992 default:
993 Unpredictable ();
994 }
995 }
996
997
998 /* Reading and writing accumulator (rounding conversions).
999 Enumerating function guarantees s >= 0 for QH ops. */
1000
1001 typedef int16_t (*QH_ROUND)(signed48 a, int16_t s);
1002
1003 #define QH_BIT(n) ((unsigned48)1 << (n))
1004 #define QH_ONES(n) (((unsigned48)1 << (n))-1)
1005
1006 static int16_t
1007 RNASQH(signed48 a, int16_t s)
1008 {
1009 signed48 t;
1010 int16_t result = 0;
1011
1012 if (s > 48)
1013 result = 0;
1014 else
1015 {
1016 t = (a >> s);
1017 if ((a & QH_BIT(47)) == 0)
1018 {
1019 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1020 t++;
1021 if (t > QH_MAX)
1022 t = QH_MAX;
1023 }
1024 else
1025 {
1026 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1027 {
1028 if (s > 1 && ((unsigned48)a & QH_ONES(s-1)) != 0)
1029 t++;
1030 }
1031 if (t < QH_MIN)
1032 t = QH_MIN;
1033 }
1034 result = (int16_t)t;
1035 }
1036 return result;
1037 }
1038
1039 static int16_t
1040 RNAUQH(signed48 a, int16_t s)
1041 {
1042 unsigned48 t;
1043 int16_t result;
1044
1045 if (s > 48)
1046 result = 0;
1047 else if (s == 48)
1048 result = ((unsigned48)a & MASK48) >> 47;
1049 else
1050 {
1051 t = ((unsigned48)a & MASK48) >> s;
1052 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1053 t++;
1054 if (t > 0xFFFF)
1055 t = 0xFFFF;
1056 result = (int16_t)t;
1057 }
1058 return result;
1059 }
1060
1061 static int16_t
1062 RNESQH(signed48 a, int16_t s)
1063 {
1064 signed48 t;
1065 int16_t result = 0;
1066
1067 if (s > 47)
1068 result = 0;
1069 else
1070 {
1071 t = (a >> s);
1072 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1073 {
1074 if (s == 1 || (a & QH_ONES(s-1)) == 0)
1075 t += t & 1;
1076 else
1077 t += 1;
1078 }
1079 if ((a & QH_BIT(47)) == 0)
1080 {
1081 if (t > QH_MAX)
1082 t = QH_MAX;
1083 }
1084 else
1085 {
1086 if (t < QH_MIN)
1087 t = QH_MIN;
1088 }
1089 result = (int16_t)t;
1090 }
1091 return result;
1092 }
1093
1094 static int16_t
1095 RNEUQH(signed48 a, int16_t s)
1096 {
1097 unsigned48 t;
1098 int16_t result;
1099
1100 if (s > 48)
1101 result = 0;
1102 else if (s == 48)
1103 result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1104 else
1105 {
1106 t = ((unsigned48)a & MASK48) >> s;
1107 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1108 {
1109 if (s > 1 && (a & QH_ONES(s-1)) != 0)
1110 t++;
1111 else
1112 t += t & 1;
1113 }
1114 if (t > 0xFFFF)
1115 t = 0xFFFF;
1116 result = (int16_t)t;
1117 }
1118 return result;
1119 }
1120
1121 static int16_t
1122 RZSQH(signed48 a, int16_t s)
1123 {
1124 signed48 t;
1125 int16_t result = 0;
1126
1127 if (s > 47)
1128 result = 0;
1129 else
1130 {
1131 t = (a >> s);
1132 if ((a & QH_BIT(47)) == 0)
1133 {
1134 if (t > QH_MAX)
1135 t = QH_MAX;
1136 }
1137 else
1138 {
1139 if (t < QH_MIN)
1140 t = QH_MIN;
1141 }
1142 result = (int16_t)t;
1143 }
1144 return result;
1145 }
1146
1147 static int16_t
1148 RZUQH(signed48 a, int16_t s)
1149 {
1150 unsigned48 t;
1151 int16_t result = 0;
1152
1153 if (s > 48)
1154 result = 0;
1155 else if (s == 48)
1156 result = ((unsigned48)a > QH_BIT(47) ? 1 : 0);
1157 else
1158 {
1159 t = ((unsigned48)a & MASK48) >> s;
1160 if (t > 0xFFFF)
1161 t = 0xFFFF;
1162 result = (int16_t)t;
1163 }
1164 return result;
1165 }
1166
1167
1168 typedef uint8_t (*OB_ROUND)(signed24 a, uint8_t s);
1169
1170 #define OB_BIT(n) ((unsigned24)1 << (n))
1171 #define OB_ONES(n) (((unsigned24)1 << (n))-1)
1172
1173 static uint8_t
1174 RNAUOB(signed24 a, uint8_t s)
1175 {
1176 uint8_t result;
1177 unsigned24 t;
1178
1179 if (s > 24)
1180 result = 0;
1181 else if (s == 24)
1182 result = ((unsigned24)a & MASK24) >> 23;
1183 else
1184 {
1185 t = ((unsigned24)a & MASK24) >> s;
1186 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1187 t ++;
1188 result = OB_CLAMP(t);
1189 }
1190 return result;
1191 }
1192
1193 static uint8_t
1194 RNEUOB(signed24 a, uint8_t s)
1195 {
1196 uint8_t result;
1197 unsigned24 t;
1198
1199 if (s > 24)
1200 result = 0;
1201 else if (s == 24)
1202 result = (((unsigned24)a & MASK24) > OB_BIT(23) ? 1 : 0);
1203 else
1204 {
1205 t = ((unsigned24)a & MASK24) >> s;
1206 if (s > 0 && ((a >> (s-1)) & 1) == 1)
1207 {
1208 if (s > 1 && (a & OB_ONES(s-1)) != 0)
1209 t++;
1210 else
1211 t += t & 1;
1212 }
1213 result = OB_CLAMP(t);
1214 }
1215 return result;
1216 }
1217
1218 static uint8_t
1219 RZUOB(signed24 a, uint8_t s)
1220 {
1221 uint8_t result;
1222 unsigned24 t;
1223
1224 if (s >= 24)
1225 result = 0;
1226 else
1227 {
1228 t = ((unsigned24)a & MASK24) >> s;
1229 result = OB_CLAMP(t);
1230 }
1231 return result;
1232 }
1233
1234
1235 static const QH_ROUND qh_round[] = {
1236 RNASQH, RNAUQH, RNESQH, RNEUQH, RZSQH, RZUQH
1237 };
1238
1239 static const OB_ROUND ob_round[] = {
1240 NULL, RNAUOB, NULL, RNEUOB, NULL, RZUOB
1241 };
1242
1243
1244 static uint64_t
1245 qh_vector_round(sim_cpu *cpu, address_word cia, uint64_t v2, QH_ROUND round)
1246 {
1247 uint64_t result = 0;
1248 int i, s;
1249 int16_t h, h2;
1250
1251 s = 0;
1252 for (i = 0; i < 4; i++)
1253 {
1254 h2 = (int16_t)(v2 & 0xFFFF);
1255 if (h2 >= 0)
1256 h = (*round)(ACC.qh[i], h2);
1257 else
1258 {
1259 UnpredictableResult ();
1260 h = 0xdead;
1261 }
1262 v2 >>= 16;
1263 result |= ((uint64_t)((uint16_t)h) << s);
1264 s += 16;
1265 }
1266 return result;
1267 }
1268
1269 static uint64_t
1270 qh_map_round(sim_cpu *cpu, address_word cia, int16_t h2, QH_ROUND round)
1271 {
1272 uint64_t result = 0;
1273 int i, s;
1274 int16_t h;
1275
1276 s = 0;
1277 for (i = 0; i < 4; i++)
1278 {
1279 if (h2 >= 0)
1280 h = (*round)(ACC.qh[i], h2);
1281 else
1282 {
1283 UnpredictableResult ();
1284 h = 0xdead;
1285 }
1286 result |= ((uint64_t)((uint16_t)h) << s);
1287 s += 16;
1288 }
1289 return result;
1290 }
1291
1292 static uint64_t
1293 ob_vector_round(sim_cpu *cpu, address_word cia, uint64_t v2, OB_ROUND round)
1294 {
1295 uint64_t result = 0;
1296 int i, s;
1297 uint8_t b, b2;
1298
1299 s = 0;
1300 for (i = 0; i < 8; i++)
1301 {
1302 b2 = v2 & 0xFF; v2 >>= 8;
1303 b = (*round)(ACC.ob[i], b2);
1304 result |= ((uint64_t)b << s);
1305 s += 8;
1306 }
1307 return result;
1308 }
1309
1310 static uint64_t
1311 ob_map_round(sim_cpu *cpu, address_word cia, uint8_t b2, OB_ROUND round)
1312 {
1313 uint64_t result = 0;
1314 int i, s;
1315 uint8_t b;
1316
1317 s = 0;
1318 for (i = 0; i < 8; i++)
1319 {
1320 b = (*round)(ACC.ob[i], b2);
1321 result |= ((uint64_t)b << s);
1322 s += 8;
1323 }
1324 return result;
1325 }
1326
1327
1328 uint64_t
1329 mdmx_round_op(sim_cpu *cpu,
1330 address_word cia,
1331 int rm,
1332 int vt,
1333 MX_fmtsel fmtsel)
1334 {
1335 uint64_t op2;
1336 uint64_t result = 0;
1337
1338 switch (MX_FMT (fmtsel))
1339 {
1340 case mdmx_qh:
1341 switch (MX_VT (fmtsel))
1342 {
1343 case sel_elem:
1344 op2 = ValueFPR(vt, fmt_mdmx);
1345 result = qh_map_round(cpu, cia, QH_ELEM(op2, fmtsel), qh_round[rm]);
1346 break;
1347 case sel_vect:
1348 op2 = ValueFPR(vt, fmt_mdmx);
1349 result = qh_vector_round(cpu, cia, op2, qh_round[rm]);
1350 break;
1351 case sel_imm:
1352 result = qh_map_round(cpu, cia, vt, qh_round[rm]);
1353 break;
1354 }
1355 break;
1356 case mdmx_ob:
1357 switch (MX_VT (fmtsel))
1358 {
1359 case sel_elem:
1360 op2 = ValueFPR(vt, fmt_mdmx);
1361 result = ob_map_round(cpu, cia, OB_ELEM(op2, fmtsel), ob_round[rm]);
1362 break;
1363 case sel_vect:
1364 op2 = ValueFPR(vt, fmt_mdmx);
1365 result = ob_vector_round(cpu, cia, op2, ob_round[rm]);
1366 break;
1367 case sel_imm:
1368 result = ob_map_round(cpu, cia, vt, ob_round[rm]);
1369 break;
1370 }
1371 break;
1372 default:
1373 Unpredictable ();
1374 }
1375
1376 return result;
1377 }
1378
1379
1380 /* Shuffle operation. */
1381
1382 typedef struct {
1383 enum {vs, ss, vt} source;
1384 unsigned int index;
1385 } sh_map;
1386
1387 static const sh_map ob_shuffle[][8] = {
1388 /* MDMX 2.0 encodings (3-4, 6-7). */
1389 /* vr5400 encoding (5), otherwise. */
1390 { }, /* RSVD */
1391 {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* RSVD */
1392 {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* RSVD */
1393 {{vs,0}, {ss,0}, {vs,1}, {ss,1}, {vs,2}, {ss,2}, {vs,3}, {ss,3}}, /* upsl */
1394 {{vt,1}, {vt,3}, {vt,5}, {vt,7}, {vs,1}, {vs,3}, {vs,5}, {vs,7}}, /* pach */
1395 {{vt,0}, {vt,2}, {vt,4}, {vt,6}, {vs,0}, {vs,2}, {vs,4}, {vs,6}}, /* pacl */
1396 {{vt,4}, {vs,4}, {vt,5}, {vs,5}, {vt,6}, {vs,6}, {vt,7}, {vs,7}}, /* mixh */
1397 {{vt,0}, {vs,0}, {vt,1}, {vs,1}, {vt,2}, {vs,2}, {vt,3}, {vs,3}} /* mixl */
1398 };
1399
1400 static const sh_map qh_shuffle[][4] = {
1401 {{vt,2}, {vs,2}, {vt,3}, {vs,3}}, /* mixh */
1402 {{vt,0}, {vs,0}, {vt,1}, {vs,1}}, /* mixl */
1403 {{vt,1}, {vt,3}, {vs,1}, {vs,3}}, /* pach */
1404 { }, /* RSVD */
1405 {{vt,1}, {vs,0}, {vt,3}, {vs,2}}, /* bfla */
1406 { }, /* RSVD */
1407 {{vt,2}, {vt,3}, {vs,2}, {vs,3}}, /* repa */
1408 {{vt,0}, {vt,1}, {vs,0}, {vs,1}} /* repb */
1409 };
1410
1411
1412 uint64_t
1413 mdmx_shuffle(sim_cpu *cpu,
1414 address_word cia,
1415 int shop,
1416 uint64_t op1,
1417 uint64_t op2)
1418 {
1419 uint64_t result = 0;
1420 int i, s;
1421 int op;
1422
1423 if ((shop & 0x3) == 0x1) /* QH format. */
1424 {
1425 op = shop >> 2;
1426 s = 0;
1427 for (i = 0; i < 4; i++)
1428 {
1429 uint64_t v;
1430
1431 switch (qh_shuffle[op][i].source)
1432 {
1433 case vs:
1434 v = op1;
1435 break;
1436 case vt:
1437 v = op2;
1438 break;
1439 default:
1440 Unpredictable ();
1441 v = 0;
1442 }
1443 result |= (((v >> 16*qh_shuffle[op][i].index) & 0xFFFF) << s);
1444 s += 16;
1445 }
1446 }
1447 else if ((shop & 0x1) == 0x0) /* OB format. */
1448 {
1449 op = shop >> 1;
1450 s = 0;
1451 for (i = 0; i < 8; i++)
1452 {
1453 uint8_t b;
1454 unsigned int ishift = 8*ob_shuffle[op][i].index;
1455
1456 switch (ob_shuffle[op][i].source)
1457 {
1458 case vs:
1459 b = (op1 >> ishift) & 0xFF;
1460 break;
1461 case ss:
1462 b = ((op1 >> ishift) & 0x80) ? 0xFF : 0;
1463 break;
1464 case vt:
1465 b = (op2 >> ishift) & 0xFF;
1466 break;
1467 default:
1468 Unpredictable ();
1469 b = 0;
1470 }
1471 result |= ((uint64_t)b << s);
1472 s += 8;
1473 }
1474 }
1475 else
1476 Unpredictable ();
1477
1478 return result;
1479 }