gallium: added comment/annotation support to PPC rtasm
[mesa.git] / src / gallium / auxiliary / rtasm / rtasm_ppc.c
1 /**************************************************************************
2 *
3 * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved.
4 * Copyright (C) 2009 VMware, Inc. All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
20 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 **************************************************************************/
24
25 /**
26 * PPC code generation.
27 * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf
28 * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf
29 *
30 * Other PPC refs:
31 * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2
32 * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html
33 * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
34 *
35 * \author Brian Paul
36 */
37
38
39 #include <stdio.h>
40 #include "util/u_memory.h"
41 #include "pipe/p_debug.h"
42 #include "rtasm_execmem.h"
43 #include "rtasm_ppc.h"
44
45
46 void
47 ppc_init_func(struct ppc_function *p)
48 {
49 uint i;
50
51 memset(p, 0, sizeof(*p));
52
53 p->num_inst = 0;
54 p->max_inst = 100; /* first guess at buffer size */
55 p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
56 p->reg_used = 0x0;
57 p->fp_used = 0x0;
58 p->vec_used = 0x0;
59
60 p->print = FALSE;
61 p->indent = 0;
62
63 /* only allow using gp registers 3..12 for now */
64 for (i = 0; i < 3; i++)
65 ppc_reserve_register(p, i);
66 for (i = 12; i < PPC_NUM_REGS; i++)
67 ppc_reserve_register(p, i);
68 }
69
70
71 void
72 ppc_release_func(struct ppc_function *p)
73 {
74 assert(p->num_inst <= p->max_inst);
75 if (p->store != NULL) {
76 rtasm_exec_free(p->store);
77 }
78 p->store = NULL;
79 }
80
81
82 uint
83 ppc_num_instructions(const struct ppc_function *p)
84 {
85 return p->num_inst;
86 }
87
88
89 void (*ppc_get_func(struct ppc_function *p))(void)
90 {
91 #if 0
92 DUMP_END();
93 if (DISASSEM && p->store)
94 debug_printf("disassemble %p %p\n", p->store, p->csr);
95
96 if (p->store == p->error_overflow)
97 return (void (*)(void)) NULL;
98 else
99 #endif
100 return (void (*)(void)) p->store;
101 }
102
103
104 void
105 ppc_dump_func(const struct ppc_function *p)
106 {
107 uint i;
108 for (i = 0; i < p->num_inst; i++) {
109 debug_printf("%3u: 0x%08x\n", i, p->store[i]);
110 }
111 }
112
113
114 void
115 ppc_print_code(struct ppc_function *p, boolean enable)
116 {
117 p->print = enable;
118 }
119
120
121 void
122 ppc_indent(struct ppc_function *p, int spaces)
123 {
124 p->indent += spaces;
125 }
126
127
128 static void
129 indent(const struct ppc_function *p)
130 {
131 int i;
132 for (i = 0; i < p->indent; i++) {
133 putchar(' ');
134 }
135 }
136
137
138 void
139 ppc_comment(struct ppc_function *p, int rel_indent, const char *s)
140 {
141 if (p->print) {
142 p->indent += rel_indent;
143 indent(p);
144 p->indent -= rel_indent;
145 printf("# %s\n", s);
146 }
147 }
148
149
150 /**
151 * Mark a register as being unavailable.
152 */
153 int
154 ppc_reserve_register(struct ppc_function *p, int reg)
155 {
156 assert(reg < PPC_NUM_REGS);
157 p->reg_used |= (1 << reg);
158 return reg;
159 }
160
161
162 /**
163 * Allocate a general purpose register.
164 * \return register index or -1 if none left.
165 */
166 int
167 ppc_allocate_register(struct ppc_function *p)
168 {
169 unsigned i;
170 for (i = 0; i < PPC_NUM_REGS; i++) {
171 const uint64_t mask = 1 << i;
172 if ((p->reg_used & mask) == 0) {
173 p->reg_used |= mask;
174 return i;
175 }
176 }
177 printf("OUT OF PPC registers!\n");
178 return -1;
179 }
180
181
182 /**
183 * Mark the given general purpose register as "unallocated".
184 */
185 void
186 ppc_release_register(struct ppc_function *p, int reg)
187 {
188 assert(reg < PPC_NUM_REGS);
189 assert(p->reg_used & (1 << reg));
190 p->reg_used &= ~(1 << reg);
191 }
192
193
194 /**
195 * Allocate a floating point register.
196 * \return register index or -1 if none left.
197 */
198 int
199 ppc_allocate_fp_register(struct ppc_function *p)
200 {
201 unsigned i;
202 for (i = 0; i < PPC_NUM_FP_REGS; i++) {
203 const uint64_t mask = 1 << i;
204 if ((p->fp_used & mask) == 0) {
205 p->fp_used |= mask;
206 return i;
207 }
208 }
209 printf("OUT OF PPC FP registers!\n");
210 return -1;
211 }
212
213
214 /**
215 * Mark the given floating point register as "unallocated".
216 */
217 void
218 ppc_release_fp_register(struct ppc_function *p, int reg)
219 {
220 assert(reg < PPC_NUM_FP_REGS);
221 assert(p->fp_used & (1 << reg));
222 p->fp_used &= ~(1 << reg);
223 }
224
225
226 /**
227 * Allocate a vector register.
228 * \return register index or -1 if none left.
229 */
230 int
231 ppc_allocate_vec_register(struct ppc_function *p)
232 {
233 unsigned i;
234 for (i = 0; i < PPC_NUM_VEC_REGS; i++) {
235 const uint64_t mask = 1 << i;
236 if ((p->vec_used & mask) == 0) {
237 p->vec_used |= mask;
238 return i;
239 }
240 }
241 printf("OUT OF PPC VEC registers!\n");
242 return -1;
243 }
244
245
246 /**
247 * Mark the given vector register as "unallocated".
248 */
249 void
250 ppc_release_vec_register(struct ppc_function *p, int reg)
251 {
252 assert(reg < PPC_NUM_VEC_REGS);
253 assert(p->vec_used & (1 << reg));
254 p->vec_used &= ~(1 << reg);
255 }
256
257
258 /**
259 * Append instruction to instruction buffer. Grow buffer if out of room.
260 */
261 static void
262 emit_instruction(struct ppc_function *p, uint32_t inst_bits)
263 {
264 if (!p->store)
265 return; /* out of memory, drop the instruction */
266
267 if (p->num_inst == p->max_inst) {
268 /* allocate larger buffer */
269 uint32_t *newbuf;
270 p->max_inst *= 2; /* 2x larger */
271 newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE);
272 if (newbuf) {
273 memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE);
274 }
275 rtasm_exec_free(p->store);
276 p->store = newbuf;
277 if (!p->store) {
278 /* out of memory */
279 p->num_inst = 0;
280 return;
281 }
282 }
283
284 p->store[p->num_inst++] = inst_bits;
285 }
286
287
288 union vx_inst {
289 uint32_t bits;
290 struct {
291 unsigned op:6;
292 unsigned vD:5;
293 unsigned vA:5;
294 unsigned vB:5;
295 unsigned op2:11;
296 } inst;
297 };
298
299 static INLINE void
300 emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
301 const char *format, boolean transpose)
302 {
303 union vx_inst inst;
304 inst.inst.op = 4;
305 inst.inst.vD = vD;
306 inst.inst.vA = vA;
307 inst.inst.vB = vB;
308 inst.inst.op2 = op2;
309 emit_instruction(p, inst.bits);
310 if (p->print) {
311 indent(p);
312 if (transpose)
313 printf(format, vD, vB, vA);
314 else
315 printf(format, vD, vA, vB);
316 }
317 }
318
319
320 union vxr_inst {
321 uint32_t bits;
322 struct {
323 unsigned op:6;
324 unsigned vD:5;
325 unsigned vA:5;
326 unsigned vB:5;
327 unsigned rC:1;
328 unsigned op2:10;
329 } inst;
330 };
331
332 static INLINE void
333 emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB,
334 const char *format)
335 {
336 union vxr_inst inst;
337 inst.inst.op = 4;
338 inst.inst.vD = vD;
339 inst.inst.vA = vA;
340 inst.inst.vB = vB;
341 inst.inst.rC = 0;
342 inst.inst.op2 = op2;
343 emit_instruction(p, inst.bits);
344 if (p->print) {
345 indent(p);
346 printf(format, vD, vA, vB);
347 }
348 }
349
350
351 union va_inst {
352 uint32_t bits;
353 struct {
354 unsigned op:6;
355 unsigned vD:5;
356 unsigned vA:5;
357 unsigned vB:5;
358 unsigned vC:5;
359 unsigned op2:6;
360 } inst;
361 };
362
363 static INLINE void
364 emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC,
365 const char *format)
366 {
367 union va_inst inst;
368 inst.inst.op = 4;
369 inst.inst.vD = vD;
370 inst.inst.vA = vA;
371 inst.inst.vB = vB;
372 inst.inst.vC = vC;
373 inst.inst.op2 = op2;
374 emit_instruction(p, inst.bits);
375 if (p->print) {
376 indent(p);
377 printf(format, vD, vA, vB, vC);
378 }
379 }
380
381
382 union i_inst {
383 uint32_t bits;
384 struct {
385 unsigned op:6;
386 unsigned li:24;
387 unsigned aa:1;
388 unsigned lk:1;
389 } inst;
390 };
391
392 static INLINE void
393 emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk)
394 {
395 union i_inst inst;
396 inst.inst.op = op;
397 inst.inst.li = li;
398 inst.inst.aa = aa;
399 inst.inst.lk = lk;
400 emit_instruction(p, inst.bits);
401 }
402
403
404 union xl_inst {
405 uint32_t bits;
406 struct {
407 unsigned op:6;
408 unsigned bo:5;
409 unsigned bi:5;
410 unsigned unused:3;
411 unsigned bh:2;
412 unsigned op2:10;
413 unsigned lk:1;
414 } inst;
415 };
416
417 static INLINE void
418 emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh,
419 uint op2, uint lk)
420 {
421 union xl_inst inst;
422 inst.inst.op = op;
423 inst.inst.bo = bo;
424 inst.inst.bi = bi;
425 inst.inst.unused = 0x0;
426 inst.inst.bh = bh;
427 inst.inst.op2 = op2;
428 inst.inst.lk = lk;
429 emit_instruction(p, inst.bits);
430 }
431
432 static INLINE void
433 dump_xl(const char *name, uint inst)
434 {
435 union xl_inst i;
436
437 i.bits = inst;
438 debug_printf("%s = 0x%08x\n", name, inst);
439 debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op);
440 debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo);
441 debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi);
442 debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused);
443 debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh);
444 debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2);
445 debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk);
446 }
447
448
449 union x_inst {
450 uint32_t bits;
451 struct {
452 unsigned op:6;
453 unsigned vrs:5;
454 unsigned ra:5;
455 unsigned rb:5;
456 unsigned op2:10;
457 unsigned unused:1;
458 } inst;
459 };
460
461 static INLINE void
462 emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2,
463 const char *format)
464 {
465 union x_inst inst;
466 inst.inst.op = op;
467 inst.inst.vrs = vrs;
468 inst.inst.ra = ra;
469 inst.inst.rb = rb;
470 inst.inst.op2 = op2;
471 inst.inst.unused = 0x0;
472 emit_instruction(p, inst.bits);
473 if (p->print) {
474 indent(p);
475 printf(format, vrs, ra, rb);
476 }
477 }
478
479
480 union d_inst {
481 uint32_t bits;
482 struct {
483 unsigned op:6;
484 unsigned rt:5;
485 unsigned ra:5;
486 unsigned si:16;
487 } inst;
488 };
489
490 static INLINE void
491 emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si,
492 const char *format, boolean transpose)
493 {
494 union d_inst inst;
495 assert(si >= -32768);
496 assert(si <= 32767);
497 inst.inst.op = op;
498 inst.inst.rt = rt;
499 inst.inst.ra = ra;
500 inst.inst.si = (unsigned) (si & 0xffff);
501 emit_instruction(p, inst.bits);
502 if (p->print) {
503 indent(p);
504 if (transpose)
505 printf(format, rt, si, ra);
506 else
507 printf(format, rt, ra, si);
508 }
509 }
510
511
512 union a_inst {
513 uint32_t bits;
514 struct {
515 unsigned op:6;
516 unsigned frt:5;
517 unsigned fra:5;
518 unsigned frb:5;
519 unsigned unused:5;
520 unsigned op2:5;
521 unsigned rc:1;
522 } inst;
523 };
524
525 static INLINE void
526 emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2,
527 uint rc, const char *format)
528 {
529 union a_inst inst;
530 inst.inst.op = op;
531 inst.inst.frt = frt;
532 inst.inst.fra = fra;
533 inst.inst.frb = frb;
534 inst.inst.unused = 0x0;
535 inst.inst.op2 = op2;
536 inst.inst.rc = rc;
537 emit_instruction(p, inst.bits);
538 if (p->print) {
539 indent(p);
540 printf(format, frt, fra, frb);
541 }
542 }
543
544
545 union xo_inst {
546 uint32_t bits;
547 struct {
548 unsigned op:6;
549 unsigned rt:5;
550 unsigned ra:5;
551 unsigned rb:5;
552 unsigned oe:1;
553 unsigned op2:9;
554 unsigned rc:1;
555 } inst;
556 };
557
558 static INLINE void
559 emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe,
560 uint op2, uint rc, const char *format)
561 {
562 union xo_inst inst;
563 inst.inst.op = op;
564 inst.inst.rt = rt;
565 inst.inst.ra = ra;
566 inst.inst.rb = rb;
567 inst.inst.oe = oe;
568 inst.inst.op2 = op2;
569 inst.inst.rc = rc;
570 emit_instruction(p, inst.bits);
571 if (p->print) {
572 indent(p);
573 printf(format, rt, ra, rb);
574 }
575 }
576
577
578
579
580
581 /**
582 ** float vector arithmetic
583 **/
584
585 /** vector float add */
586 void
587 ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB)
588 {
589 emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE);
590 }
591
592 /** vector float substract */
593 void
594 ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB)
595 {
596 emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE);
597 }
598
599 /** vector float min */
600 void
601 ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB)
602 {
603 emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE);
604 }
605
606 /** vector float max */
607 void
608 ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB)
609 {
610 emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE);
611 }
612
613 /** vector float mult add: vD = vA * vB + vC */
614 void
615 ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
616 {
617 /* note arg order */
618 emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n");
619 }
620
621 /** vector float negative mult subtract: vD = vA - vB * vC */
622 void
623 ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
624 {
625 /* note arg order */
626 emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n");
627 }
628
629 /** vector float compare greater than */
630 void
631 ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
632 {
633 emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u");
634 }
635
636 /** vector float compare greater than or equal to */
637 void
638 ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB)
639 {
640 emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u");
641 }
642
643 /** vector float compare equal */
644 void
645 ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB)
646 {
647 emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u");
648 }
649
650 /** vector float 2^x */
651 void
652 ppc_vexptefp(struct ppc_function *p, uint vD, uint vB)
653 {
654 emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE);
655 }
656
657 /** vector float log2(x) */
658 void
659 ppc_vlogefp(struct ppc_function *p, uint vD, uint vB)
660 {
661 emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE);
662 }
663
664 /** vector float reciprocol */
665 void
666 ppc_vrefp(struct ppc_function *p, uint vD, uint vB)
667 {
668 emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE);
669 }
670
671 /** vector float reciprocol sqrt estimate */
672 void
673 ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB)
674 {
675 emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE);
676 }
677
678 /** vector float round to negative infinity */
679 void
680 ppc_vrfim(struct ppc_function *p, uint vD, uint vB)
681 {
682 emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE);
683 }
684
685 /** vector float round to positive infinity */
686 void
687 ppc_vrfip(struct ppc_function *p, uint vD, uint vB)
688 {
689 emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE);
690 }
691
692 /** vector float round to nearest int */
693 void
694 ppc_vrfin(struct ppc_function *p, uint vD, uint vB)
695 {
696 emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE);
697 }
698
699 /** vector float round to int toward zero */
700 void
701 ppc_vrfiz(struct ppc_function *p, uint vD, uint vB)
702 {
703 emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE);
704 }
705
706 /** vector store: store vR at mem[rA+rB] */
707 void
708 ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB)
709 {
710 emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n");
711 }
712
713 /** vector load: vR = mem[rA+rB] */
714 void
715 ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB)
716 {
717 emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n");
718 }
719
720 /** load vector element word: vR = mem_word[ra+rb] */
721 void
722 ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB)
723 {
724 emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n");
725 }
726
727
728
729
730 /**
731 ** vector bitwise operations
732 **/
733
734 /** vector and */
735 void
736 ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB)
737 {
738 emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE);
739 }
740
741 /** vector and complement */
742 void
743 ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB)
744 {
745 emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE);
746 }
747
748 /** vector or */
749 void
750 ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB)
751 {
752 emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE);
753 }
754
755 /** vector nor */
756 void
757 ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB)
758 {
759 emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE);
760 }
761
762 /** vector xor */
763 void
764 ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB)
765 {
766 emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE);
767 }
768
769 /** Pseudo-instruction: vector move */
770 void
771 ppc_vmove(struct ppc_function *p, uint vD, uint vA)
772 {
773 boolean print = p->print;
774 p->print = FALSE;
775 ppc_vor(p, vD, vA, vA);
776 if (print) {
777 indent(p);
778 printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA);
779 }
780 p->print = print;
781 }
782
783 /** Set vector register to {0,0,0,0} */
784 void
785 ppc_vzero(struct ppc_function *p, uint vr)
786 {
787 boolean print = p->print;
788 p->print = FALSE;
789 ppc_vxor(p, vr, vr, vr);
790 if (print) {
791 indent(p);
792 printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr);
793 }
794 p->print = print;
795 }
796
797
798
799
800 /**
801 ** Vector shuffle / select / splat / etc
802 **/
803
804 /** vector permute */
805 void
806 ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
807 {
808 emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u");
809 }
810
811 /** vector select */
812 void
813 ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC)
814 {
815 emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u");
816 }
817
818 /** vector splat byte */
819 void
820 ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm)
821 {
822 emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE);
823 }
824
825 /** vector splat half word */
826 void
827 ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm)
828 {
829 emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE);
830 }
831
832 /** vector splat word */
833 void
834 ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm)
835 {
836 emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE);
837 }
838
839 /** vector splat signed immediate word */
840 void
841 ppc_vspltisw(struct ppc_function *p, uint vD, int imm)
842 {
843 assert(imm >= -16);
844 assert(imm < 15);
845 emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE);
846 }
847
848 /** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
849 void
850 ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB)
851 {
852 emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE);
853 }
854
855
856
857
858 /**
859 ** integer arithmetic
860 **/
861
862 /** rt = ra + imm */
863 void
864 ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm)
865 {
866 emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE);
867 }
868
869 /** rt = ra + (imm << 16) */
870 void
871 ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm)
872 {
873 emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE);
874 }
875
876 /** rt = ra + rb */
877 void
878 ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb)
879 {
880 emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n");
881 }
882
883 /** rt = ra AND ra */
884 void
885 ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb)
886 {
887 emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n"); /* note argument order */
888 }
889
890 /** rt = ra AND imm */
891 void
892 ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm)
893 {
894 /* note argument order */
895 emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE);
896 }
897
898 /** rt = ra OR ra */
899 void
900 ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb)
901 {
902 emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n"); /* note argument order */
903 }
904
905 /** rt = ra OR imm */
906 void
907 ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm)
908 {
909 /* note argument order */
910 emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE);
911 }
912
913 /** rt = ra XOR ra */
914 void
915 ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb)
916 {
917 emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n"); /* note argument order */
918 }
919
920 /** rt = ra XOR imm */
921 void
922 ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm)
923 {
924 /* note argument order */
925 emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE);
926 }
927
928 /** pseudo instruction: move: rt = ra */
929 void
930 ppc_mr(struct ppc_function *p, uint rt, uint ra)
931 {
932 ppc_or(p, rt, ra, ra);
933 }
934
935 /** pseudo instruction: load immediate: rt = imm */
936 void
937 ppc_li(struct ppc_function *p, uint rt, int imm)
938 {
939 boolean print = p->print;
940 p->print = FALSE;
941 ppc_addi(p, rt, 0, imm);
942 if (print) {
943 indent(p);
944 printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm);
945 }
946 p->print = print;
947 }
948
949 /** rt = imm << 16 */
950 void
951 ppc_lis(struct ppc_function *p, uint rt, int imm)
952 {
953 ppc_addis(p, rt, 0, imm);
954 }
955
956 /** rt = imm */
957 void
958 ppc_load_int(struct ppc_function *p, uint rt, int imm)
959 {
960 ppc_lis(p, rt, (imm >> 16)); /* rt = imm >> 16 */
961 ppc_ori(p, rt, rt, (imm & 0xffff)); /* rt = rt | (imm & 0xffff) */
962 }
963
964
965
966
967 /**
968 ** integer load/store
969 **/
970
971 /** store rs at memory[(ra)+d],
972 * then update ra = (ra)+d
973 */
974 void
975 ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d)
976 {
977 emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE);
978 }
979
980 /** store rs at memory[(ra)+d] */
981 void
982 ppc_stw(struct ppc_function *p, uint rs, uint ra, int d)
983 {
984 emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE);
985 }
986
987 /** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */
988 void
989 ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d)
990 {
991 emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE);
992 }
993
994
995
996 /**
997 ** Float (non-vector) arithmetic
998 **/
999
1000 /** add: frt = fra + frb */
1001 void
1002 ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb)
1003 {
1004 emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n");
1005 }
1006
1007 /** sub: frt = fra - frb */
1008 void
1009 ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb)
1010 {
1011 emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n");
1012 }
1013
1014 /** convert to int: rt = (int) ra */
1015 void
1016 ppc_fctiwz(struct ppc_function *p, uint rt, uint fra)
1017 {
1018 emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n");
1019 }
1020
1021 /** store frs at mem[(ra)+offset] */
1022 void
1023 ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset)
1024 {
1025 emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
1026 }
1027
1028 /** store frs at mem[(ra)+(rb)] */
1029 void
1030 ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb)
1031 {
1032 emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n");
1033 }
1034
1035 /** load frt = mem[(ra)+offset] */
1036 void
1037 ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset)
1038 {
1039 emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE);
1040 }
1041
1042
1043
1044
1045
1046 /**
1047 ** branch instructions
1048 **/
1049
1050 /** BLR: Branch to link register (p. 35) */
1051 void
1052 ppc_blr(struct ppc_function *p)
1053 {
1054 emit_i(p, 18, 0, 0, 1);
1055 if (p->print) {
1056 indent(p);
1057 printf("blr\n");
1058 }
1059 }
1060
1061 /** Branch Conditional to Link Register (p. 36) */
1062 void
1063 ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg)
1064 {
1065 emit_xl(p, 19, condOp, condReg, branchHint, 16, 0);
1066 if (p->print) {
1067 indent(p);
1068 printf("bclr\t%u %u %u\n", condOp, branchHint, condReg);
1069 }
1070 }
1071
1072 /** Pseudo instruction: return from subroutine */
1073 void
1074 ppc_return(struct ppc_function *p)
1075 {
1076 ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0);
1077 }