e9015ec2eb8af48cc193dca4ee9e7c154b41f85f
1 /**************************************************************************
3 * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 **************************************************************************/
25 * PPC code generation.
26 * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf
27 * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf
30 * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2
31 * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html
32 * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf
39 #include "util/u_memory.h"
40 #include "pipe/p_debug.h"
41 #include "rtasm_execmem.h"
42 #include "rtasm_ppc.h"
46 ppc_init_func(struct ppc_function
*p
)
51 p
->max_inst
= 100; /* first guess at buffer size */
52 p
->store
= rtasm_exec_malloc(p
->max_inst
* PPC_INST_SIZE
);
57 /* only allow using gp registers 3..12 for now */
58 for (i
= 0; i
< 3; i
++)
59 ppc_reserve_register(p
, i
);
60 for (i
= 12; i
< PPC_NUM_REGS
; i
++)
61 ppc_reserve_register(p
, i
);
66 ppc_release_func(struct ppc_function
*p
)
68 assert(p
->num_inst
<= p
->max_inst
);
69 if (p
->store
!= NULL
) {
70 rtasm_exec_free(p
->store
);
77 ppc_num_instructions(const struct ppc_function
*p
)
83 void (*ppc_get_func(struct ppc_function
*p
))(void)
87 if (DISASSEM
&& p
->store
)
88 debug_printf("disassemble %p %p\n", p
->store
, p
->csr
);
90 if (p
->store
== p
->error_overflow
)
91 return (void (*)(void)) NULL
;
94 return (void (*)(void)) p
->store
;
99 ppc_dump_func(const struct ppc_function
*p
)
102 for (i
= 0; i
< p
->num_inst
; i
++) {
103 debug_printf("%3u: 0x%08x\n", i
, p
->store
[i
]);
109 * Mark a register as being unavailable.
112 ppc_reserve_register(struct ppc_function
*p
, int reg
)
114 assert(reg
< PPC_NUM_REGS
);
115 p
->reg_used
|= (1 << reg
);
121 * Allocate a general purpose register.
122 * \return register index or -1 if none left.
125 ppc_allocate_register(struct ppc_function
*p
)
128 for (i
= 0; i
< PPC_NUM_REGS
; i
++) {
129 const uint64_t mask
= 1 << i
;
130 if ((p
->reg_used
& mask
) == 0) {
140 * Mark the given general purpose register as "unallocated".
143 ppc_release_register(struct ppc_function
*p
, int reg
)
145 assert(reg
< PPC_NUM_REGS
);
146 assert(p
->reg_used
& (1 << reg
));
147 p
->reg_used
&= ~(1 << reg
);
152 * Allocate a floating point register.
153 * \return register index or -1 if none left.
156 ppc_allocate_fp_register(struct ppc_function
*p
)
159 for (i
= 0; i
< PPC_NUM_FP_REGS
; i
++) {
160 const uint64_t mask
= 1 << i
;
161 if ((p
->fp_used
& mask
) == 0) {
171 * Mark the given floating point register as "unallocated".
174 ppc_release_fp_register(struct ppc_function
*p
, int reg
)
176 assert(reg
< PPC_NUM_FP_REGS
);
177 assert(p
->fp_used
& (1 << reg
));
178 p
->fp_used
&= ~(1 << reg
);
183 * Allocate a vector register.
184 * \return register index or -1 if none left.
187 ppc_allocate_vec_register(struct ppc_function
*p
)
190 for (i
= 0; i
< PPC_NUM_VEC_REGS
; i
++) {
191 const uint64_t mask
= 1 << i
;
192 if ((p
->vec_used
& mask
) == 0) {
202 * Mark the given vector register as "unallocated".
205 ppc_release_vec_register(struct ppc_function
*p
, int reg
)
207 assert(reg
< PPC_NUM_VEC_REGS
);
208 assert(p
->vec_used
& (1 << reg
));
209 p
->vec_used
&= ~(1 << reg
);
214 * Append instruction to instruction buffer. Grow buffer if out of room.
217 emit_instruction(struct ppc_function
*p
, uint32_t inst_bits
)
220 return; /* out of memory, drop the instruction */
222 if (p
->num_inst
== p
->max_inst
) {
223 /* allocate larger buffer */
225 p
->max_inst
*= 2; /* 2x larger */
226 newbuf
= rtasm_exec_malloc(p
->max_inst
* PPC_INST_SIZE
);
228 memcpy(newbuf
, p
->store
, p
->num_inst
* PPC_INST_SIZE
);
230 rtasm_exec_free(p
->store
);
239 p
->store
[p
->num_inst
++] = inst_bits
;
255 emit_vx(struct ppc_function
*p
, uint op2
, uint vD
, uint vA
, uint vB
)
263 emit_instruction(p
, inst
.bits
);
280 emit_vxr(struct ppc_function
*p
, uint op2
, uint vD
, uint vA
, uint vB
)
289 emit_instruction(p
, inst
.bits
);
306 emit_va(struct ppc_function
*p
, uint op2
, uint vD
, uint vA
, uint vB
, uint vC
)
315 emit_instruction(p
, inst
.bits
);
330 emit_i(struct ppc_function
*p
, uint op
, uint li
, uint aa
, uint lk
)
337 emit_instruction(p
, inst
.bits
);
355 emit_xl(struct ppc_function
*p
, uint op
, uint bo
, uint bi
, uint bh
,
362 inst
.inst
.unused
= 0x0;
366 emit_instruction(p
, inst
.bits
);
370 dump_xl(const char *name
, uint inst
)
375 debug_printf("%s = 0x%08x\n", name
, inst
);
376 debug_printf(" op: %d 0x%x\n", i
.inst
.op
, i
.inst
.op
);
377 debug_printf(" bo: %d 0x%x\n", i
.inst
.bo
, i
.inst
.bo
);
378 debug_printf(" bi: %d 0x%x\n", i
.inst
.bi
, i
.inst
.bi
);
379 debug_printf(" unused: %d 0x%x\n", i
.inst
.unused
, i
.inst
.unused
);
380 debug_printf(" bh: %d 0x%x\n", i
.inst
.bh
, i
.inst
.bh
);
381 debug_printf(" op2: %d 0x%x\n", i
.inst
.op2
, i
.inst
.op2
);
382 debug_printf(" lk: %d 0x%x\n", i
.inst
.lk
, i
.inst
.lk
);
399 emit_x(struct ppc_function
*p
, uint op
, uint vrs
, uint ra
, uint rb
, uint op2
)
407 inst
.inst
.unused
= 0x0;
408 emit_instruction(p
, inst
.bits
);
423 emit_d(struct ppc_function
*p
, uint op
, uint rt
, uint ra
, int si
)
426 assert(si
>= -32768);
431 inst
.inst
.si
= (unsigned) (si
& 0xffff);
432 emit_instruction(p
, inst
.bits
);
450 emit_a(struct ppc_function
*p
, uint op
, uint frt
, uint fra
, uint frb
, uint op2
,
458 inst
.inst
.unused
= 0x0;
461 emit_instruction(p
, inst
.bits
);
479 emit_xo(struct ppc_function
*p
, uint op
, uint rt
, uint ra
, uint rb
, uint oe
,
490 emit_instruction(p
, inst
.bits
);
498 ** float vector arithmetic
501 /** vector float add */
503 ppc_vaddfp(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
505 emit_vx(p
, 10, vD
, vA
, vB
);
508 /** vector float substract */
510 ppc_vsubfp(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
512 emit_vx(p
, 74, vD
, vA
, vB
);
515 /** vector float min */
517 ppc_vminfp(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
519 emit_vx(p
, 1098, vD
, vA
, vB
);
522 /** vector float max */
524 ppc_vmaxfp(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
526 emit_vx(p
, 1034, vD
, vA
, vB
);
529 /** vector float mult add: vD = vA * vB + vC */
531 ppc_vmaddfp(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
, uint vC
)
533 emit_va(p
, 46, vD
, vA
, vC
, vB
); /* note arg order */
536 /** vector float negative mult subtract: vD = vA - vB * vC */
538 ppc_vnmsubfp(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
, uint vC
)
540 emit_va(p
, 47, vD
, vB
, vA
, vC
); /* note arg order */
543 /** vector float compare greater than */
545 ppc_vcmpgtfpx(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
547 emit_vxr(p
, 710, vD
, vA
, vB
);
550 /** vector float compare greater than or equal to */
552 ppc_vcmpgefpx(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
554 emit_vxr(p
, 454, vD
, vA
, vB
);
557 /** vector float compare equal */
559 ppc_vcmpeqfpx(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
561 emit_vxr(p
, 198, vD
, vA
, vB
);
564 /** vector float 2^x */
566 ppc_vexptefp(struct ppc_function
*p
, uint vD
, uint vB
)
568 emit_vx(p
, 394, vD
, 0, vB
);
571 /** vector float log2(x) */
573 ppc_vlogefp(struct ppc_function
*p
, uint vD
, uint vB
)
575 emit_vx(p
, 458, vD
, 0, vB
);
578 /** vector float reciprocol */
580 ppc_vrefp(struct ppc_function
*p
, uint vD
, uint vB
)
582 emit_vx(p
, 266, vD
, 0, vB
);
585 /** vector float reciprocol sqrt estimate */
587 ppc_vrsqrtefp(struct ppc_function
*p
, uint vD
, uint vB
)
589 emit_vx(p
, 330, vD
, 0, vB
);
592 /** vector float round to negative infinity */
594 ppc_vrfim(struct ppc_function
*p
, uint vD
, uint vB
)
596 emit_vx(p
, 714, vD
, 0, vB
);
599 /** vector float round to positive infinity */
601 ppc_vrfip(struct ppc_function
*p
, uint vD
, uint vB
)
603 emit_vx(p
, 650, vD
, 0, vB
);
606 /** vector float round to nearest int */
608 ppc_vrfin(struct ppc_function
*p
, uint vD
, uint vB
)
610 emit_vx(p
, 522, vD
, 0, vB
);
613 /** vector float round to int toward zero */
615 ppc_vrfiz(struct ppc_function
*p
, uint vD
, uint vB
)
617 emit_vx(p
, 586, vD
, 0, vB
);
620 /** vector store: store vR at mem[vA+vB] */
622 ppc_stvx(struct ppc_function
*p
, uint vR
, uint vA
, uint vB
)
624 emit_x(p
, 31, vR
, vA
, vB
, 231);
627 /** vector load: vR = mem[vA+vB] */
629 ppc_lvx(struct ppc_function
*p
, uint vR
, uint vA
, uint vB
)
631 emit_x(p
, 31, vR
, vA
, vB
, 103);
634 /** load vector element word: vR = mem_word[ra+rb] */
636 ppc_lvewx(struct ppc_function
*p
, uint vr
, uint ra
, uint rb
)
638 emit_x(p
, 31, vr
, ra
, rb
, 71);
645 ** vector bitwise operations
650 ppc_vand(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
652 emit_vx(p
, 1028, vD
, vA
, vB
);
655 /** vector and complement */
657 ppc_vandc(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
659 emit_vx(p
, 1092, vD
, vA
, vB
);
664 ppc_vor(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
666 emit_vx(p
, 1156, vD
, vA
, vB
);
671 ppc_vnor(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
673 emit_vx(p
, 1284, vD
, vA
, vB
);
678 ppc_vxor(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
680 emit_vx(p
, 1220, vD
, vA
, vB
);
683 /** Pseudo-instruction: vector move */
685 ppc_vmove(struct ppc_function
*p
, uint vD
, uint vA
)
687 ppc_vor(p
, vD
, vA
, vA
);
690 /** Set vector register to {0,0,0,0} */
692 ppc_vzero(struct ppc_function
*p
, uint vr
)
694 ppc_vxor(p
, vr
, vr
, vr
);
701 ** Vector shuffle / select / splat / etc
704 /** vector permute */
706 ppc_vperm(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
, uint vC
)
708 emit_va(p
, 43, vD
, vA
, vB
, vC
);
713 ppc_vsel(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
, uint vC
)
715 emit_va(p
, 42, vD
, vA
, vB
, vC
);
718 /** vector splat byte */
720 ppc_vspltb(struct ppc_function
*p
, uint vD
, uint vB
, uint imm
)
722 emit_vx(p
, 42, vD
, imm
, vB
);
725 /** vector splat half word */
727 ppc_vsplthw(struct ppc_function
*p
, uint vD
, uint vB
, uint imm
)
729 emit_vx(p
, 588, vD
, imm
, vB
);
732 /** vector splat word */
734 ppc_vspltw(struct ppc_function
*p
, uint vD
, uint vB
, uint imm
)
736 emit_vx(p
, 652, vD
, imm
, vB
);
739 /** vector splat signed immediate word */
741 ppc_vspltisw(struct ppc_function
*p
, uint vD
, int imm
)
745 emit_vx(p
, 908, vD
, imm
, 0);
748 /** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */
750 ppc_vslw(struct ppc_function
*p
, uint vD
, uint vA
, uint vB
)
752 emit_vx(p
, 388, vD
, vA
, vB
);
759 ** integer arithmetic
764 ppc_addi(struct ppc_function
*p
, uint rt
, uint ra
, int imm
)
766 emit_d(p
, 14, rt
, ra
, imm
);
769 /** rt = ra + (imm << 16) */
771 ppc_addis(struct ppc_function
*p
, uint rt
, uint ra
, int imm
)
773 emit_d(p
, 15, rt
, ra
, imm
);
778 ppc_add(struct ppc_function
*p
, uint rt
, uint ra
, uint rb
)
780 emit_xo(p
, 31, rt
, ra
, rb
, 0, 266, 0);
783 /** rt = ra AND ra */
785 ppc_and(struct ppc_function
*p
, uint rt
, uint ra
, uint rb
)
787 emit_x(p
, 31, ra
, rt
, rb
, 28); /* note argument order */
790 /** rt = ra AND imm */
792 ppc_andi(struct ppc_function
*p
, uint rt
, uint ra
, int imm
)
794 emit_d(p
, 28, ra
, rt
, imm
); /* note argument order */
799 ppc_or(struct ppc_function
*p
, uint rt
, uint ra
, uint rb
)
801 emit_x(p
, 31, ra
, rt
, rb
, 444); /* note argument order */
804 /** rt = ra OR imm */
806 ppc_ori(struct ppc_function
*p
, uint rt
, uint ra
, int imm
)
808 emit_d(p
, 24, ra
, rt
, imm
); /* note argument order */
811 /** rt = ra XOR ra */
813 ppc_xor(struct ppc_function
*p
, uint rt
, uint ra
, uint rb
)
815 emit_x(p
, 31, ra
, rt
, rb
, 316); /* note argument order */
818 /** rt = ra XOR imm */
820 ppc_xori(struct ppc_function
*p
, uint rt
, uint ra
, int imm
)
822 emit_d(p
, 26, ra
, rt
, imm
); /* note argument order */
825 /** pseudo instruction: move: rt = ra */
827 ppc_mr(struct ppc_function
*p
, uint rt
, uint ra
)
829 ppc_or(p
, rt
, ra
, ra
);
832 /** pseudo instruction: load immediate: rt = imm */
834 ppc_li(struct ppc_function
*p
, uint rt
, int imm
)
836 ppc_addi(p
, rt
, 0, imm
);
839 /** rt = imm << 16 */
841 ppc_lis(struct ppc_function
*p
, uint rt
, int imm
)
843 ppc_addis(p
, rt
, 0, imm
);
848 ppc_load_int(struct ppc_function
*p
, uint rt
, int imm
)
850 ppc_lis(p
, rt
, (imm
>> 16)); /* rt = imm >> 16 */
851 ppc_ori(p
, rt
, rt
, (imm
& 0xffff)); /* rt = rt | (imm & 0xffff) */
858 ** integer load/store
861 /** store rs at memory[(ra)+d],
862 * then update ra = (ra)+d
865 ppc_stwu(struct ppc_function
*p
, uint rs
, uint ra
, int d
)
867 emit_d(p
, 37, rs
, ra
, d
);
870 /** store rs at memory[(ra)+d] */
872 ppc_stw(struct ppc_function
*p
, uint rs
, uint ra
, int d
)
874 emit_d(p
, 36, rs
, ra
, d
);
877 /** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */
879 ppc_lwz(struct ppc_function
*p
, uint rt
, uint ra
, int d
)
881 emit_d(p
, 32, rt
, ra
, d
);
887 ** Float (non-vector) arithmetic
890 /** add: frt = fra + frb */
892 ppc_fadd(struct ppc_function
*p
, uint frt
, uint fra
, uint frb
)
894 emit_a(p
, 63, frt
, fra
, frb
, 21, 0);
897 /** sub: frt = fra - frb */
899 ppc_fsub(struct ppc_function
*p
, uint frt
, uint fra
, uint frb
)
901 emit_a(p
, 63, frt
, fra
, frb
, 20, 0);
904 /** convert to int: rt = (int) ra */
906 ppc_fctiwz(struct ppc_function
*p
, uint rt
, uint fra
)
908 emit_x(p
, 63, rt
, 0, fra
, 15);
911 /** store frs at mem[(ra)+offset] */
913 ppc_stfs(struct ppc_function
*p
, uint frs
, uint ra
, int offset
)
915 emit_d(p
, 52, frs
, ra
, offset
);
918 /** store frs at mem[(ra)+(rb)] */
920 ppc_stfiwx(struct ppc_function
*p
, uint frs
, uint ra
, uint rb
)
922 emit_x(p
, 31, frs
, ra
, rb
, 983);
925 /** load frt = mem[(ra)+offset] */
927 ppc_lfs(struct ppc_function
*p
, uint frt
, uint ra
, int offset
)
929 emit_d(p
, 48, frt
, ra
, offset
);
937 ** branch instructions
940 /** BLR: Branch to link register (p. 35) */
942 ppc_blr(struct ppc_function
*p
)
944 emit_i(p
, 18, 0, 0, 1);
947 /** Branch Conditional to Link Register (p. 36) */
949 ppc_bclr(struct ppc_function
*p
, uint condOp
, uint branchHint
, uint condReg
)
951 emit_xl(p
, 19, condOp
, condReg
, branchHint
, 16, 0);
954 /** Pseudo instruction: return from subroutine */
956 ppc_return(struct ppc_function
*p
)
958 ppc_bclr(p
, BRANCH_COND_ALWAYS
, BRANCH_HINT_SUB_RETURN
, 0);