2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nvc0_program.h"
26 #define NVC0_FIXUP_CODE_RELOC 0
27 #define NVC0_FIXUP_DATA_RELOC 1
38 nvc0_relocate_program(struct nvc0_program
*prog
,
42 struct nvc0_fixup
*f
= (struct nvc0_fixup
*)prog
->relocs
;
45 for (i
= 0; i
< prog
->num_relocs
; ++i
) {
49 case NVC0_FIXUP_CODE_RELOC
: data
= code_base
+ f
[i
].data
; break;
50 case NVC0_FIXUP_DATA_RELOC
: data
= data_base
+ f
[i
].data
; break;
55 data
= (f
[i
].shift
< 0) ? (data
>> -f
[i
].shift
) : (data
<< f
[i
].shift
);
57 prog
->code
[f
[i
].ofst
/ 4] &= ~f
[i
].mask
;
58 prog
->code
[f
[i
].ofst
/ 4] |= data
& f
[i
].mask
;
63 create_fixup(struct nv_pc
*pc
, uint8_t ty
,
64 int w
, uint32_t data
, uint32_t m
, int s
)
68 const unsigned size
= sizeof(struct nvc0_fixup
);
69 const unsigned n
= pc
->num_relocs
;
72 pc
->reloc_entries
= REALLOC(pc
->reloc_entries
, n
* size
, (n
+ 8) * size
);
74 f
= (struct nvc0_fixup
*)pc
->reloc_entries
;
76 f
[n
].ofst
= (pc
->emit_pos
+ w
) * 4;
86 SSIZE(struct nv_instruction
*nvi
, int s
)
88 return nvi
->src
[s
]->value
->reg
.size
;
92 DSIZE(struct nv_instruction
*nvi
, int d
)
94 return nvi
->def
[d
]->reg
.size
;
97 static INLINE
struct nv_reg
*
98 SREG(struct nv_ref
*ref
)
102 return &ref
->value
->join
->reg
;
105 static INLINE
struct nv_reg
*
106 DREG(struct nv_value
*val
)
110 return &val
->join
->reg
;
114 SFILE(struct nv_instruction
*nvi
, int s
)
116 return nvi
->src
[s
]->value
->reg
.file
;
120 DFILE(struct nv_instruction
*nvi
, int d
)
122 return nvi
->def
[0]->reg
.file
;
126 SID(struct nv_pc
*pc
, struct nv_ref
*ref
, int pos
)
128 pc
->emit
[pos
/ 32] |= (SREG(ref
) ? SREG(ref
)->id
: 63) << (pos
% 32);
132 DID(struct nv_pc
*pc
, struct nv_value
*val
, int pos
)
134 pc
->emit
[pos
/ 32] |= (DREG(val
) ? DREG(val
)->id
: 63) << (pos
% 32);
137 static INLINE
uint32_t
138 get_immd_u32(struct nv_ref
*ref
) /* XXX: dependent on [0]:2 */
140 assert(ref
->value
->reg
.file
== NV_FILE_IMM
);
141 return ref
->value
->reg
.imm
.u32
;
145 set_immd_u32_l(struct nv_pc
*pc
, uint32_t u32
)
147 pc
->emit
[0] |= (u32
& 0x3f) << 26;
148 pc
->emit
[1] |= u32
>> 6;
152 set_immd_u32(struct nv_pc
*pc
, uint32_t u32
)
154 if ((pc
->emit
[0] & 0xf) == 0x2) {
155 set_immd_u32_l(pc
, u32
);
157 if ((pc
->emit
[0] & 0xf) == 0x3) {
158 assert(!(pc
->emit
[1] & 0xc000));
159 pc
->emit
[1] |= 0xc000;
160 assert(!(u32
& 0xfff00000));
161 set_immd_u32_l(pc
, u32
);
163 assert(!(pc
->emit
[1] & 0xc000));
164 pc
->emit
[1] |= 0xc000;
165 assert(!(u32
& 0xfff));
166 set_immd_u32_l(pc
, u32
>> 12);
171 set_immd(struct nv_pc
*pc
, struct nv_instruction
*i
, int s
)
173 set_immd_u32(pc
, get_immd_u32(i
->src
[s
]));
177 DVS(struct nv_pc
*pc
, struct nv_instruction
*i
)
179 uint s
= i
->def
[0]->reg
.size
;
181 for (n
= 1; n
< 4 && i
->def
[n
]; ++n
)
182 s
+= i
->def
[n
]->reg
.size
;
183 pc
->emit
[0] |= ((s
/ 4) - 1) << 5;
187 SVS(struct nv_pc
*pc
, struct nv_ref
*src
)
189 pc
->emit
[0] |= (SREG(src
)->size
/ 4 - 1) << 5;
193 set_pred(struct nv_pc
*pc
, struct nv_instruction
*i
)
195 if (i
->predicate
>= 0) {
196 SID(pc
, i
->src
[i
->predicate
], 6);
198 pc
->emit
[0] |= 0x2000; /* negate */
200 pc
->emit
[0] |= 0x1c00;
205 set_address_16(struct nv_pc
*pc
, struct nv_ref
*src
)
207 pc
->emit
[0] |= (src
->value
->reg
.address
& 0x003f) << 26;
208 pc
->emit
[1] |= (src
->value
->reg
.address
& 0xffc0) >> 6;
211 static INLINE
unsigned
212 const_space_index(struct nv_instruction
*i
, int s
)
214 return SFILE(i
, s
) - NV_FILE_MEM_C(0);
218 emit_flow(struct nv_pc
*pc
, struct nv_instruction
*i
, uint8_t op
)
220 pc
->emit
[0] = 0x000001e7;
221 pc
->emit
[1] = op
<< 24;
226 uint32_t pos
= i
->target
->emit_pos
;
228 create_fixup(pc
, NVC0_FIXUP_CODE_RELOC
, 0, pos
, 26, 0xfc000000);
229 create_fixup(pc
, NVC0_FIXUP_CODE_RELOC
, 1, pos
, -6, 0x0001ffff);
231 pc
->emit
[0] |= (pos
& 0x3f) << 26;
232 pc
->emit
[1] |= (pos
>> 6) & 0x1ffff;
236 /* doesn't work for vfetch, export, ld, st, mov ... */
238 emit_form_0(struct nv_pc
*pc
, struct nv_instruction
*i
)
244 DID(pc
, i
->def
[0], 14);
246 for (s
= 0; s
< 3 && i
->src
[s
]; ++s
) {
247 if (SFILE(i
, s
) >= NV_FILE_MEM_C(0) &&
248 SFILE(i
, s
) <= NV_FILE_MEM_C(15)) {
249 assert(!(pc
->emit
[1] & 0xc000));
251 pc
->emit
[1] |= 0x4000 | (const_space_index(i
, s
) << 10);
252 set_address_16(pc
, i
->src
[s
]);
254 if (SFILE(i
, s
) == NV_FILE_GPR
) {
255 SID(pc
, i
->src
[s
], s
? ((s
== 2) ? 49 : 26) : 20);
257 if (SFILE(i
, s
) == NV_FILE_IMM
) {
258 assert(!(pc
->emit
[1] & 0xc000));
259 assert(s
== 1 || i
->opcode
== NV_OP_MOV
);
266 emit_form_1(struct nv_pc
*pc
, struct nv_instruction
*i
)
272 DID(pc
, i
->def
[0], 14);
274 for (s
= 0; s
< 1 && i
->src
[s
]; ++s
) {
275 if (SFILE(i
, s
) >= NV_FILE_MEM_C(0) &&
276 SFILE(i
, s
) <= NV_FILE_MEM_C(15)) {
277 assert(!(pc
->emit
[1] & 0xc000));
279 pc
->emit
[1] |= 0x4000 | (const_space_index(i
, s
) << 10);
280 set_address_16(pc
, i
->src
[s
]);
282 if (SFILE(i
, s
) == NV_FILE_GPR
) {
283 SID(pc
, i
->src
[s
], 26);
285 if (SFILE(i
, s
) == NV_FILE_IMM
) {
286 assert(!(pc
->emit
[1] & 0xc000));
287 assert(s
== 1 || i
->opcode
== NV_OP_MOV
);
294 emit_neg_abs_1_2(struct nv_pc
*pc
, struct nv_instruction
*i
)
296 if (i
->src
[0]->mod
& NV_MOD_ABS
)
297 pc
->emit
[0] |= 1 << 7;
298 if (i
->src
[0]->mod
& NV_MOD_NEG
)
299 pc
->emit
[0] |= 1 << 9;
300 if (i
->src
[1]->mod
& NV_MOD_ABS
)
301 pc
->emit
[0] |= 1 << 6;
302 if (i
->src
[1]->mod
& NV_MOD_NEG
)
303 pc
->emit
[0] |= 1 << 8;
307 emit_add_f32(struct nv_pc
*pc
, struct nv_instruction
*i
)
309 pc
->emit
[0] = 0x00000000;
310 pc
->emit
[1] = 0x50000000;
314 emit_neg_abs_1_2(pc
, i
);
317 pc
->emit
[1] |= 1 << 17;
321 emit_mul_f32(struct nv_pc
*pc
, struct nv_instruction
*i
)
323 pc
->emit
[0] = 0x00000000;
324 pc
->emit
[1] = 0x58000000;
328 if ((i
->src
[0]->mod
^ i
->src
[1]->mod
) & NV_MOD_NEG
)
329 pc
->emit
[1] |= 1 << 25;
332 pc
->emit
[0] |= 1 << 5;
336 emit_mad_f32(struct nv_pc
*pc
, struct nv_instruction
*i
)
338 pc
->emit
[0] = 0x00000000;
339 pc
->emit
[1] = 0x30000000;
343 if ((i
->src
[0]->mod
^ i
->src
[1]->mod
) & NV_MOD_NEG
)
344 pc
->emit
[0] |= 1 << 9;
346 if (i
->src
[2]->mod
& NV_MOD_NEG
)
347 pc
->emit
[0] |= 1 << 8;
350 pc
->emit
[0] |= 1 << 5;
354 emit_minmax(struct nv_pc
*pc
, struct nv_instruction
*i
)
356 pc
->emit
[0] = 0x00000000;
357 pc
->emit
[1] = 0x08000000;
359 if (NV_BASEOP(i
->opcode
) == NV_OP_MAX
)
360 pc
->emit
[1] |= 0x001e0000;
362 pc
->emit
[1] |= 0x000e0000; /* predicate ? */
366 emit_neg_abs_1_2(pc
, i
);
375 pc
->emit
[0] |= 3 | (1 << 5);
385 emit_tex(struct nv_pc
*pc
, struct nv_instruction
*i
)
387 pc
->emit
[0] = 0x00000086;
388 pc
->emit
[1] = 0x80000000;
390 if (i
->opcode
== NV_OP_TXB
) pc
->emit
[1] |= 0x04000000;
392 if (i
->opcode
== NV_OP_TXL
) pc
->emit
[1] |= 0x06000000;
397 pc
->emit
[0] |= 63 << 26; /* explicit derivatives */
399 DID(pc
, i
->def
[0], 14);
400 SID(pc
, i
->src
[0], 20);
402 pc
->emit
[1] |= i
->tex_mask
<< 14;
403 pc
->emit
[1] |= (i
->tex_argc
- 1) << 20;
405 assert(i
->ext
.tex
.s
< 16);
407 pc
->emit
[1] |= i
->ext
.tex
.t
;
408 pc
->emit
[1] |= i
->ext
.tex
.s
<< 8;
411 pc
->emit
[0] |= 1 << 9;
414 /* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */
416 emit_flop(struct nv_pc
*pc
, struct nv_instruction
*i
, ubyte op
)
418 pc
->emit
[0] = 0x00000000;
419 pc
->emit
[1] = 0xc8000000;
423 DID(pc
, i
->def
[0], 14);
424 SID(pc
, i
->src
[0], 20);
426 pc
->emit
[0] |= op
<< 26;
429 if (i
->src
[0]->mod
& NV_MOD_NEG
) pc
->emit
[0] |= 1 << 9;
430 if (i
->src
[0]->mod
& NV_MOD_ABS
) pc
->emit
[0] |= 1 << 7;
432 assert(!i
->src
[0]->mod
);
437 emit_quadop(struct nv_pc
*pc
, struct nv_instruction
*i
)
439 pc
->emit
[0] = 0x00000000;
440 pc
->emit
[1] = 0x48000000;
444 assert(SFILE(i
, 0) == NV_FILE_GPR
&& SFILE(i
, 1) == NV_FILE_GPR
);
446 DID(pc
, i
->def
[0], 14);
447 SID(pc
, i
->src
[0], 20);
448 SID(pc
, i
->src
[0], 26);
450 pc
->emit
[0] |= i
->lanes
<< 6; /* l0, l1, l2, l3, dx, dy */
451 pc
->emit
[1] |= i
->quadop
;
455 emit_ddx(struct nv_pc
*pc
, struct nv_instruction
*i
)
463 emit_ddy(struct nv_pc
*pc
, struct nv_instruction
*i
)
470 /* preparation op (preex2, presin / convert to fixed point) */
472 emit_preop(struct nv_pc
*pc
, struct nv_instruction
*i
)
474 pc
->emit
[0] = 0x00000000;
475 pc
->emit
[1] = 0x60000000;
477 if (i
->opcode
== NV_OP_PREEX2
)
482 if (i
->src
[0]->mod
& NV_MOD_NEG
) pc
->emit
[0] |= 1 << 8;
483 if (i
->src
[0]->mod
& NV_MOD_ABS
) pc
->emit
[0] |= 1 << 6;
487 emit_shift(struct nv_pc
*pc
, struct nv_instruction
*i
)
489 pc
->emit
[0] = 0x00000003;
493 pc
->emit
[0] |= 0x20; /* fall through */
495 pc
->emit
[1] = 0x58000000;
499 pc
->emit
[1] = 0x60000000;
507 emit_bitop(struct nv_pc
*pc
, struct nv_instruction
*i
)
509 if (SFILE(i
, 1) == NV_FILE_IMM
) {
510 pc
->emit
[0] = 0x00000002;
511 pc
->emit
[1] = 0x38000000;
513 pc
->emit
[0] = 0x00000003;
514 pc
->emit
[1] = 0x68000000;
533 emit_set(struct nv_pc
*pc
, struct nv_instruction
*i
)
535 pc
->emit
[0] = 0x00000000;
539 pc
->emit
[0] |= 0x20; /* fall through */
542 pc
->emit
[1] = 0x100e0000;
544 case NV_OP_SET_F32_AND
:
545 pc
->emit
[1] = 0x18000000;
547 case NV_OP_SET_F32_OR
:
548 pc
->emit
[1] = 0x18200000;
550 case NV_OP_SET_F32_XOR
:
551 pc
->emit
[1] = 0x18400000;
554 pc
->emit
[0] |= 0x20; /* fall through */
557 pc
->emit
[1] = 0x180e0000;
561 if (DFILE(i
, 0) == NV_FILE_PRED
) {
562 pc
->emit
[0] |= 0x1c000;
563 pc
->emit
[1] += 0x08000000;
566 pc
->emit
[1] |= i
->set_cond
<< 23;
570 emit_neg_abs_1_2(pc
, i
); /* maybe assert that U/S32 don't use mods */
574 emit_selp(struct nv_pc
*pc
, struct nv_instruction
*i
)
576 pc
->emit
[0] = 0x00000004;
577 pc
->emit
[1] = 0x20000000;
581 if (i
->cc
|| (i
->src
[2]->mod
& NV_MOD_NOT
))
582 pc
->emit
[1] |= 1 << 20;
586 emit_slct(struct nv_pc
*pc
, struct nv_instruction
*i
)
588 pc
->emit
[0] = 0x00000000;
592 pc
->emit
[0] |= 0x20; /* fall through */
595 pc
->emit
[1] = 0x30000000;
599 pc
->emit
[1] = 0x38000000;
605 pc
->emit
[1] |= i
->set_cond
<< 23;
609 emit_cvt(struct nv_pc
*pc
, struct nv_instruction
*i
)
611 pc
->emit
[0] = 0x00000004;
612 pc
->emit
[1] = 0x10000000;
614 if (i
->opcode
!= NV_OP_CVT
)
615 i
->ext
.cvt
.d
= i
->ext
.cvt
.s
= NV_OPTYPE(i
->opcode
);
617 switch (i
->ext
.cvt
.d
) {
619 switch (i
->ext
.cvt
.s
) {
620 case NV_TYPE_F32
: pc
->emit
[1] = 0x10000000; break;
621 case NV_TYPE_S32
: pc
->emit
[0] |= 0x200;
622 case NV_TYPE_U32
: pc
->emit
[1] = 0x18000000; break;
625 case NV_TYPE_S32
: pc
->emit
[0] |= 0x80;
627 switch (i
->ext
.cvt
.s
) {
628 case NV_TYPE_F32
: pc
->emit
[1] = 0x14000000; break;
629 case NV_TYPE_S32
: pc
->emit
[0] |= 0x200;
630 case NV_TYPE_U32
: pc
->emit
[1] = 0x1c000000; break;
634 assert(!"cvt: unknown type");
638 if (i
->opcode
== NV_OP_FLOOR
)
639 pc
->emit
[1] |= 0x00020000;
641 if (i
->opcode
== NV_OP_CEIL
)
642 pc
->emit
[1] |= 0x00040000;
644 if (i
->opcode
== NV_OP_TRUNC
)
645 pc
->emit
[1] |= 0x00060000;
647 if (i
->saturate
|| i
->opcode
== NV_OP_SAT
)
650 if (NV_BASEOP(i
->opcode
) == NV_OP_ABS
|| i
->src
[0]->mod
& NV_MOD_ABS
)
651 pc
->emit
[0] |= 1 << 6;
652 if (NV_BASEOP(i
->opcode
) == NV_OP_NEG
|| i
->src
[0]->mod
& NV_MOD_NEG
)
653 pc
->emit
[0] |= 1 << 8;
655 pc
->emit
[0] |= util_logbase2(DREG(i
->def
[0])->size
) << 20;
656 pc
->emit
[0] |= util_logbase2(SREG(i
->src
[0])->size
) << 23;
662 emit_interp(struct nv_pc
*pc
, struct nv_instruction
*i
)
664 pc
->emit
[0] = 0x00000000;
665 pc
->emit
[1] = 0xc07e0000;
667 DID(pc
, i
->def
[0], 14);
672 SID(pc
, i
->src
[i
->indirect
], 20);
676 if (i
->opcode
== NV_OP_PINTERP
) {
677 pc
->emit
[0] |= 0x040;
678 SID(pc
, i
->src
[1], 26);
683 pc
->emit
[1] |= i
->src
[0]->value
->reg
.address
& 0xffff;
686 pc
->emit
[0] |= 0x100;
689 pc
->emit
[0] |= 0x080;
693 emit_vfetch(struct nv_pc
*pc
, struct nv_instruction
*i
)
695 pc
->emit
[0] = 0x03f00006;
696 pc
->emit
[1] = 0x06000000 | i
->src
[0]->value
->reg
.address
;
698 pc
->emit
[0] |= 0x100;
703 DID(pc
, i
->def
[0], 14);
705 SID(pc
, (i
->indirect
>= 0) ? i
->src
[i
->indirect
] : NULL
, 26);
709 emit_export(struct nv_pc
*pc
, struct nv_instruction
*i
)
711 pc
->emit
[0] = 0x00000006;
712 pc
->emit
[1] = 0x0a000000;
714 pc
->emit
[0] |= 0x100;
718 assert(SFILE(i
, 0) == NV_FILE_MEM_V
);
719 assert(SFILE(i
, 1) == NV_FILE_GPR
);
721 SID(pc
, i
->src
[1], 26); /* register source */
724 pc
->emit
[1] |= i
->src
[0]->value
->reg
.address
& 0xfff;
726 SID(pc
, (i
->indirect
>= 0) ? i
->src
[i
->indirect
] : NULL
, 20);
730 emit_mov(struct nv_pc
*pc
, struct nv_instruction
*i
)
732 if (i
->opcode
== NV_OP_MOV
)
735 if (SFILE(i
, 0) == NV_FILE_IMM
) {
736 pc
->emit
[0] = 0x000001e2;
737 pc
->emit
[1] = 0x18000000;
739 if (SFILE(i
, 0) == NV_FILE_PRED
) {
740 pc
->emit
[0] = 0x1c000004;
741 pc
->emit
[1] = 0x080e0000;
743 pc
->emit
[0] = 0x00000004 | (i
->lanes
<< 5);
744 pc
->emit
[1] = 0x28000000;
751 emit_ldst_size(struct nv_pc
*pc
, struct nv_instruction
*i
)
753 assert(NV_IS_MEMORY_FILE(SFILE(i
, 0)));
755 switch (SSIZE(i
, 0)) {
757 if (NV_TYPE_ISSGD(i
->ext
.cvt
.s
))
762 if (NV_TYPE_ISSGD(i
->ext
.cvt
.s
))
765 case 4: pc
->emit
[0] |= 0x80; break;
766 case 8: pc
->emit
[0] |= 0xa0; break;
767 case 16: pc
->emit
[0] |= 0xc0; break;
769 NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i
, 0));
775 emit_ld_const(struct nv_pc
*pc
, struct nv_instruction
*i
)
777 pc
->emit
[0] = 0x00000006;
778 pc
->emit
[1] = 0x14000000 | (const_space_index(i
, 0) << 10);
780 emit_ldst_size(pc
, i
);
783 set_address_16(pc
, i
->src
[0]);
785 SID(pc
, (i
->indirect
>= 0) ? i
->src
[i
->indirect
] : NULL
, 20);
786 DID(pc
, i
->def
[0], 14);
790 emit_ld(struct nv_pc
*pc
, struct nv_instruction
*i
)
792 if (SFILE(i
, 0) >= NV_FILE_MEM_C(0) &&
793 SFILE(i
, 0) <= NV_FILE_MEM_C(15)) {
794 emit_ld_const(pc
, i
);
796 NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i
, 0));
802 emit_st(struct nv_pc
*pc
, struct nv_instruction
*i
)
804 NOUVEAU_ERR("emit_st: not handled yet\n");
809 nvc0_emit_instruction(struct nv_pc
*pc
, struct nv_instruction
*i
)
811 debug_printf("EMIT: "); nvc0_print_instruction(i
);
818 if (!pc
->is_fragprog
)
910 emit_flow(pc
, i
, 0x40);
913 emit_flow(pc
, i
, 0x50);
916 emit_flow(pc
, i
, 0x60);
919 emit_flow(pc
, i
, 0x80);
922 emit_flow(pc
, i
, 0x90);
925 emit_flow(pc
, i
, 0x98);
929 pc
->emit
[0] = 0x00003c00;
930 pc
->emit
[1] = 0x00000000;
941 NOUVEAU_ERR("unhandled NV_OP: %d\n", i
->opcode
);