2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "nvc0_program.h"
26 #define NVC0_FIXUP_CODE_RELOC 0
27 #define NVC0_FIXUP_DATA_RELOC 1
38 nvc0_relocate_program(struct nvc0_program
*prog
,
42 struct nvc0_fixup
*f
= (struct nvc0_fixup
*)prog
->relocs
;
45 for (i
= 0; i
< prog
->num_relocs
; ++i
) {
49 case NVC0_FIXUP_CODE_RELOC
: data
= code_base
+ f
[i
].data
; break;
50 case NVC0_FIXUP_DATA_RELOC
: data
= data_base
+ f
[i
].data
; break;
55 data
= (f
[i
].shift
< 0) ? (data
>> -f
[i
].shift
) : (data
<< f
[i
].shift
);
57 prog
->code
[f
[i
].ofst
/ 4] &= ~f
[i
].mask
;
58 prog
->code
[f
[i
].ofst
/ 4] |= data
& f
[i
].mask
;
63 create_fixup(struct nv_pc
*pc
, uint8_t ty
,
64 int w
, uint32_t data
, uint32_t m
, int s
)
68 const unsigned size
= sizeof(struct nvc0_fixup
);
69 const unsigned n
= pc
->num_relocs
;
72 pc
->reloc_entries
= REALLOC(pc
->reloc_entries
, n
* size
, (n
+ 8) * size
);
74 f
= (struct nvc0_fixup
*)pc
->reloc_entries
;
76 f
[n
].ofst
= (pc
->emit_pos
+ w
) * 4;
86 SSIZE(struct nv_instruction
*nvi
, int s
)
88 return nvi
->src
[s
]->value
->reg
.size
;
92 DSIZE(struct nv_instruction
*nvi
, int d
)
94 return nvi
->def
[d
]->reg
.size
;
97 static INLINE
struct nv_reg
*
98 SREG(struct nv_ref
*ref
)
102 return &ref
->value
->join
->reg
;
105 static INLINE
struct nv_reg
*
106 DREG(struct nv_value
*val
)
110 return &val
->join
->reg
;
114 SFILE(struct nv_instruction
*nvi
, int s
)
116 return nvi
->src
[s
]->value
->reg
.file
;
120 DFILE(struct nv_instruction
*nvi
, int d
)
122 return nvi
->def
[0]->reg
.file
;
126 SID(struct nv_pc
*pc
, struct nv_ref
*ref
, int pos
)
128 pc
->emit
[pos
/ 32] |= (SREG(ref
) ? SREG(ref
)->id
: 63) << (pos
% 32);
132 DID(struct nv_pc
*pc
, struct nv_value
*val
, int pos
)
134 pc
->emit
[pos
/ 32] |= (DREG(val
) ? DREG(val
)->id
: 63) << (pos
% 32);
137 static INLINE
uint32_t
138 get_immd_u32(struct nv_ref
*ref
) /* XXX: dependent on [0]:2 */
140 assert(ref
->value
->reg
.file
== NV_FILE_IMM
);
141 return ref
->value
->reg
.imm
.u32
;
145 set_immd_u32_l(struct nv_pc
*pc
, uint32_t u32
)
147 pc
->emit
[0] |= (u32
& 0x3f) << 26;
148 pc
->emit
[1] |= u32
>> 6;
152 set_immd_u32(struct nv_pc
*pc
, uint32_t u32
)
154 if ((pc
->emit
[0] & 0xf) == 0x2) {
155 set_immd_u32_l(pc
, u32
);
157 assert(!(pc
->emit
[1] & 0xc000));
158 pc
->emit
[1] |= 0xc000;
160 assert(!(u32
& 0xfff));
161 set_immd_u32_l(pc
, u32
>> 12);
166 set_immd(struct nv_pc
*pc
, struct nv_instruction
*i
, int s
)
168 set_immd_u32(pc
, get_immd_u32(i
->src
[s
]));
172 DVS(struct nv_pc
*pc
, struct nv_instruction
*i
)
174 uint s
= i
->def
[0]->reg
.size
;
176 for (n
= 1; n
< 4 && i
->def
[n
]; ++n
)
177 s
+= i
->def
[n
]->reg
.size
;
178 pc
->emit
[0] |= ((s
/ 4) - 1) << 5;
182 SVS(struct nv_pc
*pc
, struct nv_ref
*src
)
184 pc
->emit
[0] |= (SREG(src
)->size
/ 4 - 1) << 5;
188 set_pred(struct nv_pc
*pc
, struct nv_instruction
*i
)
190 if (i
->predicate
>= 0) {
191 SID(pc
, i
->src
[i
->predicate
], 6);
193 pc
->emit
[0] |= 0x2000; /* negate */
195 pc
->emit
[0] |= 0x1c00;
200 set_address_16(struct nv_pc
*pc
, struct nv_ref
*src
)
202 pc
->emit
[0] |= (src
->value
->reg
.address
& 0x003f) << 26;
203 pc
->emit
[1] |= (src
->value
->reg
.address
& 0xffc0) >> 6;
206 static INLINE
unsigned
207 const_space_index(struct nv_instruction
*i
, int s
)
209 return SFILE(i
, s
) - NV_FILE_MEM_C(0);
213 emit_flow(struct nv_pc
*pc
, struct nv_instruction
*i
, uint8_t op
)
215 pc
->emit
[0] = 0x000001e7;
216 pc
->emit
[1] = op
<< 24;
221 uint32_t pos
= i
->target
->emit_pos
;
223 create_fixup(pc
, NVC0_FIXUP_CODE_RELOC
, 0, pos
, 26, 0xfc000000);
224 create_fixup(pc
, NVC0_FIXUP_CODE_RELOC
, 1, pos
, -6, 0x0001ffff);
226 pc
->emit
[0] |= (pos
& 0x3f) << 26;
227 pc
->emit
[1] |= (pos
>> 6) & 0x1ffff;
231 /* doesn't work for vfetch, export, ld, st, mov ... */
233 emit_form_0(struct nv_pc
*pc
, struct nv_instruction
*i
)
239 DID(pc
, i
->def
[0], 14);
241 for (s
= 0; s
< 3 && i
->src
[s
]; ++s
) {
242 if (SFILE(i
, s
) >= NV_FILE_MEM_C(0) &&
243 SFILE(i
, s
) <= NV_FILE_MEM_C(15)) {
244 assert(!(pc
->emit
[1] & 0xc000));
246 pc
->emit
[1] |= 0x4000 | (const_space_index(i
, s
) << 10);
247 set_address_16(pc
, i
->src
[s
]);
249 if (SFILE(i
, s
) == NV_FILE_GPR
) {
250 SID(pc
, i
->src
[s
], s
? ((s
== 2) ? 49 : 26) : 20);
252 if (SFILE(i
, s
) == NV_FILE_IMM
) {
253 assert(!(pc
->emit
[1] & 0xc000));
254 assert(s
== 1 || i
->opcode
== NV_OP_MOV
);
261 emit_form_1(struct nv_pc
*pc
, struct nv_instruction
*i
)
267 DID(pc
, i
->def
[0], 14);
269 for (s
= 0; s
< 1 && i
->src
[s
]; ++s
) {
270 if (SFILE(i
, s
) >= NV_FILE_MEM_C(0) &&
271 SFILE(i
, s
) <= NV_FILE_MEM_C(15)) {
272 assert(!(pc
->emit
[1] & 0xc000));
274 pc
->emit
[1] |= 0x4000 | (const_space_index(i
, s
) << 10);
275 set_address_16(pc
, i
->src
[s
]);
277 if (SFILE(i
, s
) == NV_FILE_GPR
) {
278 SID(pc
, i
->src
[s
], 26);
280 if (SFILE(i
, s
) == NV_FILE_IMM
) {
281 assert(!(pc
->emit
[1] & 0xc000));
282 assert(s
== 1 || i
->opcode
== NV_OP_MOV
);
289 emit_neg_abs_1_2(struct nv_pc
*pc
, struct nv_instruction
*i
)
291 if (i
->src
[0]->mod
& NV_MOD_ABS
)
292 pc
->emit
[0] |= 1 << 7;
293 if (i
->src
[0]->mod
& NV_MOD_NEG
)
294 pc
->emit
[0] |= 1 << 9;
295 if (i
->src
[1]->mod
& NV_MOD_ABS
)
296 pc
->emit
[0] |= 1 << 6;
297 if (i
->src
[1]->mod
& NV_MOD_NEG
)
298 pc
->emit
[0] |= 1 << 8;
302 emit_add_f32(struct nv_pc
*pc
, struct nv_instruction
*i
)
304 pc
->emit
[0] = 0x00000000;
305 pc
->emit
[1] = 0x50000000;
309 emit_neg_abs_1_2(pc
, i
);
312 pc
->emit
[1] |= 1 << 17;
316 emit_mul_f32(struct nv_pc
*pc
, struct nv_instruction
*i
)
318 pc
->emit
[0] = 0x00000000;
319 pc
->emit
[1] = 0x58000000;
323 if ((i
->src
[0]->mod
^ i
->src
[1]->mod
) & NV_MOD_NEG
)
324 pc
->emit
[1] |= 1 << 25;
327 pc
->emit
[0] |= 1 << 5;
331 emit_mad_f32(struct nv_pc
*pc
, struct nv_instruction
*i
)
333 pc
->emit
[0] = 0x00000000;
334 pc
->emit
[1] = 0x30000000;
338 if ((i
->src
[0]->mod
^ i
->src
[1]->mod
) & NV_MOD_NEG
)
339 pc
->emit
[0] |= 1 << 9;
341 if (i
->src
[2]->mod
& NV_MOD_NEG
)
342 pc
->emit
[0] |= 1 << 8;
345 pc
->emit
[0] |= 1 << 5;
349 emit_minmax(struct nv_pc
*pc
, struct nv_instruction
*i
)
351 pc
->emit
[0] = 0x00000000;
352 pc
->emit
[1] = 0x08000000;
354 if (NV_BASEOP(i
->opcode
) == NV_OP_MAX
)
355 pc
->emit
[1] |= 0x001e0000;
357 pc
->emit
[1] |= 0x000e0000; /* predicate ? */
361 emit_neg_abs_1_2(pc
, i
);
370 pc
->emit
[0] |= 3 | (1 << 5);
380 emit_tex(struct nv_pc
*pc
, struct nv_instruction
*i
)
382 pc
->emit
[0] = 0x00000086;
383 pc
->emit
[1] = 0x80000000;
385 if (i
->opcode
== NV_OP_TXB
) pc
->emit
[1] |= 0x04000000;
387 if (i
->opcode
== NV_OP_TXL
) pc
->emit
[1] |= 0x06000000;
392 pc
->emit
[0] |= 63 << 26; /* explicit derivatives */
394 DID(pc
, i
->def
[0], 14);
395 SID(pc
, i
->src
[0], 20);
397 pc
->emit
[1] |= i
->tex_mask
<< 14;
398 pc
->emit
[1] |= (i
->tex_argc
- 1) << 20;
400 assert(i
->ext
.tex
.s
< 16);
402 pc
->emit
[1] |= i
->ext
.tex
.t
;
403 pc
->emit
[1] |= i
->ext
.tex
.s
<< 8;
406 pc
->emit
[0] |= 1 << 9;
409 /* 0: cos, 1: sin, 2: ex2, 3: lg2, 4: rcp, 5: rsqrt */
411 emit_flop(struct nv_pc
*pc
, struct nv_instruction
*i
, ubyte op
)
413 pc
->emit
[0] = 0x00000000;
414 pc
->emit
[1] = 0xc8000000;
418 DID(pc
, i
->def
[0], 14);
419 SID(pc
, i
->src
[0], 20);
421 pc
->emit
[0] |= op
<< 26;
424 if (i
->src
[0]->mod
& NV_MOD_NEG
) pc
->emit
[0] |= 1 << 9;
425 if (i
->src
[0]->mod
& NV_MOD_ABS
) pc
->emit
[0] |= 1 << 7;
427 assert(!i
->src
[0]->mod
);
432 emit_quadop(struct nv_pc
*pc
, struct nv_instruction
*i
)
434 pc
->emit
[0] = 0x00000000;
435 pc
->emit
[1] = 0x48000000;
439 assert(SFILE(i
, 0) == NV_FILE_GPR
&& SFILE(i
, 1) == NV_FILE_GPR
);
441 DID(pc
, i
->def
[0], 14);
442 SID(pc
, i
->src
[0], 20);
443 SID(pc
, i
->src
[0], 26);
445 pc
->emit
[0] |= i
->lanes
<< 6; /* l0, l1, l2, l3, dx, dy */
446 pc
->emit
[1] |= i
->quadop
;
450 emit_ddx(struct nv_pc
*pc
, struct nv_instruction
*i
)
458 emit_ddy(struct nv_pc
*pc
, struct nv_instruction
*i
)
465 /* preparation op (preex2, presin / convert to fixed point) */
467 emit_preop(struct nv_pc
*pc
, struct nv_instruction
*i
)
469 pc
->emit
[0] = 0x00000000;
470 pc
->emit
[1] = 0x60000000;
472 if (i
->opcode
== NV_OP_PREEX2
)
477 if (i
->src
[0]->mod
& NV_MOD_NEG
) pc
->emit
[0] |= 1 << 8;
478 if (i
->src
[0]->mod
& NV_MOD_ABS
) pc
->emit
[0] |= 1 << 6;
482 emit_shift(struct nv_pc
*pc
, struct nv_instruction
*i
)
484 pc
->emit
[0] = 0x00000003;
488 pc
->emit
[0] |= 0x20; /* fall through */
490 pc
->emit
[1] = 0x58000000;
494 pc
->emit
[1] = 0x60000000;
502 emit_bitop(struct nv_pc
*pc
, struct nv_instruction
*i
)
504 if (SFILE(i
, 1) == NV_FILE_IMM
) {
505 pc
->emit
[0] = 0x00000002;
506 pc
->emit
[1] = 0x38000000;
508 pc
->emit
[0] = 0x00000003;
509 pc
->emit
[1] = 0x68000000;
528 emit_set(struct nv_pc
*pc
, struct nv_instruction
*i
)
530 pc
->emit
[0] = 0x00000000;
534 pc
->emit
[0] |= 0x20; /* fall through */
537 pc
->emit
[1] = 0x100e0000;
539 case NV_OP_SET_F32_AND
:
540 pc
->emit
[1] = 0x18000000;
542 case NV_OP_SET_F32_OR
:
543 pc
->emit
[1] = 0x18200000;
545 case NV_OP_SET_F32_XOR
:
546 pc
->emit
[1] = 0x18400000;
549 pc
->emit
[0] |= 0x20; /* fall through */
552 pc
->emit
[1] = 0x180e0000;
556 if (DFILE(i
, 0) == NV_FILE_PRED
) {
557 pc
->emit
[0] |= 0x1c000;
558 pc
->emit
[1] += 0x08000000;
561 pc
->emit
[1] |= i
->set_cond
<< 23;
565 emit_neg_abs_1_2(pc
, i
); /* maybe assert that U/S32 don't use mods */
569 emit_selp(struct nv_pc
*pc
, struct nv_instruction
*i
)
571 pc
->emit
[0] = 0x00000004;
572 pc
->emit
[1] = 0x20000000;
576 if (i
->cc
|| (i
->src
[2]->mod
& NV_MOD_NOT
))
577 pc
->emit
[1] |= 1 << 20;
581 emit_slct(struct nv_pc
*pc
, struct nv_instruction
*i
)
583 pc
->emit
[0] = 0x00000000;
587 pc
->emit
[0] |= 0x20; /* fall through */
590 pc
->emit
[1] = 0x30000000;
594 pc
->emit
[1] = 0x38000000;
600 pc
->emit
[1] |= i
->set_cond
<< 23;
604 emit_cvt(struct nv_pc
*pc
, struct nv_instruction
*i
)
606 pc
->emit
[0] = 0x00000004;
607 pc
->emit
[1] = 0x10000000;
609 if (i
->opcode
!= NV_OP_CVT
)
610 i
->ext
.cvt
.d
= i
->ext
.cvt
.s
= NV_OPTYPE(i
->opcode
);
612 switch (i
->ext
.cvt
.d
) {
614 switch (i
->ext
.cvt
.s
) {
615 case NV_TYPE_F32
: pc
->emit
[1] = 0x10000000; break;
616 case NV_TYPE_S32
: pc
->emit
[0] |= 0x200;
617 case NV_TYPE_U32
: pc
->emit
[1] = 0x18000000; break;
620 case NV_TYPE_S32
: pc
->emit
[0] |= 0x80;
622 switch (i
->ext
.cvt
.s
) {
623 case NV_TYPE_F32
: pc
->emit
[1] = 0x14000000; break;
624 case NV_TYPE_S32
: pc
->emit
[0] |= 0x200;
625 case NV_TYPE_U32
: pc
->emit
[1] = 0x1c000000; break;
629 assert(!"cvt: unknown type");
633 if (i
->opcode
== NV_OP_FLOOR
)
634 pc
->emit
[1] |= 0x00020000;
636 if (i
->opcode
== NV_OP_CEIL
)
637 pc
->emit
[1] |= 0x00040000;
639 if (i
->opcode
== NV_OP_TRUNC
)
640 pc
->emit
[1] |= 0x00060000;
642 if (i
->saturate
|| i
->opcode
== NV_OP_SAT
)
645 if (NV_BASEOP(i
->opcode
) == NV_OP_ABS
|| i
->src
[0]->mod
& NV_MOD_ABS
)
646 pc
->emit
[0] |= 1 << 6;
647 if (NV_BASEOP(i
->opcode
) == NV_OP_NEG
|| i
->src
[0]->mod
& NV_MOD_NEG
)
648 pc
->emit
[0] |= 1 << 8;
650 pc
->emit
[0] |= util_logbase2(DREG(i
->def
[0])->size
) << 20;
651 pc
->emit
[0] |= util_logbase2(SREG(i
->src
[0])->size
) << 23;
657 emit_interp(struct nv_pc
*pc
, struct nv_instruction
*i
)
659 pc
->emit
[0] = 0x00000000;
660 pc
->emit
[1] = 0xc07e0000;
662 DID(pc
, i
->def
[0], 14);
667 SID(pc
, i
->src
[i
->indirect
], 20);
671 if (i
->opcode
== NV_OP_PINTERP
) {
672 pc
->emit
[0] |= 0x040;
673 SID(pc
, i
->src
[1], 26);
678 pc
->emit
[1] |= i
->src
[0]->value
->reg
.address
& 0xffff;
681 pc
->emit
[0] |= 0x100;
684 pc
->emit
[0] |= 0x080;
688 emit_vfetch(struct nv_pc
*pc
, struct nv_instruction
*i
)
690 pc
->emit
[0] = 0x03f00006;
691 pc
->emit
[1] = 0x06000000 | i
->src
[0]->value
->reg
.address
;
693 pc
->emit
[0] |= 0x100;
698 DID(pc
, i
->def
[0], 14);
700 SID(pc
, (i
->indirect
>= 0) ? i
->src
[i
->indirect
] : NULL
, 26);
704 emit_export(struct nv_pc
*pc
, struct nv_instruction
*i
)
706 pc
->emit
[0] = 0x00000006;
707 pc
->emit
[1] = 0x0a000000;
709 pc
->emit
[0] |= 0x100;
713 assert(SFILE(i
, 0) == NV_FILE_MEM_V
);
714 assert(SFILE(i
, 1) == NV_FILE_GPR
);
716 SID(pc
, i
->src
[1], 26); /* register source */
719 pc
->emit
[1] |= i
->src
[0]->value
->reg
.address
& 0xfff;
721 SID(pc
, (i
->indirect
>= 0) ? i
->src
[i
->indirect
] : NULL
, 20);
725 emit_mov(struct nv_pc
*pc
, struct nv_instruction
*i
)
727 if (i
->opcode
== NV_OP_MOV
)
730 if (SFILE(i
, 0) == NV_FILE_IMM
) {
731 pc
->emit
[0] = 0x000001e2;
732 pc
->emit
[1] = 0x18000000;
734 if (SFILE(i
, 0) == NV_FILE_PRED
) {
735 pc
->emit
[0] = 0x1c000004;
736 pc
->emit
[1] = 0x080e0000;
738 pc
->emit
[0] = 0x00000004 | (i
->lanes
<< 5);
739 pc
->emit
[1] = 0x28000000;
746 emit_ldst_size(struct nv_pc
*pc
, struct nv_instruction
*i
)
748 assert(NV_IS_MEMORY_FILE(SFILE(i
, 0)));
750 switch (SSIZE(i
, 0)) {
752 if (NV_TYPE_ISSGD(i
->ext
.cvt
.s
))
757 if (NV_TYPE_ISSGD(i
->ext
.cvt
.s
))
760 case 4: pc
->emit
[0] |= 0x80; break;
761 case 8: pc
->emit
[0] |= 0xa0; break;
762 case 16: pc
->emit
[0] |= 0xc0; break;
764 NOUVEAU_ERR("invalid load/store size %u\n", SSIZE(i
, 0));
770 emit_ld_const(struct nv_pc
*pc
, struct nv_instruction
*i
)
772 pc
->emit
[0] = 0x00000006;
773 pc
->emit
[1] = 0x14000000 | (const_space_index(i
, 0) << 10);
775 emit_ldst_size(pc
, i
);
778 set_address_16(pc
, i
->src
[0]);
780 SID(pc
, (i
->indirect
>= 0) ? i
->src
[i
->indirect
] : NULL
, 20);
781 DID(pc
, i
->def
[0], 14);
785 emit_ld(struct nv_pc
*pc
, struct nv_instruction
*i
)
787 if (SFILE(i
, 0) >= NV_FILE_MEM_C(0) &&
788 SFILE(i
, 0) <= NV_FILE_MEM_C(15)) {
789 emit_ld_const(pc
, i
);
791 NOUVEAU_ERR("emit_ld(%u): not handled yet\n", SFILE(i
, 0));
797 emit_st(struct nv_pc
*pc
, struct nv_instruction
*i
)
799 NOUVEAU_ERR("emit_st: not handled yet\n");
804 nvc0_emit_instruction(struct nv_pc
*pc
, struct nv_instruction
*i
)
806 debug_printf("EMIT: "); nvc0_print_instruction(i
);
813 if (!pc
->is_fragprog
)
905 emit_flow(pc
, i
, 0x40);
908 emit_flow(pc
, i
, 0x50);
911 emit_flow(pc
, i
, 0x60);
914 emit_flow(pc
, i
, 0x80);
917 emit_flow(pc
, i
, 0x90);
920 emit_flow(pc
, i
, 0x98);
924 pc
->emit
[0] = 0x00003c00;
925 pc
->emit
[1] = 0x00000000;
936 NOUVEAU_ERR("unhandled NV_OP: %d\n", i
->opcode
);