1 #include "pipe/p_context.h"
2 #include "pipe/p_defines.h"
3 #include "pipe/p_state.h"
4 #include "pipe/p_inlines.h"
6 #include "pipe/p_shader_tokens.h"
7 #include "tgsi/util/tgsi_parse.h"
8 #include "tgsi/util/tgsi_util.h"
10 #include "nv50_context.h"
11 #include "nv50_state.h"
13 #define NV50_SU_MAX_TEMP 64
31 struct nv50_program
*p
;
34 struct nv50_reg
*r_temp
[NV50_SU_MAX_TEMP
];
37 struct nv50_reg
*temp
;
39 struct nv50_reg
*attr
;
41 struct nv50_reg
*result
;
43 struct nv50_reg
*param
;
45 struct nv50_reg
*immd
;
51 alloc_reg(struct nv50_pc
*pc
, struct nv50_reg
*reg
)
55 if (reg
->type
!= P_TEMP
)
59 /*XXX: do this here too to catch FP temp-as-attr usage..
60 * not clean, but works */
61 if (pc
->p
->cfg
.high_temp
< (reg
->hw
+ 1))
62 pc
->p
->cfg
.high_temp
= reg
->hw
+ 1;
66 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
67 if (!(pc
->r_temp
[i
])) {
70 if (pc
->p
->cfg
.high_temp
< (i
+ 1))
71 pc
->p
->cfg
.high_temp
= i
+ 1;
79 static struct nv50_reg
*
80 alloc_temp(struct nv50_pc
*pc
, struct nv50_reg
*dst
)
85 if (dst
&& dst
->type
== P_TEMP
&& dst
->hw
== -1)
88 for (i
= 0; i
< NV50_SU_MAX_TEMP
; i
++) {
90 r
= CALLOC_STRUCT(nv50_reg
);
104 free_temp(struct nv50_pc
*pc
, struct nv50_reg
*r
)
106 if (r
->index
== -1) {
107 FREE(pc
->r_temp
[r
->hw
]);
108 pc
->r_temp
[r
->hw
] = NULL
;
112 static struct nv50_reg
*
113 tgsi_dst(struct nv50_pc
*pc
, int c
, const struct tgsi_full_dst_register
*dst
)
115 switch (dst
->DstRegister
.File
) {
116 case TGSI_FILE_TEMPORARY
:
117 return &pc
->temp
[dst
->DstRegister
.Index
* 4 + c
];
118 case TGSI_FILE_OUTPUT
:
119 return &pc
->result
[dst
->DstRegister
.Index
* 4 + c
];
129 static struct nv50_reg
*
130 tgsi_src(struct nv50_pc
*pc
, int c
, const struct tgsi_full_src_register
*src
)
132 /* Handle swizzling */
134 case 0: c
= src
->SrcRegister
.SwizzleX
; break;
135 case 1: c
= src
->SrcRegister
.SwizzleY
; break;
136 case 2: c
= src
->SrcRegister
.SwizzleZ
; break;
137 case 3: c
= src
->SrcRegister
.SwizzleW
; break;
142 switch (src
->SrcRegister
.File
) {
143 case TGSI_FILE_INPUT
:
144 return &pc
->attr
[src
->SrcRegister
.Index
* 4 + c
];
145 case TGSI_FILE_TEMPORARY
:
146 return &pc
->temp
[src
->SrcRegister
.Index
* 4 + c
];
147 case TGSI_FILE_CONSTANT
:
148 return &pc
->param
[src
->SrcRegister
.Index
* 4 + c
];
149 case TGSI_FILE_IMMEDIATE
:
150 return &pc
->immd
[src
->SrcRegister
.Index
* 4 + c
];
159 emit(struct nv50_pc
*pc
, unsigned *inst
)
161 struct nv50_program
*p
= pc
->p
;
165 p
->insns
= realloc(p
->insns
, sizeof(unsigned) * p
->insns_nr
);
166 memcpy(p
->insns
+ (p
->insns_nr
- 2), inst
, sizeof(unsigned)*2);
169 p
->insns
= realloc(p
->insns
, sizeof(unsigned) * p
->insns_nr
);
170 memcpy(p
->insns
+ (p
->insns_nr
- 1), inst
, sizeof(unsigned));
174 static INLINE
void set_long(struct nv50_pc
*, unsigned *);
177 is_long(unsigned *inst
)
185 is_immd(unsigned *inst
)
187 if (is_long(inst
) && (inst
[1] & 3) == 3)
193 set_pred(struct nv50_pc
*pc
, unsigned pred
, unsigned idx
, unsigned *inst
)
196 inst
[1] &= ~((0x1f << 7) | (0x3 << 12));
197 inst
[1] |= (pred
<< 7) | (idx
<< 12);
201 set_pred_wr(struct nv50_pc
*pc
, unsigned on
, unsigned idx
, unsigned *inst
)
204 inst
[1] &= ~((0x3 << 4) | (1 << 6));
205 inst
[1] |= (idx
<< 4) | (on
<< 6);
209 set_long(struct nv50_pc
*pc
, unsigned *inst
)
215 set_pred(pc
, 0xf, 0, inst
);
216 set_pred_wr(pc
, 0, 0, inst
);
220 set_dst(struct nv50_pc
*pc
, struct nv50_reg
*dst
, unsigned *inst
)
222 if (dst
->type
== P_RESULT
) {
224 inst
[1] |= 0x00000008;
228 inst
[0] |= (dst
->hw
<< 2);
232 set_immd(struct nv50_pc
*pc
, struct nv50_reg
*imm
, unsigned *inst
)
234 unsigned val
= fui(pc
->immd_buf
[imm
->hw
]); /* XXX */
237 /*XXX: can't be predicated - bits overlap.. catch cases where both
238 * are required and avoid them. */
239 set_pred(pc
, 0, 0, inst
);
240 set_pred_wr(pc
, 0, 0, inst
);
242 inst
[1] |= 0x00000002 | 0x00000001;
243 inst
[0] |= (val
& 0x3f) << 16;
244 inst
[1] |= (val
>> 6) << 2;
248 emit_interp(struct nv50_pc
*pc
, struct nv50_reg
*dst
,
249 struct nv50_reg
*src
, struct nv50_reg
*iv
, boolean noperspective
)
251 unsigned inst
[2] = { 0, 0 };
253 inst
[0] |= 0x80000000;
254 set_dst(pc
, dst
, inst
);
256 inst
[0] |= (iv
->hw
<< 9);
258 inst
[0] |= (src
->hw
<< 16);
260 inst
[0] |= (1 << 25);
266 emit_mov(struct nv50_pc
*pc
, struct nv50_reg
*dst
, struct nv50_reg
*src
)
268 unsigned inst
[2] = { 0, 0 };
271 inst
[0] |= 0x10000000;
273 set_dst(pc
, dst
, inst
);
275 if (dst
->type
!= P_RESULT
&& src
->type
== P_IMMD
) {
276 set_immd(pc
, src
, inst
);
277 /*XXX: 32-bit, but steals part of "half" reg space - need to
278 * catch and handle this case if/when we do half-regs
280 inst
[0] |= 0x00008000;
282 if (src
->type
== P_IMMD
|| src
->type
== P_CONST
) {
284 if (src
->type
== P_IMMD
)
285 inst
[1] |= (NV50_CB_PMISC
<< 22);
287 inst
[1] |= (NV50_CB_PVP
<< 22);
288 inst
[0] |= (src
->hw
<< 9);
289 inst
[1] |= 0x20000000; /* src0 const? */
291 if (src
->type
== P_ATTR
) {
293 inst
[1] |= 0x00200000;
297 inst
[0] |= (src
->hw
<< 9);
300 /* We really should support "half" instructions here at some point,
301 * but I don't feel confident enough about them yet.
304 if (is_long(inst
) && !is_immd(inst
)) {
305 inst
[1] |= 0x04000000; /* 32-bit */
306 inst
[1] |= 0x0003c000; /* "subsubop" 0xf == mov */
313 nv50_program_tx_insn(struct nv50_pc
*pc
, const union tgsi_full_token
*tok
)
315 const struct tgsi_full_instruction
*inst
= &tok
->FullInstruction
;
316 struct nv50_reg
*dst
[4], *src
[3][4];
320 NOUVEAU_ERR("insn %p\n", tok
);
322 mask
= inst
->FullDstRegisters
[0].DstRegister
.WriteMask
;
324 for (c
= 0; c
< 4; c
++) {
326 dst
[c
] = tgsi_dst(pc
, c
, &inst
->FullDstRegisters
[0]);
331 for (i
= 0; i
< inst
->Instruction
.NumSrcRegs
; i
++) {
332 for (c
= 0; c
< 4; c
++)
333 src
[i
][c
] = tgsi_src(pc
, c
, &inst
->FullSrcRegisters
[i
]);
336 switch (inst
->Instruction
.Opcode
) {
337 case TGSI_OPCODE_MOV
:
338 for (c
= 0; c
< 4; c
++)
339 emit_mov(pc
, dst
[c
], src
[0][c
]);
341 case TGSI_OPCODE_END
:
344 NOUVEAU_ERR("invalid opcode %d\n", inst
->Instruction
.Opcode
);
352 nv50_program_tx_prep(struct nv50_pc
*pc
)
354 struct tgsi_parse_context p
;
358 tgsi_parse_init(&p
, pc
->p
->pipe
.tokens
);
359 while (!tgsi_parse_end_of_tokens(&p
)) {
360 const union tgsi_full_token
*tok
= &p
.FullToken
;
362 tgsi_parse_token(&p
);
363 switch (tok
->Token
.Type
) {
364 case TGSI_TOKEN_TYPE_IMMEDIATE
:
366 const struct tgsi_full_immediate
*imm
=
367 &p
.FullToken
.FullImmediate
;
370 pc
->immd_buf
= realloc(pc
->immd_buf
, 4 * pc
->immd_nr
*
372 pc
->immd_buf
[4 * (pc
->immd_nr
- 1) + 0] =
373 imm
->u
.ImmediateFloat32
[0].Float
;
374 pc
->immd_buf
[4 * (pc
->immd_nr
- 1) + 1] =
375 imm
->u
.ImmediateFloat32
[1].Float
;
376 pc
->immd_buf
[4 * (pc
->immd_nr
- 1) + 2] =
377 imm
->u
.ImmediateFloat32
[2].Float
;
378 pc
->immd_buf
[4 * (pc
->immd_nr
- 1) + 3] =
379 imm
->u
.ImmediateFloat32
[3].Float
;
382 case TGSI_TOKEN_TYPE_DECLARATION
:
384 const struct tgsi_full_declaration
*d
;
387 d
= &p
.FullToken
.FullDeclaration
;
388 last
= d
->u
.DeclarationRange
.Last
;
390 switch (d
->Declaration
.File
) {
391 case TGSI_FILE_TEMPORARY
:
392 if (pc
->temp_nr
< (last
+ 1))
393 pc
->temp_nr
= last
+ 1;
395 case TGSI_FILE_OUTPUT
:
396 if (pc
->result_nr
< (last
+ 1))
397 pc
->result_nr
= last
+ 1;
399 case TGSI_FILE_INPUT
:
400 if (pc
->attr_nr
< (last
+ 1))
401 pc
->attr_nr
= last
+ 1;
403 case TGSI_FILE_CONSTANT
:
404 if (pc
->param_nr
< (last
+ 1))
405 pc
->param_nr
= last
+ 1;
408 NOUVEAU_ERR("bad decl file %d\n",
409 d
->Declaration
.File
);
414 case TGSI_TOKEN_TYPE_INSTRUCTION
:
421 NOUVEAU_ERR("%d temps\n", pc
->temp_nr
);
423 pc
->temp
= calloc(pc
->temp_nr
* 4, sizeof(struct nv50_reg
));
427 for (i
= 0; i
< pc
->temp_nr
; i
++) {
428 for (c
= 0; c
< 4; c
++) {
429 pc
->temp
[i
*4+c
].type
= P_TEMP
;
430 pc
->temp
[i
*4+c
].hw
= -1;
431 pc
->temp
[i
*4+c
].index
= i
;
436 NOUVEAU_ERR("%d attrib regs\n", pc
->attr_nr
);
438 struct nv50_reg
*iv
= NULL
, *tmp
= NULL
;
441 pc
->attr
= calloc(pc
->attr_nr
* 4, sizeof(struct nv50_reg
));
445 if (pc
->p
->type
== NV50_PROG_FRAGMENT
) {
446 iv
= alloc_temp(pc
, NULL
);
450 for (i
= 0; i
< pc
->attr_nr
; i
++) {
451 struct nv50_reg
*a
= &pc
->attr
[i
*4];
453 for (c
= 0; c
< 4; c
++) {
454 if (pc
->p
->type
== NV50_PROG_FRAGMENT
) {
455 struct nv50_reg
*at
=
456 alloc_temp(pc
, NULL
);
457 pc
->attr
[i
*4+c
].type
= at
->type
;
458 pc
->attr
[i
*4+c
].hw
= at
->hw
;
459 pc
->attr
[i
*4+c
].index
= at
->index
;
461 pc
->p
->cfg
.vp
.attr
[aid
/32] |=
463 pc
->attr
[i
*4+c
].type
= P_ATTR
;
464 pc
->attr
[i
*4+c
].hw
= aid
++;
465 pc
->attr
[i
*4+c
].index
= i
;
469 if (pc
->p
->type
!= NV50_PROG_FRAGMENT
)
472 emit_interp(pc
, iv
, iv
, iv
, FALSE
);
473 tmp
= alloc_temp(pc
, NULL
);
475 unsigned inst
[2] = { 0, 0 };
476 inst
[0] = 0x90000000;
477 inst
[0] |= (tmp
->hw
<< 2);
480 emit_interp(pc
, &a
[0], &a
[0], tmp
, TRUE
);
481 emit_interp(pc
, &a
[1], &a
[1], tmp
, TRUE
);
482 emit_interp(pc
, &a
[2], &a
[2], tmp
, TRUE
);
483 emit_interp(pc
, &a
[3], &a
[3], tmp
, TRUE
);
491 NOUVEAU_ERR("%d result regs\n", pc
->result_nr
);
495 pc
->result
= calloc(pc
->result_nr
* 4, sizeof(struct nv50_reg
));
499 for (i
= 0; i
< pc
->result_nr
; i
++) {
500 for (c
= 0; c
< 4; c
++) {
501 if (pc
->p
->type
== NV50_PROG_FRAGMENT
)
502 pc
->result
[i
*4+c
].type
= P_TEMP
;
504 pc
->result
[i
*4+c
].type
= P_RESULT
;
505 pc
->result
[i
*4+c
].hw
= rid
++;
506 pc
->result
[i
*4+c
].index
= i
;
511 NOUVEAU_ERR("%d param regs\n", pc
->param_nr
);
515 pc
->param
= calloc(pc
->param_nr
* 4, sizeof(struct nv50_reg
));
519 for (i
= 0; i
< pc
->param_nr
; i
++) {
520 for (c
= 0; c
< 4; c
++) {
521 pc
->param
[i
*4+c
].type
= P_CONST
;
522 pc
->param
[i
*4+c
].hw
= rid
++;
523 pc
->param
[i
*4+c
].index
= i
;
531 pc
->immd
= calloc(pc
->immd_nr
* 4, sizeof(struct nv50_reg
));
535 for (i
= 0; i
< pc
->immd_nr
; i
++) {
536 for (c
= 0; c
< 4; c
++) {
537 pc
->immd
[i
*4+c
].type
= P_IMMD
;
538 pc
->immd
[i
*4+c
].hw
= rid
++;
539 pc
->immd
[i
*4+c
].index
= i
;
551 nv50_program_tx(struct nv50_program
*p
)
553 struct tgsi_parse_context parse
;
557 pc
= CALLOC_STRUCT(nv50_pc
);
561 pc
->p
->cfg
.high_temp
= 4;
563 ret
= nv50_program_tx_prep(pc
);
567 tgsi_parse_init(&parse
, pc
->p
->pipe
.tokens
);
568 while (!tgsi_parse_end_of_tokens(&parse
)) {
569 const union tgsi_full_token
*tok
= &parse
.FullToken
;
571 tgsi_parse_token(&parse
);
573 switch (tok
->Token
.Type
) {
574 case TGSI_TOKEN_TYPE_INSTRUCTION
:
575 ret
= nv50_program_tx_insn(pc
, tok
);
584 p
->immd_nr
= pc
->immd_nr
* 4;
585 p
->immd
= pc
->immd_buf
;
588 tgsi_parse_free(&parse
);
595 nv50_program_validate(struct nv50_context
*nv50
, struct nv50_program
*p
)
598 struct tgsi_parse_context pc
;
600 tgsi_parse_init(&pc
, p
->pipe
.tokens
);
602 if (pc
.FullHeader
.Processor
.Processor
== TGSI_PROCESSOR_FRAGMENT
) {
604 p
->insns
= malloc(p
->insns_nr
* sizeof(unsigned));
605 p
->insns
[0] = 0x80000000;
606 p
->insns
[1] = 0x9000000c;
607 p
->insns
[2] = 0x82010600;
608 p
->insns
[3] = 0x82020604;
609 p
->insns
[4] = 0x80030609;
610 p
->insns
[5] = 0x00020780;
611 p
->insns
[6] = 0x8004060d;
612 p
->insns
[7] = 0x00020781;
614 if (pc
.FullHeader
.Processor
.Processor
== TGSI_PROCESSOR_VERTEX
) {
618 if (nv50_program_tx(p
) == FALSE
)
620 /* *not* sufficient, it's fine if last inst is long and
621 * NOT immd - otherwise it's fucked fucked fucked */
622 p
->insns
[p
->insns_nr
- 1] |= 0x00000001;
624 for (i
= 0; i
< p
->insns_nr
; i
++)
625 NOUVEAU_ERR("%d 0x%08x\n", i
, p
->insns
[i
]);
628 NOUVEAU_ERR("invalid TGSI processor\n");
629 tgsi_parse_free(&pc
);
633 tgsi_parse_free(&pc
);
636 p
->translated
= TRUE
;
640 nv50_program_validate_data(struct nv50_context
*nv50
, struct nv50_program
*p
)
644 for (i
= 0; i
< p
->immd_nr
; i
++) {
645 BEGIN_RING(tesla
, 0x0f00, 2);
646 OUT_RING ((NV50_CB_PMISC
<< 16) | (i
<< 8));
647 OUT_RING (fui(p
->immd
[i
]));
652 nv50_program_validate_code(struct nv50_context
*nv50
, struct nv50_program
*p
)
654 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
658 p
->buffer
= ws
->buffer_create(ws
, 0x100, 0, p
->insns_nr
* 4);
659 map
= ws
->buffer_map(ws
, p
->buffer
, PIPE_BUFFER_USAGE_CPU_WRITE
);
660 memcpy(map
, p
->insns
, p
->insns_nr
* 4);
661 ws
->buffer_unmap(ws
, p
->buffer
);
665 nv50_vertprog_validate(struct nv50_context
*nv50
)
667 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
668 struct nv50_program
*p
= nv50
->vertprog
;
669 struct nouveau_stateobj
*so
;
671 if (!p
->translated
) {
672 nv50_program_validate(nv50
, p
);
677 nv50_program_validate_data(nv50
, p
);
678 nv50_program_validate_code(nv50
, p
);
681 so_method(so
, tesla
, NV50TCL_VP_ADDRESS_HIGH
, 2);
682 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
683 NOUVEAU_BO_HIGH
, 0, 0);
684 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
685 NOUVEAU_BO_LOW
, 0, 0);
686 so_method(so
, tesla
, 0x1650, 2);
687 so_data (so
, p
->cfg
.vp
.attr
[0]);
688 so_data (so
, p
->cfg
.vp
.attr
[1]);
689 so_method(so
, tesla
, 0x16ac, 2);
691 so_data (so
, p
->cfg
.high_temp
);
692 so_method(so
, tesla
, 0x140c, 1);
693 so_data (so
, 0); /* program start offset */
694 so_emit(nv50
->screen
->nvws
, so
);
699 nv50_fragprog_validate(struct nv50_context
*nv50
)
701 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
702 struct nouveau_grobj
*tesla
= nv50
->screen
->tesla
;
703 struct nv50_program
*p
= nv50
->fragprog
;
704 struct nouveau_stateobj
*so
;
707 if (!p
->translated
) {
708 nv50_program_validate(nv50
, p
);
713 nv50_program_validate_data(nv50
, p
);
714 nv50_program_validate_code(nv50
, p
);
717 so_method(so
, tesla
, NV50TCL_FP_ADDRESS_HIGH
, 2);
718 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
719 NOUVEAU_BO_HIGH
, 0, 0);
720 so_reloc (so
, p
->buffer
, 0, NOUVEAU_BO_VRAM
| NOUVEAU_BO_RD
|
721 NOUVEAU_BO_LOW
, 0, 0);
722 so_method(so
, tesla
, 0x198c, 1);
723 so_data (so
, p
->cfg
.high_temp
);
724 so_method(so
, tesla
, 0x1414, 1);
725 so_data (so
, 0); /* program start offset */
726 so_emit(nv50
->screen
->nvws
, so
);
731 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
733 struct pipe_winsys
*ws
= nv50
->pipe
.winsys
;
742 pipe_buffer_reference(ws
, &p
->buffer
, NULL
);