2 * Copyright 2010 Chrsitoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 /* #define NV50_PROGRAM_DEBUG */
25 #include "nv50_program.h"
27 #include "nv50_context.h"
29 #include "pipe/p_shader_tokens.h"
30 #include "tgsi/tgsi_parse.h"
31 #include "tgsi/tgsi_util.h"
32 #include "tgsi/tgsi_dump.h"
34 static INLINE
unsigned
35 bitcount4(const uint32_t val
)
37 static const unsigned cnt
[16]
38 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
39 return cnt
[val
& 0xf];
43 nv50_tgsi_src_mask(const struct tgsi_full_instruction
*inst
, int c
)
45 unsigned mask
= inst
->Dst
[0].Register
.WriteMask
;
47 switch (inst
->Instruction
.Opcode
) {
50 return (mask
& 0x8) | ((mask
& 0x7) ? 0x1 : 0x0);
55 case TGSI_OPCODE_KIL
: /* WriteMask ignored */
58 return mask
& (c
? 0xa : 0x6);
77 const struct tgsi_instruction_texture
*tex
;
79 assert(inst
->Instruction
.Texture
);
83 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_TEX
&&
84 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXD
)
85 mask
|= 0x8; /* bias, lod or proj */
87 switch (tex
->Texture
) {
91 case TGSI_TEXTURE_SHADOW1D
:
102 case TGSI_OPCODE_XPD
:
105 if (mask
& 1) x
|= 0x6;
106 if (mask
& 2) x
|= 0x5;
107 if (mask
& 4) x
|= 0x3;
118 nv50_indirect_inputs(struct nv50_translation_info
*ti
, int id
)
122 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; ++i
)
123 for (c
= 0; c
< 4; ++c
)
124 ti
->input_access
[i
][c
] = id
;
126 ti
->indirect_inputs
= TRUE
;
130 nv50_indirect_outputs(struct nv50_translation_info
*ti
, int id
)
134 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
)
135 for (c
= 0; c
< 4; ++c
)
136 ti
->output_access
[i
][c
] = id
;
138 ti
->indirect_outputs
= TRUE
;
142 prog_inst(struct nv50_translation_info
*ti
,
143 const struct tgsi_full_instruction
*inst
, int id
)
145 const struct tgsi_dst_register
*dst
;
146 const struct tgsi_src_register
*src
;
150 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_BGNSUB
) {
151 ti
->subr
[ti
->subr_nr
].pos
= id
- 1;
152 ti
->subr
[ti
->subr_nr
].id
= ti
->subr_nr
+ 1; /* id 0 is main program */
156 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
157 dst
= &inst
->Dst
[0].Register
;
159 for (c
= 0; c
< 4; ++c
) {
161 nv50_indirect_outputs(ti
, id
);
162 if (!(dst
->WriteMask
& (1 << c
)))
164 ti
->output_access
[dst
->Index
][c
] = id
;
167 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
168 inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
&&
169 dst
->Index
== ti
->edgeflag_out
)
170 ti
->p
->vp
.edgeflag
= inst
->Src
[0].Register
.Index
;
173 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
174 src
= &inst
->Src
[s
].Register
;
175 if (src
->File
!= TGSI_FILE_INPUT
)
177 mask
= nv50_tgsi_src_mask(inst
, s
);
179 if (inst
->Src
[s
].Register
.Indirect
)
180 nv50_indirect_inputs(ti
, id
);
182 for (c
= 0; c
< 4; ++c
) {
183 if (!(mask
& (1 << c
)))
185 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
186 if (k
<= TGSI_SWIZZLE_W
)
187 ti
->input_access
[src
->Index
][k
] = id
;
192 /* Probably should introduce something like struct tgsi_function_declaration
193 * instead of trying to guess inputs/outputs.
196 prog_subroutine_inst(struct nv50_subroutine
*subr
,
197 const struct tgsi_full_instruction
*inst
)
199 const struct tgsi_dst_register
*dst
;
200 const struct tgsi_src_register
*src
;
204 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
205 src
= &inst
->Src
[s
].Register
;
206 if (src
->File
!= TGSI_FILE_TEMPORARY
)
208 mask
= nv50_tgsi_src_mask(inst
, s
);
210 assert(!inst
->Src
[s
].Register
.Indirect
);
212 for (c
= 0; c
< 4; ++c
) {
213 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
215 if ((mask
& (1 << c
)) && k
< TGSI_SWIZZLE_W
)
216 if (!(subr
->retv
[src
->Index
/ 32][k
] & (1 << (src
->Index
% 32))))
217 subr
->argv
[src
->Index
/ 32][k
] |= 1 << (src
->Index
% 32);
221 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
222 dst
= &inst
->Dst
[0].Register
;
224 for (c
= 0; c
< 4; ++c
)
225 if (dst
->WriteMask
& (1 << c
))
226 subr
->retv
[dst
->Index
/ 32][c
] |= 1 << (dst
->Index
% 32);
231 prog_immediate(struct nv50_translation_info
*ti
,
232 const struct tgsi_full_immediate
*imm
)
235 unsigned n
= ti
->immd32_nr
++;
237 assert(ti
->immd32_nr
<= ti
->scan
.immediate_count
);
239 for (c
= 0; c
< 4; ++c
)
240 ti
->immd32
[n
* 4 + c
] = imm
->u
[c
].Uint
;
242 ti
->immd32_ty
[n
] = imm
->Immediate
.DataType
;
245 static INLINE
unsigned
246 translate_interpolate(const struct tgsi_full_declaration
*decl
)
250 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_CONSTANT
)
251 mode
= NV50_INTERP_FLAT
;
253 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
)
256 mode
= NV50_INTERP_LINEAR
;
258 if (decl
->Declaration
.Centroid
)
259 mode
|= NV50_INTERP_CENTROID
;
265 prog_decl(struct nv50_translation_info
*ti
,
266 const struct tgsi_full_declaration
*decl
)
268 unsigned i
, first
, last
, sn
= 0, si
= 0;
270 first
= decl
->Range
.First
;
271 last
= decl
->Range
.Last
;
273 if (decl
->Declaration
.Semantic
) {
274 sn
= decl
->Semantic
.Name
;
275 si
= decl
->Semantic
.Index
;
278 switch (decl
->Declaration
.File
) {
279 case TGSI_FILE_INPUT
:
280 for (i
= first
; i
<= last
; ++i
)
281 ti
->interp_mode
[i
] = translate_interpolate(decl
);
283 if (!decl
->Declaration
.Semantic
)
286 for (i
= first
; i
<= last
; ++i
) {
287 ti
->p
->in
[i
].sn
= sn
;
288 ti
->p
->in
[i
].si
= si
;
292 case TGSI_SEMANTIC_FACE
:
294 case TGSI_SEMANTIC_COLOR
:
295 if (ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
296 ti
->p
->vp
.bfc
[si
] = first
;
300 case TGSI_FILE_OUTPUT
:
301 if (!decl
->Declaration
.Semantic
)
304 for (i
= first
; i
<= last
; ++i
) {
305 ti
->p
->out
[i
].sn
= sn
;
306 ti
->p
->out
[i
].si
= si
;
310 case TGSI_SEMANTIC_BCOLOR
:
311 ti
->p
->vp
.bfc
[si
] = first
;
313 case TGSI_SEMANTIC_PSIZE
:
314 ti
->p
->vp
.psiz
= first
;
316 case TGSI_SEMANTIC_EDGEFLAG
:
317 ti
->edgeflag_out
= first
;
323 case TGSI_FILE_SYSTEM_VALUE
:
324 switch (decl
->Semantic
.Name
) {
325 case TGSI_SEMANTIC_FACE
:
327 case TGSI_SEMANTIC_INSTANCEID
:
329 case TGSI_SEMANTIC_PRIMID
:
332 case TGSI_SEMANTIC_PRIMIDIN:
334 case TGSI_SEMANTIC_VERTEXID:
341 case TGSI_FILE_CONSTANT
:
342 ti
->p
->parm_size
= MAX2(ti
->p
->parm_size
, (last
+ 1) * 16);
344 case TGSI_FILE_ADDRESS
:
345 case TGSI_FILE_SAMPLER
:
346 case TGSI_FILE_TEMPORARY
:
355 nv50_vertprog_prepare(struct nv50_translation_info
*ti
)
357 struct nv50_program
*p
= ti
->p
;
359 unsigned num_inputs
= 0;
361 ti
->input_file
= NV_FILE_MEM_S
;
362 ti
->output_file
= NV_FILE_OUT
;
364 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
366 p
->in
[i
].hw
= num_inputs
;
368 for (c
= 0; c
< 4; ++c
) {
369 if (!ti
->input_access
[i
][c
])
371 ti
->input_map
[i
][c
] = num_inputs
++;
372 p
->vp
.attrs
[(4 * i
+ c
) / 32] |= 1 << ((i
* 4 + c
) % 32);
376 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
378 p
->out
[i
].hw
= p
->max_out
;
380 for (c
= 0; c
< 4; ++c
) {
381 if (!ti
->output_access
[i
][c
])
383 ti
->output_map
[i
][c
] = p
->max_out
++;
384 p
->out
[i
].mask
|= 1 << c
;
388 if (p
->vp
.psiz
< 0x40)
389 p
->vp
.psiz
= p
->out
[p
->vp
.psiz
].hw
;
395 nv50_fragprog_prepare(struct nv50_translation_info
*ti
)
397 struct nv50_program
*p
= ti
->p
;
399 unsigned nvary
, nintp
, depr
;
400 unsigned n
= 0, m
= 0, skip
= 0;
401 ubyte sn
[16], si
[16];
405 if (ti
->scan
.writes_z
) {
406 p
->fp
.flags
[1] = 0x11;
407 p
->fp
.flags
[0] |= NV50TCL_FP_CONTROL_EXPORTS_Z
;
410 if (ti
->scan
.uses_kill
)
411 p
->fp
.flags
[0] |= NV50TCL_FP_CONTROL_USES_KIL
;
415 ti
->input_file
= NV_FILE_MEM_V
;
416 ti
->output_file
= NV_FILE_GPR
;
418 /* count non-flat inputs, save semantic info */
419 for (i
= 0; i
< p
->in_nr
; ++i
) {
420 m
+= (ti
->interp_mode
[i
] & NV50_INTERP_FLAT
) ? 0 : 1;
425 /* reorder p->in[] so that non-flat inputs are first and
426 * kick out special inputs that don't use VP/GP_RESULT_MAP
429 for (i
= 0; i
< p
->in_nr
; ++i
) {
430 if (sn
[i
] == TGSI_SEMANTIC_POSITION
) {
431 for (c
= 0; c
< 4; ++c
) {
432 ti
->input_map
[i
][c
] = nintp
;
433 if (ti
->input_access
[i
][c
]) {
434 p
->fp
.interp
|= 1 << (24 + c
);
441 if (sn
[i
] == TGSI_SEMANTIC_FACE
) {
442 ti
->input_map
[i
][0] = 255;
447 j
= (ti
->interp_mode
[i
] & NV50_INTERP_FLAT
) ? m
++ : n
++;
449 if (sn
[i
] == TGSI_SEMANTIC_COLOR
)
450 p
->vp
.bfc
[si
[i
]] = j
;
452 p
->in
[j
].linear
= (ti
->interp_mode
[i
] & NV50_INTERP_LINEAR
) ? 1 : 0;
460 if (!(p
->fp
.interp
& (8 << 24))) {
461 p
->fp
.interp
|= (8 << 24);
465 p
->fp
.colors
= (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */
467 for (i
= 0; i
< p
->in_nr
; ++i
) {
471 for (c
= 0; c
< 4; ++c
) {
472 if (!ti
->input_access
[j
][c
])
474 p
->in
[i
].mask
|= 1 << c
;
475 ti
->input_map
[j
][c
] = nintp
++;
477 /* count color inputs */
478 if (i
== p
->vp
.bfc
[0] || i
== p
->vp
.bfc
[1])
479 p
->fp
.colors
+= bitcount4(p
->in
[i
].mask
) << 16;
481 nintp
-= bitcount4(p
->fp
.interp
>> 24); /* subtract position inputs */
484 nvary
-= p
->in
[n
].hw
;
486 p
->fp
.interp
|= nvary
<< NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT
;
487 p
->fp
.interp
|= nintp
<< NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT
;
491 if (p
->out_nr
> (1 + (ti
->scan
.writes_z
? 1 : 0)))
492 p
->fp
.flags
[0] |= NV50TCL_FP_CONTROL_MULTIPLE_RESULTS
;
495 for (i
= 0; i
< p
->out_nr
; ++i
) {
497 if (p
->out
[i
].sn
== TGSI_SEMANTIC_POSITION
) {
501 p
->out
[i
].hw
= p
->max_out
;
502 p
->out
[i
].mask
= 0xf;
504 for (c
= 0; c
< 4; ++c
)
505 ti
->output_map
[i
][c
] = p
->max_out
++;
507 if (depr
< p
->out_nr
) {
508 p
->out
[depr
].mask
= 0x4;
509 p
->out
[depr
].hw
= ti
->output_map
[depr
][2] = p
->max_out
++;
516 nv50_geomprog_prepare(struct nv50_translation_info
*ti
)
518 ti
->input_file
= NV_FILE_MEM_S
;
519 ti
->output_file
= NV_FILE_OUT
;
526 nv50_prog_scan(struct nv50_translation_info
*ti
)
528 struct nv50_program
*p
= ti
->p
;
529 struct tgsi_parse_context parse
;
532 p
->vp
.edgeflag
= 0x40;
538 tgsi_scan_shader(p
->pipe
.tokens
, &ti
->scan
);
540 #ifdef NV50_PROGRAM_DEBUG
541 tgsi_dump(p
->pipe
.tokens
, 0);
545 CALLOC(ti
->scan
.opcode_count
[TGSI_OPCODE_BGNSUB
], sizeof(ti
->subr
[0]));
547 ti
->immd32
= (uint32_t *)MALLOC(ti
->scan
.immediate_count
* 16);
548 ti
->immd32_ty
= (ubyte
*)MALLOC(ti
->scan
.immediate_count
* sizeof(ubyte
));
550 ti
->insns
= MALLOC(ti
->scan
.num_instructions
* sizeof(ti
->insns
[0]));
552 tgsi_parse_init(&parse
, p
->pipe
.tokens
);
553 while (!tgsi_parse_end_of_tokens(&parse
)) {
554 tgsi_parse_token(&parse
);
556 switch (parse
.FullToken
.Token
.Type
) {
557 case TGSI_TOKEN_TYPE_IMMEDIATE
:
558 prog_immediate(ti
, &parse
.FullToken
.FullImmediate
);
560 case TGSI_TOKEN_TYPE_DECLARATION
:
561 prog_decl(ti
, &parse
.FullToken
.FullDeclaration
);
563 case TGSI_TOKEN_TYPE_INSTRUCTION
:
564 ti
->insns
[ti
->inst_nr
] = parse
.FullToken
.FullInstruction
;
565 prog_inst(ti
, &parse
.FullToken
.FullInstruction
, ++ti
->inst_nr
);
570 /* Scan to determine which registers are inputs/outputs of a subroutine. */
571 for (i
= 0; i
< ti
->subr_nr
; ++i
) {
572 int pc
= ti
->subr
[i
].id
;
573 while (ti
->insns
[pc
].Instruction
.Opcode
!= TGSI_OPCODE_ENDSUB
)
574 prog_subroutine_inst(&ti
->subr
[i
], &ti
->insns
[pc
++]);
577 p
->in_nr
= ti
->scan
.file_max
[TGSI_FILE_INPUT
] + 1;
578 p
->out_nr
= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
] + 1;
581 case PIPE_SHADER_VERTEX
:
582 ret
= nv50_vertprog_prepare(ti
);
584 case PIPE_SHADER_FRAGMENT
:
585 ret
= nv50_fragprog_prepare(ti
);
587 case PIPE_SHADER_GEOMETRY
:
588 ret
= nv50_geomprog_prepare(ti
);
591 assert(!"unsupported program type");
601 nv50_program_tx(struct nv50_program
*p
)
603 struct nv50_translation_info
*ti
;
606 ti
= CALLOC_STRUCT(nv50_translation_info
);
609 ti
->edgeflag_out
= PIPE_MAX_SHADER_OUTPUTS
;
611 ret
= nv50_prog_scan(ti
);
613 NOUVEAU_ERR("unsupported shader program\n");
617 ret
= nv50_generate_code(ti
);
619 NOUVEAU_ERR("error during shader translation\n");
633 return ret
? FALSE
: TRUE
;
637 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
639 nouveau_bo_ref(NULL
, &p
->bo
);
641 so_ref(NULL
, &p
->so
);
646 p
->translated
= FALSE
;