2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "nv50_program.h"
25 #include "nv50_context.h"
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_util.h"
30 #include "tgsi/tgsi_dump.h"
32 #include "codegen/nv50_ir_driver.h"
34 static INLINE
unsigned
35 bitcount4(const uint32_t val
)
37 static const unsigned cnt
[16]
38 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
39 return cnt
[val
& 0xf];
43 nv50_tgsi_src_mask(const struct tgsi_full_instruction
*inst
, int c
)
45 unsigned mask
= inst
->Dst
[0].Register
.WriteMask
;
47 switch (inst
->Instruction
.Opcode
) {
50 return (mask
& 0x8) | ((mask
& 0x7) ? 0x1 : 0x0);
55 case TGSI_OPCODE_KIL
: /* WriteMask ignored */
58 return mask
& (c
? 0xa : 0x6);
77 const struct tgsi_instruction_texture
*tex
;
79 assert(inst
->Instruction
.Texture
);
83 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_TEX
&&
84 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXD
)
85 mask
|= 0x8; /* bias, lod or proj */
87 switch (tex
->Texture
) {
91 case TGSI_TEXTURE_SHADOW1D
:
102 case TGSI_OPCODE_XPD
:
105 if (mask
& 1) x
|= 0x6;
106 if (mask
& 2) x
|= 0x5;
107 if (mask
& 4) x
|= 0x3;
118 nv50_indirect_inputs(struct nv50_translation_info
*ti
, int id
)
122 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; ++i
)
123 for (c
= 0; c
< 4; ++c
)
124 ti
->input_access
[i
][c
] = id
;
126 ti
->indirect_inputs
= TRUE
;
130 nv50_indirect_outputs(struct nv50_translation_info
*ti
, int id
)
134 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
)
135 for (c
= 0; c
< 4; ++c
)
136 ti
->output_access
[i
][c
] = id
;
138 ti
->indirect_outputs
= TRUE
;
142 prog_inst(struct nv50_translation_info
*ti
,
143 const struct tgsi_full_instruction
*inst
, int id
)
145 const struct tgsi_dst_register
*dst
;
146 const struct tgsi_src_register
*src
;
150 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_BGNSUB
) {
151 ti
->subr
[ti
->subr_nr
].pos
= id
- 1;
152 ti
->subr
[ti
->subr_nr
].id
= ti
->subr_nr
+ 1; /* id 0 is main program */
156 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
157 dst
= &inst
->Dst
[0].Register
;
159 for (c
= 0; c
< 4; ++c
) {
161 nv50_indirect_outputs(ti
, id
);
162 if (!(dst
->WriteMask
& (1 << c
)))
164 ti
->output_access
[dst
->Index
][c
] = id
;
167 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
168 inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
&&
169 dst
->Index
== ti
->edgeflag_out
)
170 ti
->p
->vp
.edgeflag
= inst
->Src
[0].Register
.Index
;
172 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
173 if (inst
->Dst
[0].Register
.Indirect
)
174 ti
->store_to_memory
= TRUE
;
177 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
178 src
= &inst
->Src
[s
].Register
;
179 if (src
->File
== TGSI_FILE_TEMPORARY
)
180 if (inst
->Src
[s
].Register
.Indirect
)
181 ti
->store_to_memory
= TRUE
;
182 if (src
->File
!= TGSI_FILE_INPUT
)
184 mask
= nv50_tgsi_src_mask(inst
, s
);
186 if (inst
->Src
[s
].Register
.Indirect
)
187 nv50_indirect_inputs(ti
, id
);
189 for (c
= 0; c
< 4; ++c
) {
190 if (!(mask
& (1 << c
)))
192 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
193 if (k
<= TGSI_SWIZZLE_W
)
194 ti
->input_access
[src
->Index
][k
] = id
;
199 /* Probably should introduce something like struct tgsi_function_declaration
200 * instead of trying to guess inputs/outputs.
203 prog_subroutine_inst(struct nv50_subroutine
*subr
,
204 const struct tgsi_full_instruction
*inst
)
206 const struct tgsi_dst_register
*dst
;
207 const struct tgsi_src_register
*src
;
211 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
212 src
= &inst
->Src
[s
].Register
;
213 if (src
->File
!= TGSI_FILE_TEMPORARY
)
215 mask
= nv50_tgsi_src_mask(inst
, s
);
217 assert(!inst
->Src
[s
].Register
.Indirect
);
219 for (c
= 0; c
< 4; ++c
) {
220 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
222 if ((mask
& (1 << c
)) && k
< TGSI_SWIZZLE_W
)
223 if (!(subr
->retv
[src
->Index
/ 32][k
] & (1 << (src
->Index
% 32))))
224 subr
->argv
[src
->Index
/ 32][k
] |= 1 << (src
->Index
% 32);
228 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
229 dst
= &inst
->Dst
[0].Register
;
231 for (c
= 0; c
< 4; ++c
)
232 if (dst
->WriteMask
& (1 << c
))
233 subr
->retv
[dst
->Index
/ 32][c
] |= 1 << (dst
->Index
% 32);
238 prog_immediate(struct nv50_translation_info
*ti
,
239 const struct tgsi_full_immediate
*imm
)
242 unsigned n
= ti
->immd32_nr
++;
244 assert(ti
->immd32_nr
<= ti
->scan
.immediate_count
);
246 for (c
= 0; c
< 4; ++c
)
247 ti
->immd32
[n
* 4 + c
] = imm
->u
[c
].Uint
;
249 ti
->immd32_ty
[n
] = imm
->Immediate
.DataType
;
252 static INLINE
unsigned
253 translate_interpolate(const struct tgsi_full_declaration
*decl
)
257 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_CONSTANT
)
258 mode
= NV50_INTERP_FLAT
;
260 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
)
263 mode
= NV50_INTERP_LINEAR
;
265 if (decl
->Declaration
.Centroid
)
266 mode
|= NV50_INTERP_CENTROID
;
272 prog_decl(struct nv50_translation_info
*ti
,
273 const struct tgsi_full_declaration
*decl
)
275 unsigned i
, first
, last
, sn
= 0, si
= 0;
277 first
= decl
->Range
.First
;
278 last
= decl
->Range
.Last
;
280 if (decl
->Declaration
.Semantic
) {
281 sn
= decl
->Semantic
.Name
;
282 si
= decl
->Semantic
.Index
;
285 switch (decl
->Declaration
.File
) {
286 case TGSI_FILE_INPUT
:
287 for (i
= first
; i
<= last
; ++i
)
288 ti
->interp_mode
[i
] = translate_interpolate(decl
);
290 if (!decl
->Declaration
.Semantic
)
293 for (i
= first
; i
<= last
; ++i
) {
294 ti
->p
->in
[i
].sn
= sn
;
295 ti
->p
->in
[i
].si
= si
;
299 case TGSI_SEMANTIC_FACE
:
301 case TGSI_SEMANTIC_COLOR
:
302 if (ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
303 ti
->p
->vp
.bfc
[si
] = first
;
307 case TGSI_FILE_OUTPUT
:
308 if (!decl
->Declaration
.Semantic
)
311 for (i
= first
; i
<= last
; ++i
) {
312 ti
->p
->out
[i
].sn
= sn
;
313 ti
->p
->out
[i
].si
= si
;
317 case TGSI_SEMANTIC_BCOLOR
:
318 ti
->p
->vp
.bfc
[si
] = first
;
320 case TGSI_SEMANTIC_PSIZE
:
321 ti
->p
->vp
.psiz
= first
;
323 case TGSI_SEMANTIC_EDGEFLAG
:
324 ti
->edgeflag_out
= first
;
330 case TGSI_FILE_SYSTEM_VALUE
:
331 /* For VP/GP inputs, they are put in s[] after the last normal input.
332 * Let sysval_map reflect the order of the sysvals in s[] and fixup later.
334 switch (decl
->Semantic
.Name
) {
335 case TGSI_SEMANTIC_FACE
:
337 case TGSI_SEMANTIC_INSTANCEID
:
338 ti
->p
->vp
.attrs
[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID
;
339 ti
->sysval_map
[first
] = 2;
341 case TGSI_SEMANTIC_PRIMID
:
344 case TGSI_SEMANTIC_PRIMIDIN:
346 case TGSI_SEMANTIC_VERTEXID:
353 case TGSI_FILE_CONSTANT
:
354 ti
->p
->parm_size
= MAX2(ti
->p
->parm_size
, (last
+ 1) * 16);
356 case TGSI_FILE_ADDRESS
:
357 case TGSI_FILE_SAMPLER
:
358 case TGSI_FILE_TEMPORARY
:
367 nv50_vertprog_prepare(struct nv50_translation_info
*ti
)
369 struct nv50_program
*p
= ti
->p
;
371 unsigned num_inputs
= 0;
373 ti
->input_file
= NV_FILE_MEM_S
;
374 ti
->output_file
= NV_FILE_OUT
;
376 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
378 p
->in
[i
].hw
= num_inputs
;
380 for (c
= 0; c
< 4; ++c
) {
381 if (!ti
->input_access
[i
][c
])
383 ti
->input_map
[i
][c
] = num_inputs
++;
384 p
->vp
.attrs
[(4 * i
+ c
) / 32] |= 1 << ((i
* 4 + c
) % 32);
388 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
390 p
->out
[i
].hw
= p
->max_out
;
392 for (c
= 0; c
< 4; ++c
) {
393 if (!ti
->output_access
[i
][c
])
395 ti
->output_map
[i
][c
] = p
->max_out
++;
396 p
->out
[i
].mask
|= 1 << c
;
400 p
->vp
.clpd
= p
->max_out
;
401 p
->max_out
+= p
->vp
.clpd_nr
;
403 for (i
= 0; i
< TGSI_SEMANTIC_COUNT
; ++i
) {
404 switch (ti
->sysval_map
[i
]) {
406 if (!(ti
->p
->vp
.attrs
[2] & NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID
))
407 ti
->sysval_map
[i
] = 1;
408 ti
->sysval_map
[i
] = (ti
->sysval_map
[i
] - 1) + num_inputs
;
415 if (p
->vp
.psiz
< 0x40)
416 p
->vp
.psiz
= p
->out
[p
->vp
.psiz
].hw
;
422 nv50_fragprog_prepare(struct nv50_translation_info
*ti
)
424 struct nv50_program
*p
= ti
->p
;
426 unsigned nvary
, nintp
, depr
;
427 unsigned n
= 0, m
= 0, skip
= 0;
428 ubyte sn
[16], si
[16];
432 if (ti
->scan
.writes_z
) {
433 p
->fp
.flags
[1] = 0x11;
434 p
->fp
.flags
[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z
;
437 if (ti
->scan
.uses_kill
)
438 p
->fp
.flags
[0] |= NV50_3D_FP_CONTROL_USES_KIL
;
442 ti
->input_file
= NV_FILE_MEM_V
;
443 ti
->output_file
= NV_FILE_GPR
;
445 /* count non-flat inputs, save semantic info */
446 for (i
= 0; i
< p
->in_nr
; ++i
) {
447 m
+= (ti
->interp_mode
[i
] & NV50_INTERP_FLAT
) ? 0 : 1;
452 /* reorder p->in[] so that non-flat inputs are first and
453 * kick out special inputs that don't use VP/GP_RESULT_MAP
456 for (i
= 0; i
< p
->in_nr
; ++i
) {
457 if (sn
[i
] == TGSI_SEMANTIC_POSITION
) {
458 for (c
= 0; c
< 4; ++c
) {
459 ti
->input_map
[i
][c
] = nintp
;
460 if (ti
->input_access
[i
][c
]) {
461 p
->fp
.interp
|= 1 << (24 + c
);
468 if (sn
[i
] == TGSI_SEMANTIC_FACE
) {
469 ti
->input_map
[i
][0] = 255;
474 j
= (ti
->interp_mode
[i
] & NV50_INTERP_FLAT
) ? m
++ : n
++;
476 if (sn
[i
] == TGSI_SEMANTIC_COLOR
)
477 p
->vp
.bfc
[si
[i
]] = j
;
479 p
->in
[j
].linear
= (ti
->interp_mode
[i
] & NV50_INTERP_LINEAR
) ? 1 : 0;
487 if (!(p
->fp
.interp
& (8 << 24))) {
488 p
->fp
.interp
|= (8 << 24);
493 p
->fp
.colors
= 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT
;
495 for (i
= 0; i
< p
->in_nr
; ++i
) {
499 for (c
= 0; c
< 4; ++c
) {
500 if (!ti
->input_access
[j
][c
])
502 p
->in
[i
].mask
|= 1 << c
;
503 ti
->input_map
[j
][c
] = nintp
++;
505 /* count color inputs */
506 if (i
== p
->vp
.bfc
[0] || i
== p
->vp
.bfc
[1])
507 p
->fp
.colors
+= bitcount4(p
->in
[i
].mask
) << 16;
509 nintp
-= bitcount4(p
->fp
.interp
>> 24); /* subtract position inputs */
512 nvary
-= p
->in
[n
].hw
;
514 p
->fp
.interp
|= nvary
<< NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT
;
515 p
->fp
.interp
|= nintp
<< NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT
;
519 if (p
->out_nr
> (1 + (ti
->scan
.writes_z
? 1 : 0)))
520 p
->fp
.flags
[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS
;
523 for (i
= 0; i
< p
->out_nr
; ++i
) {
525 if (p
->out
[i
].sn
== TGSI_SEMANTIC_POSITION
) {
529 p
->out
[i
].hw
= p
->max_out
;
530 p
->out
[i
].mask
= 0xf;
532 for (c
= 0; c
< 4; ++c
)
533 ti
->output_map
[i
][c
] = p
->max_out
++;
535 if (depr
< p
->out_nr
) {
536 p
->out
[depr
].mask
= 0x4;
537 p
->out
[depr
].hw
= ti
->output_map
[depr
][2] = p
->max_out
++;
539 /* allowed values are 1, 4, 5, 8, 9, ... */
540 p
->max_out
= MAX2(4, p
->max_out
);
547 nv50_geomprog_prepare(struct nv50_translation_info
*ti
)
549 ti
->input_file
= NV_FILE_MEM_S
;
550 ti
->output_file
= NV_FILE_OUT
;
557 nv50_prog_scan(struct nv50_translation_info
*ti
)
559 struct nv50_program
*p
= ti
->p
;
560 struct tgsi_parse_context parse
;
563 p
->vp
.edgeflag
= 0x40;
569 tgsi_scan_shader(p
->pipe
.tokens
, &ti
->scan
);
571 #if NV50_DEBUG & NV50_DEBUG_SHADER
572 tgsi_dump(p
->pipe
.tokens
, 0);
576 CALLOC(ti
->scan
.opcode_count
[TGSI_OPCODE_BGNSUB
], sizeof(ti
->subr
[0]));
578 ti
->immd32
= (uint32_t *)MALLOC(ti
->scan
.immediate_count
* 16);
579 ti
->immd32_ty
= (ubyte
*)MALLOC(ti
->scan
.immediate_count
* sizeof(ubyte
));
581 ti
->insns
= MALLOC(ti
->scan
.num_instructions
* sizeof(ti
->insns
[0]));
583 tgsi_parse_init(&parse
, p
->pipe
.tokens
);
584 while (!tgsi_parse_end_of_tokens(&parse
)) {
585 tgsi_parse_token(&parse
);
587 switch (parse
.FullToken
.Token
.Type
) {
588 case TGSI_TOKEN_TYPE_IMMEDIATE
:
589 prog_immediate(ti
, &parse
.FullToken
.FullImmediate
);
591 case TGSI_TOKEN_TYPE_DECLARATION
:
592 prog_decl(ti
, &parse
.FullToken
.FullDeclaration
);
594 case TGSI_TOKEN_TYPE_INSTRUCTION
:
595 ti
->insns
[ti
->inst_nr
] = parse
.FullToken
.FullInstruction
;
596 prog_inst(ti
, &parse
.FullToken
.FullInstruction
, ++ti
->inst_nr
);
601 /* Scan to determine which registers are inputs/outputs of a subroutine. */
602 for (i
= 0; i
< ti
->subr_nr
; ++i
) {
603 int pc
= ti
->subr
[i
].id
;
604 while (ti
->insns
[pc
].Instruction
.Opcode
!= TGSI_OPCODE_ENDSUB
)
605 prog_subroutine_inst(&ti
->subr
[i
], &ti
->insns
[pc
++]);
608 p
->in_nr
= ti
->scan
.file_max
[TGSI_FILE_INPUT
] + 1;
609 p
->out_nr
= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
] + 1;
612 case PIPE_SHADER_VERTEX
:
613 ret
= nv50_vertprog_prepare(ti
);
615 case PIPE_SHADER_FRAGMENT
:
616 ret
= nv50_fragprog_prepare(ti
);
618 case PIPE_SHADER_GEOMETRY
:
619 ret
= nv50_geomprog_prepare(ti
);
622 assert(!"unsupported program type");
631 /* Temporary, need a reference to nv50_ir_generate_code in libnv50 or
632 * it "gets disappeared" and cannot be used in libnvc0 ...
635 nv50_program_translate_new(struct nv50_program
*p
)
637 struct nv50_ir_prog_info info
;
639 return nv50_ir_generate_code(&info
);
643 nv50_program_translate(struct nv50_program
*p
)
645 struct nv50_translation_info
*ti
;
648 ti
= CALLOC_STRUCT(nv50_translation_info
);
651 ti
->edgeflag_out
= PIPE_MAX_SHADER_OUTPUTS
;
653 ret
= nv50_prog_scan(ti
);
655 NOUVEAU_ERR("unsupported shader program\n");
659 ret
= nv50_generate_code(ti
);
661 NOUVEAU_ERR("error during shader translation\n");
675 return ret
? FALSE
: TRUE
;
679 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
681 const struct pipe_shader_state pipe
= p
->pipe
;
682 const ubyte type
= p
->type
;
685 nouveau_heap_free(&p
->mem
);
693 memset(p
, 0, sizeof(*p
));