2 * Copyright 2010 Chrsitoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "nv50_program.h"
25 #include "nv50_context.h"
27 #include "pipe/p_shader_tokens.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_util.h"
30 #include "tgsi/tgsi_dump.h"
32 static INLINE
unsigned
33 bitcount4(const uint32_t val
)
35 static const unsigned cnt
[16]
36 = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 };
37 return cnt
[val
& 0xf];
41 nv50_tgsi_src_mask(const struct tgsi_full_instruction
*inst
, int c
)
43 unsigned mask
= inst
->Dst
[0].Register
.WriteMask
;
45 switch (inst
->Instruction
.Opcode
) {
48 return (mask
& 0x8) | ((mask
& 0x7) ? 0x1 : 0x0);
53 case TGSI_OPCODE_KIL
: /* WriteMask ignored */
56 return mask
& (c
? 0xa : 0x6);
75 const struct tgsi_instruction_texture
*tex
;
77 assert(inst
->Instruction
.Texture
);
81 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_TEX
&&
82 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXD
)
83 mask
|= 0x8; /* bias, lod or proj */
85 switch (tex
->Texture
) {
89 case TGSI_TEXTURE_SHADOW1D
:
100 case TGSI_OPCODE_XPD
:
103 if (mask
& 1) x
|= 0x6;
104 if (mask
& 2) x
|= 0x5;
105 if (mask
& 4) x
|= 0x3;
116 nv50_indirect_inputs(struct nv50_translation_info
*ti
, int id
)
120 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; ++i
)
121 for (c
= 0; c
< 4; ++c
)
122 ti
->input_access
[i
][c
] = id
;
124 ti
->indirect_inputs
= TRUE
;
128 nv50_indirect_outputs(struct nv50_translation_info
*ti
, int id
)
132 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
)
133 for (c
= 0; c
< 4; ++c
)
134 ti
->output_access
[i
][c
] = id
;
136 ti
->indirect_outputs
= TRUE
;
140 prog_inst(struct nv50_translation_info
*ti
,
141 const struct tgsi_full_instruction
*inst
, int id
)
143 const struct tgsi_dst_register
*dst
;
144 const struct tgsi_src_register
*src
;
148 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
149 for (c
= 0; c
< 4; ++c
) {
150 dst
= &inst
->Dst
[0].Register
;
151 if (inst
->Dst
[0].Register
.Indirect
)
152 nv50_indirect_outputs(ti
, id
);
153 if (!(dst
->WriteMask
& (1 << c
)))
155 ti
->output_access
[dst
->Index
][c
] = id
;
158 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
159 inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
&&
160 dst
->Index
== ti
->edgeflag_out
)
161 ti
->p
->vp
.edgeflag
= inst
->Src
[0].Register
.Index
;
164 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
165 src
= &inst
->Src
[s
].Register
;
166 if (src
->File
!= TGSI_FILE_INPUT
)
168 mask
= nv50_tgsi_src_mask(inst
, s
);
170 if (inst
->Src
[s
].Register
.Indirect
)
171 nv50_indirect_inputs(ti
, id
);
173 for (c
= 0; c
< 4; ++c
) {
174 if (!(mask
& (1 << c
)))
176 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
177 if (k
<= TGSI_SWIZZLE_W
)
178 ti
->input_access
[src
->Index
][k
] = id
;
184 prog_immediate(struct nv50_translation_info
*ti
,
185 const struct tgsi_full_immediate
*imm
)
188 unsigned n
= ++ti
->immd32_nr
;
190 tgsi_dump_immediate(imm
);
192 if (n
== (1 << (ffs(n
) - 1)))
193 ti
->immd32
= REALLOC(ti
->immd32
, (n
/ 2) * 16, (n
* 2) * 16);
195 for (c
= 0; c
< 4; ++c
)
196 ti
->immd32
[(n
- 1) * 4 + c
] = imm
->u
[c
].Uint
;
199 static INLINE
unsigned
200 translate_interpolate(const struct tgsi_full_declaration
*decl
)
204 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_CONSTANT
)
205 mode
= NV50_INTERP_FLAT
;
207 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
)
210 mode
= NV50_INTERP_LINEAR
;
212 if (decl
->Declaration
.Centroid
)
213 mode
|= NV50_INTERP_CENTROID
;
219 prog_decl(struct nv50_translation_info
*ti
,
220 const struct tgsi_full_declaration
*decl
)
222 unsigned i
, first
, last
, sn
= 0, si
= 0;
224 first
= decl
->Range
.First
;
225 last
= decl
->Range
.Last
;
227 if (decl
->Declaration
.Semantic
) {
228 sn
= decl
->Semantic
.Name
;
229 si
= decl
->Semantic
.Index
;
231 tgsi_dump_declaration(decl
);
233 switch (decl
->Declaration
.File
) {
234 case TGSI_FILE_INPUT
:
235 for (i
= first
; i
<= last
; ++i
)
236 ti
->interp_mode
[i
] = translate_interpolate(decl
);
238 if (!decl
->Declaration
.Semantic
)
241 for (i
= first
; i
<= last
; ++i
) {
242 ti
->p
->in
[i
].sn
= sn
;
243 ti
->p
->in
[i
].si
= si
;
247 case TGSI_SEMANTIC_FACE
:
249 case TGSI_SEMANTIC_COLOR
:
250 if (ti
->p
->type
== PIPE_SHADER_FRAGMENT
)
251 ti
->p
->vp
.bfc
[si
] = first
;
255 case TGSI_FILE_OUTPUT
:
256 if (!decl
->Declaration
.Semantic
)
259 for (i
= first
; i
<= last
; ++i
) {
260 ti
->p
->out
[i
].sn
= sn
;
261 ti
->p
->out
[i
].si
= si
;
265 case TGSI_SEMANTIC_BCOLOR
:
266 ti
->p
->vp
.bfc
[si
] = first
;
268 case TGSI_SEMANTIC_PSIZE
:
269 ti
->p
->vp
.psiz
= first
;
271 case TGSI_SEMANTIC_EDGEFLAG
:
272 ti
->edgeflag_out
= first
;
278 case TGSI_FILE_SYSTEM_VALUE
:
279 switch (decl
->Semantic
.Name
) {
280 case TGSI_SEMANTIC_FACE
:
282 case TGSI_SEMANTIC_INSTANCEID
:
284 case TGSI_SEMANTIC_PRIMID
:
287 case TGSI_SEMANTIC_PRIMIDIN:
289 case TGSI_SEMANTIC_VERTEXID:
296 case TGSI_FILE_CONSTANT
:
297 ti
->p
->parm_size
= MAX2(ti
->p
->parm_size
, (last
+ 1) * 16);
299 case TGSI_FILE_ADDRESS
:
300 case TGSI_FILE_SAMPLER
:
301 case TGSI_FILE_TEMPORARY
:
310 nv50_vertprog_prepare(struct nv50_translation_info
*ti
)
312 struct nv50_program
*p
= ti
->p
;
314 unsigned num_inputs
= 0;
316 ti
->input_file
= NV_FILE_MEM_S
;
317 ti
->output_file
= NV_FILE_OUT
;
319 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
321 p
->in
[i
].hw
= num_inputs
;
323 for (c
= 0; c
< 4; ++c
) {
324 if (!ti
->input_access
[i
][c
])
326 ti
->input_map
[i
][c
] = num_inputs
++;
327 p
->vp
.attrs
[(4 * i
+ c
) / 32] |= 1 << ((i
* 4 + c
) % 32);
331 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
333 p
->out
[i
].hw
= p
->max_out
;
335 for (c
= 0; c
< 4; ++c
) {
336 if (!ti
->output_access
[i
][c
])
338 ti
->output_map
[i
][c
] = p
->max_out
++;
339 p
->out
[i
].mask
|= 1 << c
;
343 if (p
->vp
.psiz
< 0x40)
344 p
->vp
.psiz
= p
->out
[p
->vp
.psiz
].hw
;
350 nv50_fragprog_prepare(struct nv50_translation_info
*ti
)
352 struct nv50_program
*p
= ti
->p
;
354 unsigned nvary
, nintp
, depr
;
355 unsigned n
= 0, m
= 0, skip
= 0;
356 ubyte sn
[16], si
[16];
360 if (ti
->scan
.writes_z
) {
361 p
->fp
.flags
[1] = 0x11;
362 p
->fp
.flags
[0] |= NV50TCL_FP_CONTROL_EXPORTS_Z
;
365 if (ti
->scan
.uses_kill
)
366 p
->fp
.flags
[0] |= NV50TCL_FP_CONTROL_USES_KIL
;
370 ti
->input_file
= NV_FILE_MEM_V
;
371 ti
->output_file
= NV_FILE_GPR
;
373 /* count non-flat inputs, save semantic info */
374 for (i
= 0; i
< p
->in_nr
; ++i
) {
375 m
+= (ti
->interp_mode
[i
] & NV50_INTERP_FLAT
) ? 0 : 1;
380 /* reorder p->in[] so that non-flat inputs are first and
381 * kick out special inputs that don't use VP/GP_RESULT_MAP
384 for (i
= 0; i
< p
->in_nr
; ++i
) {
385 if (sn
[i
] == TGSI_SEMANTIC_POSITION
) {
386 for (c
= 0; c
< 4; ++c
) {
387 ti
->input_map
[i
][c
] = nintp
;
388 if (ti
->input_access
[i
][c
]) {
389 p
->fp
.interp
|= 1 << (24 + c
);
396 if (sn
[i
] == TGSI_SEMANTIC_FACE
) {
397 ti
->input_map
[i
][0] = 255;
402 j
= (ti
->interp_mode
[i
] & NV50_INTERP_FLAT
) ? m
++ : n
++;
404 if (sn
[i
] == TGSI_SEMANTIC_COLOR
)
405 p
->vp
.bfc
[si
[i
]] = j
;
407 p
->in
[j
].linear
= (ti
->interp_mode
[i
] & NV50_INTERP_LINEAR
) ? 1 : 0;
415 if (!(p
->fp
.interp
& (8 << 24))) {
416 p
->fp
.interp
|= (8 << 24);
420 p
->fp
.colors
= (1 << 24) | 4; /* CLAMP, FFC0_ID = 4 */
422 for (i
= 0; i
< p
->in_nr
; ++i
) {
426 for (c
= 0; c
< 4; ++c
) {
427 if (!ti
->input_access
[j
][c
])
429 p
->in
[i
].mask
|= 1 << c
;
430 ti
->input_map
[j
][c
] = nintp
++;
432 /* count color inputs */
433 if (i
== p
->vp
.bfc
[0] || i
== p
->vp
.bfc
[1])
434 p
->fp
.colors
+= bitcount4(p
->in
[i
].mask
) << 16;
436 nintp
-= bitcount4(p
->fp
.interp
>> 24); /* subtract position inputs */
439 nvary
-= p
->in
[n
].hw
;
441 p
->fp
.interp
|= nvary
<< NV50TCL_FP_INTERPOLANT_CTRL_COUNT_NONFLAT_SHIFT
;
442 p
->fp
.interp
|= nintp
<< NV50TCL_FP_INTERPOLANT_CTRL_COUNT_SHIFT
;
446 if (p
->out_nr
> (1 + (ti
->scan
.writes_z
? 1 : 0)))
447 p
->fp
.flags
[0] |= NV50TCL_FP_CONTROL_MULTIPLE_RESULTS
;
450 for (i
= 0; i
< p
->out_nr
; ++i
) {
452 if (p
->out
[i
].sn
== TGSI_SEMANTIC_POSITION
) {
456 p
->out
[i
].hw
= p
->max_out
;
457 p
->out
[i
].mask
= 0xf;
459 for (c
= 0; c
< 4; ++c
)
460 ti
->output_map
[i
][c
] = p
->max_out
++;
462 if (depr
< p
->out_nr
) {
463 p
->out
[depr
].mask
= 0x4;
464 p
->out
[depr
].hw
= p
->max_out
++;
471 nv50_geomprog_prepare(struct nv50_translation_info
*ti
)
473 ti
->input_file
= NV_FILE_MEM_S
;
474 ti
->output_file
= NV_FILE_OUT
;
481 nv50_prog_scan(struct nv50_translation_info
*ti
)
483 struct nv50_program
*p
= ti
->p
;
484 struct tgsi_parse_context parse
;
492 tgsi_scan_shader(p
->pipe
.tokens
, &ti
->scan
);
494 tgsi_parse_init(&parse
, p
->pipe
.tokens
);
495 while (!tgsi_parse_end_of_tokens(&parse
)) {
496 tgsi_parse_token(&parse
);
498 switch (parse
.FullToken
.Token
.Type
) {
499 case TGSI_TOKEN_TYPE_IMMEDIATE
:
500 prog_immediate(ti
, &parse
.FullToken
.FullImmediate
);
502 case TGSI_TOKEN_TYPE_DECLARATION
:
503 prog_decl(ti
, &parse
.FullToken
.FullDeclaration
);
505 case TGSI_TOKEN_TYPE_INSTRUCTION
:
506 prog_inst(ti
, &parse
.FullToken
.FullInstruction
, ++ti
->inst_nr
);
511 p
->in_nr
= ti
->scan
.file_max
[TGSI_FILE_INPUT
] + 1;
512 p
->out_nr
= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
] + 1;
515 case PIPE_SHADER_VERTEX
:
516 ret
= nv50_vertprog_prepare(ti
);
518 case PIPE_SHADER_FRAGMENT
:
519 ret
= nv50_fragprog_prepare(ti
);
521 case PIPE_SHADER_GEOMETRY
:
522 ret
= nv50_geomprog_prepare(ti
);
525 assert(!"unsupported program type");
535 nv50_program_tx(struct nv50_program
*p
)
537 struct nv50_translation_info
*ti
;
540 ti
= CALLOC_STRUCT(nv50_translation_info
);
543 ti
->edgeflag_out
= PIPE_MAX_SHADER_OUTPUTS
;
545 ret
= nv50_prog_scan(ti
);
547 NOUVEAU_ERR("unsupported shader program\n");
551 ret
= nv50_generate_code(ti
);
553 NOUVEAU_ERR("error during shader translation\n");
561 return ret
? FALSE
: TRUE
;
565 nv50_program_destroy(struct nv50_context
*nv50
, struct nv50_program
*p
)
567 nouveau_bo_ref(NULL
, &p
->bo
);
569 so_ref(NULL
, &p
->so
);
574 p
->translated
= FALSE
;