2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "pipe/p_shader_tokens.h"
24 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_util.h"
30 #include "tgsi/tgsi_dump.h"
32 #include "nvc0_context.h"
36 nvc0_tgsi_src_mask(const struct tgsi_full_instruction
*inst
, int c
)
38 unsigned mask
= inst
->Dst
[0].Register
.WriteMask
;
40 switch (inst
->Instruction
.Opcode
) {
43 return (mask
& 0x8) | ((mask
& 0x7) ? 0x1 : 0x0);
48 case TGSI_OPCODE_KIL
: /* WriteMask ignored */
51 return mask
& (c
? 0xa : 0x6);
70 const struct tgsi_instruction_texture
*tex
;
72 assert(inst
->Instruction
.Texture
);
76 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_TEX
&&
77 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXD
)
78 mask
|= 0x8; /* bias, lod or proj */
80 switch (tex
->Texture
) {
84 case TGSI_TEXTURE_SHADOW1D
:
98 if (mask
& 1) x
|= 0x6;
99 if (mask
& 2) x
|= 0x5;
100 if (mask
& 4) x
|= 0x3;
111 nvc0_indirect_inputs(struct nvc0_translation_info
*ti
, int id
)
115 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; ++i
)
116 for (c
= 0; c
< 4; ++c
)
117 ti
->input_access
[i
][c
] = id
;
119 ti
->indirect_inputs
= TRUE
;
123 nvc0_indirect_outputs(struct nvc0_translation_info
*ti
, int id
)
127 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
)
128 for (c
= 0; c
< 4; ++c
)
129 ti
->output_access
[i
][c
] = id
;
131 ti
->indirect_outputs
= TRUE
;
134 static INLINE
unsigned
135 nvc0_system_value_location(unsigned sn
, unsigned si
)
137 /* NOTE: locations 0xfxx indicate special regs */
140 case TGSI_SEMANTIC_VERTEXID:
143 case TGSI_SEMANTIC_PRIMID
:
146 case TGSI_SEMANTIC_LAYER_INDEX:
148 case TGSI_SEMANTIC_VIEWPORT_INDEX:
151 case TGSI_SEMANTIC_INSTANCEID
:
153 case TGSI_SEMANTIC_FACE
:
156 case TGSI_SEMANTIC_INVOCATIONID:
165 static INLINE
unsigned
166 nvc0_varying_location(unsigned sn
, unsigned si
)
169 case TGSI_SEMANTIC_POSITION
:
171 case TGSI_SEMANTIC_COLOR
:
172 return 0x280 + (si
* 16); /* are these hard-wired ? */
173 case TGSI_SEMANTIC_BCOLOR
:
174 return 0x2a0 + (si
* 16);
175 case TGSI_SEMANTIC_FOG
:
177 case TGSI_SEMANTIC_PSIZE
:
180 case TGSI_SEMANTIC_PNTC:
183 case TGSI_SEMANTIC_GENERIC
:
185 return 0x80 + (si
* 16);
186 case TGSI_SEMANTIC_NORMAL
:
188 case TGSI_SEMANTIC_PRIMID
:
190 case TGSI_SEMANTIC_FACE
:
193 case TGSI_SEMANTIC_CLIP_DISTANCE:
194 return 0x2c0 + (si * 4);
202 static INLINE
unsigned
203 nvc0_interp_mode(const struct tgsi_full_declaration
*decl
)
207 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_CONSTANT
)
208 mode
= NVC0_INTERP_FLAT
;
210 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
)
211 mode
= NVC0_INTERP_PERSPECTIVE
;
213 mode
= NVC0_INTERP_LINEAR
;
215 if (decl
->Declaration
.Centroid
)
216 mode
|= NVC0_INTERP_CENTROID
;
222 prog_immediate(struct nvc0_translation_info
*ti
,
223 const struct tgsi_full_immediate
*imm
)
226 unsigned n
= ti
->immd32_nr
++;
228 assert(ti
->immd32_nr
<= ti
->scan
.immediate_count
);
230 for (c
= 0; c
< 4; ++c
)
231 ti
->immd32
[n
* 4 + c
] = imm
->u
[c
].Uint
;
233 ti
->immd32_ty
[n
] = imm
->Immediate
.DataType
;
237 prog_decl(struct nvc0_translation_info
*ti
,
238 const struct tgsi_full_declaration
*decl
)
241 unsigned sn
= TGSI_SEMANTIC_GENERIC
;
243 const unsigned first
= decl
->Range
.First
;
244 const unsigned last
= decl
->Range
.Last
;
246 if (decl
->Declaration
.Semantic
) {
247 sn
= decl
->Semantic
.Name
;
248 si
= decl
->Semantic
.Index
;
251 switch (decl
->Declaration
.File
) {
252 case TGSI_FILE_INPUT
:
253 for (i
= first
; i
<= last
; ++i
) {
254 if (ti
->prog
->type
== PIPE_SHADER_VERTEX
) {
255 sn
= TGSI_SEMANTIC_GENERIC
;
258 for (c
= 0; c
< 4; ++c
)
259 ti
->input_loc
[i
][c
] = nvc0_varying_location(sn
, si
) + c
* 4;
261 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
)
262 ti
->interp_mode
[i
] = nvc0_interp_mode(decl
);
265 case TGSI_FILE_OUTPUT
:
266 for (i
= first
; i
<= last
; ++i
, ++si
) {
267 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
) {
269 if (i
== ti
->fp_depth_output
) {
270 ti
->output_loc
[i
][2] = (ti
->scan
.num_outputs
- 1) * 4;
272 if (i
> ti
->fp_depth_output
)
274 for (c
= 0; c
< 4; ++c
)
275 ti
->output_loc
[i
][c
] = si
* 4 + c
;
278 for (c
= 0; c
< 4; ++c
)
279 ti
->output_loc
[i
][c
] = nvc0_varying_location(sn
, si
) + c
* 4;
283 case TGSI_FILE_SYSTEM_VALUE
:
284 ti
->sysval_loc
[i
] = nvc0_system_value_location(sn
, si
);
285 assert(first
== last
);
288 case TGSI_FILE_CONSTANT
:
289 case TGSI_FILE_TEMPORARY
:
290 case TGSI_FILE_SAMPLER
:
291 case TGSI_FILE_ADDRESS
:
292 case TGSI_FILE_IMMEDIATE
:
293 case TGSI_FILE_PREDICATE
:
296 NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl
->Declaration
.File
);
303 prog_inst(struct nvc0_translation_info
*ti
,
304 const struct tgsi_full_instruction
*inst
, int id
)
306 const struct tgsi_dst_register
*dst
;
307 const struct tgsi_src_register
*src
;
311 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_BGNSUB
) {
312 ti
->subr
[ti
->num_subrs
].first_insn
= id
- 1;
313 ti
->subr
[ti
->num_subrs
].id
= ti
->num_subrs
+ 1; /* id 0 is main program */
317 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
318 dst
= &inst
->Dst
[0].Register
;
320 for (c
= 0; c
< 4; ++c
) {
322 nvc0_indirect_outputs(ti
, id
);
323 if (!(dst
->WriteMask
& (1 << c
)))
325 ti
->output_access
[dst
->Index
][c
] = id
;
328 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
329 inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
&&
330 dst
->Index
== ti
->edgeflag_out
)
331 ti
->prog
->vp
.edgeflag
= inst
->Src
[0].Register
.Index
;
333 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
334 if (inst
->Dst
[0].Register
.Indirect
)
335 ti
->require_stores
= TRUE
;
338 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
339 src
= &inst
->Src
[s
].Register
;
340 if (src
->File
== TGSI_FILE_TEMPORARY
)
341 if (inst
->Src
[s
].Register
.Indirect
)
342 ti
->require_stores
= TRUE
;
343 if (src
->File
!= TGSI_FILE_INPUT
)
345 mask
= nvc0_tgsi_src_mask(inst
, s
);
347 if (inst
->Src
[s
].Register
.Indirect
)
348 nvc0_indirect_inputs(ti
, id
);
350 for (c
= 0; c
< 4; ++c
) {
351 if (!(mask
& (1 << c
)))
353 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
354 if (k
<= TGSI_SWIZZLE_W
)
355 ti
->input_access
[src
->Index
][k
] = id
;
360 /* Probably should introduce something like struct tgsi_function_declaration
361 * instead of trying to guess inputs/outputs.
364 prog_subroutine_inst(struct nvc0_subroutine
*subr
,
365 const struct tgsi_full_instruction
*inst
)
367 const struct tgsi_dst_register
*dst
;
368 const struct tgsi_src_register
*src
;
372 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
373 src
= &inst
->Src
[s
].Register
;
374 if (src
->File
!= TGSI_FILE_TEMPORARY
)
376 mask
= nvc0_tgsi_src_mask(inst
, s
);
378 for (c
= 0; c
< 4; ++c
) {
379 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
381 if ((mask
& (1 << c
)) && k
< TGSI_SWIZZLE_W
)
382 if (!(subr
->retv
[src
->Index
/ 32][k
] & (1 << (src
->Index
% 32))))
383 subr
->argv
[src
->Index
/ 32][k
] |= 1 << (src
->Index
% 32);
387 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
388 dst
= &inst
->Dst
[0].Register
;
390 for (c
= 0; c
< 4; ++c
)
391 if (dst
->WriteMask
& (1 << c
))
392 subr
->retv
[dst
->Index
/ 32][c
] |= 1 << (dst
->Index
% 32);
397 nvc0_vp_gp_gen_header(struct nvc0_program
*vp
, struct nvc0_translation_info
*ti
)
402 for (a
= 0x80/4, i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
403 for (c
= 0; c
< 4; ++c
, ++a
)
404 if (ti
->input_access
[i
][c
])
405 vp
->hdr
[5 + a
/ 32] |= 1 << (a
% 32); /* VP_ATTR_EN */
408 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
409 a
= (ti
->output_loc
[i
][0] - 0x40) / 4;
410 for (c
= 0; c
< 4; ++c
, ++a
) {
411 if (!ti
->output_access
[i
][c
])
413 vp
->hdr
[13 + a
/ 32] |= 1 << (a
% 32); /* VP_EXPORT_EN */
421 nvc0_vp_gen_header(struct nvc0_program
*vp
, struct nvc0_translation_info
*ti
)
423 vp
->hdr
[0] = 0x20461;
424 vp
->hdr
[4] = 0xff000;
426 vp
->hdr
[18] = (1 << vp
->vp
.num_ucps
) - 1;
428 return nvc0_vp_gp_gen_header(vp
, ti
);
432 nvc0_gp_gen_header(struct nvc0_program
*gp
, struct nvc0_translation_info
*ti
)
434 unsigned invocations
= 1;
435 unsigned max_output_verts
, output_prim
;
438 gp
->hdr
[0] = 0x21061;
440 for (i
= 0; i
< ti
->scan
.num_properties
; ++i
) {
441 switch (ti
->scan
.properties
[i
].name
) {
442 case TGSI_PROPERTY_GS_OUTPUT_PRIM
:
443 output_prim
= ti
->scan
.properties
[i
].data
[0];
445 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
:
446 max_output_verts
= ti
->scan
.properties
[i
].data
[0];
447 assert(max_output_verts
< 512);
450 case TGSI_PROPERTY_GS_INVOCATIONS:
451 invocations = ti->scan.properties[i].data[0];
452 assert(invocations <= 32);
460 gp
->hdr
[2] = MIN2(invocations
, 32) << 24;
462 switch (output_prim
) {
463 case PIPE_PRIM_POINTS
:
464 gp
->hdr
[3] = 0x01000000;
465 gp
->hdr
[0] |= 0xf0000000;
467 case PIPE_PRIM_LINE_STRIP
:
468 gp
->hdr
[3] = 0x06000000;
469 gp
->hdr
[0] |= 0x10000000;
471 case PIPE_PRIM_TRIANGLE_STRIP
:
472 gp
->hdr
[3] = 0x07000000;
473 gp
->hdr
[0] |= 0x10000000;
480 gp
->hdr
[4] = max_output_verts
& 0x1ff;
482 return nvc0_vp_gp_gen_header(gp
, ti
);
486 nvc0_fp_gen_header(struct nvc0_program
*fp
, struct nvc0_translation_info
*ti
)
491 fp
->hdr
[0] = 0x21462;
492 fp
->hdr
[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
494 if (ti
->scan
.uses_kill
)
495 fp
->hdr
[0] |= 0x8000;
496 if (ti
->scan
.writes_z
) {
498 if (ti
->scan
.num_outputs
> 2)
499 fp
->hdr
[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
501 if (ti
->scan
.num_outputs
> 1)
502 fp
->hdr
[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
505 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
506 m
= ti
->interp_mode
[i
];
507 for (c
= 0; c
< 4; ++c
) {
508 if (!ti
->input_access
[i
][c
])
510 a
= ti
->input_loc
[i
][c
] / 2;
511 if ((a
& ~7) == 0x70/2)
512 fp
->hdr
[5] |= 1 << (28 + (a
& 7) / 2); /* FRAG_COORD_UMASK */
514 fp
->hdr
[4 + a
/ 32] |= m
<< (a
% 32);
518 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
519 if (i
!= ti
->fp_depth_output
)
520 fp
->hdr
[18] |= 0xf << ti
->output_loc
[i
][0];
527 nvc0_prog_scan(struct nvc0_translation_info
*ti
)
529 struct nvc0_program
*prog
= ti
->prog
;
530 struct tgsi_parse_context parse
;
535 tgsi_dump(prog
->pipe
.tokens
, 0);
538 tgsi_scan_shader(prog
->pipe
.tokens
, &ti
->scan
);
540 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
) {
541 ti
->fp_depth_output
= 255;
542 for (i
= 0; i
< ti
->scan
.num_outputs
; ++i
)
543 if (ti
->scan
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
)
544 ti
->fp_depth_output
= i
;
548 CALLOC(ti
->scan
.opcode_count
[TGSI_OPCODE_BGNSUB
], sizeof(ti
->subr
[0]));
550 ti
->immd32
= (uint32_t *)MALLOC(ti
->scan
.immediate_count
* 16);
551 ti
->immd32_ty
= (ubyte
*)MALLOC(ti
->scan
.immediate_count
* sizeof(ubyte
));
553 ti
->insns
= MALLOC(ti
->scan
.num_instructions
* sizeof(ti
->insns
[0]));
555 tgsi_parse_init(&parse
, prog
->pipe
.tokens
);
556 while (!tgsi_parse_end_of_tokens(&parse
)) {
557 tgsi_parse_token(&parse
);
559 switch (parse
.FullToken
.Token
.Type
) {
560 case TGSI_TOKEN_TYPE_IMMEDIATE
:
561 prog_immediate(ti
, &parse
.FullToken
.FullImmediate
);
563 case TGSI_TOKEN_TYPE_DECLARATION
:
564 prog_decl(ti
, &parse
.FullToken
.FullDeclaration
);
566 case TGSI_TOKEN_TYPE_INSTRUCTION
:
567 ti
->insns
[ti
->num_insns
] = parse
.FullToken
.FullInstruction
;
568 prog_inst(ti
, &parse
.FullToken
.FullInstruction
, ++ti
->num_insns
);
575 for (i
= 0; i
< ti
->num_subrs
; ++i
) {
576 unsigned pc
= ti
->subr
[i
].id
;
577 while (ti
->insns
[pc
].Instruction
.Opcode
!= TGSI_OPCODE_ENDSUB
)
578 prog_subroutine_inst(&ti
->subr
[i
], &ti
->insns
[pc
++]);
581 switch (prog
->type
) {
582 case PIPE_SHADER_VERTEX
:
583 ti
->input_file
= NV_FILE_MEM_A
;
584 ti
->output_file
= NV_FILE_MEM_V
;
585 ret
= nvc0_vp_gen_header(prog
, ti
);
588 case PIPE_SHADER_TESSELLATION_CONTROL:
589 ret = nvc0_tcp_gen_header(ti);
591 case PIPE_SHADER_TESSELLATION_EVALUATION:
592 ret = nvc0_tep_gen_header(ti);
594 case PIPE_SHADER_GEOMETRY:
595 ret = nvc0_gp_gen_header(ti);
598 case PIPE_SHADER_FRAGMENT
:
599 ti
->input_file
= NV_FILE_MEM_V
;
600 ti
->output_file
= NV_FILE_GPR
;
602 if (ti
->scan
.writes_z
)
603 prog
->flags
[0] = 0x11; /* ? */
605 if (!ti
->global_stores
)
606 prog
->fp
.early_z
= 1;
608 ret
= nvc0_fp_gen_header(prog
, ti
);
611 assert(!"unsupported program type");
621 nvc0_program_translate(struct nvc0_program
*prog
)
623 struct nvc0_translation_info
*ti
;
626 ti
= CALLOC_STRUCT(nvc0_translation_info
);
629 ti
->edgeflag_out
= PIPE_MAX_SHADER_OUTPUTS
;
631 if (prog
->type
== PIPE_SHADER_VERTEX
&& prog
->vp
.num_ucps
)
632 ti
->append_ucp
= TRUE
;
634 ret
= nvc0_prog_scan(ti
);
636 NOUVEAU_ERR("unsupported shader program\n");
640 ret
= nvc0_generate_code(ti
);
642 NOUVEAU_ERR("shader translation failed\n");
646 for (i
= 0; i
< sizeof(prog
->hdr
) / sizeof(prog
->hdr
[0]); ++i
)
647 debug_printf("HDR[%02lx] = 0x%08x\n",
648 i
* sizeof(prog
->hdr
[0]), prog
->hdr
[i
]);
661 return ret
? FALSE
: TRUE
;
665 nvc0_program_destroy(struct nvc0_context
*nvc0
, struct nvc0_program
*prog
)
668 nouveau_resource_free(&prog
->res
);
675 memset(prog
->hdr
, 0, sizeof(prog
->hdr
));
677 prog
->translated
= FALSE
;