2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "pipe/p_shader_tokens.h"
24 #include "pipe/p_defines.h"
26 #include "tgsi/tgsi_parse.h"
27 #include "tgsi/tgsi_util.h"
28 #include "tgsi/tgsi_dump.h"
30 #include "nvc0_context.h"
34 nvc0_tgsi_src_mask(const struct tgsi_full_instruction
*inst
, int c
)
36 unsigned mask
= inst
->Dst
[0].Register
.WriteMask
;
38 switch (inst
->Instruction
.Opcode
) {
41 return (mask
& 0x8) | ((mask
& 0x7) ? 0x1 : 0x0);
46 case TGSI_OPCODE_KIL
: /* WriteMask ignored */
49 return mask
& (c
? 0xa : 0x6);
68 const struct tgsi_instruction_texture
*tex
;
70 assert(inst
->Instruction
.Texture
);
74 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_TEX
&&
75 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXD
)
76 mask
|= 0x8; /* bias, lod or proj */
78 switch (tex
->Texture
) {
82 case TGSI_TEXTURE_SHADOW1D
:
96 if (mask
& 1) x
|= 0x6;
97 if (mask
& 2) x
|= 0x5;
98 if (mask
& 4) x
|= 0x3;
109 nvc0_indirect_inputs(struct nvc0_translation_info
*ti
, int id
)
113 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; ++i
)
114 for (c
= 0; c
< 4; ++c
)
115 ti
->input_access
[i
][c
] = id
;
117 ti
->indirect_inputs
= TRUE
;
121 nvc0_indirect_outputs(struct nvc0_translation_info
*ti
, int id
)
125 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
)
126 for (c
= 0; c
< 4; ++c
)
127 ti
->output_access
[i
][c
] = id
;
129 ti
->indirect_outputs
= TRUE
;
132 static INLINE
unsigned
133 nvc0_system_value_location(unsigned sn
, unsigned si
)
137 case TGSI_SEMANTIC_VERTEXID:
140 case TGSI_SEMANTIC_PRIMID
:
143 case TGSI_SEMANTIC_LAYER_INDEX:
145 case TGSI_SEMANTIC_VIEWPORT_INDEX:
148 case TGSI_SEMANTIC_INSTANCEID
:
156 static INLINE
unsigned
157 nvc0_varying_location(unsigned sn
, unsigned si
)
160 case TGSI_SEMANTIC_POSITION
:
162 case TGSI_SEMANTIC_COLOR
:
163 return 0x280 + (si
* 16); /* are these hard-wired ? */
164 case TGSI_SEMANTIC_BCOLOR
:
165 return 0x2a0 + (si
* 16);
166 case TGSI_SEMANTIC_FOG
:
168 case TGSI_SEMANTIC_PSIZE
:
171 case TGSI_SEMANTIC_PNTC:
174 case TGSI_SEMANTIC_GENERIC
:
176 return 0x80 + (si
* 16);
177 case TGSI_SEMANTIC_NORMAL
:
179 case TGSI_SEMANTIC_PRIMID
:
182 case TGSI_SEMANTIC_CLIP_DISTANCE:
183 return 0x2c0 + (si * 4);
191 static INLINE
unsigned
192 nvc0_interp_mode(const struct tgsi_full_declaration
*decl
)
196 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_CONSTANT
)
197 mode
= NVC0_INTERP_FLAT
;
199 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
)
200 mode
= NVC0_INTERP_PERSPECTIVE
;
202 mode
= NVC0_INTERP_LINEAR
;
204 if (decl
->Declaration
.Centroid
)
205 mode
|= NVC0_INTERP_CENTROID
;
211 prog_immediate(struct nvc0_translation_info
*ti
,
212 const struct tgsi_full_immediate
*imm
)
215 unsigned n
= ti
->immd32_nr
++;
217 assert(ti
->immd32_nr
<= ti
->scan
.immediate_count
);
219 for (c
= 0; c
< 4; ++c
)
220 ti
->immd32
[n
* 4 + c
] = imm
->u
[c
].Uint
;
222 ti
->immd32_ty
[n
] = imm
->Immediate
.DataType
;
226 prog_decl(struct nvc0_translation_info
*ti
,
227 const struct tgsi_full_declaration
*decl
)
230 unsigned sn
= TGSI_SEMANTIC_GENERIC
;
232 const unsigned first
= decl
->Range
.First
;
233 const unsigned last
= decl
->Range
.Last
;
235 if (decl
->Declaration
.Semantic
) {
236 sn
= decl
->Semantic
.Name
;
237 si
= decl
->Semantic
.Index
;
240 switch (decl
->Declaration
.File
) {
241 case TGSI_FILE_INPUT
:
242 for (i
= first
; i
<= last
; ++i
) {
243 if (ti
->prog
->type
== PIPE_SHADER_VERTEX
) {
244 sn
= TGSI_SEMANTIC_GENERIC
;
247 for (c
= 0; c
< 4; ++c
)
248 ti
->input_loc
[i
][c
] = nvc0_varying_location(sn
, si
) + c
* 4;
250 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
)
251 ti
->interp_mode
[i
] = nvc0_interp_mode(decl
);
254 case TGSI_FILE_OUTPUT
:
255 for (i
= first
; i
<= last
; ++i
, ++si
) {
256 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
) {
258 if (i
== ti
->fp_depth_output
) {
259 ti
->output_loc
[i
][2] = (ti
->scan
.num_outputs
- 1) * 4;
261 if (i
> ti
->fp_depth_output
)
263 for (c
= 0; c
< 4; ++c
)
264 ti
->output_loc
[i
][c
] = si
* 4 + c
;
267 for (c
= 0; c
< 4; ++c
)
268 ti
->output_loc
[i
][c
] = nvc0_varying_location(sn
, si
) + c
* 4;
272 case TGSI_FILE_SYSTEM_VALUE
:
273 ti
->sysval_loc
[i
] = nvc0_system_value_location(sn
, si
);
274 assert(first
== last
);
277 case TGSI_FILE_CONSTANT
:
278 case TGSI_FILE_TEMPORARY
:
279 case TGSI_FILE_SAMPLER
:
280 case TGSI_FILE_ADDRESS
:
281 case TGSI_FILE_IMMEDIATE
:
282 case TGSI_FILE_PREDICATE
:
285 NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl
->Declaration
.File
);
292 prog_inst(struct nvc0_translation_info
*ti
,
293 const struct tgsi_full_instruction
*inst
, int id
)
295 const struct tgsi_dst_register
*dst
;
296 const struct tgsi_src_register
*src
;
300 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_BGNSUB
) {
301 ti
->subr
[ti
->num_subrs
].first_insn
= id
- 1;
302 ti
->subr
[ti
->num_subrs
].id
= ti
->num_subrs
+ 1; /* id 0 is main program */
306 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
307 dst
= &inst
->Dst
[0].Register
;
309 for (c
= 0; c
< 4; ++c
) {
311 nvc0_indirect_outputs(ti
, id
);
312 if (!(dst
->WriteMask
& (1 << c
)))
314 ti
->output_access
[dst
->Index
][c
] = id
;
317 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
318 inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
&&
319 dst
->Index
== ti
->edgeflag_out
)
320 ti
->prog
->vp
.edgeflag
= inst
->Src
[0].Register
.Index
;
322 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
323 if (inst
->Dst
[0].Register
.Indirect
)
324 ti
->require_stores
= TRUE
;
327 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
328 src
= &inst
->Src
[s
].Register
;
329 if (src
->File
== TGSI_FILE_TEMPORARY
)
330 if (inst
->Src
[s
].Register
.Indirect
)
331 ti
->require_stores
= TRUE
;
332 if (src
->File
!= TGSI_FILE_INPUT
)
334 mask
= nvc0_tgsi_src_mask(inst
, s
);
336 if (inst
->Src
[s
].Register
.Indirect
)
337 nvc0_indirect_inputs(ti
, id
);
339 for (c
= 0; c
< 4; ++c
) {
340 if (!(mask
& (1 << c
)))
342 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
343 if (k
<= TGSI_SWIZZLE_W
)
344 ti
->input_access
[src
->Index
][k
] = id
;
349 /* Probably should introduce something like struct tgsi_function_declaration
350 * instead of trying to guess inputs/outputs.
353 prog_subroutine_inst(struct nvc0_subroutine
*subr
,
354 const struct tgsi_full_instruction
*inst
)
356 const struct tgsi_dst_register
*dst
;
357 const struct tgsi_src_register
*src
;
361 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
362 src
= &inst
->Src
[s
].Register
;
363 if (src
->File
!= TGSI_FILE_TEMPORARY
)
365 mask
= nvc0_tgsi_src_mask(inst
, s
);
367 for (c
= 0; c
< 4; ++c
) {
368 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
370 if ((mask
& (1 << c
)) && k
< TGSI_SWIZZLE_W
)
371 if (!(subr
->retv
[src
->Index
/ 32][k
] & (1 << (src
->Index
% 32))))
372 subr
->argv
[src
->Index
/ 32][k
] |= 1 << (src
->Index
% 32);
376 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
377 dst
= &inst
->Dst
[0].Register
;
379 for (c
= 0; c
< 4; ++c
)
380 if (dst
->WriteMask
& (1 << c
))
381 subr
->retv
[dst
->Index
/ 32][c
] |= 1 << (dst
->Index
% 32);
386 nvc0_vp_gp_gen_header(struct nvc0_program
*vp
, struct nvc0_translation_info
*ti
)
391 for (a
= 0x80/4, i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
392 for (c
= 0; c
< 4; ++c
, ++a
)
393 if (ti
->input_access
[i
][c
])
394 vp
->hdr
[5 + a
/ 32] |= 1 << (a
% 32); /* VP_ATTR_EN */
397 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
398 a
= (ti
->output_loc
[i
][0] - 0x40) / 4;
399 for (c
= 0; c
< 4; ++c
, ++a
) {
400 if (!ti
->output_access
[i
][c
])
402 vp
->hdr
[13 + a
/ 32] |= 1 << (a
% 32); /* VP_EXPORT_EN */
410 nvc0_vp_gen_header(struct nvc0_program
*vp
, struct nvc0_translation_info
*ti
)
412 vp
->hdr
[0] = 0x20461;
413 vp
->hdr
[4] = 0xff000;
415 return nvc0_vp_gp_gen_header(vp
, ti
);
419 nvc0_gp_gen_header(struct nvc0_program
*gp
, struct nvc0_translation_info
*ti
)
421 unsigned max_output_verts
, output_prim
;
424 gp
->hdr
[0] = 0x00021061;
425 gp
->hdr
[2] = 0x01000000;
427 for (i
= 0; i
< ti
->scan
.num_properties
; ++i
) {
428 switch (ti
->scan
.properties
[i
].name
) {
429 case TGSI_PROPERTY_GS_OUTPUT_PRIM
:
430 output_prim
= ti
->scan
.properties
[i
].data
[0];
432 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
:
433 max_output_verts
= ti
->scan
.properties
[i
].data
[0];
440 switch (output_prim
) {
441 case PIPE_PRIM_POINTS
:
442 gp
->hdr
[3] = 0x01000000;
443 gp
->hdr
[0] |= 0xf0000000;
445 case PIPE_PRIM_LINE_STRIP
:
446 gp
->hdr
[3] = 0x06000000;
447 gp
->hdr
[0] |= 0x10000000;
449 case PIPE_PRIM_TRIANGLE_STRIP
:
450 gp
->hdr
[3] = 0x07000000;
451 gp
->hdr
[0] |= 0x10000000;
458 gp
->hdr
[4] = max_output_verts
& 0x1ff;
460 return nvc0_vp_gp_gen_header(gp
, ti
);
464 nvc0_fp_gen_header(struct nvc0_program
*fp
, struct nvc0_translation_info
*ti
)
469 fp
->hdr
[0] = 0x21462;
470 fp
->hdr
[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
472 if (ti
->scan
.uses_kill
)
473 fp
->hdr
[0] |= 0x8000;
474 if (ti
->scan
.writes_z
) {
476 if (ti
->scan
.num_outputs
> 2)
477 fp
->hdr
[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
479 if (ti
->scan
.num_outputs
> 1)
480 fp
->hdr
[0] |= 0x8000; /* FP_MULTIPLE_COLOR_OUTPUTS */
483 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
484 m
= ti
->interp_mode
[i
];
485 for (c
= 0; c
< 4; ++c
) {
486 if (!ti
->input_access
[i
][c
])
488 a
= ti
->input_loc
[i
][c
] / 2;
489 if ((a
& ~7) == 0x70/2)
490 fp
->hdr
[5] |= 1 << (28 + (a
& 7) / 2); /* FRAG_COORD_UMASK */
492 fp
->hdr
[4 + a
/ 32] |= m
<< (a
% 32);
496 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
497 if (i
!= ti
->fp_depth_output
)
498 fp
->hdr
[18] |= 0xf << ti
->output_loc
[i
][0];
505 nvc0_prog_scan(struct nvc0_translation_info
*ti
)
507 struct nvc0_program
*prog
= ti
->prog
;
508 struct tgsi_parse_context parse
;
513 tgsi_dump(prog
->pipe
.tokens
, 0);
516 tgsi_scan_shader(prog
->pipe
.tokens
, &ti
->scan
);
518 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
) {
519 ti
->fp_depth_output
= 255;
520 for (i
= 0; i
< ti
->scan
.num_outputs
; ++i
)
521 if (ti
->scan
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
)
522 ti
->fp_depth_output
= i
;
526 CALLOC(ti
->scan
.opcode_count
[TGSI_OPCODE_BGNSUB
], sizeof(ti
->subr
[0]));
528 ti
->immd32
= (uint32_t *)MALLOC(ti
->scan
.immediate_count
* 16);
529 ti
->immd32_ty
= (ubyte
*)MALLOC(ti
->scan
.immediate_count
* sizeof(ubyte
));
531 ti
->insns
= MALLOC(ti
->scan
.num_instructions
* sizeof(ti
->insns
[0]));
533 tgsi_parse_init(&parse
, prog
->pipe
.tokens
);
534 while (!tgsi_parse_end_of_tokens(&parse
)) {
535 tgsi_parse_token(&parse
);
537 switch (parse
.FullToken
.Token
.Type
) {
538 case TGSI_TOKEN_TYPE_IMMEDIATE
:
539 prog_immediate(ti
, &parse
.FullToken
.FullImmediate
);
541 case TGSI_TOKEN_TYPE_DECLARATION
:
542 prog_decl(ti
, &parse
.FullToken
.FullDeclaration
);
544 case TGSI_TOKEN_TYPE_INSTRUCTION
:
545 ti
->insns
[ti
->num_insns
] = parse
.FullToken
.FullInstruction
;
546 prog_inst(ti
, &parse
.FullToken
.FullInstruction
, ++ti
->num_insns
);
553 for (i
= 0; i
< ti
->num_subrs
; ++i
) {
554 unsigned pc
= ti
->subr
[i
].id
;
555 while (ti
->insns
[pc
].Instruction
.Opcode
!= TGSI_OPCODE_ENDSUB
)
556 prog_subroutine_inst(&ti
->subr
[i
], &ti
->insns
[pc
++]);
559 switch (prog
->type
) {
560 case PIPE_SHADER_VERTEX
:
561 ti
->input_file
= NV_FILE_MEM_A
;
562 ti
->output_file
= NV_FILE_MEM_V
;
563 ret
= nvc0_vp_gen_header(prog
, ti
);
566 case PIPE_SHADER_TESSELLATION_CONTROL:
567 ret = nvc0_tcp_gen_header(ti);
569 case PIPE_SHADER_TESSELLATION_EVALUATION:
570 ret = nvc0_tep_gen_header(ti);
572 case PIPE_SHADER_GEOMETRY:
573 ret = nvc0_gp_gen_header(ti);
576 case PIPE_SHADER_FRAGMENT
:
577 ti
->input_file
= NV_FILE_MEM_V
;
578 ti
->output_file
= NV_FILE_GPR
;
579 ret
= nvc0_fp_gen_header(prog
, ti
);
582 assert(!"unsupported program type");
592 nvc0_program_translate(struct nvc0_program
*prog
)
594 struct nvc0_translation_info
*ti
;
597 ti
= CALLOC_STRUCT(nvc0_translation_info
);
600 ti
->edgeflag_out
= PIPE_MAX_SHADER_OUTPUTS
;
602 ret
= nvc0_prog_scan(ti
);
604 NOUVEAU_ERR("unsupported shader program\n");
608 ret
= nvc0_generate_code(ti
);
610 NOUVEAU_ERR("shader translation failed\n");
614 for (i
= 0; i
< sizeof(prog
->hdr
) / sizeof(prog
->hdr
[0]); ++i
)
615 debug_printf("HDR[%02lx] = 0x%08x\n",
616 i
* sizeof(prog
->hdr
[0]), prog
->hdr
[i
]);
629 return ret
? FALSE
: TRUE
;
633 nvc0_program_destroy(struct nvc0_context
*nvc0
, struct nvc0_program
*prog
)
636 nouveau_resource_free(&prog
->res
);
643 prog
->translated
= FALSE
;