2 * Copyright 2010 Christoph Bumiller
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 #include "pipe/p_shader_tokens.h"
24 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_parse.h"
29 #include "tgsi/tgsi_util.h"
30 #include "tgsi/tgsi_dump.h"
32 #include "nvc0_context.h"
36 nvc0_tgsi_src_mask(const struct tgsi_full_instruction
*inst
, int c
)
38 unsigned mask
= inst
->Dst
[0].Register
.WriteMask
;
40 switch (inst
->Instruction
.Opcode
) {
43 return (mask
& 0x8) | ((mask
& 0x7) ? 0x1 : 0x0);
48 case TGSI_OPCODE_KIL
: /* WriteMask ignored */
51 return mask
& (c
? 0xa : 0x6);
70 const struct tgsi_instruction_texture
*tex
;
72 assert(inst
->Instruction
.Texture
);
76 if (inst
->Instruction
.Opcode
!= TGSI_OPCODE_TEX
&&
77 inst
->Instruction
.Opcode
!= TGSI_OPCODE_TXD
)
78 mask
|= 0x8; /* bias, lod or proj */
80 switch (tex
->Texture
) {
84 case TGSI_TEXTURE_SHADOW1D
:
98 if (mask
& 1) x
|= 0x6;
99 if (mask
& 2) x
|= 0x5;
100 if (mask
& 4) x
|= 0x3;
111 nvc0_indirect_inputs(struct nvc0_translation_info
*ti
, int id
)
115 for (i
= 0; i
< PIPE_MAX_SHADER_INPUTS
; ++i
)
116 for (c
= 0; c
< 4; ++c
)
117 ti
->input_access
[i
][c
] = id
;
119 ti
->indirect_inputs
= TRUE
;
123 nvc0_indirect_outputs(struct nvc0_translation_info
*ti
, int id
)
127 for (i
= 0; i
< PIPE_MAX_SHADER_OUTPUTS
; ++i
)
128 for (c
= 0; c
< 4; ++c
)
129 ti
->output_access
[i
][c
] = id
;
131 ti
->indirect_outputs
= TRUE
;
134 static INLINE
unsigned
135 nvc0_system_value_location(unsigned sn
, unsigned si
, boolean
*is_input
)
137 /* NOTE: locations 0xfxx indicate special regs */
140 case TGSI_SEMANTIC_VERTEXID:
144 case TGSI_SEMANTIC_PRIMID
:
148 case TGSI_SEMANTIC_LAYER_INDEX:
150 case TGSI_SEMANTIC_VIEWPORT_INDEX:
153 case TGSI_SEMANTIC_INSTANCEID
:
156 case TGSI_SEMANTIC_FACE
:
160 case TGSI_SEMANTIC_INVOCATIONID:
169 static INLINE
unsigned
170 nvc0_varying_location(unsigned sn
, unsigned si
)
173 case TGSI_SEMANTIC_POSITION
:
175 case TGSI_SEMANTIC_COLOR
:
176 return 0x280 + (si
* 16); /* are these hard-wired ? */
177 case TGSI_SEMANTIC_BCOLOR
:
178 return 0x2a0 + (si
* 16);
179 case TGSI_SEMANTIC_FOG
:
181 case TGSI_SEMANTIC_PSIZE
:
184 case TGSI_SEMANTIC_PNTC:
187 case TGSI_SEMANTIC_GENERIC
:
188 /* We'd really like to distinguish between TEXCOORD and GENERIC here,
189 * since only 0x300 to 0x37c can be replaced by sprite coordinates.
190 * Also, gl_PointCoord should be a system value and must be assigned to
191 * address 0x2e0. For now, let's cheat:
195 return 0x300 + si
* 16;
198 return 0x80 + ((si
- 8) * 16);
199 case TGSI_SEMANTIC_NORMAL
:
201 case TGSI_SEMANTIC_PRIMID
:
203 case TGSI_SEMANTIC_FACE
:
206 case TGSI_SEMANTIC_CLIP_DISTANCE:
207 return 0x2c0 + (si * 4);
215 static INLINE
unsigned
216 nvc0_interp_mode(const struct tgsi_full_declaration
*decl
)
220 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_CONSTANT
)
221 mode
= NVC0_INTERP_FLAT
;
223 if (decl
->Declaration
.Interpolate
== TGSI_INTERPOLATE_PERSPECTIVE
)
224 mode
= NVC0_INTERP_PERSPECTIVE
;
226 mode
= NVC0_INTERP_LINEAR
;
228 if (decl
->Declaration
.Centroid
)
229 mode
|= NVC0_INTERP_CENTROID
;
235 prog_immediate(struct nvc0_translation_info
*ti
,
236 const struct tgsi_full_immediate
*imm
)
239 unsigned n
= ti
->immd32_nr
++;
241 assert(ti
->immd32_nr
<= ti
->scan
.immediate_count
);
243 for (c
= 0; c
< 4; ++c
)
244 ti
->immd32
[n
* 4 + c
] = imm
->u
[c
].Uint
;
246 ti
->immd32_ty
[n
] = imm
->Immediate
.DataType
;
250 prog_decl(struct nvc0_translation_info
*ti
,
251 const struct tgsi_full_declaration
*decl
)
254 unsigned sn
= TGSI_SEMANTIC_GENERIC
;
256 const unsigned first
= decl
->Range
.First
;
257 const unsigned last
= decl
->Range
.Last
;
259 if (decl
->Declaration
.Semantic
) {
260 sn
= decl
->Semantic
.Name
;
261 si
= decl
->Semantic
.Index
;
264 switch (decl
->Declaration
.File
) {
265 case TGSI_FILE_INPUT
:
266 for (i
= first
; i
<= last
; ++i
) {
267 if (ti
->prog
->type
== PIPE_SHADER_VERTEX
) {
268 for (c
= 0; c
< 4; ++c
)
269 ti
->input_loc
[i
][c
] = 0x80 + i
* 16 + c
* 4;
271 for (c
= 0; c
< 4; ++c
)
272 ti
->input_loc
[i
][c
] = nvc0_varying_location(sn
, si
) + c
* 4;
273 /* for sprite coordinates: */
274 ti
->prog
->fp
.in_pos
[i
] = ti
->input_loc
[i
][0] / 4;
276 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
)
277 ti
->interp_mode
[i
] = nvc0_interp_mode(decl
);
280 case TGSI_FILE_OUTPUT
:
281 for (i
= first
; i
<= last
; ++i
, ++si
) {
282 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
) {
284 if (i
== ti
->fp_depth_output
) {
285 ti
->output_loc
[i
][2] = (ti
->scan
.num_outputs
- 1) * 4;
287 if (i
> ti
->fp_depth_output
)
289 for (c
= 0; c
< 4; ++c
)
290 ti
->output_loc
[i
][c
] = si
* 4 + c
;
293 for (c
= 0; c
< 4; ++c
)
294 ti
->output_loc
[i
][c
] = nvc0_varying_location(sn
, si
) + c
* 4;
295 /* for TFB_VARYING_LOCS: */
296 ti
->prog
->vp
.out_pos
[i
] = ti
->output_loc
[i
][0] / 4;
300 case TGSI_FILE_SYSTEM_VALUE
:
302 ti
->sysval_loc
[i
] = nvc0_system_value_location(sn
, si
, &ti
->sysval_in
[i
]);
303 assert(first
== last
);
305 case TGSI_FILE_TEMPORARY
:
306 ti
->temp128_nr
= MAX2(ti
->temp128_nr
, last
+ 1);
309 case TGSI_FILE_CONSTANT
:
310 case TGSI_FILE_SAMPLER
:
311 case TGSI_FILE_ADDRESS
:
312 case TGSI_FILE_IMMEDIATE
:
313 case TGSI_FILE_PREDICATE
:
316 NOUVEAU_ERR("unhandled TGSI_FILE %d\n", decl
->Declaration
.File
);
323 prog_inst(struct nvc0_translation_info
*ti
,
324 const struct tgsi_full_instruction
*inst
, int id
)
326 const struct tgsi_dst_register
*dst
;
327 const struct tgsi_src_register
*src
;
331 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_BGNSUB
) {
332 ti
->subr
[ti
->num_subrs
].first_insn
= id
- 1;
333 ti
->subr
[ti
->num_subrs
].id
= ti
->num_subrs
+ 1; /* id 0 is main program */
337 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
338 dst
= &inst
->Dst
[0].Register
;
340 for (c
= 0; c
< 4; ++c
) {
342 nvc0_indirect_outputs(ti
, id
);
343 if (!(dst
->WriteMask
& (1 << c
)))
345 ti
->output_access
[dst
->Index
][c
] = id
;
348 if (inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
349 inst
->Src
[0].Register
.File
== TGSI_FILE_INPUT
&&
350 dst
->Index
== ti
->edgeflag_out
)
351 ti
->prog
->vp
.edgeflag
= inst
->Src
[0].Register
.Index
;
353 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
354 if (inst
->Dst
[0].Register
.Indirect
)
355 ti
->require_stores
= TRUE
;
358 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
359 src
= &inst
->Src
[s
].Register
;
360 if (src
->File
== TGSI_FILE_TEMPORARY
)
361 if (inst
->Src
[s
].Register
.Indirect
)
362 ti
->require_stores
= TRUE
;
363 if (src
->File
!= TGSI_FILE_INPUT
)
365 mask
= nvc0_tgsi_src_mask(inst
, s
);
367 if (inst
->Src
[s
].Register
.Indirect
)
368 nvc0_indirect_inputs(ti
, id
);
370 for (c
= 0; c
< 4; ++c
) {
371 if (!(mask
& (1 << c
)))
373 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
374 if (k
<= TGSI_SWIZZLE_W
)
375 ti
->input_access
[src
->Index
][k
] = id
;
380 /* Probably should introduce something like struct tgsi_function_declaration
381 * instead of trying to guess inputs/outputs.
384 prog_subroutine_inst(struct nvc0_subroutine
*subr
,
385 const struct tgsi_full_instruction
*inst
)
387 const struct tgsi_dst_register
*dst
;
388 const struct tgsi_src_register
*src
;
392 for (s
= 0; s
< inst
->Instruction
.NumSrcRegs
; ++s
) {
393 src
= &inst
->Src
[s
].Register
;
394 if (src
->File
!= TGSI_FILE_TEMPORARY
)
396 mask
= nvc0_tgsi_src_mask(inst
, s
);
398 for (c
= 0; c
< 4; ++c
) {
399 k
= tgsi_util_get_full_src_register_swizzle(&inst
->Src
[s
], c
);
401 if ((mask
& (1 << c
)) && k
< TGSI_SWIZZLE_W
)
402 if (!(subr
->retv
[src
->Index
/ 32][k
] & (1 << (src
->Index
% 32))))
403 subr
->argv
[src
->Index
/ 32][k
] |= 1 << (src
->Index
% 32);
407 if (inst
->Dst
[0].Register
.File
== TGSI_FILE_TEMPORARY
) {
408 dst
= &inst
->Dst
[0].Register
;
410 for (c
= 0; c
< 4; ++c
)
411 if (dst
->WriteMask
& (1 << c
))
412 subr
->retv
[dst
->Index
/ 32][c
] |= 1 << (dst
->Index
% 32);
417 nvc0_vp_gp_gen_header(struct nvc0_program
*vp
, struct nvc0_translation_info
*ti
)
422 for (a
= 0x80/4, i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
423 for (c
= 0; c
< 4; ++c
, ++a
)
424 if (ti
->input_access
[i
][c
])
425 vp
->hdr
[5 + a
/ 32] |= 1 << (a
% 32); /* VP_ATTR_EN */
428 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
429 a
= (ti
->output_loc
[i
][0] - 0x40) / 4;
430 for (c
= 0; c
< 4; ++c
, ++a
) {
431 if (!ti
->output_access
[i
][c
])
433 vp
->hdr
[13 + a
/ 32] |= 1 << (a
% 32); /* VP_EXPORT_EN */
437 for (i
= 0; i
< TGSI_SEMANTIC_COUNT
; ++i
) {
438 a
= ti
->sysval_loc
[i
] / 4;
439 if (a
> 0 && a
< (0xf00 / 4))
440 vp
->hdr
[(ti
->sysval_in
[i
] ? 5 : 13) + a
/ 32] |= 1 << (a
% 32);
447 nvc0_vp_gen_header(struct nvc0_program
*vp
, struct nvc0_translation_info
*ti
)
449 vp
->hdr
[0] = 0x20461;
450 vp
->hdr
[4] = 0xff000;
452 vp
->hdr
[18] = (1 << vp
->vp
.num_ucps
) - 1;
454 return nvc0_vp_gp_gen_header(vp
, ti
);
458 nvc0_gp_gen_header(struct nvc0_program
*gp
, struct nvc0_translation_info
*ti
)
460 unsigned invocations
= 1;
461 unsigned max_output_verts
, output_prim
;
464 gp
->hdr
[0] = 0x21061;
466 for (i
= 0; i
< ti
->scan
.num_properties
; ++i
) {
467 switch (ti
->scan
.properties
[i
].name
) {
468 case TGSI_PROPERTY_GS_OUTPUT_PRIM
:
469 output_prim
= ti
->scan
.properties
[i
].data
[0];
471 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES
:
472 max_output_verts
= ti
->scan
.properties
[i
].data
[0];
473 assert(max_output_verts
< 512);
476 case TGSI_PROPERTY_GS_INVOCATIONS:
477 invocations = ti->scan.properties[i].data[0];
478 assert(invocations <= 32);
486 gp
->hdr
[2] = MIN2(invocations
, 32) << 24;
488 switch (output_prim
) {
489 case PIPE_PRIM_POINTS
:
490 gp
->hdr
[3] = 0x01000000;
491 gp
->hdr
[0] |= 0xf0000000;
493 case PIPE_PRIM_LINE_STRIP
:
494 gp
->hdr
[3] = 0x06000000;
495 gp
->hdr
[0] |= 0x10000000;
497 case PIPE_PRIM_TRIANGLE_STRIP
:
498 gp
->hdr
[3] = 0x07000000;
499 gp
->hdr
[0] |= 0x10000000;
506 gp
->hdr
[4] = max_output_verts
& 0x1ff;
508 return nvc0_vp_gp_gen_header(gp
, ti
);
512 nvc0_fp_gen_header(struct nvc0_program
*fp
, struct nvc0_translation_info
*ti
)
517 fp
->hdr
[0] = 0x21462;
518 fp
->hdr
[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
520 if (ti
->scan
.uses_kill
)
521 fp
->hdr
[0] |= 0x8000;
522 if (ti
->scan
.writes_z
) {
524 if (ti
->scan
.num_outputs
> 2)
525 fp
->hdr
[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
527 if (ti
->scan
.num_outputs
> 1)
528 fp
->hdr
[0] |= 0x4000; /* FP_MULTIPLE_COLOR_OUTPUTS */
531 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_INPUT
]; ++i
) {
532 m
= ti
->interp_mode
[i
];
533 for (c
= 0; c
< 4; ++c
) {
534 if (!ti
->input_access
[i
][c
])
536 a
= ti
->input_loc
[i
][c
] / 2;
537 if (ti
->input_loc
[i
][c
] >= 0x2c0)
539 if (ti
->input_loc
[i
][0] == 0x70)
540 fp
->hdr
[5] |= 1 << (28 + c
); /* FRAG_COORD_UMASK */
542 if (ti
->input_loc
[i
][0] == 0x2e0)
543 fp
->hdr
[14] |= 1 << (24 + c
); /* POINT_COORD */
545 fp
->hdr
[4 + a
/ 32] |= m
<< (a
% 32);
549 for (i
= 0; i
<= ti
->scan
.file_max
[TGSI_FILE_OUTPUT
]; ++i
) {
550 if (i
!= ti
->fp_depth_output
)
551 fp
->hdr
[18] |= 0xf << ti
->output_loc
[i
][0];
554 for (i
= 0; i
< TGSI_SEMANTIC_COUNT
; ++i
) {
555 a
= ti
->sysval_loc
[i
] / 2;
556 if ((a
> 0) && (a
< 0xf00 / 2))
557 fp
->hdr
[4 + a
/ 32] |= NVC0_INTERP_FLAT
<< (a
% 32);
564 nvc0_prog_scan(struct nvc0_translation_info
*ti
)
566 struct nvc0_program
*prog
= ti
->prog
;
567 struct tgsi_parse_context parse
;
572 tgsi_dump(prog
->pipe
.tokens
, 0);
575 tgsi_scan_shader(prog
->pipe
.tokens
, &ti
->scan
);
577 if (ti
->prog
->type
== PIPE_SHADER_FRAGMENT
) {
578 ti
->fp_depth_output
= 255;
579 for (i
= 0; i
< ti
->scan
.num_outputs
; ++i
)
580 if (ti
->scan
.output_semantic_name
[i
] == TGSI_SEMANTIC_POSITION
)
581 ti
->fp_depth_output
= i
;
585 CALLOC(ti
->scan
.opcode_count
[TGSI_OPCODE_BGNSUB
], sizeof(ti
->subr
[0]));
587 ti
->immd32
= (uint32_t *)MALLOC(ti
->scan
.immediate_count
* 16);
588 ti
->immd32_ty
= (ubyte
*)MALLOC(ti
->scan
.immediate_count
* sizeof(ubyte
));
590 ti
->insns
= MALLOC(ti
->scan
.num_instructions
* sizeof(ti
->insns
[0]));
592 tgsi_parse_init(&parse
, prog
->pipe
.tokens
);
593 while (!tgsi_parse_end_of_tokens(&parse
)) {
594 tgsi_parse_token(&parse
);
596 switch (parse
.FullToken
.Token
.Type
) {
597 case TGSI_TOKEN_TYPE_IMMEDIATE
:
598 prog_immediate(ti
, &parse
.FullToken
.FullImmediate
);
600 case TGSI_TOKEN_TYPE_DECLARATION
:
601 prog_decl(ti
, &parse
.FullToken
.FullDeclaration
);
603 case TGSI_TOKEN_TYPE_INSTRUCTION
:
604 ti
->insns
[ti
->num_insns
] = parse
.FullToken
.FullInstruction
;
605 prog_inst(ti
, &parse
.FullToken
.FullInstruction
, ++ti
->num_insns
);
612 for (i
= 0; i
< ti
->num_subrs
; ++i
) {
613 unsigned pc
= ti
->subr
[i
].id
;
614 while (ti
->insns
[pc
].Instruction
.Opcode
!= TGSI_OPCODE_ENDSUB
)
615 prog_subroutine_inst(&ti
->subr
[i
], &ti
->insns
[pc
++]);
618 switch (prog
->type
) {
619 case PIPE_SHADER_VERTEX
:
620 ti
->input_file
= NV_FILE_MEM_A
;
621 ti
->output_file
= NV_FILE_MEM_V
;
622 ret
= nvc0_vp_gen_header(prog
, ti
);
625 case PIPE_SHADER_TESSELLATION_CONTROL:
626 ret = nvc0_tcp_gen_header(ti);
628 case PIPE_SHADER_TESSELLATION_EVALUATION:
629 ret = nvc0_tep_gen_header(ti);
631 case PIPE_SHADER_GEOMETRY:
632 ret = nvc0_gp_gen_header(ti);
635 case PIPE_SHADER_FRAGMENT
:
636 ti
->input_file
= NV_FILE_MEM_V
;
637 ti
->output_file
= NV_FILE_GPR
;
639 if (ti
->scan
.writes_z
)
640 prog
->flags
[0] = 0x11; /* ? */
642 if (!ti
->scan
.uses_kill
&& !ti
->global_stores
)
643 prog
->fp
.early_z
= 1;
645 ret
= nvc0_fp_gen_header(prog
, ti
);
648 assert(!"unsupported program type");
653 if (ti
->require_stores
) {
654 prog
->hdr
[0] |= 1 << 26;
655 prog
->hdr
[1] |= ti
->temp128_nr
* 16; /* l[] size */
663 nvc0_program_translate(struct nvc0_program
*prog
)
665 struct nvc0_translation_info
*ti
;
668 ti
= CALLOC_STRUCT(nvc0_translation_info
);
671 ti
->edgeflag_out
= PIPE_MAX_SHADER_OUTPUTS
;
673 if (prog
->type
== PIPE_SHADER_VERTEX
&& prog
->vp
.num_ucps
)
674 ti
->append_ucp
= TRUE
;
676 ret
= nvc0_prog_scan(ti
);
678 NOUVEAU_ERR("unsupported shader program\n");
682 ret
= nvc0_generate_code(ti
);
684 NOUVEAU_ERR("shader translation failed\n");
688 for (i
= 0; i
< sizeof(prog
->hdr
) / sizeof(prog
->hdr
[0]); ++i
)
689 debug_printf("HDR[%02lx] = 0x%08x\n",
690 i
* sizeof(prog
->hdr
[0]), prog
->hdr
[i
]);
703 return ret
? FALSE
: TRUE
;
707 nvc0_program_destroy(struct nvc0_context
*nvc0
, struct nvc0_program
*prog
)
710 nouveau_resource_free(&prog
->res
);
717 memset(prog
->hdr
, 0, sizeof(prog
->hdr
));
719 prog
->translated
= FALSE
;