2 * Copyright (C) 2016 Miklós Máté
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
23 #include "main/mtypes.h"
24 #include "main/atifragshader.h"
25 #include "main/errors.h"
26 #include "program/prog_parameter.h"
28 #include "tgsi/tgsi_ureg.h"
29 #include "tgsi/tgsi_scan.h"
30 #include "tgsi/tgsi_transform.h"
32 #include "st_program.h"
33 #include "st_atifs_to_tgsi.h"
36 * Intermediate state used during shader translation.
39 struct ureg_program
*ureg
;
40 struct ati_fragment_shader
*atifs
;
42 struct ureg_dst temps
[MAX_PROGRAM_TEMPS
];
43 struct ureg_src
*constants
;
44 struct ureg_dst outputs
[PIPE_MAX_SHADER_OUTPUTS
];
45 struct ureg_src inputs
[PIPE_MAX_SHADER_INPUTS
];
46 struct ureg_src samplers
[PIPE_MAX_SAMPLERS
];
48 const ubyte
*inputMapping
;
49 const ubyte
*outputMapping
;
51 unsigned current_pass
;
53 bool regs_written
[MAX_NUM_PASSES_ATI
][MAX_NUM_FRAGMENT_REGISTERS_ATI
];
58 struct instruction_desc
{
61 unsigned char arg_count
;
64 static const struct instruction_desc inst_desc
[] = {
65 {TGSI_OPCODE_MOV
, "MOV", 1},
66 {TGSI_OPCODE_NOP
, "UND", 0}, /* unused */
67 {TGSI_OPCODE_ADD
, "ADD", 2},
68 {TGSI_OPCODE_MUL
, "MUL", 2},
69 {TGSI_OPCODE_NOP
, "SUB", 2},
70 {TGSI_OPCODE_DP3
, "DOT3", 2},
71 {TGSI_OPCODE_DP4
, "DOT4", 2},
72 {TGSI_OPCODE_MAD
, "MAD", 3},
73 {TGSI_OPCODE_LRP
, "LERP", 3},
74 {TGSI_OPCODE_NOP
, "CND", 3},
75 {TGSI_OPCODE_NOP
, "CND0", 3},
76 {TGSI_OPCODE_NOP
, "DOT2_ADD", 3}
79 static struct ureg_dst
80 get_temp(struct st_translate
*t
, unsigned index
)
82 if (ureg_dst_is_undef(t
->temps
[index
]))
83 t
->temps
[index
] = ureg_DECL_temporary(t
->ureg
);
84 return t
->temps
[index
];
87 static struct ureg_src
88 apply_swizzle(struct st_translate
*t
,
89 struct ureg_src src
, GLuint swizzle
)
91 if (swizzle
== GL_SWIZZLE_STR_ATI
) {
93 } else if (swizzle
== GL_SWIZZLE_STQ_ATI
) {
94 return ureg_swizzle(src
,
100 struct ureg_dst tmp
[2];
101 struct ureg_src imm
[3];
103 tmp
[0] = get_temp(t
, MAX_NUM_FRAGMENT_REGISTERS_ATI
);
104 tmp
[1] = get_temp(t
, MAX_NUM_FRAGMENT_REGISTERS_ATI
+ 1);
106 imm
[1] = ureg_imm4f(t
->ureg
, 1.0f
, 1.0f
, 0.0f
, 0.0f
);
107 imm
[2] = ureg_imm4f(t
->ureg
, 0.0f
, 0.0f
, 1.0f
, 1.0f
);
108 ureg_insn(t
->ureg
, TGSI_OPCODE_MAD
, &tmp
[0], 1, imm
, 3, 0);
110 if (swizzle
== GL_SWIZZLE_STR_DR_ATI
) {
111 imm
[0] = ureg_scalar(src
, TGSI_SWIZZLE_Z
);
113 imm
[0] = ureg_scalar(src
, TGSI_SWIZZLE_W
);
115 ureg_insn(t
->ureg
, TGSI_OPCODE_RCP
, &tmp
[1], 1, &imm
[0], 1, 0);
117 imm
[0] = ureg_src(tmp
[0]);
118 imm
[1] = ureg_src(tmp
[1]);
119 ureg_insn(t
->ureg
, TGSI_OPCODE_MUL
, &tmp
[0], 1, imm
, 2, 0);
121 return ureg_src(tmp
[0]);
125 static struct ureg_src
126 get_source(struct st_translate
*t
, GLuint src_type
)
128 if (src_type
>= GL_REG_0_ATI
&& src_type
<= GL_REG_5_ATI
) {
129 if (t
->regs_written
[t
->current_pass
][src_type
- GL_REG_0_ATI
]) {
130 return ureg_src(get_temp(t
, src_type
- GL_REG_0_ATI
));
132 return ureg_imm1f(t
->ureg
, 0.0f
);
134 } else if (src_type
>= GL_CON_0_ATI
&& src_type
<= GL_CON_7_ATI
) {
135 return t
->constants
[src_type
- GL_CON_0_ATI
];
136 } else if (src_type
== GL_ZERO
) {
137 return ureg_imm1f(t
->ureg
, 0.0f
);
138 } else if (src_type
== GL_ONE
) {
139 return ureg_imm1f(t
->ureg
, 1.0f
);
140 } else if (src_type
== GL_PRIMARY_COLOR_ARB
) {
141 return t
->inputs
[t
->inputMapping
[VARYING_SLOT_COL0
]];
142 } else if (src_type
== GL_SECONDARY_INTERPOLATOR_ATI
) {
143 return t
->inputs
[t
->inputMapping
[VARYING_SLOT_COL1
]];
145 /* frontend prevents this */
146 unreachable("unknown source");
150 static struct ureg_src
151 prepare_argument(struct st_translate
*t
, const unsigned argId
,
152 const struct atifragshader_src_register
*srcReg
)
154 struct ureg_src src
= get_source(t
, srcReg
->Index
);
155 struct ureg_dst arg
= get_temp(t
, MAX_NUM_FRAGMENT_REGISTERS_ATI
+ argId
);
157 switch (srcReg
->argRep
) {
161 src
= ureg_scalar(src
, TGSI_SWIZZLE_X
);
164 src
= ureg_scalar(src
, TGSI_SWIZZLE_Y
);
167 src
= ureg_scalar(src
, TGSI_SWIZZLE_Z
);
170 src
= ureg_scalar(src
, TGSI_SWIZZLE_W
);
173 ureg_insn(t
->ureg
, TGSI_OPCODE_MOV
, &arg
, 1, &src
, 1, 0);
175 if (srcReg
->argMod
& GL_COMP_BIT_ATI
) {
176 struct ureg_src modsrc
[2];
177 modsrc
[0] = ureg_imm1f(t
->ureg
, 1.0f
);
178 modsrc
[1] = ureg_negate(ureg_src(arg
));
180 ureg_insn(t
->ureg
, TGSI_OPCODE_ADD
, &arg
, 1, modsrc
, 2, 0);
182 if (srcReg
->argMod
& GL_BIAS_BIT_ATI
) {
183 struct ureg_src modsrc
[2];
184 modsrc
[0] = ureg_src(arg
);
185 modsrc
[1] = ureg_imm1f(t
->ureg
, -0.5f
);
187 ureg_insn(t
->ureg
, TGSI_OPCODE_ADD
, &arg
, 1, modsrc
, 2, 0);
189 if (srcReg
->argMod
& GL_2X_BIT_ATI
) {
190 struct ureg_src modsrc
[2];
191 modsrc
[0] = ureg_src(arg
);
192 modsrc
[1] = ureg_src(arg
);
194 ureg_insn(t
->ureg
, TGSI_OPCODE_ADD
, &arg
, 1, modsrc
, 2, 0);
196 if (srcReg
->argMod
& GL_NEGATE_BIT_ATI
) {
197 struct ureg_src modsrc
[2];
198 modsrc
[0] = ureg_src(arg
);
199 modsrc
[1] = ureg_imm1f(t
->ureg
, -1.0f
);
201 ureg_insn(t
->ureg
, TGSI_OPCODE_MUL
, &arg
, 1, modsrc
, 2, 0);
203 return ureg_src(arg
);
206 /* These instructions need special treatment */
208 emit_special_inst(struct st_translate
*t
, const struct instruction_desc
*desc
,
209 struct ureg_dst
*dst
, struct ureg_src
*args
, unsigned argcount
)
211 struct ureg_dst tmp
[1];
212 struct ureg_src src
[3];
214 if (!strcmp(desc
->name
, "SUB")) {
215 ureg_ADD(t
->ureg
, *dst
, args
[0], ureg_negate(args
[1]));
216 } else if (!strcmp(desc
->name
, "CND")) {
217 tmp
[0] = get_temp(t
, MAX_NUM_FRAGMENT_REGISTERS_ATI
+ 2); /* re-purpose a3 */
218 src
[0] = ureg_imm1f(t
->ureg
, 0.5f
);
219 src
[1] = ureg_negate(args
[2]);
220 ureg_insn(t
->ureg
, TGSI_OPCODE_ADD
, tmp
, 1, src
, 2, 0);
221 src
[0] = ureg_src(tmp
[0]);
224 ureg_insn(t
->ureg
, TGSI_OPCODE_CMP
, dst
, 1, src
, 3, 0);
225 } else if (!strcmp(desc
->name
, "CND0")) {
229 ureg_insn(t
->ureg
, TGSI_OPCODE_CMP
, dst
, 1, src
, 3, 0);
230 } else if (!strcmp(desc
->name
, "DOT2_ADD")) {
231 tmp
[0] = get_temp(t
, MAX_NUM_FRAGMENT_REGISTERS_ATI
); /* re-purpose a1 */
234 ureg_insn(t
->ureg
, TGSI_OPCODE_DP2
, tmp
, 1, src
, 2, 0);
235 src
[0] = ureg_src(tmp
[0]);
236 src
[1] = ureg_scalar(args
[2], TGSI_SWIZZLE_Z
);
237 ureg_insn(t
->ureg
, TGSI_OPCODE_ADD
, dst
, 1, src
, 2, 0);
242 emit_arith_inst(struct st_translate
*t
,
243 const struct instruction_desc
*desc
,
244 struct ureg_dst
*dst
, struct ureg_src
*args
, unsigned argcount
)
246 if (desc
->TGSI_opcode
== TGSI_OPCODE_NOP
) {
247 emit_special_inst(t
, desc
, dst
, args
, argcount
);
251 ureg_insn(t
->ureg
, desc
->TGSI_opcode
, dst
, 1, args
, argcount
, 0);
255 emit_dstmod(struct st_translate
*t
,
256 struct ureg_dst dst
, GLuint dstMod
)
259 struct ureg_src src
[3];
260 GLuint scale
= dstMod
& ~GL_SATURATE_BIT_ATI
;
262 if (dstMod
== GL_NONE
) {
276 case GL_HALF_BIT_ATI
:
279 case GL_QUARTER_BIT_ATI
:
282 case GL_EIGHTH_BIT_ATI
:
289 src
[0] = ureg_src(dst
);
290 src
[1] = ureg_imm1f(t
->ureg
, imm
);
291 if (dstMod
& GL_SATURATE_BIT_ATI
) {
292 dst
= ureg_saturate(dst
);
294 ureg_insn(t
->ureg
, TGSI_OPCODE_MUL
, &dst
, 1, src
, 2, 0);
298 * Compile one setup instruction to TGSI instructions.
301 compile_setupinst(struct st_translate
*t
,
303 const struct atifs_setupinst
*texinst
)
305 struct ureg_dst dst
[1];
306 struct ureg_src src
[2];
308 if (!texinst
->Opcode
)
311 dst
[0] = get_temp(t
, r
);
313 GLuint pass_tex
= texinst
->src
;
315 if (pass_tex
>= GL_TEXTURE0_ARB
&& pass_tex
<= GL_TEXTURE7_ARB
) {
316 unsigned attr
= pass_tex
- GL_TEXTURE0_ARB
+ VARYING_SLOT_TEX0
;
318 src
[0] = t
->inputs
[t
->inputMapping
[attr
]];
319 } else if (pass_tex
>= GL_REG_0_ATI
&& pass_tex
<= GL_REG_5_ATI
) {
320 unsigned reg
= pass_tex
- GL_REG_0_ATI
;
322 /* the frontend already validated that REG is only allowed in second pass */
323 if (t
->regs_written
[0][reg
]) {
324 src
[0] = ureg_src(t
->temps
[reg
]);
326 src
[0] = ureg_imm1f(t
->ureg
, 0.0f
);
329 src
[0] = apply_swizzle(t
, src
[0], texinst
->swizzle
);
331 if (texinst
->Opcode
== ATI_FRAGMENT_SHADER_SAMPLE_OP
) {
332 /* by default texture and sampler indexes are the same */
333 src
[1] = t
->samplers
[r
];
334 /* the texture target is still unknown, it will be fixed in the draw call */
335 ureg_tex_insn(t
->ureg
, TGSI_OPCODE_TEX
, dst
, 1, TGSI_TEXTURE_2D
,
336 TGSI_RETURN_TYPE_FLOAT
, NULL
, 0, src
, 2);
337 } else if (texinst
->Opcode
== ATI_FRAGMENT_SHADER_PASS_OP
) {
338 ureg_insn(t
->ureg
, TGSI_OPCODE_MOV
, dst
, 1, src
, 1, 0);
341 t
->regs_written
[t
->current_pass
][r
] = true;
345 * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions.
348 compile_instruction(struct st_translate
*t
,
349 const struct atifs_instruction
*inst
)
353 for (optype
= 0; optype
< 2; optype
++) { /* color, alpha */
354 const struct instruction_desc
*desc
;
355 struct ureg_dst dst
[1];
356 struct ureg_src args
[3]; /* arguments for the main operation */
358 unsigned dstreg
= inst
->DstReg
[optype
].Index
- GL_REG_0_ATI
;
360 if (!inst
->Opcode
[optype
])
363 desc
= &inst_desc
[inst
->Opcode
[optype
] - GL_MOV_ATI
];
365 /* prepare the arguments */
366 for (arg
= 0; arg
< desc
->arg_count
; arg
++) {
367 if (arg
>= inst
->ArgCount
[optype
]) {
368 _mesa_warning(0, "Using 0 for missing argument %d of %s\n",
370 args
[arg
] = ureg_imm1f(t
->ureg
, 0.0f
);
372 args
[arg
] = prepare_argument(t
, arg
,
373 &inst
->SrcReg
[optype
][arg
]);
378 dst
[0] = get_temp(t
, dstreg
);
381 dst
[0] = ureg_writemask(dst
[0], TGSI_WRITEMASK_W
);
383 GLuint dstMask
= inst
->DstReg
[optype
].dstMask
;
384 if (dstMask
== GL_NONE
) {
385 dst
[0] = ureg_writemask(dst
[0], TGSI_WRITEMASK_XYZ
);
387 dst
[0] = ureg_writemask(dst
[0], dstMask
); /* the enum values match */
391 /* emit the main instruction */
392 emit_arith_inst(t
, desc
, dst
, args
, arg
);
394 emit_dstmod(t
, *dst
, inst
->DstReg
[optype
].dstMod
);
396 t
->regs_written
[t
->current_pass
][dstreg
] = true;
401 finalize_shader(struct st_translate
*t
, unsigned numPasses
)
403 struct ureg_dst dst
[1] = { { 0 } };
404 struct ureg_src src
[1] = { { 0 } };
406 if (t
->regs_written
[numPasses
-1][0]) {
407 /* copy the result into the OUT slot */
408 dst
[0] = t
->outputs
[t
->outputMapping
[FRAG_RESULT_COLOR
]];
409 src
[0] = ureg_src(t
->temps
[0]);
410 ureg_insn(t
->ureg
, TGSI_OPCODE_MOV
, dst
, 1, src
, 1, 0);
413 /* signal the end of the program */
414 ureg_insn(t
->ureg
, TGSI_OPCODE_END
, dst
, 0, src
, 0, 0);
418 * Called when a new variant is needed, we need to translate
419 * the ATI fragment shader to TGSI
422 st_translate_atifs_program(
423 struct ureg_program
*ureg
,
424 struct ati_fragment_shader
*atifs
,
425 struct gl_program
*program
,
427 const ubyte inputMapping
[],
428 const ubyte inputSemanticName
[],
429 const ubyte inputSemanticIndex
[],
430 const ubyte interpMode
[],
432 const ubyte outputMapping
[],
433 const ubyte outputSemanticName
[],
434 const ubyte outputSemanticIndex
[])
436 enum pipe_error ret
= PIPE_OK
;
440 struct st_translate translate
, *t
;
442 memset(t
, 0, sizeof *t
);
444 t
->inputMapping
= inputMapping
;
445 t
->outputMapping
= outputMapping
;
450 * Declare input attributes.
452 for (i
= 0; i
< numInputs
; i
++) {
453 t
->inputs
[i
] = ureg_DECL_fs_input(ureg
,
454 inputSemanticName
[i
],
455 inputSemanticIndex
[i
],
460 * Declare output attributes:
461 * we always have numOutputs=1 and it's FRAG_RESULT_COLOR
463 t
->outputs
[0] = ureg_DECL_output(ureg
,
465 outputSemanticIndex
[0]);
467 /* Emit constants and immediates. Mesa uses a single index space
468 * for these, so we put all the translated regs in t->constants.
470 if (program
->Parameters
) {
471 t
->constants
= calloc(program
->Parameters
->NumParameters
,
472 sizeof t
->constants
[0]);
473 if (t
->constants
== NULL
) {
474 ret
= PIPE_ERROR_OUT_OF_MEMORY
;
478 for (i
= 0; i
< program
->Parameters
->NumParameters
; i
++) {
479 switch (program
->Parameters
->Parameters
[i
].Type
) {
480 case PROGRAM_STATE_VAR
:
481 case PROGRAM_UNIFORM
:
482 t
->constants
[i
] = ureg_DECL_constant(ureg
, i
);
484 case PROGRAM_CONSTANT
:
486 ureg_DECL_immediate(ureg
,
487 (const float*)program
->Parameters
->ParameterValues
[i
],
496 /* texture samplers */
497 for (i
= 0; i
< MAX_NUM_FRAGMENT_REGISTERS_ATI
; i
++) {
498 if (program
->SamplersUsed
& (1 << i
)) {
499 t
->samplers
[i
] = ureg_DECL_sampler(ureg
, i
);
500 /* the texture target is still unknown, it will be fixed in the draw call */
501 ureg_DECL_sampler_view(ureg
, i
, TGSI_TEXTURE_2D
,
502 TGSI_RETURN_TYPE_FLOAT
,
503 TGSI_RETURN_TYPE_FLOAT
,
504 TGSI_RETURN_TYPE_FLOAT
,
505 TGSI_RETURN_TYPE_FLOAT
);
509 /* emit instructions */
510 for (pass
= 0; pass
< atifs
->NumPasses
; pass
++) {
511 t
->current_pass
= pass
;
512 for (r
= 0; r
< MAX_NUM_FRAGMENT_REGISTERS_ATI
; r
++) {
513 struct atifs_setupinst
*texinst
= &atifs
->SetupInst
[pass
][r
];
514 compile_setupinst(t
, r
, texinst
);
516 for (i
= 0; i
< atifs
->numArithInstr
[pass
]; i
++) {
517 struct atifs_instruction
*inst
= &atifs
->Instructions
[pass
][i
];
518 compile_instruction(t
, inst
);
522 finalize_shader(t
, atifs
->NumPasses
);
528 debug_printf("%s: translate error flag set\n", __func__
);
535 * Called in ProgramStringNotify, we need to fill the metadata of the
536 * gl_program attached to the ati_fragment_shader
539 st_init_atifs_prog(struct gl_context
*ctx
, struct gl_program
*prog
)
541 /* we know this is st_fragment_program, because of st_new_ati_fs() */
542 struct st_fragment_program
*stfp
= (struct st_fragment_program
*) prog
;
543 struct ati_fragment_shader
*atifs
= stfp
->ati_fs
;
545 unsigned pass
, i
, r
, optype
, arg
;
547 static const gl_state_index fog_params_state
[STATE_LENGTH
] =
548 {STATE_INTERNAL
, STATE_FOG_PARAMS_OPTIMIZED
, 0, 0, 0};
549 static const gl_state_index fog_color
[STATE_LENGTH
] =
550 {STATE_FOG_COLOR
, 0, 0, 0, 0};
552 prog
->info
.inputs_read
= 0;
553 prog
->info
.outputs_written
= BITFIELD64_BIT(FRAG_RESULT_COLOR
);
554 prog
->SamplersUsed
= 0;
555 prog
->Parameters
= _mesa_new_parameter_list();
557 /* fill in inputs_read, SamplersUsed, TexturesUsed */
558 for (pass
= 0; pass
< atifs
->NumPasses
; pass
++) {
559 for (r
= 0; r
< MAX_NUM_FRAGMENT_REGISTERS_ATI
; r
++) {
560 struct atifs_setupinst
*texinst
= &atifs
->SetupInst
[pass
][r
];
561 GLuint pass_tex
= texinst
->src
;
563 if (texinst
->Opcode
== ATI_FRAGMENT_SHADER_SAMPLE_OP
) {
564 /* mark which texcoords are used */
565 prog
->info
.inputs_read
|= BITFIELD64_BIT(VARYING_SLOT_TEX0
+ pass_tex
- GL_TEXTURE0_ARB
);
566 /* by default there is 1:1 mapping between samplers and textures */
567 prog
->SamplersUsed
|= (1 << r
);
568 /* the target is unknown here, it will be fixed in the draw call */
569 prog
->TexturesUsed
[r
] = TEXTURE_2D_BIT
;
570 } else if (texinst
->Opcode
== ATI_FRAGMENT_SHADER_PASS_OP
) {
571 if (pass_tex
>= GL_TEXTURE0_ARB
&& pass_tex
<= GL_TEXTURE7_ARB
) {
572 prog
->info
.inputs_read
|= BITFIELD64_BIT(VARYING_SLOT_TEX0
+ pass_tex
- GL_TEXTURE0_ARB
);
577 for (pass
= 0; pass
< atifs
->NumPasses
; pass
++) {
578 for (i
= 0; i
< atifs
->numArithInstr
[pass
]; i
++) {
579 struct atifs_instruction
*inst
= &atifs
->Instructions
[pass
][i
];
581 for (optype
= 0; optype
< 2; optype
++) { /* color, alpha */
582 if (inst
->Opcode
[optype
]) {
583 for (arg
= 0; arg
< inst
->ArgCount
[optype
]; arg
++) {
584 GLint index
= inst
->SrcReg
[optype
][arg
].Index
;
585 if (index
== GL_PRIMARY_COLOR_EXT
) {
586 prog
->info
.inputs_read
|= BITFIELD64_BIT(VARYING_SLOT_COL0
);
587 } else if (index
== GL_SECONDARY_INTERPOLATOR_ATI
) {
588 /* note: ATI_fragment_shader.txt never specifies what
589 * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses
590 * VARYING_SLOT_COL1 for this input */
591 prog
->info
.inputs_read
|= BITFIELD64_BIT(VARYING_SLOT_COL1
);
598 /* we may need fog */
599 prog
->info
.inputs_read
|= BITFIELD64_BIT(VARYING_SLOT_FOGC
);
601 /* we always have the ATI_fs constants, and the fog params */
602 for (i
= 0; i
< MAX_NUM_FRAGMENT_CONSTANTS_ATI
; i
++) {
603 _mesa_add_parameter(prog
->Parameters
, PROGRAM_UNIFORM
,
604 NULL
, 4, GL_FLOAT
, NULL
, NULL
);
606 _mesa_add_state_reference(prog
->Parameters
, fog_params_state
);
607 _mesa_add_state_reference(prog
->Parameters
, fog_color
);
609 prog
->arb
.NumInstructions
= 0;
610 prog
->arb
.NumTemporaries
= MAX_NUM_FRAGMENT_REGISTERS_ATI
+ 3; /* 3 input temps for arith ops */
611 prog
->arb
.NumParameters
= MAX_NUM_FRAGMENT_CONSTANTS_ATI
+ 2; /* 2 state variables for fog */
615 struct tgsi_atifs_transform
{
616 struct tgsi_transform_context base
;
617 struct tgsi_shader_info info
;
618 const struct st_fp_variant_key
*key
;
619 bool first_instruction_emitted
;
620 unsigned fog_factor_temp
;
623 static inline struct tgsi_atifs_transform
*
624 tgsi_atifs_transform(struct tgsi_transform_context
*tctx
)
626 return (struct tgsi_atifs_transform
*)tctx
;
629 /* copied from st_cb_drawpixels_shader.c */
631 set_src(struct tgsi_full_instruction
*inst
, unsigned i
, unsigned file
, unsigned index
,
632 unsigned x
, unsigned y
, unsigned z
, unsigned w
)
634 inst
->Src
[i
].Register
.File
= file
;
635 inst
->Src
[i
].Register
.Index
= index
;
636 inst
->Src
[i
].Register
.SwizzleX
= x
;
637 inst
->Src
[i
].Register
.SwizzleY
= y
;
638 inst
->Src
[i
].Register
.SwizzleZ
= z
;
639 inst
->Src
[i
].Register
.SwizzleW
= w
;
640 if (file
== TGSI_FILE_CONSTANT
) {
641 inst
->Src
[i
].Register
.Dimension
= 1;
642 inst
->Src
[i
].Dimension
.Index
= 0;
646 #define SET_SRC(inst, i, file, index, x, y, z, w) \
647 set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \
648 TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w)
651 transform_decl(struct tgsi_transform_context
*tctx
,
652 struct tgsi_full_declaration
*decl
)
654 struct tgsi_atifs_transform
*ctx
= tgsi_atifs_transform(tctx
);
656 if (decl
->Declaration
.File
== TGSI_FILE_SAMPLER_VIEW
) {
657 /* fix texture target */
658 unsigned newtarget
= ctx
->key
->texture_targets
[decl
->Range
.First
];
660 decl
->SamplerView
.Resource
= newtarget
;
663 tctx
->emit_declaration(tctx
, decl
);
667 transform_instr(struct tgsi_transform_context
*tctx
,
668 struct tgsi_full_instruction
*current_inst
)
670 struct tgsi_atifs_transform
*ctx
= tgsi_atifs_transform(tctx
);
672 if (ctx
->first_instruction_emitted
)
675 ctx
->first_instruction_emitted
= true;
678 /* add a new temp for the fog factor */
679 ctx
->fog_factor_temp
= ctx
->info
.file_max
[TGSI_FILE_TEMPORARY
] + 1;
680 tgsi_transform_temp_decl(tctx
, ctx
->fog_factor_temp
);
684 if (current_inst
->Instruction
.Opcode
== TGSI_OPCODE_TEX
) {
685 /* fix texture target */
686 unsigned newtarget
= ctx
->key
->texture_targets
[current_inst
->Src
[1].Register
.Index
];
688 current_inst
->Texture
.Texture
= newtarget
;
690 } else if (ctx
->key
->fog
&& current_inst
->Instruction
.Opcode
== TGSI_OPCODE_MOV
&&
691 current_inst
->Dst
[0].Register
.File
== TGSI_FILE_OUTPUT
) {
692 struct tgsi_full_instruction inst
;
695 int reg0_index
= current_inst
->Src
[0].Register
.Index
;
697 /* find FOGC input */
698 for (i
= 0; i
< ctx
->info
.num_inputs
; i
++) {
699 if (ctx
->info
.input_semantic_name
[i
] == TGSI_SEMANTIC_FOG
) {
704 if (fogc_index
< 0) {
705 /* should never be reached, because fog coord input is always declared */
706 tctx
->emit_instruction(tctx
, current_inst
);
710 /* compute the 1 component fog factor f */
711 if (ctx
->key
->fog
== FOG_LINEAR
) {
712 /* LINEAR formula: f = (end - z) / (end - start)
713 * with optimized parameters:
714 * f = MAD(fogcoord, oparams.x, oparams.y)
716 inst
= tgsi_default_full_instruction();
717 inst
.Instruction
.Opcode
= TGSI_OPCODE_MAD
;
718 inst
.Instruction
.NumDstRegs
= 1;
719 inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
720 inst
.Dst
[0].Register
.Index
= ctx
->fog_factor_temp
;
721 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
722 inst
.Instruction
.NumSrcRegs
= 3;
723 SET_SRC(&inst
, 0, TGSI_FILE_INPUT
, fogc_index
, X
, Y
, Z
, W
);
724 SET_SRC(&inst
, 1, TGSI_FILE_CONSTANT
, MAX_NUM_FRAGMENT_CONSTANTS_ATI
, X
, X
, X
, X
);
725 SET_SRC(&inst
, 2, TGSI_FILE_CONSTANT
, MAX_NUM_FRAGMENT_CONSTANTS_ATI
, Y
, Y
, Y
, Y
);
726 tctx
->emit_instruction(tctx
, &inst
);
727 } else if (ctx
->key
->fog
== FOG_EXP
) {
728 /* EXP formula: f = exp(-dens * z)
729 * with optimized parameters:
730 * f = MUL(fogcoord, oparams.z); f= EX2(-f)
732 inst
= tgsi_default_full_instruction();
733 inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
734 inst
.Instruction
.NumDstRegs
= 1;
735 inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
736 inst
.Dst
[0].Register
.Index
= ctx
->fog_factor_temp
;
737 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
738 inst
.Instruction
.NumSrcRegs
= 2;
739 SET_SRC(&inst
, 0, TGSI_FILE_INPUT
, fogc_index
, X
, Y
, Z
, W
);
740 SET_SRC(&inst
, 1, TGSI_FILE_CONSTANT
, MAX_NUM_FRAGMENT_CONSTANTS_ATI
, Z
, Z
, Z
, Z
);
741 tctx
->emit_instruction(tctx
, &inst
);
743 inst
= tgsi_default_full_instruction();
744 inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
745 inst
.Instruction
.NumDstRegs
= 1;
746 inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
747 inst
.Dst
[0].Register
.Index
= ctx
->fog_factor_temp
;
748 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
749 inst
.Instruction
.NumSrcRegs
= 1;
750 SET_SRC(&inst
, 0, TGSI_FILE_TEMPORARY
, ctx
->fog_factor_temp
, X
, Y
, Z
, W
);
751 inst
.Src
[0].Register
.Negate
= 1;
752 tctx
->emit_instruction(tctx
, &inst
);
753 } else if (ctx
->key
->fog
== FOG_EXP2
) {
754 /* EXP2 formula: f = exp(-(dens * z)^2)
755 * with optimized parameters:
756 * f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f)
758 inst
= tgsi_default_full_instruction();
759 inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
760 inst
.Instruction
.NumDstRegs
= 1;
761 inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
762 inst
.Dst
[0].Register
.Index
= ctx
->fog_factor_temp
;
763 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
764 inst
.Instruction
.NumSrcRegs
= 2;
765 SET_SRC(&inst
, 0, TGSI_FILE_INPUT
, fogc_index
, X
, Y
, Z
, W
);
766 SET_SRC(&inst
, 1, TGSI_FILE_CONSTANT
, MAX_NUM_FRAGMENT_CONSTANTS_ATI
, W
, W
, W
, W
);
767 tctx
->emit_instruction(tctx
, &inst
);
769 inst
= tgsi_default_full_instruction();
770 inst
.Instruction
.Opcode
= TGSI_OPCODE_MUL
;
771 inst
.Instruction
.NumDstRegs
= 1;
772 inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
773 inst
.Dst
[0].Register
.Index
= ctx
->fog_factor_temp
;
774 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
775 inst
.Instruction
.NumSrcRegs
= 2;
776 SET_SRC(&inst
, 0, TGSI_FILE_TEMPORARY
, ctx
->fog_factor_temp
, X
, Y
, Z
, W
);
777 SET_SRC(&inst
, 1, TGSI_FILE_TEMPORARY
, ctx
->fog_factor_temp
, X
, Y
, Z
, W
);
778 tctx
->emit_instruction(tctx
, &inst
);
780 inst
= tgsi_default_full_instruction();
781 inst
.Instruction
.Opcode
= TGSI_OPCODE_EX2
;
782 inst
.Instruction
.NumDstRegs
= 1;
783 inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
784 inst
.Dst
[0].Register
.Index
= ctx
->fog_factor_temp
;
785 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
786 inst
.Instruction
.NumSrcRegs
= 1;
787 SET_SRC(&inst
, 0, TGSI_FILE_TEMPORARY
, ctx
->fog_factor_temp
, X
, Y
, Z
, W
);
788 inst
.Src
[0].Register
.Negate
^= 1;
789 tctx
->emit_instruction(tctx
, &inst
);
791 /* f = saturate(f) */
792 inst
= tgsi_default_full_instruction();
793 inst
.Instruction
.Opcode
= TGSI_OPCODE_MOV
;
794 inst
.Instruction
.NumDstRegs
= 1;
795 inst
.Instruction
.Saturate
= 1;
796 inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
797 inst
.Dst
[0].Register
.Index
= ctx
->fog_factor_temp
;
798 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
799 inst
.Instruction
.NumSrcRegs
= 1;
800 SET_SRC(&inst
, 0, TGSI_FILE_TEMPORARY
, ctx
->fog_factor_temp
, X
, Y
, Z
, W
);
801 tctx
->emit_instruction(tctx
, &inst
);
803 /* REG0 = LRP(f, REG0, fogcolor) */
804 inst
= tgsi_default_full_instruction();
805 inst
.Instruction
.Opcode
= TGSI_OPCODE_LRP
;
806 inst
.Instruction
.NumDstRegs
= 1;
807 inst
.Dst
[0].Register
.File
= TGSI_FILE_TEMPORARY
;
808 inst
.Dst
[0].Register
.Index
= reg0_index
;
809 inst
.Dst
[0].Register
.WriteMask
= TGSI_WRITEMASK_XYZW
;
810 inst
.Instruction
.NumSrcRegs
= 3;
811 SET_SRC(&inst
, 0, TGSI_FILE_TEMPORARY
, ctx
->fog_factor_temp
, X
, X
, X
, Y
);
812 SET_SRC(&inst
, 1, TGSI_FILE_TEMPORARY
, reg0_index
, X
, Y
, Z
, W
);
813 SET_SRC(&inst
, 2, TGSI_FILE_CONSTANT
, MAX_NUM_FRAGMENT_CONSTANTS_ATI
+ 1, X
, Y
, Z
, W
);
814 tctx
->emit_instruction(tctx
, &inst
);
817 tctx
->emit_instruction(tctx
, current_inst
);
821 * A post-process step in the draw call to fix texture targets and
822 * insert code for fog.
824 const struct tgsi_token
*
825 st_fixup_atifs(const struct tgsi_token
*tokens
,
826 const struct st_fp_variant_key
*key
)
828 struct tgsi_atifs_transform ctx
;
829 struct tgsi_token
*newtoks
;
832 memset(&ctx
, 0, sizeof(ctx
));
833 ctx
.base
.transform_declaration
= transform_decl
;
834 ctx
.base
.transform_instruction
= transform_instr
;
836 tgsi_scan_shader(tokens
, &ctx
.info
);
838 newlen
= tgsi_num_tokens(tokens
) + 30;
839 newtoks
= tgsi_alloc_tokens(newlen
);
843 tgsi_transform_shader(tokens
, newtoks
, newlen
, &ctx
.base
);