st/mesa: use correct return statement for a void function
[mesa.git] / src / mesa / state_tracker / st_atifs_to_tgsi.c
1 /*
2 * Copyright (C) 2016 Miklós Máté
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "main/mtypes.h"
24 #include "main/atifragshader.h"
25 #include "main/errors.h"
26 #include "program/prog_parameter.h"
27
28 #include "tgsi/tgsi_ureg.h"
29 #include "tgsi/tgsi_scan.h"
30 #include "tgsi/tgsi_transform.h"
31
32 #include "st_program.h"
33 #include "st_atifs_to_tgsi.h"
34
35 /**
36 * Intermediate state used during shader translation.
37 */
38 struct st_translate {
39 struct ureg_program *ureg;
40 struct ati_fragment_shader *atifs;
41
42 struct ureg_dst temps[MAX_PROGRAM_TEMPS];
43 struct ureg_src *constants;
44 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
45 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
46 struct ureg_src samplers[PIPE_MAX_SAMPLERS];
47
48 const GLuint *inputMapping;
49 const GLuint *outputMapping;
50
51 unsigned current_pass;
52
53 bool regs_written[MAX_NUM_PASSES_ATI][MAX_NUM_FRAGMENT_REGISTERS_ATI];
54
55 boolean error;
56 };
57
58 struct instruction_desc {
59 unsigned TGSI_opcode;
60 const char *name;
61 unsigned char arg_count;
62 };
63
64 static const struct instruction_desc inst_desc[] = {
65 {TGSI_OPCODE_MOV, "MOV", 1},
66 {TGSI_OPCODE_NOP, "UND", 0}, /* unused */
67 {TGSI_OPCODE_ADD, "ADD", 2},
68 {TGSI_OPCODE_MUL, "MUL", 2},
69 {TGSI_OPCODE_NOP, "SUB", 2},
70 {TGSI_OPCODE_DP3, "DOT3", 2},
71 {TGSI_OPCODE_DP4, "DOT4", 2},
72 {TGSI_OPCODE_MAD, "MAD", 3},
73 {TGSI_OPCODE_LRP, "LERP", 3},
74 {TGSI_OPCODE_NOP, "CND", 3},
75 {TGSI_OPCODE_NOP, "CND0", 3},
76 {TGSI_OPCODE_NOP, "DOT2_ADD", 3}
77 };
78
79 static struct ureg_dst
80 get_temp(struct st_translate *t, unsigned index)
81 {
82 if (ureg_dst_is_undef(t->temps[index]))
83 t->temps[index] = ureg_DECL_temporary(t->ureg);
84 return t->temps[index];
85 }
86
87 static struct ureg_src
88 apply_swizzle(struct st_translate *t,
89 struct ureg_src src, GLuint swizzle)
90 {
91 if (swizzle == GL_SWIZZLE_STR_ATI) {
92 return src;
93 } else if (swizzle == GL_SWIZZLE_STQ_ATI) {
94 return ureg_swizzle(src,
95 TGSI_SWIZZLE_X,
96 TGSI_SWIZZLE_Y,
97 TGSI_SWIZZLE_W,
98 TGSI_SWIZZLE_Z);
99 } else {
100 struct ureg_dst tmp[2];
101 struct ureg_src imm[3];
102
103 tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI);
104 tmp[1] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 1);
105 imm[0] = src;
106 imm[1] = ureg_imm4f(t->ureg, 1.0f, 1.0f, 0.0f, 0.0f);
107 imm[2] = ureg_imm4f(t->ureg, 0.0f, 0.0f, 1.0f, 1.0f);
108 ureg_insn(t->ureg, TGSI_OPCODE_MAD, &tmp[0], 1, imm, 3);
109
110 if (swizzle == GL_SWIZZLE_STR_DR_ATI) {
111 imm[0] = ureg_scalar(src, TGSI_SWIZZLE_Z);
112 } else {
113 imm[0] = ureg_scalar(src, TGSI_SWIZZLE_W);
114 }
115 ureg_insn(t->ureg, TGSI_OPCODE_RCP, &tmp[1], 1, &imm[0], 1);
116
117 imm[0] = ureg_src(tmp[0]);
118 imm[1] = ureg_src(tmp[1]);
119 ureg_insn(t->ureg, TGSI_OPCODE_MUL, &tmp[0], 1, imm, 2);
120
121 return ureg_src(tmp[0]);
122 }
123 }
124
125 static struct ureg_src
126 get_source(struct st_translate *t, GLuint src_type)
127 {
128 if (src_type >= GL_REG_0_ATI && src_type <= GL_REG_5_ATI) {
129 if (t->regs_written[t->current_pass][src_type - GL_REG_0_ATI]) {
130 return ureg_src(get_temp(t, src_type - GL_REG_0_ATI));
131 } else {
132 return ureg_imm1f(t->ureg, 0.0f);
133 }
134 } else if (src_type >= GL_CON_0_ATI && src_type <= GL_CON_7_ATI) {
135 return t->constants[src_type - GL_CON_0_ATI];
136 } else if (src_type == GL_ZERO) {
137 return ureg_imm1f(t->ureg, 0.0f);
138 } else if (src_type == GL_ONE) {
139 return ureg_imm1f(t->ureg, 1.0f);
140 } else if (src_type == GL_PRIMARY_COLOR_ARB) {
141 return t->inputs[t->inputMapping[VARYING_SLOT_COL0]];
142 } else if (src_type == GL_SECONDARY_INTERPOLATOR_ATI) {
143 return t->inputs[t->inputMapping[VARYING_SLOT_COL1]];
144 } else {
145 /* frontend prevents this */
146 unreachable("unknown source");
147 }
148 }
149
150 static struct ureg_src
151 prepare_argument(struct st_translate *t, const unsigned argId,
152 const struct atifragshader_src_register *srcReg)
153 {
154 struct ureg_src src = get_source(t, srcReg->Index);
155 struct ureg_dst arg = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + argId);
156
157 switch (srcReg->argRep) {
158 case GL_NONE:
159 break;
160 case GL_RED:
161 src = ureg_scalar(src, TGSI_SWIZZLE_X);
162 break;
163 case GL_GREEN:
164 src = ureg_scalar(src, TGSI_SWIZZLE_Y);
165 break;
166 case GL_BLUE:
167 src = ureg_scalar(src, TGSI_SWIZZLE_Z);
168 break;
169 case GL_ALPHA:
170 src = ureg_scalar(src, TGSI_SWIZZLE_W);
171 break;
172 }
173 ureg_insn(t->ureg, TGSI_OPCODE_MOV, &arg, 1, &src, 1);
174
175 if (srcReg->argMod & GL_COMP_BIT_ATI) {
176 struct ureg_src modsrc[2];
177 modsrc[0] = ureg_imm1f(t->ureg, 1.0f);
178 modsrc[1] = ureg_negate(ureg_src(arg));
179
180 ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
181 }
182 if (srcReg->argMod & GL_BIAS_BIT_ATI) {
183 struct ureg_src modsrc[2];
184 modsrc[0] = ureg_src(arg);
185 modsrc[1] = ureg_imm1f(t->ureg, -0.5f);
186
187 ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
188 }
189 if (srcReg->argMod & GL_2X_BIT_ATI) {
190 struct ureg_src modsrc[2];
191 modsrc[0] = ureg_src(arg);
192 modsrc[1] = ureg_src(arg);
193
194 ureg_insn(t->ureg, TGSI_OPCODE_ADD, &arg, 1, modsrc, 2);
195 }
196 if (srcReg->argMod & GL_NEGATE_BIT_ATI) {
197 struct ureg_src modsrc[2];
198 modsrc[0] = ureg_src(arg);
199 modsrc[1] = ureg_imm1f(t->ureg, -1.0f);
200
201 ureg_insn(t->ureg, TGSI_OPCODE_MUL, &arg, 1, modsrc, 2);
202 }
203 return ureg_src(arg);
204 }
205
206 /* These instructions need special treatment */
207 static void
208 emit_special_inst(struct st_translate *t, const struct instruction_desc *desc,
209 struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
210 {
211 struct ureg_dst tmp[1];
212 struct ureg_src src[3];
213
214 if (!strcmp(desc->name, "SUB")) {
215 ureg_ADD(t->ureg, *dst, args[0], ureg_negate(args[1]));
216 } else if (!strcmp(desc->name, "CND")) {
217 tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI + 2); /* re-purpose a3 */
218 src[0] = ureg_imm1f(t->ureg, 0.5f);
219 src[1] = ureg_negate(args[2]);
220 ureg_insn(t->ureg, TGSI_OPCODE_ADD, tmp, 1, src, 2);
221 src[0] = ureg_src(tmp[0]);
222 src[1] = args[0];
223 src[2] = args[1];
224 ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
225 } else if (!strcmp(desc->name, "CND0")) {
226 src[0] = args[2];
227 src[1] = args[1];
228 src[2] = args[0];
229 ureg_insn(t->ureg, TGSI_OPCODE_CMP, dst, 1, src, 3);
230 } else if (!strcmp(desc->name, "DOT2_ADD")) {
231 /* note: DP2A is not implemented in most pipe drivers */
232 tmp[0] = get_temp(t, MAX_NUM_FRAGMENT_REGISTERS_ATI); /* re-purpose a1 */
233 src[0] = args[0];
234 src[1] = args[1];
235 ureg_insn(t->ureg, TGSI_OPCODE_DP2, tmp, 1, src, 2);
236 src[0] = ureg_src(tmp[0]);
237 src[1] = ureg_scalar(args[2], TGSI_SWIZZLE_Z);
238 ureg_insn(t->ureg, TGSI_OPCODE_ADD, dst, 1, src, 2);
239 }
240 }
241
242 static void
243 emit_arith_inst(struct st_translate *t,
244 const struct instruction_desc *desc,
245 struct ureg_dst *dst, struct ureg_src *args, unsigned argcount)
246 {
247 if (desc->TGSI_opcode == TGSI_OPCODE_NOP) {
248 emit_special_inst(t, desc, dst, args, argcount);
249 return;
250 }
251
252 ureg_insn(t->ureg, desc->TGSI_opcode, dst, 1, args, argcount);
253 }
254
255 static void
256 emit_dstmod(struct st_translate *t,
257 struct ureg_dst dst, GLuint dstMod)
258 {
259 float imm;
260 struct ureg_src src[3];
261 GLuint scale = dstMod & ~GL_SATURATE_BIT_ATI;
262
263 if (dstMod == GL_NONE) {
264 return;
265 }
266
267 switch (scale) {
268 case GL_2X_BIT_ATI:
269 imm = 2.0f;
270 break;
271 case GL_4X_BIT_ATI:
272 imm = 4.0f;
273 break;
274 case GL_8X_BIT_ATI:
275 imm = 8.0f;
276 break;
277 case GL_HALF_BIT_ATI:
278 imm = 0.5f;
279 break;
280 case GL_QUARTER_BIT_ATI:
281 imm = 0.25f;
282 break;
283 case GL_EIGHTH_BIT_ATI:
284 imm = 0.125f;
285 break;
286 default:
287 imm = 1.0f;
288 }
289
290 src[0] = ureg_src(dst);
291 src[1] = ureg_imm1f(t->ureg, imm);
292 if (dstMod & GL_SATURATE_BIT_ATI) {
293 dst = ureg_saturate(dst);
294 }
295 ureg_insn(t->ureg, TGSI_OPCODE_MUL, &dst, 1, src, 2);
296 }
297
298 /**
299 * Compile one setup instruction to TGSI instructions.
300 */
301 static void
302 compile_setupinst(struct st_translate *t,
303 const unsigned r,
304 const struct atifs_setupinst *texinst)
305 {
306 struct ureg_dst dst[1];
307 struct ureg_src src[2];
308
309 if (!texinst->Opcode)
310 return;
311
312 dst[0] = get_temp(t, r);
313
314 GLuint pass_tex = texinst->src;
315
316 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
317 unsigned attr = pass_tex - GL_TEXTURE0_ARB + VARYING_SLOT_TEX0;
318
319 src[0] = t->inputs[t->inputMapping[attr]];
320 } else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
321 unsigned reg = pass_tex - GL_REG_0_ATI;
322
323 /* the frontend already validated that REG is only allowed in second pass */
324 if (t->regs_written[0][reg]) {
325 src[0] = ureg_src(t->temps[reg]);
326 } else {
327 src[0] = ureg_imm1f(t->ureg, 0.0f);
328 }
329 }
330 src[0] = apply_swizzle(t, src[0], texinst->swizzle);
331
332 if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
333 /* by default texture and sampler indexes are the same */
334 src[1] = t->samplers[r];
335 /* the texture target is still unknown, it will be fixed in the draw call */
336 ureg_tex_insn(t->ureg, TGSI_OPCODE_TEX, dst, 1, TGSI_TEXTURE_2D,
337 NULL, 0, src, 2);
338 } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
339 ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
340 }
341
342 t->regs_written[t->current_pass][r] = true;
343 }
344
345 /**
346 * Compile one arithmetic operation COLOR&ALPHA pair into TGSI instructions.
347 */
348 static void
349 compile_instruction(struct st_translate *t,
350 const struct atifs_instruction *inst)
351 {
352 unsigned optype;
353
354 for (optype = 0; optype < 2; optype++) { /* color, alpha */
355 const struct instruction_desc *desc;
356 struct ureg_dst dst[1];
357 struct ureg_src args[3]; /* arguments for the main operation */
358 unsigned arg;
359 unsigned dstreg = inst->DstReg[optype].Index - GL_REG_0_ATI;
360
361 if (!inst->Opcode[optype])
362 continue;
363
364 desc = &inst_desc[inst->Opcode[optype] - GL_MOV_ATI];
365
366 /* prepare the arguments */
367 for (arg = 0; arg < desc->arg_count; arg++) {
368 if (arg >= inst->ArgCount[optype]) {
369 _mesa_warning(0, "Using 0 for missing argument %d of %s\n",
370 arg, desc->name);
371 args[arg] = ureg_imm1f(t->ureg, 0.0f);
372 } else {
373 args[arg] = prepare_argument(t, arg,
374 &inst->SrcReg[optype][arg]);
375 }
376 }
377
378 /* prepare dst */
379 dst[0] = get_temp(t, dstreg);
380
381 if (optype) {
382 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_W);
383 } else {
384 GLuint dstMask = inst->DstReg[optype].dstMask;
385 if (dstMask == GL_NONE) {
386 dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ);
387 } else {
388 dst[0] = ureg_writemask(dst[0], dstMask); /* the enum values match */
389 }
390 }
391
392 /* emit the main instruction */
393 emit_arith_inst(t, desc, dst, args, arg);
394
395 emit_dstmod(t, *dst, inst->DstReg[optype].dstMod);
396
397 t->regs_written[t->current_pass][dstreg] = true;
398 }
399 }
400
401 static void
402 finalize_shader(struct st_translate *t, unsigned numPasses)
403 {
404 struct ureg_dst dst[1] = { { 0 } };
405 struct ureg_src src[1] = { { 0 } };
406
407 if (t->regs_written[numPasses-1][0]) {
408 /* copy the result into the OUT slot */
409 dst[0] = t->outputs[t->outputMapping[FRAG_RESULT_COLOR]];
410 src[0] = ureg_src(t->temps[0]);
411 ureg_insn(t->ureg, TGSI_OPCODE_MOV, dst, 1, src, 1);
412 }
413
414 /* signal the end of the program */
415 ureg_insn(t->ureg, TGSI_OPCODE_END, dst, 0, src, 0);
416 }
417
418 /**
419 * Called when a new variant is needed, we need to translate
420 * the ATI fragment shader to TGSI
421 */
422 enum pipe_error
423 st_translate_atifs_program(
424 struct ureg_program *ureg,
425 struct ati_fragment_shader *atifs,
426 struct gl_program *program,
427 GLuint numInputs,
428 const GLuint inputMapping[],
429 const ubyte inputSemanticName[],
430 const ubyte inputSemanticIndex[],
431 const GLuint interpMode[],
432 GLuint numOutputs,
433 const GLuint outputMapping[],
434 const ubyte outputSemanticName[],
435 const ubyte outputSemanticIndex[])
436 {
437 enum pipe_error ret = PIPE_OK;
438
439 unsigned pass, i, r;
440
441 struct st_translate translate, *t;
442 t = &translate;
443 memset(t, 0, sizeof *t);
444
445 t->inputMapping = inputMapping;
446 t->outputMapping = outputMapping;
447 t->ureg = ureg;
448 t->atifs = atifs;
449
450 /*
451 * Declare input attributes.
452 */
453 for (i = 0; i < numInputs; i++) {
454 t->inputs[i] = ureg_DECL_fs_input(ureg,
455 inputSemanticName[i],
456 inputSemanticIndex[i],
457 interpMode[i]);
458 }
459
460 /*
461 * Declare output attributes:
462 * we always have numOutputs=1 and it's FRAG_RESULT_COLOR
463 */
464 t->outputs[0] = ureg_DECL_output(ureg,
465 TGSI_SEMANTIC_COLOR,
466 outputSemanticIndex[0]);
467
468 /* Emit constants and immediates. Mesa uses a single index space
469 * for these, so we put all the translated regs in t->constants.
470 */
471 if (program->Parameters) {
472 t->constants = calloc(program->Parameters->NumParameters,
473 sizeof t->constants[0]);
474 if (t->constants == NULL) {
475 ret = PIPE_ERROR_OUT_OF_MEMORY;
476 goto out;
477 }
478
479 for (i = 0; i < program->Parameters->NumParameters; i++) {
480 switch (program->Parameters->Parameters[i].Type) {
481 case PROGRAM_STATE_VAR:
482 case PROGRAM_UNIFORM:
483 t->constants[i] = ureg_DECL_constant(ureg, i);
484 break;
485 case PROGRAM_CONSTANT:
486 t->constants[i] =
487 ureg_DECL_immediate(ureg,
488 (const float*)program->Parameters->ParameterValues[i],
489 4);
490 break;
491 default:
492 break;
493 }
494 }
495 }
496
497 /* texture samplers */
498 for (i = 0; i < MAX_NUM_FRAGMENT_REGISTERS_ATI; i++) {
499 if (program->SamplersUsed & (1 << i)) {
500 t->samplers[i] = ureg_DECL_sampler(ureg, i);
501 /* the texture target is still unknown, it will be fixed in the draw call */
502 ureg_DECL_sampler_view(ureg, i, TGSI_TEXTURE_2D,
503 TGSI_RETURN_TYPE_FLOAT,
504 TGSI_RETURN_TYPE_FLOAT,
505 TGSI_RETURN_TYPE_FLOAT,
506 TGSI_RETURN_TYPE_FLOAT);
507 }
508 }
509
510 /* emit instructions */
511 for (pass = 0; pass < atifs->NumPasses; pass++) {
512 t->current_pass = pass;
513 for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
514 struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
515 compile_setupinst(t, r, texinst);
516 }
517 for (i = 0; i < atifs->numArithInstr[pass]; i++) {
518 struct atifs_instruction *inst = &atifs->Instructions[pass][i];
519 compile_instruction(t, inst);
520 }
521 }
522
523 finalize_shader(t, atifs->NumPasses);
524
525 out:
526 free(t->constants);
527
528 if (t->error) {
529 debug_printf("%s: translate error flag set\n", __func__);
530 }
531
532 return ret;
533 }
534
535 /**
536 * Called in ProgramStringNotify, we need to fill the metadata of the
537 * gl_program attached to the ati_fragment_shader
538 */
539 void
540 st_init_atifs_prog(struct gl_context *ctx, struct gl_program *prog)
541 {
542 /* we know this is st_fragment_program, because of st_new_ati_fs() */
543 struct st_fragment_program *stfp = (struct st_fragment_program *) prog;
544 struct ati_fragment_shader *atifs = stfp->ati_fs;
545
546 unsigned pass, i, r, optype, arg;
547
548 static const gl_state_index fog_params_state[STATE_LENGTH] =
549 {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED, 0, 0, 0};
550 static const gl_state_index fog_color[STATE_LENGTH] =
551 {STATE_FOG_COLOR, 0, 0, 0, 0};
552
553 prog->info.inputs_read = 0;
554 prog->info.outputs_written = BITFIELD64_BIT(FRAG_RESULT_COLOR);
555 prog->SamplersUsed = 0;
556 prog->Parameters = _mesa_new_parameter_list();
557
558 /* fill in inputs_read, SamplersUsed, TexturesUsed */
559 for (pass = 0; pass < atifs->NumPasses; pass++) {
560 for (r = 0; r < MAX_NUM_FRAGMENT_REGISTERS_ATI; r++) {
561 struct atifs_setupinst *texinst = &atifs->SetupInst[pass][r];
562 GLuint pass_tex = texinst->src;
563
564 if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP) {
565 /* mark which texcoords are used */
566 prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
567 /* by default there is 1:1 mapping between samplers and textures */
568 prog->SamplersUsed |= (1 << r);
569 /* the target is unknown here, it will be fixed in the draw call */
570 prog->TexturesUsed[r] = TEXTURE_2D_BIT;
571 } else if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP) {
572 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
573 prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + pass_tex - GL_TEXTURE0_ARB);
574 }
575 }
576 }
577 }
578 for (pass = 0; pass < atifs->NumPasses; pass++) {
579 for (i = 0; i < atifs->numArithInstr[pass]; i++) {
580 struct atifs_instruction *inst = &atifs->Instructions[pass][i];
581
582 for (optype = 0; optype < 2; optype++) { /* color, alpha */
583 if (inst->Opcode[optype]) {
584 for (arg = 0; arg < inst->ArgCount[optype]; arg++) {
585 GLint index = inst->SrcReg[optype][arg].Index;
586 if (index == GL_PRIMARY_COLOR_EXT) {
587 prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_COL0);
588 } else if (index == GL_SECONDARY_INTERPOLATOR_ATI) {
589 /* note: ATI_fragment_shader.txt never specifies what
590 * GL_SECONDARY_INTERPOLATOR_ATI is, swrast uses
591 * VARYING_SLOT_COL1 for this input */
592 prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_COL1);
593 }
594 }
595 }
596 }
597 }
598 }
599 /* we may need fog */
600 prog->info.inputs_read |= BITFIELD64_BIT(VARYING_SLOT_FOGC);
601
602 /* we always have the ATI_fs constants, and the fog params */
603 for (i = 0; i < MAX_NUM_FRAGMENT_CONSTANTS_ATI; i++) {
604 _mesa_add_parameter(prog->Parameters, PROGRAM_UNIFORM,
605 NULL, 4, GL_FLOAT, NULL, NULL);
606 }
607 _mesa_add_state_reference(prog->Parameters, fog_params_state);
608 _mesa_add_state_reference(prog->Parameters, fog_color);
609
610 prog->arb.NumInstructions = 0;
611 prog->arb.NumTemporaries = MAX_NUM_FRAGMENT_REGISTERS_ATI + 3; /* 3 input temps for arith ops */
612 prog->arb.NumParameters = MAX_NUM_FRAGMENT_CONSTANTS_ATI + 2; /* 2 state variables for fog */
613 }
614
615
616 struct tgsi_atifs_transform {
617 struct tgsi_transform_context base;
618 struct tgsi_shader_info info;
619 const struct st_fp_variant_key *key;
620 bool first_instruction_emitted;
621 unsigned fog_factor_temp;
622 unsigned fog_clamp_imm;
623 };
624
625 static inline struct tgsi_atifs_transform *
626 tgsi_atifs_transform(struct tgsi_transform_context *tctx)
627 {
628 return (struct tgsi_atifs_transform *)tctx;
629 }
630
631 /* copied from st_cb_drawpixels_shader.c */
632 static void
633 set_src(struct tgsi_full_instruction *inst, unsigned i, unsigned file, unsigned index,
634 unsigned x, unsigned y, unsigned z, unsigned w)
635 {
636 inst->Src[i].Register.File = file;
637 inst->Src[i].Register.Index = index;
638 inst->Src[i].Register.SwizzleX = x;
639 inst->Src[i].Register.SwizzleY = y;
640 inst->Src[i].Register.SwizzleZ = z;
641 inst->Src[i].Register.SwizzleW = w;
642 }
643
644 #define SET_SRC(inst, i, file, index, x, y, z, w) \
645 set_src(inst, i, file, index, TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, \
646 TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w)
647
648 static void
649 transform_decl(struct tgsi_transform_context *tctx,
650 struct tgsi_full_declaration *decl)
651 {
652 struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx);
653
654 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
655 /* fix texture target */
656 unsigned newtarget = ctx->key->texture_targets[decl->Range.First];
657 if (newtarget)
658 decl->SamplerView.Resource = newtarget;
659 }
660
661 tctx->emit_declaration(tctx, decl);
662 }
663
664 static void
665 transform_instr(struct tgsi_transform_context *tctx,
666 struct tgsi_full_instruction *current_inst)
667 {
668 struct tgsi_atifs_transform *ctx = tgsi_atifs_transform(tctx);
669
670 if (ctx->first_instruction_emitted)
671 goto transform_inst;
672
673 ctx->first_instruction_emitted = true;
674
675 if (ctx->key->fog) {
676 /* add a new temp for the fog factor */
677 ctx->fog_factor_temp = ctx->info.file_max[TGSI_FILE_TEMPORARY] + 1;
678 tgsi_transform_temp_decl(tctx, ctx->fog_factor_temp);
679
680 /* add immediates for clamp */
681 ctx->fog_clamp_imm = ctx->info.immediate_count;
682 tgsi_transform_immediate_decl(tctx, 1.0f, 0.0f, 0.0f, 0.0f);
683 }
684
685 transform_inst:
686 if (current_inst->Instruction.Opcode == TGSI_OPCODE_TEX) {
687 /* fix texture target */
688 unsigned newtarget = ctx->key->texture_targets[current_inst->Src[1].Register.Index];
689 if (newtarget)
690 current_inst->Texture.Texture = newtarget;
691
692 } else if (ctx->key->fog && current_inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
693 current_inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
694 struct tgsi_full_instruction inst;
695 unsigned i;
696 int fogc_index = -1;
697 int reg0_index = current_inst->Src[0].Register.Index;
698
699 /* find FOGC input */
700 for (i = 0; i < ctx->info.num_inputs; i++) {
701 if (ctx->info.input_semantic_name[i] == TGSI_SEMANTIC_FOG) {
702 fogc_index = i;
703 break;
704 }
705 }
706 if (fogc_index < 0) {
707 /* should never be reached, because fog coord input is always declared */
708 tctx->emit_instruction(tctx, current_inst);
709 return;
710 }
711
712 /* compute the 1 component fog factor f */
713 if (ctx->key->fog == 1) {
714 /* LINEAR formula: f = (end - z) / (end - start)
715 * with optimized parameters:
716 * f = MAD(fogcoord, oparams.x, oparams.y)
717 */
718 inst = tgsi_default_full_instruction();
719 inst.Instruction.Opcode = TGSI_OPCODE_MAD;
720 inst.Instruction.NumDstRegs = 1;
721 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
722 inst.Dst[0].Register.Index = ctx->fog_factor_temp;
723 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
724 inst.Instruction.NumSrcRegs = 3;
725 SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
726 SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, X, X, X, X);
727 SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Y, Y, Y, Y);
728 tctx->emit_instruction(tctx, &inst);
729 } else if (ctx->key->fog == 2) {
730 /* EXP formula: f = exp(-dens * z)
731 * with optimized parameters:
732 * f = MUL(fogcoord, oparams.z); f= EX2(-f)
733 */
734 inst = tgsi_default_full_instruction();
735 inst.Instruction.Opcode = TGSI_OPCODE_MUL;
736 inst.Instruction.NumDstRegs = 1;
737 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
738 inst.Dst[0].Register.Index = ctx->fog_factor_temp;
739 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
740 inst.Instruction.NumSrcRegs = 2;
741 SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
742 SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, Z, Z, Z, Z);
743 tctx->emit_instruction(tctx, &inst);
744
745 inst = tgsi_default_full_instruction();
746 inst.Instruction.Opcode = TGSI_OPCODE_EX2;
747 inst.Instruction.NumDstRegs = 1;
748 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
749 inst.Dst[0].Register.Index = ctx->fog_factor_temp;
750 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
751 inst.Instruction.NumSrcRegs = 1;
752 SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
753 inst.Src[0].Register.Negate = 1;
754 tctx->emit_instruction(tctx, &inst);
755 } else if (ctx->key->fog == 3) {
756 /* EXP2 formula: f = exp(-(dens * z)^2)
757 * with optimized parameters:
758 * f = MUL(fogcoord, oparams.w); f=MUL(f, f); f= EX2(-f)
759 */
760 inst = tgsi_default_full_instruction();
761 inst.Instruction.Opcode = TGSI_OPCODE_MUL;
762 inst.Instruction.NumDstRegs = 1;
763 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
764 inst.Dst[0].Register.Index = ctx->fog_factor_temp;
765 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
766 inst.Instruction.NumSrcRegs = 2;
767 SET_SRC(&inst, 0, TGSI_FILE_INPUT, fogc_index, X, Y, Z, W);
768 SET_SRC(&inst, 1, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI, W, W, W, W);
769 tctx->emit_instruction(tctx, &inst);
770
771 inst = tgsi_default_full_instruction();
772 inst.Instruction.Opcode = TGSI_OPCODE_MUL;
773 inst.Instruction.NumDstRegs = 1;
774 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
775 inst.Dst[0].Register.Index = ctx->fog_factor_temp;
776 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
777 inst.Instruction.NumSrcRegs = 2;
778 SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
779 SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
780 tctx->emit_instruction(tctx, &inst);
781
782 inst = tgsi_default_full_instruction();
783 inst.Instruction.Opcode = TGSI_OPCODE_EX2;
784 inst.Instruction.NumDstRegs = 1;
785 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
786 inst.Dst[0].Register.Index = ctx->fog_factor_temp;
787 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
788 inst.Instruction.NumSrcRegs = 1;
789 SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
790 inst.Src[0].Register.Negate ^= 1;
791 tctx->emit_instruction(tctx, &inst);
792 }
793 /* f = CLAMP(f, 0.0, 1.0) */
794 inst = tgsi_default_full_instruction();
795 inst.Instruction.Opcode = TGSI_OPCODE_CLAMP;
796 inst.Instruction.NumDstRegs = 1;
797 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
798 inst.Dst[0].Register.Index = ctx->fog_factor_temp;
799 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
800 inst.Instruction.NumSrcRegs = 3;
801 SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, Y, Z, W);
802 SET_SRC(&inst, 1, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, Y, Y, Y, Y); // 0.0
803 SET_SRC(&inst, 2, TGSI_FILE_IMMEDIATE, ctx->fog_clamp_imm, X, X, X, X); // 1.0
804 tctx->emit_instruction(tctx, &inst);
805
806 /* REG0 = LRP(f, REG0, fogcolor) */
807 inst = tgsi_default_full_instruction();
808 inst.Instruction.Opcode = TGSI_OPCODE_LRP;
809 inst.Instruction.NumDstRegs = 1;
810 inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
811 inst.Dst[0].Register.Index = reg0_index;
812 inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
813 inst.Instruction.NumSrcRegs = 3;
814 SET_SRC(&inst, 0, TGSI_FILE_TEMPORARY, ctx->fog_factor_temp, X, X, X, Y);
815 SET_SRC(&inst, 1, TGSI_FILE_TEMPORARY, reg0_index, X, Y, Z, W);
816 SET_SRC(&inst, 2, TGSI_FILE_CONSTANT, MAX_NUM_FRAGMENT_CONSTANTS_ATI + 1, X, Y, Z, W);
817 tctx->emit_instruction(tctx, &inst);
818 }
819
820 tctx->emit_instruction(tctx, current_inst);
821 }
822
823 /*
824 * A post-process step in the draw call to fix texture targets and
825 * insert code for fog.
826 */
827 const struct tgsi_token *
828 st_fixup_atifs(const struct tgsi_token *tokens,
829 const struct st_fp_variant_key *key)
830 {
831 struct tgsi_atifs_transform ctx;
832 struct tgsi_token *newtoks;
833 int newlen;
834
835 memset(&ctx, 0, sizeof(ctx));
836 ctx.base.transform_declaration = transform_decl;
837 ctx.base.transform_instruction = transform_instr;
838 ctx.key = key;
839 tgsi_scan_shader(tokens, &ctx.info);
840
841 newlen = tgsi_num_tokens(tokens) + 30;
842 newtoks = tgsi_alloc_tokens(newlen);
843 if (!newtoks)
844 return NULL;
845
846 tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
847 return newtoks;
848 }
849