i965/fs: Move brw_wm_compile::fp to fs_visitor.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_fp.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_fp.cpp
25 *
26 * Implementation of the compiler for GL_ARB_fragment_program shaders on top
27 * of the GLSL compiler backend.
28 */
29
30 #include "brw_context.h"
31 #include "brw_fs.h"
32
33 static fs_reg
34 regoffset(fs_reg reg, int i)
35 {
36 reg.reg_offset += i;
37 return reg;
38 }
39
40 void
41 fs_visitor::emit_fp_alu1(enum opcode opcode,
42 const struct prog_instruction *fpi,
43 fs_reg dst, fs_reg src)
44 {
45 for (int i = 0; i < 4; i++) {
46 if (fpi->DstReg.WriteMask & (1 << i))
47 emit(opcode, regoffset(dst, i), regoffset(src, i));
48 }
49 }
50
51 void
52 fs_visitor::emit_fp_alu2(enum opcode opcode,
53 const struct prog_instruction *fpi,
54 fs_reg dst, fs_reg src0, fs_reg src1)
55 {
56 for (int i = 0; i < 4; i++) {
57 if (fpi->DstReg.WriteMask & (1 << i))
58 emit(opcode, regoffset(dst, i),
59 regoffset(src0, i), regoffset(src1, i));
60 }
61 }
62
63 void
64 fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
65 fs_reg dst, fs_reg src0, fs_reg src1)
66 {
67 uint32_t conditionalmod;
68 if (fpi->Opcode == OPCODE_MIN)
69 conditionalmod = BRW_CONDITIONAL_L;
70 else
71 conditionalmod = BRW_CONDITIONAL_GE;
72
73 for (int i = 0; i < 4; i++) {
74 if (fpi->DstReg.WriteMask & (1 << i)) {
75 emit_minmax(conditionalmod, regoffset(dst, i),
76 regoffset(src0, i), regoffset(src1, i));
77 }
78 }
79 }
80
81 void
82 fs_visitor::emit_fp_sop(uint32_t conditional_mod,
83 const struct prog_instruction *fpi,
84 fs_reg dst, fs_reg src0, fs_reg src1,
85 fs_reg one)
86 {
87 for (int i = 0; i < 4; i++) {
88 if (fpi->DstReg.WriteMask & (1 << i)) {
89 fs_inst *inst;
90
91 emit(CMP(reg_null_d, regoffset(src0, i), regoffset(src1, i),
92 conditional_mod));
93
94 inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
95 inst->predicate = BRW_PREDICATE_NORMAL;
96 }
97 }
98 }
99
100 void
101 fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
102 fs_reg dst, fs_reg src)
103 {
104 for (int i = 0; i < 4; i++) {
105 if (fpi->DstReg.WriteMask & (1 << i))
106 emit(MOV(regoffset(dst, i), src));
107 }
108 }
109
110 void
111 fs_visitor::emit_fp_scalar_math(enum opcode opcode,
112 const struct prog_instruction *fpi,
113 fs_reg dst, fs_reg src)
114 {
115 fs_reg temp = fs_reg(this, glsl_type::float_type);
116 emit_math(opcode, temp, src);
117 emit_fp_scalar_write(fpi, dst, temp);
118 }
119
120 void
121 fs_visitor::emit_fragment_program_code()
122 {
123 setup_fp_regs();
124
125 fs_reg null = fs_reg(brw_null_reg());
126
127 /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
128 * be:
129 *
130 * sel.f0 dst 1.0 0.0
131 *
132 * instead of
133 *
134 * mov dst 0.0
135 * mov.f0 dst 1.0
136 */
137 fs_reg one = fs_reg(this, glsl_type::float_type);
138 emit(MOV(one, fs_reg(1.0f)));
139
140 for (unsigned int insn = 0; insn < fp->Base.NumInstructions; insn++) {
141 const struct prog_instruction *fpi = &fp->Base.Instructions[insn];
142 base_ir = fpi;
143
144 //_mesa_print_instruction(fpi);
145
146 fs_reg dst;
147 fs_reg src[3];
148
149 /* We always emit into a temporary destination register to avoid
150 * aliasing issues.
151 */
152 dst = fs_reg(this, glsl_type::vec4_type);
153
154 for (int i = 0; i < 3; i++)
155 src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
156
157 switch (fpi->Opcode) {
158 case OPCODE_ABS:
159 src[0].abs = true;
160 src[0].negate = false;
161 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
162 break;
163
164 case OPCODE_ADD:
165 emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
166 break;
167
168 case OPCODE_CMP:
169 for (int i = 0; i < 4; i++) {
170 if (fpi->DstReg.WriteMask & (1 << i)) {
171 fs_inst *inst;
172
173 emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
174 BRW_CONDITIONAL_L));
175
176 inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
177 regoffset(src[1], i), regoffset(src[2], i));
178 inst->predicate = BRW_PREDICATE_NORMAL;
179 }
180 }
181 break;
182
183 case OPCODE_COS:
184 emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
185 break;
186
187 case OPCODE_DP2:
188 case OPCODE_DP3:
189 case OPCODE_DP4:
190 case OPCODE_DPH: {
191 fs_reg mul = fs_reg(this, glsl_type::float_type);
192 fs_reg acc = fs_reg(this, glsl_type::float_type);
193 int count;
194
195 switch (fpi->Opcode) {
196 case OPCODE_DP2: count = 2; break;
197 case OPCODE_DP3: count = 3; break;
198 case OPCODE_DP4: count = 4; break;
199 case OPCODE_DPH: count = 3; break;
200 default: assert(!"not reached"); count = 0; break;
201 }
202
203 emit(MUL(acc, regoffset(src[0], 0), regoffset(src[1], 0)));
204 for (int i = 1; i < count; i++) {
205 emit(MUL(mul, regoffset(src[0], i), regoffset(src[1], i)));
206 emit(ADD(acc, acc, mul));
207 }
208
209 if (fpi->Opcode == OPCODE_DPH)
210 emit(ADD(acc, acc, regoffset(src[1], 3)));
211
212 emit_fp_scalar_write(fpi, dst, acc);
213 break;
214 }
215
216 case OPCODE_DST:
217 if (fpi->DstReg.WriteMask & WRITEMASK_X)
218 emit(MOV(dst, fs_reg(1.0f)));
219 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
220 emit(MUL(regoffset(dst, 1),
221 regoffset(src[0], 1), regoffset(src[1], 1)));
222 }
223 if (fpi->DstReg.WriteMask & WRITEMASK_Z)
224 emit(MOV(regoffset(dst, 2), regoffset(src[0], 2)));
225 if (fpi->DstReg.WriteMask & WRITEMASK_W)
226 emit(MOV(regoffset(dst, 3), regoffset(src[1], 3)));
227 break;
228
229 case OPCODE_EX2:
230 emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
231 break;
232
233 case OPCODE_FLR:
234 emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
235 break;
236
237 case OPCODE_FRC:
238 emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
239 break;
240
241 case OPCODE_KIL: {
242 for (int i = 0; i < 4; i++) {
243 /* In most cases the argument to a KIL will be something like
244 * TEMP[0].wwww, so there's no point in checking whether .w is < 0
245 * 4 times in a row.
246 */
247 if (i > 0 &&
248 GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
249 GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
250 ((fpi->SrcReg[0].Negate >> i) & 1) ==
251 ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
252 continue;
253 }
254
255 emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
256 BRW_CONDITIONAL_L));
257
258 if (intel->gen < 6 && dispatch_width == 16)
259 fail("Can't support (non-uniform) control flow on 16-wide");
260 emit(IF(BRW_PREDICATE_NORMAL));
261 emit(FS_OPCODE_DISCARD);
262 emit(BRW_OPCODE_ENDIF);
263 }
264 break;
265 }
266
267 case OPCODE_LG2:
268 emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
269 break;
270
271 case OPCODE_LIT:
272 /* From the ARB_fragment_program spec:
273 *
274 * tmp = VectorLoad(op0);
275 * if (tmp.x < 0) tmp.x = 0;
276 * if (tmp.y < 0) tmp.y = 0;
277 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
278 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
279 * result.x = 1.0;
280 * result.y = tmp.x;
281 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
282 * result.w = 1.0;
283 *
284 * Note that we don't do the clamping to +/- 128. We didn't in
285 * brw_wm_emit.c either.
286 */
287 if (fpi->DstReg.WriteMask & WRITEMASK_X)
288 emit(MOV(regoffset(dst, 0), fs_reg(1.0f)));
289
290 if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
291 fs_inst *inst;
292 emit(CMP(null, regoffset(src[0], 0), fs_reg(0.0f),
293 BRW_CONDITIONAL_LE));
294
295 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
296 emit(MOV(regoffset(dst, 1), regoffset(src[0], 0)));
297 inst = emit(MOV(regoffset(dst, 1), fs_reg(0.0f)));
298 inst->predicate = BRW_PREDICATE_NORMAL;
299 }
300
301 if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
302 emit_math(SHADER_OPCODE_POW, regoffset(dst, 2),
303 regoffset(src[0], 1), regoffset(src[0], 3));
304
305 inst = emit(MOV(regoffset(dst, 2), fs_reg(0.0f)));
306 inst->predicate = BRW_PREDICATE_NORMAL;
307 }
308 }
309
310 if (fpi->DstReg.WriteMask & WRITEMASK_W)
311 emit(MOV(regoffset(dst, 3), fs_reg(1.0f)));
312
313 break;
314
315 case OPCODE_LRP:
316 for (int i = 0; i < 4; i++) {
317 if (fpi->DstReg.WriteMask & (1 << i)) {
318 fs_reg neg_src0 = regoffset(src[0], i);
319 neg_src0.negate = !neg_src0.negate;
320 fs_reg temp = fs_reg(this, glsl_type::float_type);
321 fs_reg temp2 = fs_reg(this, glsl_type::float_type);
322 emit(ADD(temp, neg_src0, fs_reg(1.0f)));
323 emit(MUL(temp, temp, regoffset(src[2], i)));
324 emit(MUL(temp2, regoffset(src[0], i), regoffset(src[1], i)));
325 emit(ADD(regoffset(dst, i), temp, temp2));
326 }
327 }
328 break;
329
330 case OPCODE_MAD:
331 for (int i = 0; i < 4; i++) {
332 if (fpi->DstReg.WriteMask & (1 << i)) {
333 fs_reg temp = fs_reg(this, glsl_type::float_type);
334 emit(MUL(temp, regoffset(src[0], i), regoffset(src[1], i)));
335 emit(ADD(regoffset(dst, i), temp, regoffset(src[2], i)));
336 }
337 }
338 break;
339
340 case OPCODE_MAX:
341 emit_fp_minmax(fpi, dst, src[0], src[1]);
342 break;
343
344 case OPCODE_MOV:
345 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
346 break;
347
348 case OPCODE_MIN:
349 emit_fp_minmax(fpi, dst, src[0], src[1]);
350 break;
351
352 case OPCODE_MUL:
353 emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
354 break;
355
356 case OPCODE_POW: {
357 fs_reg temp = fs_reg(this, glsl_type::float_type);
358 emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
359 emit_fp_scalar_write(fpi, dst, temp);
360 break;
361 }
362
363 case OPCODE_RCP:
364 emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
365 break;
366
367 case OPCODE_RSQ:
368 emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
369 break;
370
371 case OPCODE_SCS:
372 if (fpi->DstReg.WriteMask & WRITEMASK_X) {
373 emit_math(SHADER_OPCODE_COS, regoffset(dst, 0),
374 regoffset(src[0], 0));
375 }
376
377 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
378 emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1),
379 regoffset(src[0], 1));
380 }
381 break;
382
383 case OPCODE_SGE:
384 emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
385 break;
386
387 case OPCODE_SIN:
388 emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
389 break;
390
391 case OPCODE_SLT:
392 emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
393 break;
394
395 case OPCODE_SUB: {
396 fs_reg neg_src1 = src[1];
397 neg_src1.negate = !src[1].negate;
398
399 emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
400 break;
401 }
402
403 case OPCODE_TEX:
404 case OPCODE_TXB:
405 case OPCODE_TXP: {
406 /* We piggy-back on the GLSL IR support for texture setup. To do so,
407 * we have to cook up an ir_texture that has the coordinate field
408 * with appropriate type, and shadow_comparitor set or not. All the
409 * other properties of ir_texture are passed in as arguments to the
410 * emit_texture_gen* function.
411 */
412 ir_texture *ir = NULL;
413
414 fs_reg lod;
415 fs_reg dpdy;
416 fs_reg coordinate = src[0];
417 fs_reg shadow_c;
418
419 switch (fpi->Opcode) {
420 case OPCODE_TEX:
421 ir = new(mem_ctx) ir_texture(ir_tex);
422 break;
423 case OPCODE_TXP: {
424 ir = new(mem_ctx) ir_texture(ir_tex);
425
426 coordinate = fs_reg(this, glsl_type::vec3_type);
427 fs_reg invproj = fs_reg(this, glsl_type::float_type);
428 emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3));
429 for (int i = 0; i < 3; i++) {
430 emit(MUL(regoffset(coordinate, i),
431 regoffset(src[0], i), invproj));
432 }
433 break;
434 }
435 case OPCODE_TXB:
436 ir = new(mem_ctx) ir_texture(ir_txb);
437 lod = regoffset(src[0], 3);
438 break;
439 default:
440 assert(!"not reached");
441 break;
442 }
443
444 const glsl_type *coordinate_type;
445 switch (fpi->TexSrcTarget) {
446 case TEXTURE_1D_INDEX:
447 coordinate_type = glsl_type::float_type;
448 break;
449
450 case TEXTURE_2D_INDEX:
451 case TEXTURE_1D_ARRAY_INDEX:
452 case TEXTURE_RECT_INDEX:
453 case TEXTURE_EXTERNAL_INDEX:
454 coordinate_type = glsl_type::vec2_type;
455 break;
456
457 case TEXTURE_3D_INDEX:
458 case TEXTURE_2D_ARRAY_INDEX:
459 coordinate_type = glsl_type::vec3_type;
460 break;
461
462 case TEXTURE_CUBE_INDEX: {
463 coordinate_type = glsl_type::vec3_type;
464
465 fs_reg temp = fs_reg(this, glsl_type::float_type);
466 fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
467 fs_reg abscoord = coordinate;
468 abscoord.negate = false;
469 abscoord.abs = true;
470 emit_minmax(BRW_CONDITIONAL_GE, temp,
471 regoffset(abscoord, 0), regoffset(abscoord, 1));
472 emit_minmax(BRW_CONDITIONAL_GE, temp,
473 temp, regoffset(abscoord, 2));
474 emit_math(SHADER_OPCODE_RCP, temp, temp);
475 for (int i = 0; i < 3; i++) {
476 emit(MUL(regoffset(cubecoord, i),
477 regoffset(coordinate, i), temp));
478 }
479
480 coordinate = cubecoord;
481 break;
482 }
483
484 default:
485 assert(!"not reached");
486 coordinate_type = glsl_type::vec2_type;
487 break;
488 }
489
490 ir_constant_data junk_data;
491 ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
492
493 coordinate = rescale_texcoord(ir, coordinate,
494 fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
495 fpi->TexSrcUnit, fpi->TexSrcUnit);
496
497 if (fpi->TexShadow) {
498 shadow_c = regoffset(coordinate, 2);
499 ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
500 }
501
502 fs_inst *inst;
503 if (intel->gen >= 7) {
504 inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy);
505 } else if (intel->gen >= 5) {
506 inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy);
507 } else {
508 inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
509 }
510
511 inst->sampler = fpi->TexSrcUnit;
512 inst->shadow_compare = fpi->TexShadow;
513
514 /* Reuse the GLSL swizzle_result() handler. */
515 swizzle_result(ir, dst, fpi->TexSrcUnit);
516 dst = this->result;
517
518 break;
519 }
520
521 case OPCODE_SWZ:
522 /* Note that SWZ's extended swizzles are handled in the general
523 * get_src_reg() code.
524 */
525 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
526 break;
527
528 case OPCODE_XPD:
529 for (int i = 0; i < 3; i++) {
530 if (fpi->DstReg.WriteMask & (1 << i)) {
531 int i1 = (i + 1) % 3;
532 int i2 = (i + 2) % 3;
533
534 fs_reg temp = fs_reg(this, glsl_type::float_type);
535 fs_reg neg_src1_1 = regoffset(src[1], i1);
536 neg_src1_1.negate = !neg_src1_1.negate;
537 emit(MUL(temp, regoffset(src[0], i2), neg_src1_1));
538 emit(MUL(regoffset(dst, i),
539 regoffset(src[0], i1), regoffset(src[1], i2)));
540 emit(ADD(regoffset(dst, i), regoffset(dst, i), temp));
541 }
542 }
543 break;
544
545 case OPCODE_END:
546 break;
547
548 default:
549 _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
550 _mesa_opcode_string(fpi->Opcode));
551 }
552
553 /* To handle saturates, we emit a MOV with a saturate bit, which
554 * optimization should fold into the preceding instructions when safe.
555 */
556 if (fpi->Opcode != OPCODE_END) {
557 fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
558
559 for (int i = 0; i < 4; i++) {
560 if (fpi->DstReg.WriteMask & (1 << i)) {
561 fs_inst *inst = emit(MOV(regoffset(real_dst, i),
562 regoffset(dst, i)));
563 inst->saturate = fpi->SaturateMode;
564 }
565 }
566 }
567 }
568
569 /* Epilogue:
570 *
571 * Fragment depth has this strange convention of being the .z component of
572 * a vec4. emit_fb_write() wants to see a float value, instead.
573 */
574 this->current_annotation = "result.depth write";
575 if (frag_depth.file != BAD_FILE) {
576 fs_reg temp = fs_reg(this, glsl_type::float_type);
577 emit(MOV(temp, regoffset(frag_depth, 2)));
578 frag_depth = temp;
579 }
580 }
581
582 void
583 fs_visitor::setup_fp_regs()
584 {
585 /* PROGRAM_TEMPORARY */
586 int num_temp = fp->Base.NumTemporaries;
587 fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
588 for (int i = 0; i < num_temp; i++)
589 fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
590
591 /* PROGRAM_STATE_VAR etc. */
592 if (dispatch_width == 8) {
593 for (unsigned p = 0;
594 p < fp->Base.Parameters->NumParameters; p++) {
595 for (unsigned int i = 0; i < 4; i++) {
596 this->param_index[c->prog_data.nr_params] = p;
597 this->param_offset[c->prog_data.nr_params] = i;
598 c->prog_data.nr_params++;
599 }
600 }
601 }
602
603 fp_input_regs = rzalloc_array(mem_ctx, fs_reg, FRAG_ATTRIB_MAX);
604 for (int i = 0; i < FRAG_ATTRIB_MAX; i++) {
605 if (fp->Base.InputsRead & BITFIELD64_BIT(i)) {
606 /* Make up a dummy instruction to reuse code for emitting
607 * interpolation.
608 */
609 ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
610 "fp_input",
611 ir_var_in);
612 ir->location = i;
613
614 this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
615 i);
616
617 switch (i) {
618 case FRAG_ATTRIB_WPOS:
619 ir->pixel_center_integer = fp->PixelCenterInteger;
620 ir->origin_upper_left = fp->OriginUpperLeft;
621 fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
622 break;
623 case FRAG_ATTRIB_FACE:
624 fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
625 break;
626 default:
627 fp_input_regs[i] = *emit_general_interpolation(ir);
628
629 if (i == FRAG_ATTRIB_FOGC) {
630 emit(MOV(regoffset(fp_input_regs[i], 1), fs_reg(0.0f)));
631 emit(MOV(regoffset(fp_input_regs[i], 2), fs_reg(0.0f)));
632 emit(MOV(regoffset(fp_input_regs[i], 3), fs_reg(1.0f)));
633 }
634
635 break;
636 }
637
638 this->current_annotation = NULL;
639 }
640 }
641 }
642
643 fs_reg
644 fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
645 {
646 switch (dst->File) {
647 case PROGRAM_TEMPORARY:
648 return fp_temp_regs[dst->Index];
649
650 case PROGRAM_OUTPUT:
651 if (dst->Index == FRAG_RESULT_DEPTH) {
652 if (frag_depth.file == BAD_FILE)
653 frag_depth = fs_reg(this, glsl_type::vec4_type);
654 return frag_depth;
655 } else if (dst->Index == FRAG_RESULT_COLOR) {
656 if (outputs[0].file == BAD_FILE) {
657 outputs[0] = fs_reg(this, glsl_type::vec4_type);
658 output_components[0] = 4;
659
660 /* Tell emit_fb_writes() to smear fragment.color across all the
661 * color attachments.
662 */
663 for (int i = 1; i < c->key.nr_color_regions; i++) {
664 outputs[i] = outputs[0];
665 output_components[i] = output_components[0];
666 }
667 }
668 return outputs[0];
669 } else {
670 int output_index = dst->Index - FRAG_RESULT_DATA0;
671 if (outputs[output_index].file == BAD_FILE) {
672 outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
673 }
674 output_components[output_index] = 4;
675 return outputs[output_index];
676 }
677
678 case PROGRAM_UNDEFINED:
679 return fs_reg();
680
681 default:
682 _mesa_problem(ctx, "bad dst register file: %s\n",
683 _mesa_register_file_name((gl_register_file)dst->File));
684 return fs_reg(this, glsl_type::vec4_type);
685 }
686 }
687
688 fs_reg
689 fs_visitor::get_fp_src_reg(const prog_src_register *src)
690 {
691 struct gl_program_parameter_list *plist = fp->Base.Parameters;
692
693 fs_reg result;
694
695 assert(!src->Abs);
696
697 switch (src->File) {
698 case PROGRAM_UNDEFINED:
699 return fs_reg();
700 case PROGRAM_TEMPORARY:
701 result = fp_temp_regs[src->Index];
702 break;
703
704 case PROGRAM_INPUT:
705 result = fp_input_regs[src->Index];
706 break;
707
708 case PROGRAM_STATE_VAR:
709 case PROGRAM_UNIFORM:
710 case PROGRAM_CONSTANT:
711 /* We actually want to look at the type in the Parameters list for this,
712 * because this lets us upload constant builtin uniforms, as actual
713 * constants.
714 */
715 switch (plist->Parameters[src->Index].Type) {
716 case PROGRAM_CONSTANT: {
717 result = fs_reg(this, glsl_type::vec4_type);
718
719 for (int i = 0; i < 4; i++) {
720 emit(MOV(regoffset(result, i),
721 fs_reg(plist->ParameterValues[src->Index][i].f)));
722 }
723 break;
724 }
725
726 case PROGRAM_STATE_VAR:
727 case PROGRAM_UNIFORM:
728 result = fs_reg(UNIFORM, src->Index * 4);
729 break;
730
731 default:
732 _mesa_problem(ctx, "bad uniform src register file: %s\n",
733 _mesa_register_file_name((gl_register_file)src->File));
734 return fs_reg(this, glsl_type::vec4_type);
735 }
736 break;
737
738 default:
739 _mesa_problem(ctx, "bad src register file: %s\n",
740 _mesa_register_file_name((gl_register_file)src->File));
741 return fs_reg(this, glsl_type::vec4_type);
742 }
743
744 if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
745 fs_reg unswizzled = result;
746 result = fs_reg(this, glsl_type::vec4_type);
747 for (int i = 0; i < 4; i++) {
748 bool negate = src->Negate & (1 << i);
749 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
750 * but it costs us nothing to support it.
751 */
752 int src_swiz = GET_SWZ(src->Swizzle, i);
753 if (src_swiz == SWIZZLE_ZERO) {
754 emit(MOV(regoffset(result, i), fs_reg(0.0f)));
755 } else if (src_swiz == SWIZZLE_ONE) {
756 emit(MOV(regoffset(result, i),
757 negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
758 } else {
759 fs_reg src = regoffset(unswizzled, src_swiz);
760 if (negate)
761 src.negate = !src.negate;
762 emit(MOV(regoffset(result, i), src));
763 }
764 }
765 }
766
767 return result;
768 }