i965: Move pre-draw resolve buffers to dd::UpdateState
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_fp.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_fp.cpp
25 *
26 * Implementation of the compiler for GL_ARB_fragment_program shaders on top
27 * of the GLSL compiler backend.
28 */
29
30 #include "brw_context.h"
31 #include "brw_fs.h"
32
33 void
34 fs_visitor::emit_fp_alu1(enum opcode opcode,
35 const struct prog_instruction *fpi,
36 fs_reg dst, fs_reg src)
37 {
38 for (int i = 0; i < 4; i++) {
39 if (fpi->DstReg.WriteMask & (1 << i))
40 emit(opcode, offset(dst, i), offset(src, i));
41 }
42 }
43
44 void
45 fs_visitor::emit_fp_alu2(enum opcode opcode,
46 const struct prog_instruction *fpi,
47 fs_reg dst, fs_reg src0, fs_reg src1)
48 {
49 for (int i = 0; i < 4; i++) {
50 if (fpi->DstReg.WriteMask & (1 << i))
51 emit(opcode, offset(dst, i),
52 offset(src0, i), offset(src1, i));
53 }
54 }
55
56 void
57 fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
58 fs_reg dst, fs_reg src0, fs_reg src1)
59 {
60 enum brw_conditional_mod conditionalmod;
61 if (fpi->Opcode == OPCODE_MIN)
62 conditionalmod = BRW_CONDITIONAL_L;
63 else
64 conditionalmod = BRW_CONDITIONAL_GE;
65
66 for (int i = 0; i < 4; i++) {
67 if (fpi->DstReg.WriteMask & (1 << i)) {
68 emit_minmax(conditionalmod, offset(dst, i),
69 offset(src0, i), offset(src1, i));
70 }
71 }
72 }
73
74 void
75 fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod,
76 const struct prog_instruction *fpi,
77 fs_reg dst, fs_reg src0, fs_reg src1,
78 fs_reg one)
79 {
80 for (int i = 0; i < 4; i++) {
81 if (fpi->DstReg.WriteMask & (1 << i)) {
82 fs_inst *inst;
83
84 emit(CMP(reg_null_d, offset(src0, i), offset(src1, i),
85 conditional_mod));
86
87 inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f));
88 inst->predicate = BRW_PREDICATE_NORMAL;
89 }
90 }
91 }
92
93 void
94 fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
95 fs_reg dst, fs_reg src)
96 {
97 for (int i = 0; i < 4; i++) {
98 if (fpi->DstReg.WriteMask & (1 << i))
99 emit(MOV(offset(dst, i), src));
100 }
101 }
102
103 void
104 fs_visitor::emit_fp_scalar_math(enum opcode opcode,
105 const struct prog_instruction *fpi,
106 fs_reg dst, fs_reg src)
107 {
108 fs_reg temp = fs_reg(this, glsl_type::float_type);
109 emit_math(opcode, temp, src);
110 emit_fp_scalar_write(fpi, dst, temp);
111 }
112
113 void
114 fs_visitor::emit_fragment_program_code()
115 {
116 setup_fp_regs();
117
118 fs_reg null = fs_reg(brw_null_reg());
119
120 /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
121 * be:
122 *
123 * sel.f0 dst 1.0 0.0
124 *
125 * instead of
126 *
127 * mov dst 0.0
128 * mov.f0 dst 1.0
129 */
130 fs_reg one = fs_reg(this, glsl_type::float_type);
131 emit(MOV(one, fs_reg(1.0f)));
132
133 for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
134 const struct prog_instruction *fpi = &prog->Instructions[insn];
135 base_ir = fpi;
136
137 //_mesa_print_instruction(fpi);
138
139 fs_reg dst;
140 fs_reg src[3];
141
142 /* We always emit into a temporary destination register to avoid
143 * aliasing issues.
144 */
145 dst = fs_reg(this, glsl_type::vec4_type);
146
147 for (int i = 0; i < 3; i++)
148 src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
149
150 switch (fpi->Opcode) {
151 case OPCODE_ABS:
152 src[0].abs = true;
153 src[0].negate = false;
154 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
155 break;
156
157 case OPCODE_ADD:
158 emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
159 break;
160
161 case OPCODE_CMP:
162 for (int i = 0; i < 4; i++) {
163 if (fpi->DstReg.WriteMask & (1 << i)) {
164 fs_inst *inst;
165
166 emit(CMP(null, offset(src[0], i), fs_reg(0.0f),
167 BRW_CONDITIONAL_L));
168
169 inst = emit(BRW_OPCODE_SEL, offset(dst, i),
170 offset(src[1], i), offset(src[2], i));
171 inst->predicate = BRW_PREDICATE_NORMAL;
172 }
173 }
174 break;
175
176 case OPCODE_COS:
177 emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
178 break;
179
180 case OPCODE_DP2:
181 case OPCODE_DP3:
182 case OPCODE_DP4:
183 case OPCODE_DPH: {
184 fs_reg mul = fs_reg(this, glsl_type::float_type);
185 fs_reg acc = fs_reg(this, glsl_type::float_type);
186 int count;
187
188 switch (fpi->Opcode) {
189 case OPCODE_DP2: count = 2; break;
190 case OPCODE_DP3: count = 3; break;
191 case OPCODE_DP4: count = 4; break;
192 case OPCODE_DPH: count = 3; break;
193 default: unreachable("not reached");
194 }
195
196 emit(MUL(acc, offset(src[0], 0), offset(src[1], 0)));
197 for (int i = 1; i < count; i++) {
198 emit(MUL(mul, offset(src[0], i), offset(src[1], i)));
199 emit(ADD(acc, acc, mul));
200 }
201
202 if (fpi->Opcode == OPCODE_DPH)
203 emit(ADD(acc, acc, offset(src[1], 3)));
204
205 emit_fp_scalar_write(fpi, dst, acc);
206 break;
207 }
208
209 case OPCODE_DST:
210 if (fpi->DstReg.WriteMask & WRITEMASK_X)
211 emit(MOV(dst, fs_reg(1.0f)));
212 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
213 emit(MUL(offset(dst, 1),
214 offset(src[0], 1), offset(src[1], 1)));
215 }
216 if (fpi->DstReg.WriteMask & WRITEMASK_Z)
217 emit(MOV(offset(dst, 2), offset(src[0], 2)));
218 if (fpi->DstReg.WriteMask & WRITEMASK_W)
219 emit(MOV(offset(dst, 3), offset(src[1], 3)));
220 break;
221
222 case OPCODE_EX2:
223 emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
224 break;
225
226 case OPCODE_FLR:
227 emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
228 break;
229
230 case OPCODE_FRC:
231 emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
232 break;
233
234 case OPCODE_KIL: {
235 for (int i = 0; i < 4; i++) {
236 /* In most cases the argument to a KIL will be something like
237 * TEMP[0].wwww, so there's no point in checking whether .w is < 0
238 * 4 times in a row.
239 */
240 if (i > 0 &&
241 GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
242 GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
243 ((fpi->SrcReg[0].Negate >> i) & 1) ==
244 ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
245 continue;
246 }
247
248
249 /* Emit an instruction that's predicated on the current
250 * undiscarded pixels, and updates just those pixels to be
251 * turned off.
252 */
253 fs_inst *cmp = emit(CMP(null, offset(src[0], i), fs_reg(0.0f),
254 BRW_CONDITIONAL_GE));
255 cmp->predicate = BRW_PREDICATE_NORMAL;
256 cmp->flag_subreg = 1;
257 }
258 break;
259 }
260
261 case OPCODE_LG2:
262 emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
263 break;
264
265 case OPCODE_LIT:
266 /* From the ARB_fragment_program spec:
267 *
268 * tmp = VectorLoad(op0);
269 * if (tmp.x < 0) tmp.x = 0;
270 * if (tmp.y < 0) tmp.y = 0;
271 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
272 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
273 * result.x = 1.0;
274 * result.y = tmp.x;
275 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
276 * result.w = 1.0;
277 *
278 * Note that we don't do the clamping to +/- 128. We didn't in
279 * brw_wm_emit.c either.
280 */
281 if (fpi->DstReg.WriteMask & WRITEMASK_X)
282 emit(MOV(offset(dst, 0), fs_reg(1.0f)));
283
284 if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
285 fs_inst *inst;
286 emit(CMP(null, offset(src[0], 0), fs_reg(0.0f),
287 BRW_CONDITIONAL_LE));
288
289 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
290 emit(MOV(offset(dst, 1), offset(src[0], 0)));
291 inst = emit(MOV(offset(dst, 1), fs_reg(0.0f)));
292 inst->predicate = BRW_PREDICATE_NORMAL;
293 }
294
295 if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
296 emit_math(SHADER_OPCODE_POW, offset(dst, 2),
297 offset(src[0], 1), offset(src[0], 3));
298
299 inst = emit(MOV(offset(dst, 2), fs_reg(0.0f)));
300 inst->predicate = BRW_PREDICATE_NORMAL;
301 }
302 }
303
304 if (fpi->DstReg.WriteMask & WRITEMASK_W)
305 emit(MOV(offset(dst, 3), fs_reg(1.0f)));
306
307 break;
308
309 case OPCODE_LRP:
310 for (int i = 0; i < 4; i++) {
311 if (fpi->DstReg.WriteMask & (1 << i)) {
312 fs_reg a = offset(src[0], i);
313 fs_reg y = offset(src[1], i);
314 fs_reg x = offset(src[2], i);
315 emit_lrp(offset(dst, i), x, y, a);
316 }
317 }
318 break;
319
320 case OPCODE_MAD:
321 for (int i = 0; i < 4; i++) {
322 if (fpi->DstReg.WriteMask & (1 << i)) {
323 fs_reg temp = fs_reg(this, glsl_type::float_type);
324 emit(MUL(temp, offset(src[0], i), offset(src[1], i)));
325 emit(ADD(offset(dst, i), temp, offset(src[2], i)));
326 }
327 }
328 break;
329
330 case OPCODE_MAX:
331 emit_fp_minmax(fpi, dst, src[0], src[1]);
332 break;
333
334 case OPCODE_MOV:
335 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
336 break;
337
338 case OPCODE_MIN:
339 emit_fp_minmax(fpi, dst, src[0], src[1]);
340 break;
341
342 case OPCODE_MUL:
343 emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
344 break;
345
346 case OPCODE_POW: {
347 fs_reg temp = fs_reg(this, glsl_type::float_type);
348 emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
349 emit_fp_scalar_write(fpi, dst, temp);
350 break;
351 }
352
353 case OPCODE_RCP:
354 emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
355 break;
356
357 case OPCODE_RSQ:
358 emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
359 break;
360
361 case OPCODE_SCS:
362 if (fpi->DstReg.WriteMask & WRITEMASK_X) {
363 emit_math(SHADER_OPCODE_COS, offset(dst, 0),
364 offset(src[0], 0));
365 }
366
367 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
368 emit_math(SHADER_OPCODE_SIN, offset(dst, 1),
369 offset(src[0], 1));
370 }
371 break;
372
373 case OPCODE_SGE:
374 emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
375 break;
376
377 case OPCODE_SIN:
378 emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
379 break;
380
381 case OPCODE_SLT:
382 emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
383 break;
384
385 case OPCODE_SUB: {
386 fs_reg neg_src1 = src[1];
387 neg_src1.negate = !src[1].negate;
388
389 emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
390 break;
391 }
392
393 case OPCODE_TEX:
394 case OPCODE_TXB:
395 case OPCODE_TXP: {
396 /* We piggy-back on the GLSL IR support for texture setup. To do so,
397 * we have to cook up an ir_texture that has the coordinate field
398 * with appropriate type, and shadow_comparitor set or not. All the
399 * other properties of ir_texture are passed in as arguments to the
400 * emit_texture_gen* function.
401 */
402 ir_texture *ir = NULL;
403
404 fs_reg lod;
405 fs_reg dpdy;
406 fs_reg coordinate = src[0];
407 fs_reg shadow_c;
408 fs_reg sample_index;
409
410 switch (fpi->Opcode) {
411 case OPCODE_TEX:
412 ir = new(mem_ctx) ir_texture(ir_tex);
413 break;
414 case OPCODE_TXP: {
415 ir = new(mem_ctx) ir_texture(ir_tex);
416
417 coordinate = fs_reg(this, glsl_type::vec3_type);
418 fs_reg invproj = fs_reg(this, glsl_type::float_type);
419 emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3));
420 for (int i = 0; i < 3; i++) {
421 emit(MUL(offset(coordinate, i),
422 offset(src[0], i), invproj));
423 }
424 break;
425 }
426 case OPCODE_TXB:
427 ir = new(mem_ctx) ir_texture(ir_txb);
428 lod = offset(src[0], 3);
429 break;
430 default:
431 unreachable("not reached");
432 }
433
434 ir->type = glsl_type::vec4_type;
435
436 const glsl_type *coordinate_type;
437 switch (fpi->TexSrcTarget) {
438 case TEXTURE_1D_INDEX:
439 coordinate_type = glsl_type::float_type;
440 break;
441
442 case TEXTURE_2D_INDEX:
443 case TEXTURE_1D_ARRAY_INDEX:
444 case TEXTURE_RECT_INDEX:
445 case TEXTURE_EXTERNAL_INDEX:
446 coordinate_type = glsl_type::vec2_type;
447 break;
448
449 case TEXTURE_3D_INDEX:
450 case TEXTURE_2D_ARRAY_INDEX:
451 coordinate_type = glsl_type::vec3_type;
452 break;
453
454 case TEXTURE_CUBE_INDEX: {
455 coordinate_type = glsl_type::vec3_type;
456
457 fs_reg temp = fs_reg(this, glsl_type::float_type);
458 fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
459 fs_reg abscoord = coordinate;
460 abscoord.negate = false;
461 abscoord.abs = true;
462 emit_minmax(BRW_CONDITIONAL_GE, temp,
463 offset(abscoord, 0), offset(abscoord, 1));
464 emit_minmax(BRW_CONDITIONAL_GE, temp,
465 temp, offset(abscoord, 2));
466 emit_math(SHADER_OPCODE_RCP, temp, temp);
467 for (int i = 0; i < 3; i++) {
468 emit(MUL(offset(cubecoord, i),
469 offset(coordinate, i), temp));
470 }
471
472 coordinate = cubecoord;
473 break;
474 }
475
476 default:
477 unreachable("not reached");
478 }
479
480 ir_constant_data junk_data;
481 ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
482
483 if (fpi->TexShadow) {
484 shadow_c = offset(coordinate, 2);
485 ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
486 }
487
488 coordinate = rescale_texcoord(ir, coordinate,
489 fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
490 fpi->TexSrcUnit, fpi->TexSrcUnit);
491
492 fs_inst *inst;
493 if (brw->gen >= 7) {
494 inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index, fs_reg(0u), fpi->TexSrcUnit);
495 } else if (brw->gen >= 5) {
496 inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index, fpi->TexSrcUnit);
497 } else {
498 inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy, fpi->TexSrcUnit);
499 }
500
501 inst->shadow_compare = fpi->TexShadow;
502
503 /* Reuse the GLSL swizzle_result() handler. */
504 swizzle_result(ir, dst, fpi->TexSrcUnit);
505 dst = this->result;
506
507 break;
508 }
509
510 case OPCODE_SWZ:
511 /* Note that SWZ's extended swizzles are handled in the general
512 * get_src_reg() code.
513 */
514 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
515 break;
516
517 case OPCODE_XPD:
518 for (int i = 0; i < 3; i++) {
519 if (fpi->DstReg.WriteMask & (1 << i)) {
520 int i1 = (i + 1) % 3;
521 int i2 = (i + 2) % 3;
522
523 fs_reg temp = fs_reg(this, glsl_type::float_type);
524 fs_reg neg_src1_1 = offset(src[1], i1);
525 neg_src1_1.negate = !neg_src1_1.negate;
526 emit(MUL(temp, offset(src[0], i2), neg_src1_1));
527 emit(MUL(offset(dst, i),
528 offset(src[0], i1), offset(src[1], i2)));
529 emit(ADD(offset(dst, i), offset(dst, i), temp));
530 }
531 }
532 break;
533
534 case OPCODE_END:
535 break;
536
537 default:
538 _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
539 _mesa_opcode_string(fpi->Opcode));
540 }
541
542 /* To handle saturates, we emit a MOV with a saturate bit, which
543 * optimization should fold into the preceding instructions when safe.
544 */
545 if (fpi->Opcode != OPCODE_END) {
546 fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
547
548 for (int i = 0; i < 4; i++) {
549 if (fpi->DstReg.WriteMask & (1 << i)) {
550 fs_inst *inst = emit(MOV(offset(real_dst, i),
551 offset(dst, i)));
552 inst->saturate = fpi->SaturateMode;
553 }
554 }
555 }
556 }
557
558 /* Epilogue:
559 *
560 * Fragment depth has this strange convention of being the .z component of
561 * a vec4. emit_fb_write() wants to see a float value, instead.
562 */
563 this->current_annotation = "result.depth write";
564 if (frag_depth.file != BAD_FILE) {
565 fs_reg temp = fs_reg(this, glsl_type::float_type);
566 emit(MOV(temp, offset(frag_depth, 2)));
567 frag_depth = temp;
568 }
569 }
570
571 void
572 fs_visitor::setup_fp_regs()
573 {
574 /* PROGRAM_TEMPORARY */
575 int num_temp = prog->NumTemporaries;
576 fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
577 for (int i = 0; i < num_temp; i++)
578 fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
579
580 /* PROGRAM_STATE_VAR etc. */
581 if (dispatch_width == 8) {
582 for (unsigned p = 0;
583 p < prog->Parameters->NumParameters; p++) {
584 for (unsigned int i = 0; i < 4; i++) {
585 stage_prog_data->param[uniforms++] =
586 &prog->Parameters->ParameterValues[p][i];
587 }
588 }
589 }
590
591 fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX);
592 for (int i = 0; i < VARYING_SLOT_MAX; i++) {
593 if (prog->InputsRead & BITFIELD64_BIT(i)) {
594 /* Make up a dummy instruction to reuse code for emitting
595 * interpolation.
596 */
597 ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
598 "fp_input",
599 ir_var_shader_in);
600 ir->data.location = i;
601
602 this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
603 i);
604
605 switch (i) {
606 case VARYING_SLOT_POS:
607 ir->data.pixel_center_integer = fp->PixelCenterInteger;
608 ir->data.origin_upper_left = fp->OriginUpperLeft;
609 fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
610 break;
611 case VARYING_SLOT_FACE:
612 fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
613 break;
614 default:
615 fp_input_regs[i] = *emit_general_interpolation(ir);
616
617 if (i == VARYING_SLOT_FOGC) {
618 emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f)));
619 emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f)));
620 emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f)));
621 }
622
623 break;
624 }
625
626 this->current_annotation = NULL;
627 }
628 }
629 }
630
631 fs_reg
632 fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
633 {
634 switch (dst->File) {
635 case PROGRAM_TEMPORARY:
636 return fp_temp_regs[dst->Index];
637
638 case PROGRAM_OUTPUT:
639 if (dst->Index == FRAG_RESULT_DEPTH) {
640 if (frag_depth.file == BAD_FILE)
641 frag_depth = fs_reg(this, glsl_type::vec4_type);
642 return frag_depth;
643 } else if (dst->Index == FRAG_RESULT_COLOR) {
644 if (outputs[0].file == BAD_FILE) {
645 outputs[0] = fs_reg(this, glsl_type::vec4_type);
646 output_components[0] = 4;
647
648 /* Tell emit_fb_writes() to smear fragment.color across all the
649 * color attachments.
650 */
651 for (int i = 1; i < key->nr_color_regions; i++) {
652 outputs[i] = outputs[0];
653 output_components[i] = output_components[0];
654 }
655 }
656 return outputs[0];
657 } else {
658 int output_index = dst->Index - FRAG_RESULT_DATA0;
659 if (outputs[output_index].file == BAD_FILE) {
660 outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
661 }
662 output_components[output_index] = 4;
663 return outputs[output_index];
664 }
665
666 case PROGRAM_UNDEFINED:
667 return fs_reg();
668
669 default:
670 _mesa_problem(ctx, "bad dst register file: %s\n",
671 _mesa_register_file_name((gl_register_file)dst->File));
672 return fs_reg(this, glsl_type::vec4_type);
673 }
674 }
675
676 fs_reg
677 fs_visitor::get_fp_src_reg(const prog_src_register *src)
678 {
679 struct gl_program_parameter_list *plist = prog->Parameters;
680
681 fs_reg result;
682
683 assert(!src->Abs);
684
685 switch (src->File) {
686 case PROGRAM_UNDEFINED:
687 return fs_reg();
688 case PROGRAM_TEMPORARY:
689 result = fp_temp_regs[src->Index];
690 break;
691
692 case PROGRAM_INPUT:
693 result = fp_input_regs[src->Index];
694 break;
695
696 case PROGRAM_STATE_VAR:
697 case PROGRAM_UNIFORM:
698 case PROGRAM_CONSTANT:
699 /* We actually want to look at the type in the Parameters list for this,
700 * because this lets us upload constant builtin uniforms, as actual
701 * constants.
702 */
703 switch (plist->Parameters[src->Index].Type) {
704 case PROGRAM_CONSTANT: {
705 result = fs_reg(this, glsl_type::vec4_type);
706
707 for (int i = 0; i < 4; i++) {
708 emit(MOV(offset(result, i),
709 fs_reg(plist->ParameterValues[src->Index][i].f)));
710 }
711 break;
712 }
713
714 case PROGRAM_STATE_VAR:
715 case PROGRAM_UNIFORM:
716 result = fs_reg(UNIFORM, src->Index * 4);
717 break;
718
719 default:
720 _mesa_problem(ctx, "bad uniform src register file: %s\n",
721 _mesa_register_file_name((gl_register_file)src->File));
722 return fs_reg(this, glsl_type::vec4_type);
723 }
724 break;
725
726 default:
727 _mesa_problem(ctx, "bad src register file: %s\n",
728 _mesa_register_file_name((gl_register_file)src->File));
729 return fs_reg(this, glsl_type::vec4_type);
730 }
731
732 if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
733 fs_reg unswizzled = result;
734 result = fs_reg(this, glsl_type::vec4_type);
735 for (int i = 0; i < 4; i++) {
736 bool negate = src->Negate & (1 << i);
737 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
738 * but it costs us nothing to support it.
739 */
740 int src_swiz = GET_SWZ(src->Swizzle, i);
741 if (src_swiz == SWIZZLE_ZERO) {
742 emit(MOV(offset(result, i), fs_reg(0.0f)));
743 } else if (src_swiz == SWIZZLE_ONE) {
744 emit(MOV(offset(result, i),
745 negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
746 } else {
747 fs_reg src = offset(unswizzled, src_swiz);
748 if (negate)
749 src.negate = !src.negate;
750 emit(MOV(offset(result, i), src));
751 }
752 }
753 }
754
755 return result;
756 }