i965: Assert array index on access to vec4_visitor's arrays.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_fp.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_fp.cpp
25 *
26 * Implementation of the compiler for GL_ARB_fragment_program shaders on top
27 * of the GLSL compiler backend.
28 */
29
30 #include "brw_context.h"
31 #include "brw_fs.h"
32
33 void
34 fs_visitor::emit_fp_alu1(enum opcode opcode,
35 const struct prog_instruction *fpi,
36 fs_reg dst, fs_reg src)
37 {
38 for (int i = 0; i < 4; i++) {
39 if (fpi->DstReg.WriteMask & (1 << i))
40 emit(opcode, offset(dst, i), offset(src, i));
41 }
42 }
43
44 void
45 fs_visitor::emit_fp_alu2(enum opcode opcode,
46 const struct prog_instruction *fpi,
47 fs_reg dst, fs_reg src0, fs_reg src1)
48 {
49 for (int i = 0; i < 4; i++) {
50 if (fpi->DstReg.WriteMask & (1 << i))
51 emit(opcode, offset(dst, i),
52 offset(src0, i), offset(src1, i));
53 }
54 }
55
56 void
57 fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
58 fs_reg dst, fs_reg src0, fs_reg src1)
59 {
60 uint32_t conditionalmod;
61 if (fpi->Opcode == OPCODE_MIN)
62 conditionalmod = BRW_CONDITIONAL_L;
63 else
64 conditionalmod = BRW_CONDITIONAL_GE;
65
66 for (int i = 0; i < 4; i++) {
67 if (fpi->DstReg.WriteMask & (1 << i)) {
68 emit_minmax(conditionalmod, offset(dst, i),
69 offset(src0, i), offset(src1, i));
70 }
71 }
72 }
73
74 void
75 fs_visitor::emit_fp_sop(uint32_t conditional_mod,
76 const struct prog_instruction *fpi,
77 fs_reg dst, fs_reg src0, fs_reg src1,
78 fs_reg one)
79 {
80 for (int i = 0; i < 4; i++) {
81 if (fpi->DstReg.WriteMask & (1 << i)) {
82 fs_inst *inst;
83
84 emit(CMP(reg_null_d, offset(src0, i), offset(src1, i),
85 conditional_mod));
86
87 inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f));
88 inst->predicate = BRW_PREDICATE_NORMAL;
89 }
90 }
91 }
92
93 void
94 fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
95 fs_reg dst, fs_reg src)
96 {
97 for (int i = 0; i < 4; i++) {
98 if (fpi->DstReg.WriteMask & (1 << i))
99 emit(MOV(offset(dst, i), src));
100 }
101 }
102
103 void
104 fs_visitor::emit_fp_scalar_math(enum opcode opcode,
105 const struct prog_instruction *fpi,
106 fs_reg dst, fs_reg src)
107 {
108 fs_reg temp = fs_reg(this, glsl_type::float_type);
109 emit_math(opcode, temp, src);
110 emit_fp_scalar_write(fpi, dst, temp);
111 }
112
113 void
114 fs_visitor::emit_fragment_program_code()
115 {
116 setup_fp_regs();
117
118 fs_reg null = fs_reg(brw_null_reg());
119
120 /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
121 * be:
122 *
123 * sel.f0 dst 1.0 0.0
124 *
125 * instead of
126 *
127 * mov dst 0.0
128 * mov.f0 dst 1.0
129 */
130 fs_reg one = fs_reg(this, glsl_type::float_type);
131 emit(MOV(one, fs_reg(1.0f)));
132
133 for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
134 const struct prog_instruction *fpi = &prog->Instructions[insn];
135 base_ir = fpi;
136
137 //_mesa_print_instruction(fpi);
138
139 fs_reg dst;
140 fs_reg src[3];
141
142 /* We always emit into a temporary destination register to avoid
143 * aliasing issues.
144 */
145 dst = fs_reg(this, glsl_type::vec4_type);
146
147 for (int i = 0; i < 3; i++)
148 src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
149
150 switch (fpi->Opcode) {
151 case OPCODE_ABS:
152 src[0].abs = true;
153 src[0].negate = false;
154 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
155 break;
156
157 case OPCODE_ADD:
158 emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
159 break;
160
161 case OPCODE_CMP:
162 for (int i = 0; i < 4; i++) {
163 if (fpi->DstReg.WriteMask & (1 << i)) {
164 fs_inst *inst;
165
166 emit(CMP(null, offset(src[0], i), fs_reg(0.0f),
167 BRW_CONDITIONAL_L));
168
169 inst = emit(BRW_OPCODE_SEL, offset(dst, i),
170 offset(src[1], i), offset(src[2], i));
171 inst->predicate = BRW_PREDICATE_NORMAL;
172 }
173 }
174 break;
175
176 case OPCODE_COS:
177 emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
178 break;
179
180 case OPCODE_DP2:
181 case OPCODE_DP3:
182 case OPCODE_DP4:
183 case OPCODE_DPH: {
184 fs_reg mul = fs_reg(this, glsl_type::float_type);
185 fs_reg acc = fs_reg(this, glsl_type::float_type);
186 int count;
187
188 switch (fpi->Opcode) {
189 case OPCODE_DP2: count = 2; break;
190 case OPCODE_DP3: count = 3; break;
191 case OPCODE_DP4: count = 4; break;
192 case OPCODE_DPH: count = 3; break;
193 default: assert(!"not reached"); count = 0; break;
194 }
195
196 emit(MUL(acc, offset(src[0], 0), offset(src[1], 0)));
197 for (int i = 1; i < count; i++) {
198 emit(MUL(mul, offset(src[0], i), offset(src[1], i)));
199 emit(ADD(acc, acc, mul));
200 }
201
202 if (fpi->Opcode == OPCODE_DPH)
203 emit(ADD(acc, acc, offset(src[1], 3)));
204
205 emit_fp_scalar_write(fpi, dst, acc);
206 break;
207 }
208
209 case OPCODE_DST:
210 if (fpi->DstReg.WriteMask & WRITEMASK_X)
211 emit(MOV(dst, fs_reg(1.0f)));
212 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
213 emit(MUL(offset(dst, 1),
214 offset(src[0], 1), offset(src[1], 1)));
215 }
216 if (fpi->DstReg.WriteMask & WRITEMASK_Z)
217 emit(MOV(offset(dst, 2), offset(src[0], 2)));
218 if (fpi->DstReg.WriteMask & WRITEMASK_W)
219 emit(MOV(offset(dst, 3), offset(src[1], 3)));
220 break;
221
222 case OPCODE_EX2:
223 emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
224 break;
225
226 case OPCODE_FLR:
227 emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
228 break;
229
230 case OPCODE_FRC:
231 emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
232 break;
233
234 case OPCODE_KIL: {
235 for (int i = 0; i < 4; i++) {
236 /* In most cases the argument to a KIL will be something like
237 * TEMP[0].wwww, so there's no point in checking whether .w is < 0
238 * 4 times in a row.
239 */
240 if (i > 0 &&
241 GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
242 GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
243 ((fpi->SrcReg[0].Negate >> i) & 1) ==
244 ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
245 continue;
246 }
247
248
249 /* Emit an instruction that's predicated on the current
250 * undiscarded pixels, and updates just those pixels to be
251 * turned off.
252 */
253 fs_inst *cmp = emit(CMP(null, offset(src[0], i), fs_reg(0.0f),
254 BRW_CONDITIONAL_GE));
255 cmp->predicate = BRW_PREDICATE_NORMAL;
256 cmp->flag_subreg = 1;
257 }
258 break;
259 }
260
261 case OPCODE_LG2:
262 emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
263 break;
264
265 case OPCODE_LIT:
266 /* From the ARB_fragment_program spec:
267 *
268 * tmp = VectorLoad(op0);
269 * if (tmp.x < 0) tmp.x = 0;
270 * if (tmp.y < 0) tmp.y = 0;
271 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
272 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
273 * result.x = 1.0;
274 * result.y = tmp.x;
275 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
276 * result.w = 1.0;
277 *
278 * Note that we don't do the clamping to +/- 128. We didn't in
279 * brw_wm_emit.c either.
280 */
281 if (fpi->DstReg.WriteMask & WRITEMASK_X)
282 emit(MOV(offset(dst, 0), fs_reg(1.0f)));
283
284 if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
285 fs_inst *inst;
286 emit(CMP(null, offset(src[0], 0), fs_reg(0.0f),
287 BRW_CONDITIONAL_LE));
288
289 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
290 emit(MOV(offset(dst, 1), offset(src[0], 0)));
291 inst = emit(MOV(offset(dst, 1), fs_reg(0.0f)));
292 inst->predicate = BRW_PREDICATE_NORMAL;
293 }
294
295 if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
296 emit_math(SHADER_OPCODE_POW, offset(dst, 2),
297 offset(src[0], 1), offset(src[0], 3));
298
299 inst = emit(MOV(offset(dst, 2), fs_reg(0.0f)));
300 inst->predicate = BRW_PREDICATE_NORMAL;
301 }
302 }
303
304 if (fpi->DstReg.WriteMask & WRITEMASK_W)
305 emit(MOV(offset(dst, 3), fs_reg(1.0f)));
306
307 break;
308
309 case OPCODE_LRP:
310 for (int i = 0; i < 4; i++) {
311 if (fpi->DstReg.WriteMask & (1 << i)) {
312 fs_reg a = offset(src[0], i);
313 fs_reg y = offset(src[1], i);
314 fs_reg x = offset(src[2], i);
315 emit_lrp(offset(dst, i), x, y, a);
316 }
317 }
318 break;
319
320 case OPCODE_MAD:
321 for (int i = 0; i < 4; i++) {
322 if (fpi->DstReg.WriteMask & (1 << i)) {
323 fs_reg temp = fs_reg(this, glsl_type::float_type);
324 emit(MUL(temp, offset(src[0], i), offset(src[1], i)));
325 emit(ADD(offset(dst, i), temp, offset(src[2], i)));
326 }
327 }
328 break;
329
330 case OPCODE_MAX:
331 emit_fp_minmax(fpi, dst, src[0], src[1]);
332 break;
333
334 case OPCODE_MOV:
335 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
336 break;
337
338 case OPCODE_MIN:
339 emit_fp_minmax(fpi, dst, src[0], src[1]);
340 break;
341
342 case OPCODE_MUL:
343 emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
344 break;
345
346 case OPCODE_POW: {
347 fs_reg temp = fs_reg(this, glsl_type::float_type);
348 emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
349 emit_fp_scalar_write(fpi, dst, temp);
350 break;
351 }
352
353 case OPCODE_RCP:
354 emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
355 break;
356
357 case OPCODE_RSQ:
358 emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
359 break;
360
361 case OPCODE_SCS:
362 if (fpi->DstReg.WriteMask & WRITEMASK_X) {
363 emit_math(SHADER_OPCODE_COS, offset(dst, 0),
364 offset(src[0], 0));
365 }
366
367 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
368 emit_math(SHADER_OPCODE_SIN, offset(dst, 1),
369 offset(src[0], 1));
370 }
371 break;
372
373 case OPCODE_SGE:
374 emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
375 break;
376
377 case OPCODE_SIN:
378 emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
379 break;
380
381 case OPCODE_SLT:
382 emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
383 break;
384
385 case OPCODE_SUB: {
386 fs_reg neg_src1 = src[1];
387 neg_src1.negate = !src[1].negate;
388
389 emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
390 break;
391 }
392
393 case OPCODE_TEX:
394 case OPCODE_TXB:
395 case OPCODE_TXP: {
396 /* We piggy-back on the GLSL IR support for texture setup. To do so,
397 * we have to cook up an ir_texture that has the coordinate field
398 * with appropriate type, and shadow_comparitor set or not. All the
399 * other properties of ir_texture are passed in as arguments to the
400 * emit_texture_gen* function.
401 */
402 ir_texture *ir = NULL;
403
404 fs_reg lod;
405 fs_reg dpdy;
406 fs_reg coordinate = src[0];
407 fs_reg shadow_c;
408 fs_reg sample_index;
409
410 switch (fpi->Opcode) {
411 case OPCODE_TEX:
412 ir = new(mem_ctx) ir_texture(ir_tex);
413 break;
414 case OPCODE_TXP: {
415 ir = new(mem_ctx) ir_texture(ir_tex);
416
417 coordinate = fs_reg(this, glsl_type::vec3_type);
418 fs_reg invproj = fs_reg(this, glsl_type::float_type);
419 emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3));
420 for (int i = 0; i < 3; i++) {
421 emit(MUL(offset(coordinate, i),
422 offset(src[0], i), invproj));
423 }
424 break;
425 }
426 case OPCODE_TXB:
427 ir = new(mem_ctx) ir_texture(ir_txb);
428 lod = offset(src[0], 3);
429 break;
430 default:
431 assert(!"not reached");
432 break;
433 }
434
435 ir->type = glsl_type::vec4_type;
436
437 const glsl_type *coordinate_type;
438 switch (fpi->TexSrcTarget) {
439 case TEXTURE_1D_INDEX:
440 coordinate_type = glsl_type::float_type;
441 break;
442
443 case TEXTURE_2D_INDEX:
444 case TEXTURE_1D_ARRAY_INDEX:
445 case TEXTURE_RECT_INDEX:
446 case TEXTURE_EXTERNAL_INDEX:
447 coordinate_type = glsl_type::vec2_type;
448 break;
449
450 case TEXTURE_3D_INDEX:
451 case TEXTURE_2D_ARRAY_INDEX:
452 coordinate_type = glsl_type::vec3_type;
453 break;
454
455 case TEXTURE_CUBE_INDEX: {
456 coordinate_type = glsl_type::vec3_type;
457
458 fs_reg temp = fs_reg(this, glsl_type::float_type);
459 fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
460 fs_reg abscoord = coordinate;
461 abscoord.negate = false;
462 abscoord.abs = true;
463 emit_minmax(BRW_CONDITIONAL_GE, temp,
464 offset(abscoord, 0), offset(abscoord, 1));
465 emit_minmax(BRW_CONDITIONAL_GE, temp,
466 temp, offset(abscoord, 2));
467 emit_math(SHADER_OPCODE_RCP, temp, temp);
468 for (int i = 0; i < 3; i++) {
469 emit(MUL(offset(cubecoord, i),
470 offset(coordinate, i), temp));
471 }
472
473 coordinate = cubecoord;
474 break;
475 }
476
477 default:
478 assert(!"not reached");
479 coordinate_type = glsl_type::vec2_type;
480 break;
481 }
482
483 ir_constant_data junk_data;
484 ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
485
486 if (fpi->TexShadow) {
487 shadow_c = offset(coordinate, 2);
488 ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
489 }
490
491 coordinate = rescale_texcoord(ir, coordinate,
492 fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
493 fpi->TexSrcUnit, fpi->TexSrcUnit);
494
495 fs_inst *inst;
496 if (brw->gen >= 7) {
497 inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index, fs_reg(0u), fpi->TexSrcUnit);
498 } else if (brw->gen >= 5) {
499 inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index);
500 } else {
501 inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
502 }
503
504 inst->sampler = fpi->TexSrcUnit;
505 inst->shadow_compare = fpi->TexShadow;
506
507 /* Reuse the GLSL swizzle_result() handler. */
508 swizzle_result(ir, dst, fpi->TexSrcUnit);
509 dst = this->result;
510
511 break;
512 }
513
514 case OPCODE_SWZ:
515 /* Note that SWZ's extended swizzles are handled in the general
516 * get_src_reg() code.
517 */
518 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
519 break;
520
521 case OPCODE_XPD:
522 for (int i = 0; i < 3; i++) {
523 if (fpi->DstReg.WriteMask & (1 << i)) {
524 int i1 = (i + 1) % 3;
525 int i2 = (i + 2) % 3;
526
527 fs_reg temp = fs_reg(this, glsl_type::float_type);
528 fs_reg neg_src1_1 = offset(src[1], i1);
529 neg_src1_1.negate = !neg_src1_1.negate;
530 emit(MUL(temp, offset(src[0], i2), neg_src1_1));
531 emit(MUL(offset(dst, i),
532 offset(src[0], i1), offset(src[1], i2)));
533 emit(ADD(offset(dst, i), offset(dst, i), temp));
534 }
535 }
536 break;
537
538 case OPCODE_END:
539 break;
540
541 default:
542 _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
543 _mesa_opcode_string(fpi->Opcode));
544 }
545
546 /* To handle saturates, we emit a MOV with a saturate bit, which
547 * optimization should fold into the preceding instructions when safe.
548 */
549 if (fpi->Opcode != OPCODE_END) {
550 fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
551
552 for (int i = 0; i < 4; i++) {
553 if (fpi->DstReg.WriteMask & (1 << i)) {
554 fs_inst *inst = emit(MOV(offset(real_dst, i),
555 offset(dst, i)));
556 inst->saturate = fpi->SaturateMode;
557 }
558 }
559 }
560 }
561
562 /* Epilogue:
563 *
564 * Fragment depth has this strange convention of being the .z component of
565 * a vec4. emit_fb_write() wants to see a float value, instead.
566 */
567 this->current_annotation = "result.depth write";
568 if (frag_depth.file != BAD_FILE) {
569 fs_reg temp = fs_reg(this, glsl_type::float_type);
570 emit(MOV(temp, offset(frag_depth, 2)));
571 frag_depth = temp;
572 }
573 }
574
575 void
576 fs_visitor::setup_fp_regs()
577 {
578 /* PROGRAM_TEMPORARY */
579 int num_temp = prog->NumTemporaries;
580 fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
581 for (int i = 0; i < num_temp; i++)
582 fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
583
584 /* PROGRAM_STATE_VAR etc. */
585 if (dispatch_width == 8) {
586 for (unsigned p = 0;
587 p < prog->Parameters->NumParameters; p++) {
588 for (unsigned int i = 0; i < 4; i++) {
589 stage_prog_data->param[uniforms++] =
590 &prog->Parameters->ParameterValues[p][i].f;
591 }
592 }
593 }
594
595 fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX);
596 for (int i = 0; i < VARYING_SLOT_MAX; i++) {
597 if (prog->InputsRead & BITFIELD64_BIT(i)) {
598 /* Make up a dummy instruction to reuse code for emitting
599 * interpolation.
600 */
601 ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
602 "fp_input",
603 ir_var_shader_in);
604 ir->data.location = i;
605
606 this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
607 i);
608
609 switch (i) {
610 case VARYING_SLOT_POS:
611 ir->data.pixel_center_integer = fp->PixelCenterInteger;
612 ir->data.origin_upper_left = fp->OriginUpperLeft;
613 fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
614 break;
615 case VARYING_SLOT_FACE:
616 fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
617 break;
618 default:
619 fp_input_regs[i] = *emit_general_interpolation(ir);
620
621 if (i == VARYING_SLOT_FOGC) {
622 emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f)));
623 emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f)));
624 emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f)));
625 }
626
627 break;
628 }
629
630 this->current_annotation = NULL;
631 }
632 }
633 }
634
635 fs_reg
636 fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
637 {
638 switch (dst->File) {
639 case PROGRAM_TEMPORARY:
640 return fp_temp_regs[dst->Index];
641
642 case PROGRAM_OUTPUT:
643 if (dst->Index == FRAG_RESULT_DEPTH) {
644 if (frag_depth.file == BAD_FILE)
645 frag_depth = fs_reg(this, glsl_type::vec4_type);
646 return frag_depth;
647 } else if (dst->Index == FRAG_RESULT_COLOR) {
648 if (outputs[0].file == BAD_FILE) {
649 outputs[0] = fs_reg(this, glsl_type::vec4_type);
650 output_components[0] = 4;
651
652 /* Tell emit_fb_writes() to smear fragment.color across all the
653 * color attachments.
654 */
655 for (int i = 1; i < c->key.nr_color_regions; i++) {
656 outputs[i] = outputs[0];
657 output_components[i] = output_components[0];
658 }
659 }
660 return outputs[0];
661 } else {
662 int output_index = dst->Index - FRAG_RESULT_DATA0;
663 if (outputs[output_index].file == BAD_FILE) {
664 outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
665 }
666 output_components[output_index] = 4;
667 return outputs[output_index];
668 }
669
670 case PROGRAM_UNDEFINED:
671 return fs_reg();
672
673 default:
674 _mesa_problem(ctx, "bad dst register file: %s\n",
675 _mesa_register_file_name((gl_register_file)dst->File));
676 return fs_reg(this, glsl_type::vec4_type);
677 }
678 }
679
680 fs_reg
681 fs_visitor::get_fp_src_reg(const prog_src_register *src)
682 {
683 struct gl_program_parameter_list *plist = prog->Parameters;
684
685 fs_reg result;
686
687 assert(!src->Abs);
688
689 switch (src->File) {
690 case PROGRAM_UNDEFINED:
691 return fs_reg();
692 case PROGRAM_TEMPORARY:
693 result = fp_temp_regs[src->Index];
694 break;
695
696 case PROGRAM_INPUT:
697 result = fp_input_regs[src->Index];
698 break;
699
700 case PROGRAM_STATE_VAR:
701 case PROGRAM_UNIFORM:
702 case PROGRAM_CONSTANT:
703 /* We actually want to look at the type in the Parameters list for this,
704 * because this lets us upload constant builtin uniforms, as actual
705 * constants.
706 */
707 switch (plist->Parameters[src->Index].Type) {
708 case PROGRAM_CONSTANT: {
709 result = fs_reg(this, glsl_type::vec4_type);
710
711 for (int i = 0; i < 4; i++) {
712 emit(MOV(offset(result, i),
713 fs_reg(plist->ParameterValues[src->Index][i].f)));
714 }
715 break;
716 }
717
718 case PROGRAM_STATE_VAR:
719 case PROGRAM_UNIFORM:
720 result = fs_reg(UNIFORM, src->Index * 4);
721 break;
722
723 default:
724 _mesa_problem(ctx, "bad uniform src register file: %s\n",
725 _mesa_register_file_name((gl_register_file)src->File));
726 return fs_reg(this, glsl_type::vec4_type);
727 }
728 break;
729
730 default:
731 _mesa_problem(ctx, "bad src register file: %s\n",
732 _mesa_register_file_name((gl_register_file)src->File));
733 return fs_reg(this, glsl_type::vec4_type);
734 }
735
736 if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
737 fs_reg unswizzled = result;
738 result = fs_reg(this, glsl_type::vec4_type);
739 for (int i = 0; i < 4; i++) {
740 bool negate = src->Negate & (1 << i);
741 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
742 * but it costs us nothing to support it.
743 */
744 int src_swiz = GET_SWZ(src->Swizzle, i);
745 if (src_swiz == SWIZZLE_ZERO) {
746 emit(MOV(offset(result, i), fs_reg(0.0f)));
747 } else if (src_swiz == SWIZZLE_ONE) {
748 emit(MOV(offset(result, i),
749 negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
750 } else {
751 fs_reg src = offset(unswizzled, src_swiz);
752 if (negate)
753 src.negate = !src.negate;
754 emit(MOV(offset(result, i), src));
755 }
756 }
757 }
758
759 return result;
760 }