i965: Move up duplicated fields from stage-specific prog_data to brw_stage_prog_data.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_fp.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_fp.cpp
25 *
26 * Implementation of the compiler for GL_ARB_fragment_program shaders on top
27 * of the GLSL compiler backend.
28 */
29
30 #include "brw_context.h"
31 #include "brw_fs.h"
32
33 static fs_reg
34 regoffset(fs_reg reg, int i)
35 {
36 reg.reg_offset += i;
37 return reg;
38 }
39
40 void
41 fs_visitor::emit_fp_alu1(enum opcode opcode,
42 const struct prog_instruction *fpi,
43 fs_reg dst, fs_reg src)
44 {
45 for (int i = 0; i < 4; i++) {
46 if (fpi->DstReg.WriteMask & (1 << i))
47 emit(opcode, regoffset(dst, i), regoffset(src, i));
48 }
49 }
50
51 void
52 fs_visitor::emit_fp_alu2(enum opcode opcode,
53 const struct prog_instruction *fpi,
54 fs_reg dst, fs_reg src0, fs_reg src1)
55 {
56 for (int i = 0; i < 4; i++) {
57 if (fpi->DstReg.WriteMask & (1 << i))
58 emit(opcode, regoffset(dst, i),
59 regoffset(src0, i), regoffset(src1, i));
60 }
61 }
62
63 void
64 fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
65 fs_reg dst, fs_reg src0, fs_reg src1)
66 {
67 uint32_t conditionalmod;
68 if (fpi->Opcode == OPCODE_MIN)
69 conditionalmod = BRW_CONDITIONAL_L;
70 else
71 conditionalmod = BRW_CONDITIONAL_GE;
72
73 for (int i = 0; i < 4; i++) {
74 if (fpi->DstReg.WriteMask & (1 << i)) {
75 emit_minmax(conditionalmod, regoffset(dst, i),
76 regoffset(src0, i), regoffset(src1, i));
77 }
78 }
79 }
80
81 void
82 fs_visitor::emit_fp_sop(uint32_t conditional_mod,
83 const struct prog_instruction *fpi,
84 fs_reg dst, fs_reg src0, fs_reg src1,
85 fs_reg one)
86 {
87 for (int i = 0; i < 4; i++) {
88 if (fpi->DstReg.WriteMask & (1 << i)) {
89 fs_inst *inst;
90
91 emit(CMP(reg_null_d, regoffset(src0, i), regoffset(src1, i),
92 conditional_mod));
93
94 inst = emit(BRW_OPCODE_SEL, regoffset(dst, i), one, fs_reg(0.0f));
95 inst->predicate = BRW_PREDICATE_NORMAL;
96 }
97 }
98 }
99
100 void
101 fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
102 fs_reg dst, fs_reg src)
103 {
104 for (int i = 0; i < 4; i++) {
105 if (fpi->DstReg.WriteMask & (1 << i))
106 emit(MOV(regoffset(dst, i), src));
107 }
108 }
109
110 void
111 fs_visitor::emit_fp_scalar_math(enum opcode opcode,
112 const struct prog_instruction *fpi,
113 fs_reg dst, fs_reg src)
114 {
115 fs_reg temp = fs_reg(this, glsl_type::float_type);
116 emit_math(opcode, temp, src);
117 emit_fp_scalar_write(fpi, dst, temp);
118 }
119
120 void
121 fs_visitor::emit_fragment_program_code()
122 {
123 setup_fp_regs();
124
125 fs_reg null = fs_reg(brw_null_reg());
126
127 /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
128 * be:
129 *
130 * sel.f0 dst 1.0 0.0
131 *
132 * instead of
133 *
134 * mov dst 0.0
135 * mov.f0 dst 1.0
136 */
137 fs_reg one = fs_reg(this, glsl_type::float_type);
138 emit(MOV(one, fs_reg(1.0f)));
139
140 for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
141 const struct prog_instruction *fpi = &prog->Instructions[insn];
142 base_ir = fpi;
143
144 //_mesa_print_instruction(fpi);
145
146 fs_reg dst;
147 fs_reg src[3];
148
149 /* We always emit into a temporary destination register to avoid
150 * aliasing issues.
151 */
152 dst = fs_reg(this, glsl_type::vec4_type);
153
154 for (int i = 0; i < 3; i++)
155 src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
156
157 switch (fpi->Opcode) {
158 case OPCODE_ABS:
159 src[0].abs = true;
160 src[0].negate = false;
161 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
162 break;
163
164 case OPCODE_ADD:
165 emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
166 break;
167
168 case OPCODE_CMP:
169 for (int i = 0; i < 4; i++) {
170 if (fpi->DstReg.WriteMask & (1 << i)) {
171 fs_inst *inst;
172
173 emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
174 BRW_CONDITIONAL_L));
175
176 inst = emit(BRW_OPCODE_SEL, regoffset(dst, i),
177 regoffset(src[1], i), regoffset(src[2], i));
178 inst->predicate = BRW_PREDICATE_NORMAL;
179 }
180 }
181 break;
182
183 case OPCODE_COS:
184 emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
185 break;
186
187 case OPCODE_DP2:
188 case OPCODE_DP3:
189 case OPCODE_DP4:
190 case OPCODE_DPH: {
191 fs_reg mul = fs_reg(this, glsl_type::float_type);
192 fs_reg acc = fs_reg(this, glsl_type::float_type);
193 int count;
194
195 switch (fpi->Opcode) {
196 case OPCODE_DP2: count = 2; break;
197 case OPCODE_DP3: count = 3; break;
198 case OPCODE_DP4: count = 4; break;
199 case OPCODE_DPH: count = 3; break;
200 default: assert(!"not reached"); count = 0; break;
201 }
202
203 emit(MUL(acc, regoffset(src[0], 0), regoffset(src[1], 0)));
204 for (int i = 1; i < count; i++) {
205 emit(MUL(mul, regoffset(src[0], i), regoffset(src[1], i)));
206 emit(ADD(acc, acc, mul));
207 }
208
209 if (fpi->Opcode == OPCODE_DPH)
210 emit(ADD(acc, acc, regoffset(src[1], 3)));
211
212 emit_fp_scalar_write(fpi, dst, acc);
213 break;
214 }
215
216 case OPCODE_DST:
217 if (fpi->DstReg.WriteMask & WRITEMASK_X)
218 emit(MOV(dst, fs_reg(1.0f)));
219 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
220 emit(MUL(regoffset(dst, 1),
221 regoffset(src[0], 1), regoffset(src[1], 1)));
222 }
223 if (fpi->DstReg.WriteMask & WRITEMASK_Z)
224 emit(MOV(regoffset(dst, 2), regoffset(src[0], 2)));
225 if (fpi->DstReg.WriteMask & WRITEMASK_W)
226 emit(MOV(regoffset(dst, 3), regoffset(src[1], 3)));
227 break;
228
229 case OPCODE_EX2:
230 emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
231 break;
232
233 case OPCODE_FLR:
234 emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
235 break;
236
237 case OPCODE_FRC:
238 emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
239 break;
240
241 case OPCODE_KIL: {
242 for (int i = 0; i < 4; i++) {
243 /* In most cases the argument to a KIL will be something like
244 * TEMP[0].wwww, so there's no point in checking whether .w is < 0
245 * 4 times in a row.
246 */
247 if (i > 0 &&
248 GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
249 GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
250 ((fpi->SrcReg[0].Negate >> i) & 1) ==
251 ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
252 continue;
253 }
254
255
256 /* Emit an instruction that's predicated on the current
257 * undiscarded pixels, and updates just those pixels to be
258 * turned off.
259 */
260 fs_inst *cmp = emit(CMP(null, regoffset(src[0], i), fs_reg(0.0f),
261 BRW_CONDITIONAL_GE));
262 cmp->predicate = BRW_PREDICATE_NORMAL;
263 cmp->flag_subreg = 1;
264 }
265 break;
266 }
267
268 case OPCODE_LG2:
269 emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
270 break;
271
272 case OPCODE_LIT:
273 /* From the ARB_fragment_program spec:
274 *
275 * tmp = VectorLoad(op0);
276 * if (tmp.x < 0) tmp.x = 0;
277 * if (tmp.y < 0) tmp.y = 0;
278 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
279 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
280 * result.x = 1.0;
281 * result.y = tmp.x;
282 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
283 * result.w = 1.0;
284 *
285 * Note that we don't do the clamping to +/- 128. We didn't in
286 * brw_wm_emit.c either.
287 */
288 if (fpi->DstReg.WriteMask & WRITEMASK_X)
289 emit(MOV(regoffset(dst, 0), fs_reg(1.0f)));
290
291 if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
292 fs_inst *inst;
293 emit(CMP(null, regoffset(src[0], 0), fs_reg(0.0f),
294 BRW_CONDITIONAL_LE));
295
296 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
297 emit(MOV(regoffset(dst, 1), regoffset(src[0], 0)));
298 inst = emit(MOV(regoffset(dst, 1), fs_reg(0.0f)));
299 inst->predicate = BRW_PREDICATE_NORMAL;
300 }
301
302 if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
303 emit_math(SHADER_OPCODE_POW, regoffset(dst, 2),
304 regoffset(src[0], 1), regoffset(src[0], 3));
305
306 inst = emit(MOV(regoffset(dst, 2), fs_reg(0.0f)));
307 inst->predicate = BRW_PREDICATE_NORMAL;
308 }
309 }
310
311 if (fpi->DstReg.WriteMask & WRITEMASK_W)
312 emit(MOV(regoffset(dst, 3), fs_reg(1.0f)));
313
314 break;
315
316 case OPCODE_LRP:
317 for (int i = 0; i < 4; i++) {
318 if (fpi->DstReg.WriteMask & (1 << i)) {
319 fs_reg a = regoffset(src[0], i);
320 fs_reg y = regoffset(src[1], i);
321 fs_reg x = regoffset(src[2], i);
322 emit_lrp(regoffset(dst, i), x, y, a);
323 }
324 }
325 break;
326
327 case OPCODE_MAD:
328 for (int i = 0; i < 4; i++) {
329 if (fpi->DstReg.WriteMask & (1 << i)) {
330 fs_reg temp = fs_reg(this, glsl_type::float_type);
331 emit(MUL(temp, regoffset(src[0], i), regoffset(src[1], i)));
332 emit(ADD(regoffset(dst, i), temp, regoffset(src[2], i)));
333 }
334 }
335 break;
336
337 case OPCODE_MAX:
338 emit_fp_minmax(fpi, dst, src[0], src[1]);
339 break;
340
341 case OPCODE_MOV:
342 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
343 break;
344
345 case OPCODE_MIN:
346 emit_fp_minmax(fpi, dst, src[0], src[1]);
347 break;
348
349 case OPCODE_MUL:
350 emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
351 break;
352
353 case OPCODE_POW: {
354 fs_reg temp = fs_reg(this, glsl_type::float_type);
355 emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
356 emit_fp_scalar_write(fpi, dst, temp);
357 break;
358 }
359
360 case OPCODE_RCP:
361 emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
362 break;
363
364 case OPCODE_RSQ:
365 emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
366 break;
367
368 case OPCODE_SCS:
369 if (fpi->DstReg.WriteMask & WRITEMASK_X) {
370 emit_math(SHADER_OPCODE_COS, regoffset(dst, 0),
371 regoffset(src[0], 0));
372 }
373
374 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
375 emit_math(SHADER_OPCODE_SIN, regoffset(dst, 1),
376 regoffset(src[0], 1));
377 }
378 break;
379
380 case OPCODE_SGE:
381 emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
382 break;
383
384 case OPCODE_SIN:
385 emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
386 break;
387
388 case OPCODE_SLT:
389 emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
390 break;
391
392 case OPCODE_SUB: {
393 fs_reg neg_src1 = src[1];
394 neg_src1.negate = !src[1].negate;
395
396 emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
397 break;
398 }
399
400 case OPCODE_TEX:
401 case OPCODE_TXB:
402 case OPCODE_TXP: {
403 /* We piggy-back on the GLSL IR support for texture setup. To do so,
404 * we have to cook up an ir_texture that has the coordinate field
405 * with appropriate type, and shadow_comparitor set or not. All the
406 * other properties of ir_texture are passed in as arguments to the
407 * emit_texture_gen* function.
408 */
409 ir_texture *ir = NULL;
410
411 fs_reg lod;
412 fs_reg dpdy;
413 fs_reg coordinate = src[0];
414 fs_reg shadow_c;
415 fs_reg sample_index;
416
417 switch (fpi->Opcode) {
418 case OPCODE_TEX:
419 ir = new(mem_ctx) ir_texture(ir_tex);
420 break;
421 case OPCODE_TXP: {
422 ir = new(mem_ctx) ir_texture(ir_tex);
423
424 coordinate = fs_reg(this, glsl_type::vec3_type);
425 fs_reg invproj = fs_reg(this, glsl_type::float_type);
426 emit_math(SHADER_OPCODE_RCP, invproj, regoffset(src[0], 3));
427 for (int i = 0; i < 3; i++) {
428 emit(MUL(regoffset(coordinate, i),
429 regoffset(src[0], i), invproj));
430 }
431 break;
432 }
433 case OPCODE_TXB:
434 ir = new(mem_ctx) ir_texture(ir_txb);
435 lod = regoffset(src[0], 3);
436 break;
437 default:
438 assert(!"not reached");
439 break;
440 }
441
442 ir->type = glsl_type::vec4_type;
443
444 const glsl_type *coordinate_type;
445 switch (fpi->TexSrcTarget) {
446 case TEXTURE_1D_INDEX:
447 coordinate_type = glsl_type::float_type;
448 break;
449
450 case TEXTURE_2D_INDEX:
451 case TEXTURE_1D_ARRAY_INDEX:
452 case TEXTURE_RECT_INDEX:
453 case TEXTURE_EXTERNAL_INDEX:
454 coordinate_type = glsl_type::vec2_type;
455 break;
456
457 case TEXTURE_3D_INDEX:
458 case TEXTURE_2D_ARRAY_INDEX:
459 coordinate_type = glsl_type::vec3_type;
460 break;
461
462 case TEXTURE_CUBE_INDEX: {
463 coordinate_type = glsl_type::vec3_type;
464
465 fs_reg temp = fs_reg(this, glsl_type::float_type);
466 fs_reg cubecoord = fs_reg(this, glsl_type::vec3_type);
467 fs_reg abscoord = coordinate;
468 abscoord.negate = false;
469 abscoord.abs = true;
470 emit_minmax(BRW_CONDITIONAL_GE, temp,
471 regoffset(abscoord, 0), regoffset(abscoord, 1));
472 emit_minmax(BRW_CONDITIONAL_GE, temp,
473 temp, regoffset(abscoord, 2));
474 emit_math(SHADER_OPCODE_RCP, temp, temp);
475 for (int i = 0; i < 3; i++) {
476 emit(MUL(regoffset(cubecoord, i),
477 regoffset(coordinate, i), temp));
478 }
479
480 coordinate = cubecoord;
481 break;
482 }
483
484 default:
485 assert(!"not reached");
486 coordinate_type = glsl_type::vec2_type;
487 break;
488 }
489
490 ir_constant_data junk_data;
491 ir->coordinate = new(mem_ctx) ir_constant(coordinate_type, &junk_data);
492
493 if (fpi->TexShadow) {
494 shadow_c = regoffset(coordinate, 2);
495 ir->shadow_comparitor = new(mem_ctx) ir_constant(0.0f);
496 }
497
498 coordinate = rescale_texcoord(ir, coordinate,
499 fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
500 fpi->TexSrcUnit, fpi->TexSrcUnit);
501
502 fs_inst *inst;
503 if (brw->gen >= 7) {
504 inst = emit_texture_gen7(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index, fs_reg(0u), fpi->TexSrcUnit);
505 } else if (brw->gen >= 5) {
506 inst = emit_texture_gen5(ir, dst, coordinate, shadow_c, lod, dpdy, sample_index);
507 } else {
508 inst = emit_texture_gen4(ir, dst, coordinate, shadow_c, lod, dpdy);
509 }
510
511 inst->sampler = fpi->TexSrcUnit;
512 inst->shadow_compare = fpi->TexShadow;
513
514 /* Reuse the GLSL swizzle_result() handler. */
515 swizzle_result(ir, dst, fpi->TexSrcUnit);
516 dst = this->result;
517
518 break;
519 }
520
521 case OPCODE_SWZ:
522 /* Note that SWZ's extended swizzles are handled in the general
523 * get_src_reg() code.
524 */
525 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
526 break;
527
528 case OPCODE_XPD:
529 for (int i = 0; i < 3; i++) {
530 if (fpi->DstReg.WriteMask & (1 << i)) {
531 int i1 = (i + 1) % 3;
532 int i2 = (i + 2) % 3;
533
534 fs_reg temp = fs_reg(this, glsl_type::float_type);
535 fs_reg neg_src1_1 = regoffset(src[1], i1);
536 neg_src1_1.negate = !neg_src1_1.negate;
537 emit(MUL(temp, regoffset(src[0], i2), neg_src1_1));
538 emit(MUL(regoffset(dst, i),
539 regoffset(src[0], i1), regoffset(src[1], i2)));
540 emit(ADD(regoffset(dst, i), regoffset(dst, i), temp));
541 }
542 }
543 break;
544
545 case OPCODE_END:
546 break;
547
548 default:
549 _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
550 _mesa_opcode_string(fpi->Opcode));
551 }
552
553 /* To handle saturates, we emit a MOV with a saturate bit, which
554 * optimization should fold into the preceding instructions when safe.
555 */
556 if (fpi->Opcode != OPCODE_END) {
557 fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
558
559 for (int i = 0; i < 4; i++) {
560 if (fpi->DstReg.WriteMask & (1 << i)) {
561 fs_inst *inst = emit(MOV(regoffset(real_dst, i),
562 regoffset(dst, i)));
563 inst->saturate = fpi->SaturateMode;
564 }
565 }
566 }
567 }
568
569 /* Epilogue:
570 *
571 * Fragment depth has this strange convention of being the .z component of
572 * a vec4. emit_fb_write() wants to see a float value, instead.
573 */
574 this->current_annotation = "result.depth write";
575 if (frag_depth.file != BAD_FILE) {
576 fs_reg temp = fs_reg(this, glsl_type::float_type);
577 emit(MOV(temp, regoffset(frag_depth, 2)));
578 frag_depth = temp;
579 }
580 }
581
582 void
583 fs_visitor::setup_fp_regs()
584 {
585 /* PROGRAM_TEMPORARY */
586 int num_temp = prog->NumTemporaries;
587 fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
588 for (int i = 0; i < num_temp; i++)
589 fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);
590
591 /* PROGRAM_STATE_VAR etc. */
592 if (dispatch_width == 8) {
593 for (unsigned p = 0;
594 p < prog->Parameters->NumParameters; p++) {
595 for (unsigned int i = 0; i < 4; i++) {
596 stage_prog_data->param[stage_prog_data->nr_params++] =
597 &prog->Parameters->ParameterValues[p][i].f;
598 }
599 }
600 }
601
602 fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX);
603 for (int i = 0; i < VARYING_SLOT_MAX; i++) {
604 if (prog->InputsRead & BITFIELD64_BIT(i)) {
605 /* Make up a dummy instruction to reuse code for emitting
606 * interpolation.
607 */
608 ir_variable *ir = new(mem_ctx) ir_variable(glsl_type::vec4_type,
609 "fp_input",
610 ir_var_shader_in);
611 ir->data.location = i;
612
613 this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
614 i);
615
616 switch (i) {
617 case VARYING_SLOT_POS:
618 ir->data.pixel_center_integer = fp->PixelCenterInteger;
619 ir->data.origin_upper_left = fp->OriginUpperLeft;
620 fp_input_regs[i] = *emit_fragcoord_interpolation(ir);
621 break;
622 case VARYING_SLOT_FACE:
623 fp_input_regs[i] = *emit_frontfacing_interpolation(ir);
624 break;
625 default:
626 fp_input_regs[i] = *emit_general_interpolation(ir);
627
628 if (i == VARYING_SLOT_FOGC) {
629 emit(MOV(regoffset(fp_input_regs[i], 1), fs_reg(0.0f)));
630 emit(MOV(regoffset(fp_input_regs[i], 2), fs_reg(0.0f)));
631 emit(MOV(regoffset(fp_input_regs[i], 3), fs_reg(1.0f)));
632 }
633
634 break;
635 }
636
637 this->current_annotation = NULL;
638 }
639 }
640 }
641
642 fs_reg
643 fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
644 {
645 switch (dst->File) {
646 case PROGRAM_TEMPORARY:
647 return fp_temp_regs[dst->Index];
648
649 case PROGRAM_OUTPUT:
650 if (dst->Index == FRAG_RESULT_DEPTH) {
651 if (frag_depth.file == BAD_FILE)
652 frag_depth = fs_reg(this, glsl_type::vec4_type);
653 return frag_depth;
654 } else if (dst->Index == FRAG_RESULT_COLOR) {
655 if (outputs[0].file == BAD_FILE) {
656 outputs[0] = fs_reg(this, glsl_type::vec4_type);
657 output_components[0] = 4;
658
659 /* Tell emit_fb_writes() to smear fragment.color across all the
660 * color attachments.
661 */
662 for (int i = 1; i < c->key.nr_color_regions; i++) {
663 outputs[i] = outputs[0];
664 output_components[i] = output_components[0];
665 }
666 }
667 return outputs[0];
668 } else {
669 int output_index = dst->Index - FRAG_RESULT_DATA0;
670 if (outputs[output_index].file == BAD_FILE) {
671 outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
672 }
673 output_components[output_index] = 4;
674 return outputs[output_index];
675 }
676
677 case PROGRAM_UNDEFINED:
678 return fs_reg();
679
680 default:
681 _mesa_problem(ctx, "bad dst register file: %s\n",
682 _mesa_register_file_name((gl_register_file)dst->File));
683 return fs_reg(this, glsl_type::vec4_type);
684 }
685 }
686
687 fs_reg
688 fs_visitor::get_fp_src_reg(const prog_src_register *src)
689 {
690 struct gl_program_parameter_list *plist = prog->Parameters;
691
692 fs_reg result;
693
694 assert(!src->Abs);
695
696 switch (src->File) {
697 case PROGRAM_UNDEFINED:
698 return fs_reg();
699 case PROGRAM_TEMPORARY:
700 result = fp_temp_regs[src->Index];
701 break;
702
703 case PROGRAM_INPUT:
704 result = fp_input_regs[src->Index];
705 break;
706
707 case PROGRAM_STATE_VAR:
708 case PROGRAM_UNIFORM:
709 case PROGRAM_CONSTANT:
710 /* We actually want to look at the type in the Parameters list for this,
711 * because this lets us upload constant builtin uniforms, as actual
712 * constants.
713 */
714 switch (plist->Parameters[src->Index].Type) {
715 case PROGRAM_CONSTANT: {
716 result = fs_reg(this, glsl_type::vec4_type);
717
718 for (int i = 0; i < 4; i++) {
719 emit(MOV(regoffset(result, i),
720 fs_reg(plist->ParameterValues[src->Index][i].f)));
721 }
722 break;
723 }
724
725 case PROGRAM_STATE_VAR:
726 case PROGRAM_UNIFORM:
727 result = fs_reg(UNIFORM, src->Index * 4);
728 break;
729
730 default:
731 _mesa_problem(ctx, "bad uniform src register file: %s\n",
732 _mesa_register_file_name((gl_register_file)src->File));
733 return fs_reg(this, glsl_type::vec4_type);
734 }
735 break;
736
737 default:
738 _mesa_problem(ctx, "bad src register file: %s\n",
739 _mesa_register_file_name((gl_register_file)src->File));
740 return fs_reg(this, glsl_type::vec4_type);
741 }
742
743 if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
744 fs_reg unswizzled = result;
745 result = fs_reg(this, glsl_type::vec4_type);
746 for (int i = 0; i < 4; i++) {
747 bool negate = src->Negate & (1 << i);
748 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
749 * but it costs us nothing to support it.
750 */
751 int src_swiz = GET_SWZ(src->Swizzle, i);
752 if (src_swiz == SWIZZLE_ZERO) {
753 emit(MOV(regoffset(result, i), fs_reg(0.0f)));
754 } else if (src_swiz == SWIZZLE_ONE) {
755 emit(MOV(regoffset(result, i),
756 negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
757 } else {
758 fs_reg src = regoffset(unswizzled, src_swiz);
759 if (negate)
760 src.negate = !src.negate;
761 emit(MOV(regoffset(result, i), src));
762 }
763 }
764 }
765
766 return result;
767 }