i965: Enable L3 caching of buffer surfaces.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_fp.cpp
1 /*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_fp.cpp
25 *
26 * Implementation of the compiler for GL_ARB_fragment_program shaders on top
27 * of the GLSL compiler backend.
28 */
29
30 #include "brw_context.h"
31 #include "brw_fs.h"
32
33 void
34 fs_visitor::emit_fp_alu1(enum opcode opcode,
35 const struct prog_instruction *fpi,
36 fs_reg dst, fs_reg src)
37 {
38 for (int i = 0; i < 4; i++) {
39 if (fpi->DstReg.WriteMask & (1 << i))
40 emit(opcode, offset(dst, i), offset(src, i));
41 }
42 }
43
44 void
45 fs_visitor::emit_fp_alu2(enum opcode opcode,
46 const struct prog_instruction *fpi,
47 fs_reg dst, fs_reg src0, fs_reg src1)
48 {
49 for (int i = 0; i < 4; i++) {
50 if (fpi->DstReg.WriteMask & (1 << i))
51 emit(opcode, offset(dst, i),
52 offset(src0, i), offset(src1, i));
53 }
54 }
55
56 void
57 fs_visitor::emit_fp_minmax(const prog_instruction *fpi,
58 fs_reg dst, fs_reg src0, fs_reg src1)
59 {
60 enum brw_conditional_mod conditionalmod;
61 if (fpi->Opcode == OPCODE_MIN)
62 conditionalmod = BRW_CONDITIONAL_L;
63 else
64 conditionalmod = BRW_CONDITIONAL_GE;
65
66 for (int i = 0; i < 4; i++) {
67 if (fpi->DstReg.WriteMask & (1 << i)) {
68 emit_minmax(conditionalmod, offset(dst, i),
69 offset(src0, i), offset(src1, i));
70 }
71 }
72 }
73
74 void
75 fs_visitor::emit_fp_sop(enum brw_conditional_mod conditional_mod,
76 const struct prog_instruction *fpi,
77 fs_reg dst, fs_reg src0, fs_reg src1,
78 fs_reg one)
79 {
80 for (int i = 0; i < 4; i++) {
81 if (fpi->DstReg.WriteMask & (1 << i)) {
82 fs_inst *inst;
83
84 emit(CMP(reg_null_d, offset(src0, i), offset(src1, i),
85 conditional_mod));
86
87 inst = emit(BRW_OPCODE_SEL, offset(dst, i), one, fs_reg(0.0f));
88 inst->predicate = BRW_PREDICATE_NORMAL;
89 }
90 }
91 }
92
93 void
94 fs_visitor::emit_fp_scalar_write(const struct prog_instruction *fpi,
95 fs_reg dst, fs_reg src)
96 {
97 for (int i = 0; i < 4; i++) {
98 if (fpi->DstReg.WriteMask & (1 << i))
99 emit(MOV(offset(dst, i), src));
100 }
101 }
102
103 void
104 fs_visitor::emit_fp_scalar_math(enum opcode opcode,
105 const struct prog_instruction *fpi,
106 fs_reg dst, fs_reg src)
107 {
108 fs_reg temp = vgrf(glsl_type::float_type);
109 emit_math(opcode, temp, src);
110 emit_fp_scalar_write(fpi, dst, temp);
111 }
112
113 void
114 fs_visitor::emit_fragment_program_code()
115 {
116 setup_fp_regs();
117
118 /* Keep a reg with 1.0 around, for reuse by emit_fp_sop so that it can just
119 * be:
120 *
121 * sel.f0 dst 1.0 0.0
122 *
123 * instead of
124 *
125 * mov dst 0.0
126 * mov.f0 dst 1.0
127 */
128 fs_reg one = vgrf(glsl_type::float_type);
129 emit(MOV(one, fs_reg(1.0f)));
130
131 for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
132 const struct prog_instruction *fpi = &prog->Instructions[insn];
133 base_ir = fpi;
134
135 fs_reg dst;
136 fs_reg src[3];
137
138 /* We always emit into a temporary destination register to avoid
139 * aliasing issues.
140 */
141 dst = vgrf(glsl_type::vec4_type);
142
143 for (int i = 0; i < 3; i++)
144 src[i] = get_fp_src_reg(&fpi->SrcReg[i]);
145
146 switch (fpi->Opcode) {
147 case OPCODE_ABS:
148 src[0].abs = true;
149 src[0].negate = false;
150 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
151 break;
152
153 case OPCODE_ADD:
154 emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], src[1]);
155 break;
156
157 case OPCODE_CMP:
158 for (int i = 0; i < 4; i++) {
159 if (fpi->DstReg.WriteMask & (1 << i)) {
160 fs_inst *inst;
161
162 emit(CMP(reg_null_f, offset(src[0], i), fs_reg(0.0f),
163 BRW_CONDITIONAL_L));
164
165 inst = emit(BRW_OPCODE_SEL, offset(dst, i),
166 offset(src[1], i), offset(src[2], i));
167 inst->predicate = BRW_PREDICATE_NORMAL;
168 }
169 }
170 break;
171
172 case OPCODE_COS:
173 emit_fp_scalar_math(SHADER_OPCODE_COS, fpi, dst, src[0]);
174 break;
175
176 case OPCODE_DP2:
177 case OPCODE_DP3:
178 case OPCODE_DP4:
179 case OPCODE_DPH: {
180 fs_reg mul = vgrf(glsl_type::float_type);
181 fs_reg acc = vgrf(glsl_type::float_type);
182 int count;
183
184 switch (fpi->Opcode) {
185 case OPCODE_DP2: count = 2; break;
186 case OPCODE_DP3: count = 3; break;
187 case OPCODE_DP4: count = 4; break;
188 case OPCODE_DPH: count = 3; break;
189 default: unreachable("not reached");
190 }
191
192 emit(MUL(acc, offset(src[0], 0), offset(src[1], 0)));
193 for (int i = 1; i < count; i++) {
194 emit(MUL(mul, offset(src[0], i), offset(src[1], i)));
195 emit(ADD(acc, acc, mul));
196 }
197
198 if (fpi->Opcode == OPCODE_DPH)
199 emit(ADD(acc, acc, offset(src[1], 3)));
200
201 emit_fp_scalar_write(fpi, dst, acc);
202 break;
203 }
204
205 case OPCODE_DST:
206 if (fpi->DstReg.WriteMask & WRITEMASK_X)
207 emit(MOV(dst, fs_reg(1.0f)));
208 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
209 emit(MUL(offset(dst, 1),
210 offset(src[0], 1), offset(src[1], 1)));
211 }
212 if (fpi->DstReg.WriteMask & WRITEMASK_Z)
213 emit(MOV(offset(dst, 2), offset(src[0], 2)));
214 if (fpi->DstReg.WriteMask & WRITEMASK_W)
215 emit(MOV(offset(dst, 3), offset(src[1], 3)));
216 break;
217
218 case OPCODE_EX2:
219 emit_fp_scalar_math(SHADER_OPCODE_EXP2, fpi, dst, src[0]);
220 break;
221
222 case OPCODE_FLR:
223 emit_fp_alu1(BRW_OPCODE_RNDD, fpi, dst, src[0]);
224 break;
225
226 case OPCODE_FRC:
227 emit_fp_alu1(BRW_OPCODE_FRC, fpi, dst, src[0]);
228 break;
229
230 case OPCODE_KIL: {
231 for (int i = 0; i < 4; i++) {
232 /* In most cases the argument to a KIL will be something like
233 * TEMP[0].wwww, so there's no point in checking whether .w is < 0
234 * 4 times in a row.
235 */
236 if (i > 0 &&
237 GET_SWZ(fpi->SrcReg[0].Swizzle, i) ==
238 GET_SWZ(fpi->SrcReg[0].Swizzle, i - 1) &&
239 ((fpi->SrcReg[0].Negate >> i) & 1) ==
240 ((fpi->SrcReg[0].Negate >> (i - 1)) & 1)) {
241 continue;
242 }
243
244
245 /* Emit an instruction that's predicated on the current
246 * undiscarded pixels, and updates just those pixels to be
247 * turned off.
248 */
249 fs_inst *cmp = emit(CMP(reg_null_f, offset(src[0], i),
250 fs_reg(0.0f), BRW_CONDITIONAL_GE));
251 cmp->predicate = BRW_PREDICATE_NORMAL;
252 cmp->flag_subreg = 1;
253 }
254 break;
255 }
256
257 case OPCODE_LG2:
258 emit_fp_scalar_math(SHADER_OPCODE_LOG2, fpi, dst, src[0]);
259 break;
260
261 case OPCODE_LIT:
262 /* From the ARB_fragment_program spec:
263 *
264 * tmp = VectorLoad(op0);
265 * if (tmp.x < 0) tmp.x = 0;
266 * if (tmp.y < 0) tmp.y = 0;
267 * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
268 * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
269 * result.x = 1.0;
270 * result.y = tmp.x;
271 * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
272 * result.w = 1.0;
273 *
274 * Note that we don't do the clamping to +/- 128. We didn't in
275 * brw_wm_emit.c either.
276 */
277 if (fpi->DstReg.WriteMask & WRITEMASK_X)
278 emit(MOV(offset(dst, 0), fs_reg(1.0f)));
279
280 if (fpi->DstReg.WriteMask & WRITEMASK_YZ) {
281 fs_inst *inst;
282 emit(CMP(reg_null_f, offset(src[0], 0), fs_reg(0.0f),
283 BRW_CONDITIONAL_LE));
284
285 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
286 emit(MOV(offset(dst, 1), offset(src[0], 0)));
287 inst = emit(MOV(offset(dst, 1), fs_reg(0.0f)));
288 inst->predicate = BRW_PREDICATE_NORMAL;
289 }
290
291 if (fpi->DstReg.WriteMask & WRITEMASK_Z) {
292 emit_math(SHADER_OPCODE_POW, offset(dst, 2),
293 offset(src[0], 1), offset(src[0], 3));
294
295 inst = emit(MOV(offset(dst, 2), fs_reg(0.0f)));
296 inst->predicate = BRW_PREDICATE_NORMAL;
297 }
298 }
299
300 if (fpi->DstReg.WriteMask & WRITEMASK_W)
301 emit(MOV(offset(dst, 3), fs_reg(1.0f)));
302
303 break;
304
305 case OPCODE_LRP:
306 for (int i = 0; i < 4; i++) {
307 if (fpi->DstReg.WriteMask & (1 << i)) {
308 fs_reg a = offset(src[0], i);
309 fs_reg y = offset(src[1], i);
310 fs_reg x = offset(src[2], i);
311 emit_lrp(offset(dst, i), x, y, a);
312 }
313 }
314 break;
315
316 case OPCODE_MAD:
317 for (int i = 0; i < 4; i++) {
318 if (fpi->DstReg.WriteMask & (1 << i)) {
319 fs_reg temp = vgrf(glsl_type::float_type);
320 emit(MUL(temp, offset(src[0], i), offset(src[1], i)));
321 emit(ADD(offset(dst, i), temp, offset(src[2], i)));
322 }
323 }
324 break;
325
326 case OPCODE_MAX:
327 emit_fp_minmax(fpi, dst, src[0], src[1]);
328 break;
329
330 case OPCODE_MOV:
331 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
332 break;
333
334 case OPCODE_MIN:
335 emit_fp_minmax(fpi, dst, src[0], src[1]);
336 break;
337
338 case OPCODE_MUL:
339 emit_fp_alu2(BRW_OPCODE_MUL, fpi, dst, src[0], src[1]);
340 break;
341
342 case OPCODE_POW: {
343 fs_reg temp = vgrf(glsl_type::float_type);
344 emit_math(SHADER_OPCODE_POW, temp, src[0], src[1]);
345 emit_fp_scalar_write(fpi, dst, temp);
346 break;
347 }
348
349 case OPCODE_RCP:
350 emit_fp_scalar_math(SHADER_OPCODE_RCP, fpi, dst, src[0]);
351 break;
352
353 case OPCODE_RSQ:
354 emit_fp_scalar_math(SHADER_OPCODE_RSQ, fpi, dst, src[0]);
355 break;
356
357 case OPCODE_SCS:
358 if (fpi->DstReg.WriteMask & WRITEMASK_X) {
359 emit_math(SHADER_OPCODE_COS, offset(dst, 0),
360 offset(src[0], 0));
361 }
362
363 if (fpi->DstReg.WriteMask & WRITEMASK_Y) {
364 emit_math(SHADER_OPCODE_SIN, offset(dst, 1),
365 offset(src[0], 1));
366 }
367 break;
368
369 case OPCODE_SGE:
370 emit_fp_sop(BRW_CONDITIONAL_GE, fpi, dst, src[0], src[1], one);
371 break;
372
373 case OPCODE_SIN:
374 emit_fp_scalar_math(SHADER_OPCODE_SIN, fpi, dst, src[0]);
375 break;
376
377 case OPCODE_SLT:
378 emit_fp_sop(BRW_CONDITIONAL_L, fpi, dst, src[0], src[1], one);
379 break;
380
381 case OPCODE_SUB: {
382 fs_reg neg_src1 = src[1];
383 neg_src1.negate = !src[1].negate;
384
385 emit_fp_alu2(BRW_OPCODE_ADD, fpi, dst, src[0], neg_src1);
386 break;
387 }
388
389 case OPCODE_TEX:
390 case OPCODE_TXB:
391 case OPCODE_TXP: {
392 ir_texture_opcode op;
393 fs_reg lod;
394 fs_reg dpdy;
395 fs_reg coordinate = src[0];
396 fs_reg shadow_c;
397 fs_reg sample_index;
398 fs_reg texel_offset; /* No offsets; leave as BAD_FILE. */
399
400 switch (fpi->Opcode) {
401 case OPCODE_TEX:
402 op = ir_tex;
403 break;
404 case OPCODE_TXP: {
405 op = ir_tex;
406
407 coordinate = vgrf(glsl_type::vec3_type);
408 fs_reg invproj = vgrf(glsl_type::float_type);
409 emit_math(SHADER_OPCODE_RCP, invproj, offset(src[0], 3));
410 for (int i = 0; i < 3; i++) {
411 emit(MUL(offset(coordinate, i),
412 offset(src[0], i), invproj));
413 }
414 break;
415 }
416 case OPCODE_TXB:
417 op = ir_txb;
418 lod = offset(src[0], 3);
419 break;
420 default:
421 unreachable("not reached");
422 }
423
424 int coord_components;
425 switch (fpi->TexSrcTarget) {
426 case TEXTURE_1D_INDEX:
427 coord_components = 1;
428 break;
429
430 case TEXTURE_2D_INDEX:
431 case TEXTURE_1D_ARRAY_INDEX:
432 case TEXTURE_RECT_INDEX:
433 case TEXTURE_EXTERNAL_INDEX:
434 coord_components = 2;
435 break;
436
437 case TEXTURE_3D_INDEX:
438 case TEXTURE_2D_ARRAY_INDEX:
439 coord_components = 3;
440 break;
441
442 case TEXTURE_CUBE_INDEX: {
443 coord_components = 4;
444
445 fs_reg temp = vgrf(glsl_type::float_type);
446 fs_reg cubecoord = vgrf(glsl_type::vec3_type);
447 fs_reg abscoord = coordinate;
448 abscoord.negate = false;
449 abscoord.abs = true;
450 emit_minmax(BRW_CONDITIONAL_GE, temp,
451 offset(abscoord, 0), offset(abscoord, 1));
452 emit_minmax(BRW_CONDITIONAL_GE, temp,
453 temp, offset(abscoord, 2));
454 emit_math(SHADER_OPCODE_RCP, temp, temp);
455 for (int i = 0; i < 3; i++) {
456 emit(MUL(offset(cubecoord, i),
457 offset(coordinate, i), temp));
458 }
459
460 coordinate = cubecoord;
461 break;
462 }
463
464 default:
465 unreachable("not reached");
466 }
467
468 if (fpi->TexShadow)
469 shadow_c = offset(coordinate, 2);
470
471 emit_texture(op, glsl_type::vec4_type, coordinate, coord_components,
472 shadow_c, lod, dpdy, 0, sample_index,
473 reg_undef, 0, /* offset, components */
474 reg_undef, /* mcs */
475 0, /* gather component */
476 false, /* is cube array */
477 fpi->TexSrcTarget == TEXTURE_RECT_INDEX,
478 fpi->TexSrcUnit, fs_reg(fpi->TexSrcUnit),
479 fpi->TexSrcUnit);
480 dst = this->result;
481
482 break;
483 }
484
485 case OPCODE_SWZ:
486 /* Note that SWZ's extended swizzles are handled in the general
487 * get_src_reg() code.
488 */
489 emit_fp_alu1(BRW_OPCODE_MOV, fpi, dst, src[0]);
490 break;
491
492 case OPCODE_XPD:
493 for (int i = 0; i < 3; i++) {
494 if (fpi->DstReg.WriteMask & (1 << i)) {
495 int i1 = (i + 1) % 3;
496 int i2 = (i + 2) % 3;
497
498 fs_reg temp = vgrf(glsl_type::float_type);
499 fs_reg neg_src1_1 = offset(src[1], i1);
500 neg_src1_1.negate = !neg_src1_1.negate;
501 emit(MUL(temp, offset(src[0], i2), neg_src1_1));
502 emit(MUL(offset(dst, i),
503 offset(src[0], i1), offset(src[1], i2)));
504 emit(ADD(offset(dst, i), offset(dst, i), temp));
505 }
506 }
507 break;
508
509 case OPCODE_END:
510 break;
511
512 default:
513 _mesa_problem(ctx, "Unsupported opcode %s in fragment program\n",
514 _mesa_opcode_string(fpi->Opcode));
515 }
516
517 /* To handle saturates, we emit a MOV with a saturate bit, which
518 * optimization should fold into the preceding instructions when safe.
519 */
520 if (fpi->Opcode != OPCODE_END) {
521 fs_reg real_dst = get_fp_dst_reg(&fpi->DstReg);
522
523 for (int i = 0; i < 4; i++) {
524 if (fpi->DstReg.WriteMask & (1 << i)) {
525 fs_inst *inst = emit(MOV(offset(real_dst, i),
526 offset(dst, i)));
527 inst->saturate = fpi->SaturateMode;
528 }
529 }
530 }
531 }
532
533 /* Epilogue:
534 *
535 * Fragment depth has this strange convention of being the .z component of
536 * a vec4. emit_fb_write() wants to see a float value, instead.
537 */
538 this->current_annotation = "result.depth write";
539 if (frag_depth.file != BAD_FILE) {
540 fs_reg temp = vgrf(glsl_type::float_type);
541 emit(MOV(temp, offset(frag_depth, 2)));
542 frag_depth = temp;
543 }
544 }
545
546 void
547 fs_visitor::setup_fp_regs()
548 {
549 /* PROGRAM_TEMPORARY */
550 int num_temp = prog->NumTemporaries;
551 fp_temp_regs = rzalloc_array(mem_ctx, fs_reg, num_temp);
552 for (int i = 0; i < num_temp; i++)
553 fp_temp_regs[i] = vgrf(glsl_type::vec4_type);
554
555 /* PROGRAM_STATE_VAR etc. */
556 if (dispatch_width == 8) {
557 for (unsigned p = 0;
558 p < prog->Parameters->NumParameters; p++) {
559 for (unsigned int i = 0; i < 4; i++) {
560 stage_prog_data->param[uniforms++] =
561 &prog->Parameters->ParameterValues[p][i];
562 }
563 }
564 }
565
566 fp_input_regs = rzalloc_array(mem_ctx, fs_reg, VARYING_SLOT_MAX);
567 for (int i = 0; i < VARYING_SLOT_MAX; i++) {
568 if (prog->InputsRead & BITFIELD64_BIT(i)) {
569 this->current_annotation = ralloc_asprintf(ctx, "interpolate input %d",
570 i);
571
572 switch (i) {
573 case VARYING_SLOT_POS:
574 {
575 assert(stage == MESA_SHADER_FRAGMENT);
576 gl_fragment_program *fp = (gl_fragment_program*) prog;
577 fp_input_regs[i] =
578 *emit_fragcoord_interpolation(fp->PixelCenterInteger,
579 fp->OriginUpperLeft);
580 }
581 break;
582 case VARYING_SLOT_FACE:
583 fp_input_regs[i] = *emit_frontfacing_interpolation();
584 break;
585 default:
586 fp_input_regs[i] = vgrf(glsl_type::vec4_type);
587 emit_general_interpolation(fp_input_regs[i], "fp_input",
588 glsl_type::vec4_type,
589 INTERP_QUALIFIER_NONE,
590 i, false, false);
591
592 if (i == VARYING_SLOT_FOGC) {
593 emit(MOV(offset(fp_input_regs[i], 1), fs_reg(0.0f)));
594 emit(MOV(offset(fp_input_regs[i], 2), fs_reg(0.0f)));
595 emit(MOV(offset(fp_input_regs[i], 3), fs_reg(1.0f)));
596 }
597
598 break;
599 }
600
601 this->current_annotation = NULL;
602 }
603 }
604 }
605
606 fs_reg
607 fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
608 {
609 assert(stage == MESA_SHADER_FRAGMENT);
610 brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
611
612 switch (dst->File) {
613 case PROGRAM_TEMPORARY:
614 return fp_temp_regs[dst->Index];
615
616 case PROGRAM_OUTPUT:
617 if (dst->Index == FRAG_RESULT_DEPTH) {
618 if (frag_depth.file == BAD_FILE)
619 frag_depth = vgrf(glsl_type::vec4_type);
620 return frag_depth;
621 } else if (dst->Index == FRAG_RESULT_COLOR) {
622 if (outputs[0].file == BAD_FILE) {
623 outputs[0] = vgrf(glsl_type::vec4_type);
624 output_components[0] = 4;
625
626 /* Tell emit_fb_writes() to smear fragment.color across all the
627 * color attachments.
628 */
629 for (int i = 1; i < key->nr_color_regions; i++) {
630 outputs[i] = outputs[0];
631 output_components[i] = output_components[0];
632 }
633 }
634 return outputs[0];
635 } else {
636 int output_index = dst->Index - FRAG_RESULT_DATA0;
637 if (outputs[output_index].file == BAD_FILE) {
638 outputs[output_index] = vgrf(glsl_type::vec4_type);
639 }
640 output_components[output_index] = 4;
641 return outputs[output_index];
642 }
643
644 case PROGRAM_UNDEFINED:
645 return fs_reg();
646
647 default:
648 _mesa_problem(ctx, "bad dst register file: %s\n",
649 _mesa_register_file_name((gl_register_file)dst->File));
650 return vgrf(glsl_type::vec4_type);
651 }
652 }
653
654 fs_reg
655 fs_visitor::get_fp_src_reg(const prog_src_register *src)
656 {
657 struct gl_program_parameter_list *plist = prog->Parameters;
658
659 fs_reg result;
660
661 assert(!src->Abs);
662
663 switch (src->File) {
664 case PROGRAM_UNDEFINED:
665 return fs_reg();
666 case PROGRAM_TEMPORARY:
667 result = fp_temp_regs[src->Index];
668 break;
669
670 case PROGRAM_INPUT:
671 result = fp_input_regs[src->Index];
672 break;
673
674 case PROGRAM_STATE_VAR:
675 case PROGRAM_UNIFORM:
676 case PROGRAM_CONSTANT:
677 /* We actually want to look at the type in the Parameters list for this,
678 * because this lets us upload constant builtin uniforms, as actual
679 * constants.
680 */
681 switch (plist->Parameters[src->Index].Type) {
682 case PROGRAM_CONSTANT: {
683 result = vgrf(glsl_type::vec4_type);
684
685 for (int i = 0; i < 4; i++) {
686 emit(MOV(offset(result, i),
687 fs_reg(plist->ParameterValues[src->Index][i].f)));
688 }
689 break;
690 }
691
692 case PROGRAM_STATE_VAR:
693 case PROGRAM_UNIFORM:
694 result = fs_reg(UNIFORM, src->Index * 4);
695 break;
696
697 default:
698 _mesa_problem(ctx, "bad uniform src register file: %s\n",
699 _mesa_register_file_name((gl_register_file)src->File));
700 return vgrf(glsl_type::vec4_type);
701 }
702 break;
703
704 default:
705 _mesa_problem(ctx, "bad src register file: %s\n",
706 _mesa_register_file_name((gl_register_file)src->File));
707 return vgrf(glsl_type::vec4_type);
708 }
709
710 if (src->Swizzle != SWIZZLE_NOOP || src->Negate) {
711 fs_reg unswizzled = result;
712 result = vgrf(glsl_type::vec4_type);
713 for (int i = 0; i < 4; i++) {
714 bool negate = src->Negate & (1 << i);
715 /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
716 * but it costs us nothing to support it.
717 */
718 int src_swiz = GET_SWZ(src->Swizzle, i);
719 if (src_swiz == SWIZZLE_ZERO) {
720 emit(MOV(offset(result, i), fs_reg(0.0f)));
721 } else if (src_swiz == SWIZZLE_ONE) {
722 emit(MOV(offset(result, i),
723 negate ? fs_reg(-1.0f) : fs_reg(1.0f)));
724 } else {
725 fs_reg src = offset(unswizzled, src_swiz);
726 if (negate)
727 src.negate = !src.negate;
728 emit(MOV(offset(result, i), src));
729 }
730 }
731 }
732
733 return result;
734 }