i965/fs: Rename the existing pull constant load opcode.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_visitor.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file brw_fs_visitor.cpp
25 *
26 * This file supports generating the FS LIR from the GLSL IR. The LIR
27 * makes it easier to do backend-specific optimizations than doing so
28 * in the GLSL IR or in the native code.
29 */
30 extern "C" {
31
32 #include <sys/types.h>
33
34 #include "main/macros.h"
35 #include "main/shaderobj.h"
36 #include "main/uniforms.h"
37 #include "program/prog_parameter.h"
38 #include "program/prog_print.h"
39 #include "program/prog_optimize.h"
40 #include "program/register_allocate.h"
41 #include "program/sampler.h"
42 #include "program/hash_table.h"
43 #include "brw_context.h"
44 #include "brw_eu.h"
45 #include "brw_wm.h"
46 }
47 #include "brw_fs.h"
48 #include "glsl/glsl_types.h"
49 #include "glsl/ir_optimization.h"
50 #include "glsl/ir_print_visitor.h"
51
52 void
53 fs_visitor::visit(ir_variable *ir)
54 {
55 fs_reg *reg = NULL;
56
57 if (variable_storage(ir))
58 return;
59
60 if (ir->mode == ir_var_in) {
61 if (!strcmp(ir->name, "gl_FragCoord")) {
62 reg = emit_fragcoord_interpolation(ir);
63 } else if (!strcmp(ir->name, "gl_FrontFacing")) {
64 reg = emit_frontfacing_interpolation(ir);
65 } else {
66 reg = emit_general_interpolation(ir);
67 }
68 assert(reg);
69 hash_table_insert(this->variable_ht, reg, ir);
70 return;
71 } else if (ir->mode == ir_var_out) {
72 reg = new(this->mem_ctx) fs_reg(this, ir->type);
73
74 if (ir->index > 0) {
75 assert(ir->location == FRAG_RESULT_DATA0);
76 assert(ir->index == 1);
77 this->dual_src_output = *reg;
78 } else if (ir->location == FRAG_RESULT_COLOR) {
79 /* Writing gl_FragColor outputs to all color regions. */
80 for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
81 this->outputs[i] = *reg;
82 this->output_components[i] = 4;
83 }
84 } else if (ir->location == FRAG_RESULT_DEPTH) {
85 this->frag_depth = *reg;
86 } else {
87 /* gl_FragData or a user-defined FS output */
88 assert(ir->location >= FRAG_RESULT_DATA0 &&
89 ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
90
91 int vector_elements =
92 ir->type->is_array() ? ir->type->fields.array->vector_elements
93 : ir->type->vector_elements;
94
95 /* General color output. */
96 for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
97 int output = ir->location - FRAG_RESULT_DATA0 + i;
98 this->outputs[output] = *reg;
99 this->outputs[output].reg_offset += vector_elements * i;
100 this->output_components[output] = vector_elements;
101 }
102 }
103 } else if (ir->mode == ir_var_uniform) {
104 int param_index = c->prog_data.nr_params;
105
106 /* Thanks to the lower_ubo_reference pass, we will see only
107 * ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
108 * variables, so no need for them to be in variable_ht.
109 */
110 if (ir->uniform_block != -1)
111 return;
112
113 if (dispatch_width == 16) {
114 if (!variable_storage(ir)) {
115 fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
116 }
117 return;
118 }
119
120 if (!strncmp(ir->name, "gl_", 3)) {
121 setup_builtin_uniform_values(ir);
122 } else {
123 setup_uniform_values(ir->location, ir->type);
124 }
125
126 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
127 reg->type = brw_type_for_base_type(ir->type);
128 }
129
130 if (!reg)
131 reg = new(this->mem_ctx) fs_reg(this, ir->type);
132
133 hash_table_insert(this->variable_ht, reg, ir);
134 }
135
136 void
137 fs_visitor::visit(ir_dereference_variable *ir)
138 {
139 fs_reg *reg = variable_storage(ir->var);
140 this->result = *reg;
141 }
142
143 void
144 fs_visitor::visit(ir_dereference_record *ir)
145 {
146 const glsl_type *struct_type = ir->record->type;
147
148 ir->record->accept(this);
149
150 unsigned int offset = 0;
151 for (unsigned int i = 0; i < struct_type->length; i++) {
152 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
153 break;
154 offset += type_size(struct_type->fields.structure[i].type);
155 }
156 this->result.reg_offset += offset;
157 this->result.type = brw_type_for_base_type(ir->type);
158 }
159
160 void
161 fs_visitor::visit(ir_dereference_array *ir)
162 {
163 ir_constant *index;
164 int element_size;
165
166 ir->array->accept(this);
167 index = ir->array_index->as_constant();
168
169 element_size = type_size(ir->type);
170 this->result.type = brw_type_for_base_type(ir->type);
171
172 if (index) {
173 assert(this->result.file == UNIFORM || this->result.file == GRF);
174 this->result.reg_offset += index->value.i[0] * element_size;
175 } else {
176 assert(!"FINISHME: non-constant array element");
177 }
178 }
179
180 void
181 fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst,
182 fs_reg src0, fs_reg src1)
183 {
184 fs_inst *inst;
185
186 if (intel->gen >= 6) {
187 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
188 inst->conditional_mod = conditionalmod;
189 } else {
190 emit(CMP(reg_null_d, src0, src1, conditionalmod));
191
192 inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
193 inst->predicate = BRW_PREDICATE_NORMAL;
194 }
195 }
196
197 /* Instruction selection: Produce a MOV.sat instead of
198 * MIN(MAX(val, 0), 1) when possible.
199 */
200 bool
201 fs_visitor::try_emit_saturate(ir_expression *ir)
202 {
203 ir_rvalue *sat_val = ir->as_rvalue_to_saturate();
204
205 if (!sat_val)
206 return false;
207
208 fs_inst *pre_inst = (fs_inst *) this->instructions.get_tail();
209
210 sat_val->accept(this);
211 fs_reg src = this->result;
212
213 fs_inst *last_inst = (fs_inst *) this->instructions.get_tail();
214
215 /* If the last instruction from our accept() didn't generate our
216 * src, generate a saturated MOV
217 */
218 fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
219 if (!modify || modify->regs_written() != 1) {
220 this->result = fs_reg(this, ir->type);
221 fs_inst *inst = emit(MOV(this->result, src));
222 inst->saturate = true;
223 } else {
224 modify->saturate = true;
225 this->result = src;
226 }
227
228
229 return true;
230 }
231
232 bool
233 fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
234 {
235 /* 3-src instructions were introduced in gen6. */
236 if (intel->gen < 6)
237 return false;
238
239 /* MAD can only handle floating-point data. */
240 if (ir->type != glsl_type::float_type)
241 return false;
242
243 ir_rvalue *nonmul = ir->operands[1 - mul_arg];
244 ir_expression *mul = ir->operands[mul_arg]->as_expression();
245
246 if (!mul || mul->operation != ir_binop_mul)
247 return false;
248
249 if (nonmul->as_constant() ||
250 mul->operands[0]->as_constant() ||
251 mul->operands[1]->as_constant())
252 return false;
253
254 nonmul->accept(this);
255 fs_reg src0 = this->result;
256
257 mul->operands[0]->accept(this);
258 fs_reg src1 = this->result;
259
260 mul->operands[1]->accept(this);
261 fs_reg src2 = this->result;
262
263 this->result = fs_reg(this, ir->type);
264 emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
265
266 return true;
267 }
268
269 void
270 fs_visitor::visit(ir_expression *ir)
271 {
272 unsigned int operand;
273 fs_reg op[2], temp;
274 fs_inst *inst;
275
276 assert(ir->get_num_operands() <= 2);
277
278 if (try_emit_saturate(ir))
279 return;
280 if (ir->operation == ir_binop_add) {
281 if (try_emit_mad(ir, 0) || try_emit_mad(ir, 1))
282 return;
283 }
284
285 for (operand = 0; operand < ir->get_num_operands(); operand++) {
286 ir->operands[operand]->accept(this);
287 if (this->result.file == BAD_FILE) {
288 ir_print_visitor v;
289 fail("Failed to get tree for expression operand:\n");
290 ir->operands[operand]->accept(&v);
291 }
292 op[operand] = this->result;
293
294 /* Matrix expression operands should have been broken down to vector
295 * operations already.
296 */
297 assert(!ir->operands[operand]->type->is_matrix());
298 /* And then those vector operands should have been broken down to scalar.
299 */
300 assert(!ir->operands[operand]->type->is_vector());
301 }
302
303 /* Storage for our result. If our result goes into an assignment, it will
304 * just get copy-propagated out, so no worries.
305 */
306 this->result = fs_reg(this, ir->type);
307
308 switch (ir->operation) {
309 case ir_unop_logic_not:
310 /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
311 * ones complement of the whole register, not just bit 0.
312 */
313 emit(XOR(this->result, op[0], fs_reg(1)));
314 break;
315 case ir_unop_neg:
316 op[0].negate = !op[0].negate;
317 this->result = op[0];
318 break;
319 case ir_unop_abs:
320 op[0].abs = true;
321 op[0].negate = false;
322 this->result = op[0];
323 break;
324 case ir_unop_sign:
325 temp = fs_reg(this, ir->type);
326
327 emit(MOV(this->result, fs_reg(0.0f)));
328
329 emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_G));
330 inst = emit(MOV(this->result, fs_reg(1.0f)));
331 inst->predicate = BRW_PREDICATE_NORMAL;
332
333 emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_L));
334 inst = emit(MOV(this->result, fs_reg(-1.0f)));
335 inst->predicate = BRW_PREDICATE_NORMAL;
336
337 break;
338 case ir_unop_rcp:
339 emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
340 break;
341
342 case ir_unop_exp2:
343 emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
344 break;
345 case ir_unop_log2:
346 emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
347 break;
348 case ir_unop_exp:
349 case ir_unop_log:
350 assert(!"not reached: should be handled by ir_explog_to_explog2");
351 break;
352 case ir_unop_sin:
353 case ir_unop_sin_reduced:
354 emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
355 break;
356 case ir_unop_cos:
357 case ir_unop_cos_reduced:
358 emit_math(SHADER_OPCODE_COS, this->result, op[0]);
359 break;
360
361 case ir_unop_dFdx:
362 emit(FS_OPCODE_DDX, this->result, op[0]);
363 break;
364 case ir_unop_dFdy:
365 emit(FS_OPCODE_DDY, this->result, op[0]);
366 break;
367
368 case ir_binop_add:
369 emit(ADD(this->result, op[0], op[1]));
370 break;
371 case ir_binop_sub:
372 assert(!"not reached: should be handled by ir_sub_to_add_neg");
373 break;
374
375 case ir_binop_mul:
376 if (ir->type->is_integer()) {
377 /* For integer multiplication, the MUL uses the low 16 bits
378 * of one of the operands (src0 on gen6, src1 on gen7). The
379 * MACH accumulates in the contribution of the upper 16 bits
380 * of that operand.
381 *
382 * FINISHME: Emit just the MUL if we know an operand is small
383 * enough.
384 */
385 if (intel->gen >= 7 && dispatch_width == 16)
386 fail("16-wide explicit accumulator operands unsupported\n");
387
388 struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
389
390 emit(MUL(acc, op[0], op[1]));
391 emit(MACH(reg_null_d, op[0], op[1]));
392 emit(MOV(this->result, fs_reg(acc)));
393 } else {
394 emit(MUL(this->result, op[0], op[1]));
395 }
396 break;
397 case ir_binop_div:
398 if (intel->gen >= 7 && dispatch_width == 16)
399 fail("16-wide INTDIV unsupported\n");
400
401 /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
402 assert(ir->type->is_integer());
403 emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
404 break;
405 case ir_binop_mod:
406 if (intel->gen >= 7 && dispatch_width == 16)
407 fail("16-wide INTDIV unsupported\n");
408
409 /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
410 assert(ir->type->is_integer());
411 emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
412 break;
413
414 case ir_binop_less:
415 case ir_binop_greater:
416 case ir_binop_lequal:
417 case ir_binop_gequal:
418 case ir_binop_equal:
419 case ir_binop_all_equal:
420 case ir_binop_nequal:
421 case ir_binop_any_nequal:
422 resolve_bool_comparison(ir->operands[0], &op[0]);
423 resolve_bool_comparison(ir->operands[1], &op[1]);
424
425 emit(CMP(this->result, op[0], op[1],
426 brw_conditional_for_comparison(ir->operation)));
427 break;
428
429 case ir_binop_logic_xor:
430 emit(XOR(this->result, op[0], op[1]));
431 break;
432
433 case ir_binop_logic_or:
434 emit(OR(this->result, op[0], op[1]));
435 break;
436
437 case ir_binop_logic_and:
438 emit(AND(this->result, op[0], op[1]));
439 break;
440
441 case ir_binop_dot:
442 case ir_unop_any:
443 assert(!"not reached: should be handled by brw_fs_channel_expressions");
444 break;
445
446 case ir_unop_noise:
447 assert(!"not reached: should be handled by lower_noise");
448 break;
449
450 case ir_quadop_vector:
451 assert(!"not reached: should be handled by lower_quadop_vector");
452 break;
453
454 case ir_unop_sqrt:
455 emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
456 break;
457
458 case ir_unop_rsq:
459 emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
460 break;
461
462 case ir_unop_bitcast_i2f:
463 case ir_unop_bitcast_u2f:
464 op[0].type = BRW_REGISTER_TYPE_F;
465 this->result = op[0];
466 break;
467 case ir_unop_i2u:
468 case ir_unop_bitcast_f2u:
469 op[0].type = BRW_REGISTER_TYPE_UD;
470 this->result = op[0];
471 break;
472 case ir_unop_u2i:
473 case ir_unop_bitcast_f2i:
474 op[0].type = BRW_REGISTER_TYPE_D;
475 this->result = op[0];
476 break;
477 case ir_unop_i2f:
478 case ir_unop_u2f:
479 case ir_unop_f2i:
480 case ir_unop_f2u:
481 emit(MOV(this->result, op[0]));
482 break;
483
484 case ir_unop_b2i:
485 inst = emit(AND(this->result, op[0], fs_reg(1)));
486 break;
487 case ir_unop_b2f:
488 temp = fs_reg(this, glsl_type::int_type);
489 emit(AND(temp, op[0], fs_reg(1)));
490 emit(MOV(this->result, temp));
491 break;
492
493 case ir_unop_f2b:
494 case ir_unop_i2b:
495 emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
496 break;
497
498 case ir_unop_trunc:
499 emit(RNDZ(this->result, op[0]));
500 break;
501 case ir_unop_ceil:
502 op[0].negate = !op[0].negate;
503 inst = emit(RNDD(this->result, op[0]));
504 this->result.negate = true;
505 break;
506 case ir_unop_floor:
507 inst = emit(RNDD(this->result, op[0]));
508 break;
509 case ir_unop_fract:
510 inst = emit(FRC(this->result, op[0]));
511 break;
512 case ir_unop_round_even:
513 emit(RNDE(this->result, op[0]));
514 break;
515
516 case ir_binop_min:
517 case ir_binop_max:
518 resolve_ud_negate(&op[0]);
519 resolve_ud_negate(&op[1]);
520 emit_minmax(ir->operation == ir_binop_min ?
521 BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
522 this->result, op[0], op[1]);
523 break;
524
525 case ir_binop_pow:
526 emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
527 break;
528
529 case ir_unop_bit_not:
530 inst = emit(NOT(this->result, op[0]));
531 break;
532 case ir_binop_bit_and:
533 inst = emit(AND(this->result, op[0], op[1]));
534 break;
535 case ir_binop_bit_xor:
536 inst = emit(XOR(this->result, op[0], op[1]));
537 break;
538 case ir_binop_bit_or:
539 inst = emit(OR(this->result, op[0], op[1]));
540 break;
541
542 case ir_binop_lshift:
543 inst = emit(SHL(this->result, op[0], op[1]));
544 break;
545
546 case ir_binop_rshift:
547 if (ir->type->base_type == GLSL_TYPE_INT)
548 inst = emit(ASR(this->result, op[0], op[1]));
549 else
550 inst = emit(SHR(this->result, op[0], op[1]));
551 break;
552
553 case ir_binop_ubo_load:
554 ir_constant *uniform_block = ir->operands[0]->as_constant();
555 ir_constant *offset = ir->operands[1]->as_constant();
556
557 fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
558 packed_consts.type = result.type;
559 fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_WM_UBO(uniform_block->value.u[0]));
560 fs_inst *pull = emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
561 packed_consts,
562 surf_index,
563 fs_reg(offset->value.u[0])));
564 pull->base_mrf = 14;
565 pull->mlen = 1;
566
567 packed_consts.smear = offset->value.u[0] % 16 / 4;
568 for (int i = 0; i < ir->type->vector_elements; i++) {
569 /* UBO bools are any nonzero value. We consider bools to be
570 * values with the low bit set to 1. Convert them using CMP.
571 */
572 if (ir->type->base_type == GLSL_TYPE_BOOL) {
573 emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ));
574 } else {
575 emit(MOV(result, packed_consts));
576 }
577
578 packed_consts.smear++;
579 result.reg_offset++;
580
581 /* The std140 packing rules don't allow vectors to cross 16-byte
582 * boundaries, and a reg is 32 bytes.
583 */
584 assert(packed_consts.smear < 8);
585 }
586 result.reg_offset = 0;
587 break;
588 }
589 }
590
591 void
592 fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
593 const glsl_type *type, bool predicated)
594 {
595 switch (type->base_type) {
596 case GLSL_TYPE_FLOAT:
597 case GLSL_TYPE_UINT:
598 case GLSL_TYPE_INT:
599 case GLSL_TYPE_BOOL:
600 for (unsigned int i = 0; i < type->components(); i++) {
601 l.type = brw_type_for_base_type(type);
602 r.type = brw_type_for_base_type(type);
603
604 if (predicated || !l.equals(r)) {
605 fs_inst *inst = emit(MOV(l, r));
606 inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE;
607 }
608
609 l.reg_offset++;
610 r.reg_offset++;
611 }
612 break;
613 case GLSL_TYPE_ARRAY:
614 for (unsigned int i = 0; i < type->length; i++) {
615 emit_assignment_writes(l, r, type->fields.array, predicated);
616 }
617 break;
618
619 case GLSL_TYPE_STRUCT:
620 for (unsigned int i = 0; i < type->length; i++) {
621 emit_assignment_writes(l, r, type->fields.structure[i].type,
622 predicated);
623 }
624 break;
625
626 case GLSL_TYPE_SAMPLER:
627 break;
628
629 default:
630 assert(!"not reached");
631 break;
632 }
633 }
634
635 /* If the RHS processing resulted in an instruction generating a
636 * temporary value, and it would be easy to rewrite the instruction to
637 * generate its result right into the LHS instead, do so. This ends
638 * up reliably removing instructions where it can be tricky to do so
639 * later without real UD chain information.
640 */
641 bool
642 fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
643 fs_reg dst,
644 fs_reg src,
645 fs_inst *pre_rhs_inst,
646 fs_inst *last_rhs_inst)
647 {
648 /* Only attempt if we're doing a direct assignment. */
649 if (ir->condition ||
650 !(ir->lhs->type->is_scalar() ||
651 (ir->lhs->type->is_vector() &&
652 ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
653 return false;
654
655 /* Make sure the last instruction generated our source reg. */
656 fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
657 last_rhs_inst,
658 src);
659 if (!modify)
660 return false;
661
662 /* If last_rhs_inst wrote a different number of components than our LHS,
663 * we can't safely rewrite it.
664 */
665 if (ir->lhs->type->vector_elements != modify->regs_written())
666 return false;
667
668 /* Success! Rewrite the instruction. */
669 modify->dst = dst;
670
671 return true;
672 }
673
674 void
675 fs_visitor::visit(ir_assignment *ir)
676 {
677 fs_reg l, r;
678 fs_inst *inst;
679
680 /* FINISHME: arrays on the lhs */
681 ir->lhs->accept(this);
682 l = this->result;
683
684 fs_inst *pre_rhs_inst = (fs_inst *) this->instructions.get_tail();
685
686 ir->rhs->accept(this);
687 r = this->result;
688
689 fs_inst *last_rhs_inst = (fs_inst *) this->instructions.get_tail();
690
691 assert(l.file != BAD_FILE);
692 assert(r.file != BAD_FILE);
693
694 if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
695 return;
696
697 if (ir->condition) {
698 emit_bool_to_cond_code(ir->condition);
699 }
700
701 if (ir->lhs->type->is_scalar() ||
702 ir->lhs->type->is_vector()) {
703 for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
704 if (ir->write_mask & (1 << i)) {
705 inst = emit(MOV(l, r));
706 if (ir->condition)
707 inst->predicate = BRW_PREDICATE_NORMAL;
708 r.reg_offset++;
709 }
710 l.reg_offset++;
711 }
712 } else {
713 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
714 }
715 }
716
717 fs_inst *
718 fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
719 fs_reg shadow_c, fs_reg lod, fs_reg dPdy)
720 {
721 int mlen;
722 int base_mrf = 1;
723 bool simd16 = false;
724 fs_reg orig_dst;
725
726 /* g0 header. */
727 mlen = 1;
728
729 if (ir->shadow_comparitor) {
730 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
731 emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
732 coordinate.reg_offset++;
733 }
734 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
735 mlen += 3;
736
737 if (ir->op == ir_tex) {
738 /* There's no plain shadow compare message, so we use shadow
739 * compare with a bias of 0.0.
740 */
741 emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)));
742 mlen++;
743 } else if (ir->op == ir_txb || ir->op == ir_txl) {
744 emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
745 mlen++;
746 } else {
747 assert(!"Should not get here.");
748 }
749
750 emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
751 mlen++;
752 } else if (ir->op == ir_tex) {
753 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
754 emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
755 coordinate.reg_offset++;
756 }
757 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
758 mlen += 3;
759 } else if (ir->op == ir_txd) {
760 fs_reg &dPdx = lod;
761
762 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
763 emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
764 coordinate.reg_offset++;
765 }
766 /* the slots for u and v are always present, but r is optional */
767 mlen += MAX2(ir->coordinate->type->vector_elements, 2);
768
769 /* P = u, v, r
770 * dPdx = dudx, dvdx, drdx
771 * dPdy = dudy, dvdy, drdy
772 *
773 * 1-arg: Does not exist.
774 *
775 * 2-arg: dudx dvdx dudy dvdy
776 * dPdx.x dPdx.y dPdy.x dPdy.y
777 * m4 m5 m6 m7
778 *
779 * 3-arg: dudx dvdx drdx dudy dvdy drdy
780 * dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
781 * m5 m6 m7 m8 m9 m10
782 */
783 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
784 emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx));
785 dPdx.reg_offset++;
786 }
787 mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
788
789 for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
790 emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy));
791 dPdy.reg_offset++;
792 }
793 mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
794 } else if (ir->op == ir_txs) {
795 /* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */
796 simd16 = true;
797 emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
798 mlen += 2;
799 } else {
800 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
801 * instructions. We'll need to do SIMD16 here.
802 */
803 simd16 = true;
804 assert(ir->op == ir_txb || ir->op == ir_txl || ir->op == ir_txf);
805
806 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
807 emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
808 coordinate));
809 coordinate.reg_offset++;
810 }
811
812 /* Initialize the rest of u/v/r with 0.0. Empirically, this seems to
813 * be necessary for TXF (ld), but seems wise to do for all messages.
814 */
815 for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
816 emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)));
817 }
818
819 /* lod/bias appears after u/v/r. */
820 mlen += 6;
821
822 emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod));
823 mlen++;
824
825 /* The unused upper half. */
826 mlen++;
827 }
828
829 if (simd16) {
830 /* Now, since we're doing simd16, the return is 2 interleaved
831 * vec4s where the odd-indexed ones are junk. We'll need to move
832 * this weirdness around to the expected layout.
833 */
834 orig_dst = dst;
835 const glsl_type *vec_type =
836 glsl_type::get_instance(ir->type->base_type, 4, 1);
837 dst = fs_reg(this, glsl_type::get_array_instance(vec_type, 2));
838 dst.type = intel->is_g4x ? brw_type_for_base_type(ir->type)
839 : BRW_REGISTER_TYPE_F;
840 }
841
842 fs_inst *inst = NULL;
843 switch (ir->op) {
844 case ir_tex:
845 inst = emit(SHADER_OPCODE_TEX, dst);
846 break;
847 case ir_txb:
848 inst = emit(FS_OPCODE_TXB, dst);
849 break;
850 case ir_txl:
851 inst = emit(SHADER_OPCODE_TXL, dst);
852 break;
853 case ir_txd:
854 inst = emit(SHADER_OPCODE_TXD, dst);
855 break;
856 case ir_txs:
857 inst = emit(SHADER_OPCODE_TXS, dst);
858 break;
859 case ir_txf:
860 inst = emit(SHADER_OPCODE_TXF, dst);
861 break;
862 }
863 inst->base_mrf = base_mrf;
864 inst->mlen = mlen;
865 inst->header_present = true;
866
867 if (simd16) {
868 for (int i = 0; i < 4; i++) {
869 emit(MOV(orig_dst, dst));
870 orig_dst.reg_offset++;
871 dst.reg_offset += 2;
872 }
873 }
874
875 return inst;
876 }
877
878 /* gen5's sampler has slots for u, v, r, array index, then optional
879 * parameters like shadow comparitor or LOD bias. If optional
880 * parameters aren't present, those base slots are optional and don't
881 * need to be included in the message.
882 *
883 * We don't fill in the unnecessary slots regardless, which may look
884 * surprising in the disassembly.
885 */
886 fs_inst *
887 fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
888 fs_reg shadow_c, fs_reg lod, fs_reg lod2)
889 {
890 int mlen = 0;
891 int base_mrf = 2;
892 int reg_width = dispatch_width / 8;
893 bool header_present = false;
894 const int vector_elements =
895 ir->coordinate ? ir->coordinate->type->vector_elements : 0;
896
897 if (ir->offset != NULL && ir->op == ir_txf) {
898 /* It appears that the ld instruction used for txf does its
899 * address bounds check before adding in the offset. To work
900 * around this, just add the integer offset to the integer texel
901 * coordinate, and don't put the offset in the header.
902 */
903 ir_constant *offset = ir->offset->as_constant();
904 for (int i = 0; i < vector_elements; i++) {
905 emit(ADD(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
906 coordinate,
907 offset->value.i[i]));
908 coordinate.reg_offset++;
909 }
910 } else {
911 if (ir->offset) {
912 /* The offsets set up by the ir_texture visitor are in the
913 * m1 header, so we can't go headerless.
914 */
915 header_present = true;
916 mlen++;
917 base_mrf--;
918 }
919
920 for (int i = 0; i < vector_elements; i++) {
921 emit(MOV(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
922 coordinate));
923 coordinate.reg_offset++;
924 }
925 }
926 mlen += vector_elements * reg_width;
927
928 if (ir->shadow_comparitor) {
929 mlen = MAX2(mlen, header_present + 4 * reg_width);
930
931 emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
932 mlen += reg_width;
933 }
934
935 fs_inst *inst = NULL;
936 switch (ir->op) {
937 case ir_tex:
938 inst = emit(SHADER_OPCODE_TEX, dst);
939 break;
940 case ir_txb:
941 mlen = MAX2(mlen, header_present + 4 * reg_width);
942 emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
943 mlen += reg_width;
944
945 inst = emit(FS_OPCODE_TXB, dst);
946 break;
947 case ir_txl:
948 mlen = MAX2(mlen, header_present + 4 * reg_width);
949 emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
950 mlen += reg_width;
951
952 inst = emit(SHADER_OPCODE_TXL, dst);
953 break;
954 case ir_txd: {
955 mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */
956
957 /**
958 * P = u, v, r
959 * dPdx = dudx, dvdx, drdx
960 * dPdy = dudy, dvdy, drdy
961 *
962 * Load up these values:
963 * - dudx dudy dvdx dvdy drdx drdy
964 * - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
965 */
966 for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
967 emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
968 lod.reg_offset++;
969 mlen += reg_width;
970
971 emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
972 lod2.reg_offset++;
973 mlen += reg_width;
974 }
975
976 inst = emit(SHADER_OPCODE_TXD, dst);
977 break;
978 }
979 case ir_txs:
980 emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
981 mlen += reg_width;
982 inst = emit(SHADER_OPCODE_TXS, dst);
983 break;
984 case ir_txf:
985 mlen = header_present + 4 * reg_width;
986
987 emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD),
988 lod));
989 inst = emit(SHADER_OPCODE_TXF, dst);
990 break;
991 }
992 inst->base_mrf = base_mrf;
993 inst->mlen = mlen;
994 inst->header_present = header_present;
995
996 if (mlen > 11) {
997 fail("Message length >11 disallowed by hardware\n");
998 }
999
1000 return inst;
1001 }
1002
1003 fs_inst *
1004 fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
1005 fs_reg shadow_c, fs_reg lod, fs_reg lod2)
1006 {
1007 int mlen = 0;
1008 int base_mrf = 2;
1009 int reg_width = dispatch_width / 8;
1010 bool header_present = false;
1011 int offsets[3];
1012
1013 if (ir->offset && ir->op != ir_txf) {
1014 /* The offsets set up by the ir_texture visitor are in the
1015 * m1 header, so we can't go headerless.
1016 */
1017 header_present = true;
1018 mlen++;
1019 base_mrf--;
1020 }
1021
1022 if (ir->shadow_comparitor) {
1023 emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
1024 mlen += reg_width;
1025 }
1026
1027 /* Set up the LOD info */
1028 switch (ir->op) {
1029 case ir_tex:
1030 break;
1031 case ir_txb:
1032 emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
1033 mlen += reg_width;
1034 break;
1035 case ir_txl:
1036 emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
1037 mlen += reg_width;
1038 break;
1039 case ir_txd: {
1040 if (dispatch_width == 16)
1041 fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
1042
1043 /* Load dPdx and the coordinate together:
1044 * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
1045 */
1046 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1047 emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
1048 coordinate.reg_offset++;
1049 mlen += reg_width;
1050
1051 emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
1052 lod.reg_offset++;
1053 mlen += reg_width;
1054
1055 emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
1056 lod2.reg_offset++;
1057 mlen += reg_width;
1058 }
1059 break;
1060 }
1061 case ir_txs:
1062 emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
1063 mlen += reg_width;
1064 break;
1065 case ir_txf:
1066 /* It appears that the ld instruction used for txf does its
1067 * address bounds check before adding in the offset. To work
1068 * around this, just add the integer offset to the integer texel
1069 * coordinate, and don't put the offset in the header.
1070 */
1071 if (ir->offset) {
1072 ir_constant *offset = ir->offset->as_constant();
1073 offsets[0] = offset->value.i[0];
1074 offsets[1] = offset->value.i[1];
1075 offsets[2] = offset->value.i[2];
1076 } else {
1077 memset(offsets, 0, sizeof(offsets));
1078 }
1079
1080 /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
1081 emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
1082 coordinate, offsets[0]));
1083 coordinate.reg_offset++;
1084 mlen += reg_width;
1085
1086 emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod));
1087 mlen += reg_width;
1088
1089 for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
1090 emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
1091 coordinate, offsets[i]));
1092 coordinate.reg_offset++;
1093 mlen += reg_width;
1094 }
1095 break;
1096 }
1097
1098 /* Set up the coordinate (except for cases where it was done above) */
1099 if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf) {
1100 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1101 emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
1102 coordinate.reg_offset++;
1103 mlen += reg_width;
1104 }
1105 }
1106
1107 /* Generate the SEND */
1108 fs_inst *inst = NULL;
1109 switch (ir->op) {
1110 case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break;
1111 case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
1112 case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break;
1113 case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break;
1114 case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break;
1115 case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
1116 }
1117 inst->base_mrf = base_mrf;
1118 inst->mlen = mlen;
1119 inst->header_present = header_present;
1120
1121 if (mlen > 11) {
1122 fail("Message length >11 disallowed by hardware\n");
1123 }
1124
1125 return inst;
1126 }
1127
1128 fs_reg
1129 fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
1130 bool is_rect, int sampler, int texunit)
1131 {
1132 fs_inst *inst = NULL;
1133 bool needs_gl_clamp = true;
1134 fs_reg scale_x, scale_y;
1135
1136 /* The 965 requires the EU to do the normalization of GL rectangle
1137 * texture coordinates. We use the program parameter state
1138 * tracking to get the scaling factor.
1139 */
1140 if (is_rect &&
1141 (intel->gen < 6 ||
1142 (intel->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) ||
1143 c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) {
1144 struct gl_program_parameter_list *params = fp->Base.Parameters;
1145 int tokens[STATE_LENGTH] = {
1146 STATE_INTERNAL,
1147 STATE_TEXRECT_SCALE,
1148 texunit,
1149 0,
1150 0
1151 };
1152
1153 if (dispatch_width == 16) {
1154 fail("rectangle scale uniform setup not supported on 16-wide\n");
1155 return coordinate;
1156 }
1157
1158 scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
1159 scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
1160
1161 GLuint index = _mesa_add_state_reference(params,
1162 (gl_state_index *)tokens);
1163
1164 this->param_index[c->prog_data.nr_params] = index;
1165 this->param_offset[c->prog_data.nr_params] = 0;
1166 c->prog_data.nr_params++;
1167 this->param_index[c->prog_data.nr_params] = index;
1168 this->param_offset[c->prog_data.nr_params] = 1;
1169 c->prog_data.nr_params++;
1170 }
1171
1172 /* The 965 requires the EU to do the normalization of GL rectangle
1173 * texture coordinates. We use the program parameter state
1174 * tracking to get the scaling factor.
1175 */
1176 if (intel->gen < 6 && is_rect) {
1177 fs_reg dst = fs_reg(this, ir->coordinate->type);
1178 fs_reg src = coordinate;
1179 coordinate = dst;
1180
1181 emit(MUL(dst, src, scale_x));
1182 dst.reg_offset++;
1183 src.reg_offset++;
1184 emit(MUL(dst, src, scale_y));
1185 } else if (is_rect) {
1186 /* On gen6+, the sampler handles the rectangle coordinates
1187 * natively, without needing rescaling. But that means we have
1188 * to do GL_CLAMP clamping at the [0, width], [0, height] scale,
1189 * not [0, 1] like the default case below.
1190 */
1191 needs_gl_clamp = false;
1192
1193 for (int i = 0; i < 2; i++) {
1194 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) {
1195 fs_reg chan = coordinate;
1196 chan.reg_offset += i;
1197
1198 inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0));
1199 inst->conditional_mod = BRW_CONDITIONAL_G;
1200
1201 /* Our parameter comes in as 1.0/width or 1.0/height,
1202 * because that's what people normally want for doing
1203 * texture rectangle handling. We need width or height
1204 * for clamping, but we don't care enough to make a new
1205 * parameter type, so just invert back.
1206 */
1207 fs_reg limit = fs_reg(this, glsl_type::float_type);
1208 emit(MOV(limit, i == 0 ? scale_x : scale_y));
1209 emit(SHADER_OPCODE_RCP, limit, limit);
1210
1211 inst = emit(BRW_OPCODE_SEL, chan, chan, limit);
1212 inst->conditional_mod = BRW_CONDITIONAL_L;
1213 }
1214 }
1215 }
1216
1217 if (ir->coordinate && needs_gl_clamp) {
1218 for (unsigned int i = 0;
1219 i < MIN2(ir->coordinate->type->vector_elements, 3); i++) {
1220 if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) {
1221 fs_reg chan = coordinate;
1222 chan.reg_offset += i;
1223
1224 fs_inst *inst = emit(MOV(chan, chan));
1225 inst->saturate = true;
1226 }
1227 }
1228 }
1229 return coordinate;
1230 }
1231
1232 void
1233 fs_visitor::visit(ir_texture *ir)
1234 {
1235 fs_inst *inst = NULL;
1236
1237 int sampler = _mesa_get_sampler_uniform_value(ir->sampler, prog, &fp->Base);
1238 int texunit = fp->Base.SamplerUnits[sampler];
1239
1240 /* Should be lowered by do_lower_texture_projection */
1241 assert(!ir->projector);
1242
1243 /* Generate code to compute all the subexpression trees. This has to be
1244 * done before loading any values into MRFs for the sampler message since
1245 * generating these values may involve SEND messages that need the MRFs.
1246 */
1247 fs_reg coordinate;
1248 if (ir->coordinate) {
1249 ir->coordinate->accept(this);
1250
1251 coordinate = rescale_texcoord(ir, this->result,
1252 ir->sampler->type->sampler_dimensionality ==
1253 GLSL_SAMPLER_DIM_RECT,
1254 sampler, texunit);
1255 }
1256
1257 fs_reg shadow_comparitor;
1258 if (ir->shadow_comparitor) {
1259 ir->shadow_comparitor->accept(this);
1260 shadow_comparitor = this->result;
1261 }
1262
1263 fs_reg lod, lod2;
1264 switch (ir->op) {
1265 case ir_tex:
1266 break;
1267 case ir_txb:
1268 ir->lod_info.bias->accept(this);
1269 lod = this->result;
1270 break;
1271 case ir_txd:
1272 ir->lod_info.grad.dPdx->accept(this);
1273 lod = this->result;
1274
1275 ir->lod_info.grad.dPdy->accept(this);
1276 lod2 = this->result;
1277 break;
1278 case ir_txf:
1279 case ir_txl:
1280 case ir_txs:
1281 ir->lod_info.lod->accept(this);
1282 lod = this->result;
1283 break;
1284 };
1285
1286 /* Writemasking doesn't eliminate channels on SIMD8 texture
1287 * samples, so don't worry about them.
1288 */
1289 fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
1290
1291 if (intel->gen >= 7) {
1292 inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
1293 lod, lod2);
1294 } else if (intel->gen >= 5) {
1295 inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor,
1296 lod, lod2);
1297 } else {
1298 inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor,
1299 lod, lod2);
1300 }
1301
1302 /* The header is set up by generate_tex() when necessary. */
1303 inst->src[0] = reg_undef;
1304
1305 if (ir->offset != NULL && ir->op != ir_txf)
1306 inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
1307
1308 inst->sampler = sampler;
1309
1310 if (ir->shadow_comparitor)
1311 inst->shadow_compare = true;
1312
1313 swizzle_result(ir, dst, sampler);
1314 }
1315
1316 /**
1317 * Swizzle the result of a texture result. This is necessary for
1318 * EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
1319 */
1320 void
1321 fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler)
1322 {
1323 this->result = orig_val;
1324
1325 if (ir->op == ir_txs)
1326 return;
1327
1328 if (ir->type == glsl_type::float_type) {
1329 /* Ignore DEPTH_TEXTURE_MODE swizzling. */
1330 assert(ir->sampler->type->sampler_shadow);
1331 } else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) {
1332 fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type);
1333
1334 for (int i = 0; i < 4; i++) {
1335 int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i);
1336 fs_reg l = swizzled_result;
1337 l.reg_offset += i;
1338
1339 if (swiz == SWIZZLE_ZERO) {
1340 emit(MOV(l, fs_reg(0.0f)));
1341 } else if (swiz == SWIZZLE_ONE) {
1342 emit(MOV(l, fs_reg(1.0f)));
1343 } else {
1344 fs_reg r = orig_val;
1345 r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i);
1346 emit(MOV(l, r));
1347 }
1348 }
1349 this->result = swizzled_result;
1350 }
1351 }
1352
1353 void
1354 fs_visitor::visit(ir_swizzle *ir)
1355 {
1356 ir->val->accept(this);
1357 fs_reg val = this->result;
1358
1359 if (ir->type->vector_elements == 1) {
1360 this->result.reg_offset += ir->mask.x;
1361 return;
1362 }
1363
1364 fs_reg result = fs_reg(this, ir->type);
1365 this->result = result;
1366
1367 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1368 fs_reg channel = val;
1369 int swiz = 0;
1370
1371 switch (i) {
1372 case 0:
1373 swiz = ir->mask.x;
1374 break;
1375 case 1:
1376 swiz = ir->mask.y;
1377 break;
1378 case 2:
1379 swiz = ir->mask.z;
1380 break;
1381 case 3:
1382 swiz = ir->mask.w;
1383 break;
1384 }
1385
1386 channel.reg_offset += swiz;
1387 emit(MOV(result, channel));
1388 result.reg_offset++;
1389 }
1390 }
1391
1392 void
1393 fs_visitor::visit(ir_discard *ir)
1394 {
1395 assert(ir->condition == NULL); /* FINISHME */
1396
1397 emit(FS_OPCODE_DISCARD);
1398 }
1399
1400 void
1401 fs_visitor::visit(ir_constant *ir)
1402 {
1403 /* Set this->result to reg at the bottom of the function because some code
1404 * paths will cause this visitor to be applied to other fields. This will
1405 * cause the value stored in this->result to be modified.
1406 *
1407 * Make reg constant so that it doesn't get accidentally modified along the
1408 * way. Yes, I actually had this problem. :(
1409 */
1410 const fs_reg reg(this, ir->type);
1411 fs_reg dst_reg = reg;
1412
1413 if (ir->type->is_array()) {
1414 const unsigned size = type_size(ir->type->fields.array);
1415
1416 for (unsigned i = 0; i < ir->type->length; i++) {
1417 ir->array_elements[i]->accept(this);
1418 fs_reg src_reg = this->result;
1419
1420 dst_reg.type = src_reg.type;
1421 for (unsigned j = 0; j < size; j++) {
1422 emit(MOV(dst_reg, src_reg));
1423 src_reg.reg_offset++;
1424 dst_reg.reg_offset++;
1425 }
1426 }
1427 } else if (ir->type->is_record()) {
1428 foreach_list(node, &ir->components) {
1429 ir_constant *const field = (ir_constant *) node;
1430 const unsigned size = type_size(field->type);
1431
1432 field->accept(this);
1433 fs_reg src_reg = this->result;
1434
1435 dst_reg.type = src_reg.type;
1436 for (unsigned j = 0; j < size; j++) {
1437 emit(MOV(dst_reg, src_reg));
1438 src_reg.reg_offset++;
1439 dst_reg.reg_offset++;
1440 }
1441 }
1442 } else {
1443 const unsigned size = type_size(ir->type);
1444
1445 for (unsigned i = 0; i < size; i++) {
1446 switch (ir->type->base_type) {
1447 case GLSL_TYPE_FLOAT:
1448 emit(MOV(dst_reg, fs_reg(ir->value.f[i])));
1449 break;
1450 case GLSL_TYPE_UINT:
1451 emit(MOV(dst_reg, fs_reg(ir->value.u[i])));
1452 break;
1453 case GLSL_TYPE_INT:
1454 emit(MOV(dst_reg, fs_reg(ir->value.i[i])));
1455 break;
1456 case GLSL_TYPE_BOOL:
1457 emit(MOV(dst_reg, fs_reg((int)ir->value.b[i])));
1458 break;
1459 default:
1460 assert(!"Non-float/uint/int/bool constant");
1461 }
1462 dst_reg.reg_offset++;
1463 }
1464 }
1465
1466 this->result = reg;
1467 }
1468
1469 void
1470 fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
1471 {
1472 ir_expression *expr = ir->as_expression();
1473
1474 if (expr) {
1475 fs_reg op[2];
1476 fs_inst *inst;
1477
1478 assert(expr->get_num_operands() <= 2);
1479 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1480 assert(expr->operands[i]->type->is_scalar());
1481
1482 expr->operands[i]->accept(this);
1483 op[i] = this->result;
1484
1485 resolve_ud_negate(&op[i]);
1486 }
1487
1488 switch (expr->operation) {
1489 case ir_unop_logic_not:
1490 inst = emit(AND(reg_null_d, op[0], fs_reg(1)));
1491 inst->conditional_mod = BRW_CONDITIONAL_Z;
1492 break;
1493
1494 case ir_binop_logic_xor:
1495 case ir_binop_logic_or:
1496 case ir_binop_logic_and:
1497 goto out;
1498
1499 case ir_unop_f2b:
1500 if (intel->gen >= 6) {
1501 emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
1502 } else {
1503 inst = emit(MOV(reg_null_f, op[0]));
1504 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1505 }
1506 break;
1507
1508 case ir_unop_i2b:
1509 if (intel->gen >= 6) {
1510 emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
1511 } else {
1512 inst = emit(MOV(reg_null_d, op[0]));
1513 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1514 }
1515 break;
1516
1517 case ir_binop_greater:
1518 case ir_binop_gequal:
1519 case ir_binop_less:
1520 case ir_binop_lequal:
1521 case ir_binop_equal:
1522 case ir_binop_all_equal:
1523 case ir_binop_nequal:
1524 case ir_binop_any_nequal:
1525 resolve_bool_comparison(expr->operands[0], &op[0]);
1526 resolve_bool_comparison(expr->operands[1], &op[1]);
1527
1528 emit(CMP(reg_null_d, op[0], op[1],
1529 brw_conditional_for_comparison(expr->operation)));
1530 break;
1531
1532 default:
1533 assert(!"not reached");
1534 fail("bad cond code\n");
1535 break;
1536 }
1537 return;
1538 }
1539
1540 out:
1541 ir->accept(this);
1542
1543 fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1)));
1544 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1545 }
1546
1547 /**
1548 * Emit a gen6 IF statement with the comparison folded into the IF
1549 * instruction.
1550 */
1551 void
1552 fs_visitor::emit_if_gen6(ir_if *ir)
1553 {
1554 ir_expression *expr = ir->condition->as_expression();
1555
1556 if (expr) {
1557 fs_reg op[2];
1558 fs_inst *inst;
1559 fs_reg temp;
1560
1561 assert(expr->get_num_operands() <= 2);
1562 for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
1563 assert(expr->operands[i]->type->is_scalar());
1564
1565 expr->operands[i]->accept(this);
1566 op[i] = this->result;
1567 }
1568
1569 switch (expr->operation) {
1570 case ir_unop_logic_not:
1571 case ir_binop_logic_xor:
1572 case ir_binop_logic_or:
1573 case ir_binop_logic_and:
1574 /* For operations on bool arguments, only the low bit of the bool is
1575 * valid, and the others are undefined. Fall back to the condition
1576 * code path.
1577 */
1578 break;
1579
1580 case ir_unop_f2b:
1581 inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
1582 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1583 return;
1584
1585 case ir_unop_i2b:
1586 emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
1587 return;
1588
1589 case ir_binop_greater:
1590 case ir_binop_gequal:
1591 case ir_binop_less:
1592 case ir_binop_lequal:
1593 case ir_binop_equal:
1594 case ir_binop_all_equal:
1595 case ir_binop_nequal:
1596 case ir_binop_any_nequal:
1597 resolve_bool_comparison(expr->operands[0], &op[0]);
1598 resolve_bool_comparison(expr->operands[1], &op[1]);
1599
1600 emit(IF(op[0], op[1],
1601 brw_conditional_for_comparison(expr->operation)));
1602 return;
1603 default:
1604 assert(!"not reached");
1605 emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
1606 fail("bad condition\n");
1607 return;
1608 }
1609 }
1610
1611 emit_bool_to_cond_code(ir->condition);
1612 fs_inst *inst = emit(BRW_OPCODE_IF);
1613 inst->predicate = BRW_PREDICATE_NORMAL;
1614 }
1615
1616 void
1617 fs_visitor::visit(ir_if *ir)
1618 {
1619 if (intel->gen < 6 && dispatch_width == 16) {
1620 fail("Can't support (non-uniform) control flow on 16-wide\n");
1621 }
1622
1623 /* Don't point the annotation at the if statement, because then it plus
1624 * the then and else blocks get printed.
1625 */
1626 this->base_ir = ir->condition;
1627
1628 if (intel->gen == 6) {
1629 emit_if_gen6(ir);
1630 } else {
1631 emit_bool_to_cond_code(ir->condition);
1632
1633 emit(IF(BRW_PREDICATE_NORMAL));
1634 }
1635
1636 foreach_list(node, &ir->then_instructions) {
1637 ir_instruction *ir = (ir_instruction *)node;
1638 this->base_ir = ir;
1639
1640 ir->accept(this);
1641 }
1642
1643 if (!ir->else_instructions.is_empty()) {
1644 emit(BRW_OPCODE_ELSE);
1645
1646 foreach_list(node, &ir->else_instructions) {
1647 ir_instruction *ir = (ir_instruction *)node;
1648 this->base_ir = ir;
1649
1650 ir->accept(this);
1651 }
1652 }
1653
1654 emit(BRW_OPCODE_ENDIF);
1655 }
1656
1657 void
1658 fs_visitor::visit(ir_loop *ir)
1659 {
1660 fs_reg counter = reg_undef;
1661
1662 if (intel->gen < 6 && dispatch_width == 16) {
1663 fail("Can't support (non-uniform) control flow on 16-wide\n");
1664 }
1665
1666 if (ir->counter) {
1667 this->base_ir = ir->counter;
1668 ir->counter->accept(this);
1669 counter = *(variable_storage(ir->counter));
1670
1671 if (ir->from) {
1672 this->base_ir = ir->from;
1673 ir->from->accept(this);
1674
1675 emit(MOV(counter, this->result));
1676 }
1677 }
1678
1679 this->base_ir = NULL;
1680 emit(BRW_OPCODE_DO);
1681
1682 if (ir->to) {
1683 this->base_ir = ir->to;
1684 ir->to->accept(this);
1685
1686 emit(CMP(reg_null_d, counter, this->result,
1687 brw_conditional_for_comparison(ir->cmp)));
1688
1689 fs_inst *inst = emit(BRW_OPCODE_BREAK);
1690 inst->predicate = BRW_PREDICATE_NORMAL;
1691 }
1692
1693 foreach_list(node, &ir->body_instructions) {
1694 ir_instruction *ir = (ir_instruction *)node;
1695
1696 this->base_ir = ir;
1697 ir->accept(this);
1698 }
1699
1700 if (ir->increment) {
1701 this->base_ir = ir->increment;
1702 ir->increment->accept(this);
1703 emit(ADD(counter, counter, this->result));
1704 }
1705
1706 this->base_ir = NULL;
1707 emit(BRW_OPCODE_WHILE);
1708 }
1709
1710 void
1711 fs_visitor::visit(ir_loop_jump *ir)
1712 {
1713 switch (ir->mode) {
1714 case ir_loop_jump::jump_break:
1715 emit(BRW_OPCODE_BREAK);
1716 break;
1717 case ir_loop_jump::jump_continue:
1718 emit(BRW_OPCODE_CONTINUE);
1719 break;
1720 }
1721 }
1722
1723 void
1724 fs_visitor::visit(ir_call *ir)
1725 {
1726 assert(!"FINISHME");
1727 }
1728
1729 void
1730 fs_visitor::visit(ir_return *ir)
1731 {
1732 assert(!"FINISHME");
1733 }
1734
1735 void
1736 fs_visitor::visit(ir_function *ir)
1737 {
1738 /* Ignore function bodies other than main() -- we shouldn't see calls to
1739 * them since they should all be inlined before we get to ir_to_mesa.
1740 */
1741 if (strcmp(ir->name, "main") == 0) {
1742 const ir_function_signature *sig;
1743 exec_list empty;
1744
1745 sig = ir->matching_signature(&empty);
1746
1747 assert(sig);
1748
1749 foreach_list(node, &sig->body) {
1750 ir_instruction *ir = (ir_instruction *)node;
1751 this->base_ir = ir;
1752
1753 ir->accept(this);
1754 }
1755 }
1756 }
1757
1758 void
1759 fs_visitor::visit(ir_function_signature *ir)
1760 {
1761 assert(!"not reached");
1762 (void)ir;
1763 }
1764
1765 fs_inst *
1766 fs_visitor::emit(fs_inst inst)
1767 {
1768 fs_inst *list_inst = new(mem_ctx) fs_inst;
1769 *list_inst = inst;
1770 emit(list_inst);
1771 return list_inst;
1772 }
1773
1774 fs_inst *
1775 fs_visitor::emit(fs_inst *inst)
1776 {
1777 if (force_uncompressed_stack > 0)
1778 inst->force_uncompressed = true;
1779 else if (force_sechalf_stack > 0)
1780 inst->force_sechalf = true;
1781
1782 inst->annotation = this->current_annotation;
1783 inst->ir = this->base_ir;
1784
1785 this->instructions.push_tail(inst);
1786
1787 return inst;
1788 }
1789
1790 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1791 void
1792 fs_visitor::emit_dummy_fs()
1793 {
1794 int reg_width = dispatch_width / 8;
1795
1796 /* Everyone's favorite color. */
1797 emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)));
1798 emit(MOV(fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)));
1799 emit(MOV(fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)));
1800 emit(MOV(fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)));
1801
1802 fs_inst *write;
1803 write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
1804 write->base_mrf = 2;
1805 write->mlen = 4 * reg_width;
1806 write->eot = true;
1807 }
1808
1809 /* The register location here is relative to the start of the URB
1810 * data. It will get adjusted to be a real location before
1811 * generate_code() time.
1812 */
1813 struct brw_reg
1814 fs_visitor::interp_reg(int location, int channel)
1815 {
1816 int regnr = urb_setup[location] * 2 + channel / 2;
1817 int stride = (channel & 1) * 4;
1818
1819 assert(urb_setup[location] != -1);
1820
1821 return brw_vec1_grf(regnr, stride);
1822 }
1823
1824 /** Emits the interpolation for the varying inputs. */
1825 void
1826 fs_visitor::emit_interpolation_setup_gen4()
1827 {
1828 this->current_annotation = "compute pixel centers";
1829 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1830 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1831 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1832 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1833
1834 emit(FS_OPCODE_PIXEL_X, this->pixel_x);
1835 emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
1836
1837 this->current_annotation = "compute pixel deltas from v0";
1838 if (brw->has_pln) {
1839 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1840 fs_reg(this, glsl_type::vec2_type);
1841 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1842 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
1843 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++;
1844 } else {
1845 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1846 fs_reg(this, glsl_type::float_type);
1847 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
1848 fs_reg(this, glsl_type::float_type);
1849 }
1850 emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1851 this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))));
1852 emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1853 this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))));
1854
1855 this->current_annotation = "compute pos.w and 1/pos.w";
1856 /* Compute wpos.w. It's always in our setup, since it's needed to
1857 * interpolate the other attributes.
1858 */
1859 this->wpos_w = fs_reg(this, glsl_type::float_type);
1860 emit(FS_OPCODE_LINTERP, wpos_w,
1861 this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1862 this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
1863 interp_reg(FRAG_ATTRIB_WPOS, 3));
1864 /* Compute the pixel 1/W value from wpos.w. */
1865 this->pixel_w = fs_reg(this, glsl_type::float_type);
1866 emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
1867 this->current_annotation = NULL;
1868 }
1869
1870 /** Emits the interpolation for the varying inputs. */
1871 void
1872 fs_visitor::emit_interpolation_setup_gen6()
1873 {
1874 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1875
1876 /* If the pixel centers end up used, the setup is the same as for gen4. */
1877 this->current_annotation = "compute pixel centers";
1878 fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
1879 fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
1880 int_pixel_x.type = BRW_REGISTER_TYPE_UW;
1881 int_pixel_y.type = BRW_REGISTER_TYPE_UW;
1882 emit(ADD(int_pixel_x,
1883 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1884 fs_reg(brw_imm_v(0x10101010))));
1885 emit(ADD(int_pixel_y,
1886 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1887 fs_reg(brw_imm_v(0x11001100))));
1888
1889 /* As of gen6, we can no longer mix float and int sources. We have
1890 * to turn the integer pixel centers into floats for their actual
1891 * use.
1892 */
1893 this->pixel_x = fs_reg(this, glsl_type::float_type);
1894 this->pixel_y = fs_reg(this, glsl_type::float_type);
1895 emit(MOV(this->pixel_x, int_pixel_x));
1896 emit(MOV(this->pixel_y, int_pixel_y));
1897
1898 this->current_annotation = "compute pos.w";
1899 this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
1900 this->wpos_w = fs_reg(this, glsl_type::float_type);
1901 emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
1902
1903 for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
1904 uint8_t reg = c->barycentric_coord_reg[i];
1905 this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0));
1906 this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0));
1907 }
1908
1909 this->current_annotation = NULL;
1910 }
1911
1912 void
1913 fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
1914 {
1915 int reg_width = dispatch_width / 8;
1916 fs_inst *inst;
1917 fs_reg color = outputs[target];
1918 fs_reg mrf;
1919
1920 /* If there's no color data to be written, skip it. */
1921 if (color.file == BAD_FILE)
1922 return;
1923
1924 color.reg_offset += index;
1925
1926 if (dispatch_width == 8 || intel->gen >= 6) {
1927 /* SIMD8 write looks like:
1928 * m + 0: r0
1929 * m + 1: r1
1930 * m + 2: g0
1931 * m + 3: g1
1932 *
1933 * gen6 SIMD16 DP write looks like:
1934 * m + 0: r0
1935 * m + 1: r1
1936 * m + 2: g0
1937 * m + 3: g1
1938 * m + 4: b0
1939 * m + 5: b1
1940 * m + 6: a0
1941 * m + 7: a1
1942 */
1943 inst = emit(MOV(fs_reg(MRF, first_color_mrf + index * reg_width,
1944 color.type),
1945 color));
1946 inst->saturate = c->key.clamp_fragment_color;
1947 } else {
1948 /* pre-gen6 SIMD16 single source DP write looks like:
1949 * m + 0: r0
1950 * m + 1: g0
1951 * m + 2: b0
1952 * m + 3: a0
1953 * m + 4: r1
1954 * m + 5: g1
1955 * m + 6: b1
1956 * m + 7: a1
1957 */
1958 if (brw->has_compr4) {
1959 /* By setting the high bit of the MRF register number, we
1960 * indicate that we want COMPR4 mode - instead of doing the
1961 * usual destination + 1 for the second half we get
1962 * destination + 4.
1963 */
1964 inst = emit(MOV(fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index,
1965 color.type),
1966 color));
1967 inst->saturate = c->key.clamp_fragment_color;
1968 } else {
1969 push_force_uncompressed();
1970 inst = emit(MOV(fs_reg(MRF, first_color_mrf + index, color.type),
1971 color));
1972 inst->saturate = c->key.clamp_fragment_color;
1973 pop_force_uncompressed();
1974
1975 push_force_sechalf();
1976 color.sechalf = true;
1977 inst = emit(MOV(fs_reg(MRF, first_color_mrf + index + 4, color.type),
1978 color));
1979 inst->saturate = c->key.clamp_fragment_color;
1980 pop_force_sechalf();
1981 color.sechalf = false;
1982 }
1983 }
1984 }
1985
1986 void
1987 fs_visitor::emit_fb_writes()
1988 {
1989 this->current_annotation = "FB write header";
1990 bool header_present = true;
1991 /* We can potentially have a message length of up to 15, so we have to set
1992 * base_mrf to either 0 or 1 in order to fit in m0..m15.
1993 */
1994 int base_mrf = 1;
1995 int nr = base_mrf;
1996 int reg_width = dispatch_width / 8;
1997 bool do_dual_src = this->dual_src_output.file != BAD_FILE;
1998 bool src0_alpha_to_render_target = false;
1999
2000 if (dispatch_width == 16 && do_dual_src) {
2001 fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
2002 do_dual_src = false;
2003 }
2004
2005 /* From the Sandy Bridge PRM, volume 4, page 198:
2006 *
2007 * "Dispatched Pixel Enables. One bit per pixel indicating
2008 * which pixels were originally enabled when the thread was
2009 * dispatched. This field is only required for the end-of-
2010 * thread message and on all dual-source messages."
2011 */
2012 if (intel->gen >= 6 &&
2013 !this->fp->UsesKill &&
2014 !do_dual_src &&
2015 c->key.nr_color_regions == 1) {
2016 header_present = false;
2017 }
2018
2019 if (header_present) {
2020 src0_alpha_to_render_target = intel->gen >= 6 &&
2021 !do_dual_src &&
2022 c->key.nr_color_regions > 1 &&
2023 c->key.sample_alpha_to_coverage;
2024 /* m2, m3 header */
2025 nr += 2;
2026 }
2027
2028 if (c->aa_dest_stencil_reg) {
2029 push_force_uncompressed();
2030 emit(MOV(fs_reg(MRF, nr++),
2031 fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))));
2032 pop_force_uncompressed();
2033 }
2034
2035 /* Reserve space for color. It'll be filled in per MRT below. */
2036 int color_mrf = nr;
2037 nr += 4 * reg_width;
2038 if (do_dual_src)
2039 nr += 4;
2040 if (src0_alpha_to_render_target)
2041 nr += reg_width;
2042
2043 if (c->source_depth_to_render_target) {
2044 if (intel->gen == 6 && dispatch_width == 16) {
2045 /* For outputting oDepth on gen6, SIMD8 writes have to be
2046 * used. This would require 8-wide moves of each half to
2047 * message regs, kind of like pre-gen5 SIMD16 FB writes.
2048 * Just bail on doing so for now.
2049 */
2050 fail("Missing support for simd16 depth writes on gen6\n");
2051 }
2052
2053 if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
2054 /* Hand over gl_FragDepth. */
2055 assert(this->frag_depth.file != BAD_FILE);
2056 emit(MOV(fs_reg(MRF, nr), this->frag_depth));
2057 } else {
2058 /* Pass through the payload depth. */
2059 emit(MOV(fs_reg(MRF, nr),
2060 fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
2061 }
2062 nr += reg_width;
2063 }
2064
2065 if (c->dest_depth_reg) {
2066 emit(MOV(fs_reg(MRF, nr),
2067 fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))));
2068 nr += reg_width;
2069 }
2070
2071 if (do_dual_src) {
2072 fs_reg src0 = this->outputs[0];
2073 fs_reg src1 = this->dual_src_output;
2074
2075 this->current_annotation = ralloc_asprintf(this->mem_ctx,
2076 "FB write src0");
2077 for (int i = 0; i < 4; i++) {
2078 fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0));
2079 src0.reg_offset++;
2080 inst->saturate = c->key.clamp_fragment_color;
2081 }
2082
2083 this->current_annotation = ralloc_asprintf(this->mem_ctx,
2084 "FB write src1");
2085 for (int i = 0; i < 4; i++) {
2086 fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type),
2087 src1));
2088 src1.reg_offset++;
2089 inst->saturate = c->key.clamp_fragment_color;
2090 }
2091
2092 fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
2093 inst->target = 0;
2094 inst->base_mrf = base_mrf;
2095 inst->mlen = nr - base_mrf;
2096 inst->eot = true;
2097 inst->header_present = header_present;
2098
2099 c->prog_data.dual_src_blend = true;
2100 this->current_annotation = NULL;
2101 return;
2102 }
2103
2104 for (int target = 0; target < c->key.nr_color_regions; target++) {
2105 this->current_annotation = ralloc_asprintf(this->mem_ctx,
2106 "FB write target %d",
2107 target);
2108 /* If src0_alpha_to_render_target is true, include source zero alpha
2109 * data in RenderTargetWrite message for targets > 0.
2110 */
2111 int write_color_mrf = color_mrf;
2112 if (src0_alpha_to_render_target && target != 0) {
2113 fs_inst *inst;
2114 fs_reg color = outputs[0];
2115 color.reg_offset += 3;
2116
2117 inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type),
2118 color));
2119 inst->saturate = c->key.clamp_fragment_color;
2120 write_color_mrf = color_mrf + reg_width;
2121 }
2122
2123 for (unsigned i = 0; i < this->output_components[target]; i++)
2124 emit_color_write(target, i, write_color_mrf);
2125
2126 fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
2127 inst->target = target;
2128 inst->base_mrf = base_mrf;
2129 if (src0_alpha_to_render_target && target == 0)
2130 inst->mlen = nr - base_mrf - reg_width;
2131 else
2132 inst->mlen = nr - base_mrf;
2133 if (target == c->key.nr_color_regions - 1)
2134 inst->eot = true;
2135 inst->header_present = header_present;
2136 }
2137
2138 if (c->key.nr_color_regions == 0) {
2139 /* Even if there's no color buffers enabled, we still need to send
2140 * alpha out the pipeline to our null renderbuffer to support
2141 * alpha-testing, alpha-to-coverage, and so on.
2142 */
2143 emit_color_write(0, 3, color_mrf);
2144
2145 fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
2146 inst->base_mrf = base_mrf;
2147 inst->mlen = nr - base_mrf;
2148 inst->eot = true;
2149 inst->header_present = header_present;
2150 }
2151
2152 this->current_annotation = NULL;
2153 }
2154
2155 void
2156 fs_visitor::resolve_ud_negate(fs_reg *reg)
2157 {
2158 if (reg->type != BRW_REGISTER_TYPE_UD ||
2159 !reg->negate)
2160 return;
2161
2162 fs_reg temp = fs_reg(this, glsl_type::uint_type);
2163 emit(MOV(temp, *reg));
2164 *reg = temp;
2165 }
2166
2167 void
2168 fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)
2169 {
2170 if (rvalue->type != glsl_type::bool_type)
2171 return;
2172
2173 fs_reg temp = fs_reg(this, glsl_type::bool_type);
2174 emit(AND(temp, *reg, fs_reg(1)));
2175 *reg = temp;
2176 }
2177
2178 fs_visitor::fs_visitor(struct brw_context *brw,
2179 struct brw_wm_compile *c,
2180 struct gl_shader_program *prog,
2181 struct gl_fragment_program *fp,
2182 unsigned dispatch_width)
2183 : dispatch_width(dispatch_width)
2184 {
2185 this->c = c;
2186 this->brw = brw;
2187 this->fp = fp;
2188 this->prog = prog;
2189 this->intel = &brw->intel;
2190 this->ctx = &intel->ctx;
2191 this->mem_ctx = ralloc_context(NULL);
2192 if (prog)
2193 shader = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
2194 else
2195 shader = NULL;
2196 this->failed = false;
2197 this->variable_ht = hash_table_ctor(0,
2198 hash_table_pointer_hash,
2199 hash_table_pointer_compare);
2200
2201 memset(this->outputs, 0, sizeof(this->outputs));
2202 memset(this->output_components, 0, sizeof(this->output_components));
2203 this->first_non_payload_grf = 0;
2204 this->max_grf = intel->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
2205
2206 this->current_annotation = NULL;
2207 this->base_ir = NULL;
2208
2209 this->virtual_grf_sizes = NULL;
2210 this->virtual_grf_count = 0;
2211 this->virtual_grf_array_size = 0;
2212 this->virtual_grf_def = NULL;
2213 this->virtual_grf_use = NULL;
2214 this->live_intervals_valid = false;
2215
2216 this->force_uncompressed_stack = 0;
2217 this->force_sechalf_stack = 0;
2218 }
2219
2220 fs_visitor::~fs_visitor()
2221 {
2222 ralloc_free(this->mem_ctx);
2223 hash_table_dtor(this->variable_ht);
2224 }