i965: Don't compute-to-MRF in gen6 math instructions.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "main/uniforms.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "program/prog_optimize.h"
38 #include "program/register_allocate.h"
39 #include "program/sampler.h"
40 #include "program/hash_table.h"
41 #include "brw_context.h"
42 #include "brw_eu.h"
43 #include "brw_wm.h"
44 #include "talloc.h"
45 }
46 #include "brw_fs.h"
47 #include "../glsl/glsl_types.h"
48 #include "../glsl/ir_optimization.h"
49 #include "../glsl/ir_print_visitor.h"
50
51 static int using_new_fs = -1;
52 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
53
54 struct gl_shader *
55 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
56 {
57 struct brw_shader *shader;
58
59 shader = talloc_zero(NULL, struct brw_shader);
60 if (shader) {
61 shader->base.Type = type;
62 shader->base.Name = name;
63 _mesa_init_shader(ctx, &shader->base);
64 }
65
66 return &shader->base;
67 }
68
69 struct gl_shader_program *
70 brw_new_shader_program(GLcontext *ctx, GLuint name)
71 {
72 struct brw_shader_program *prog;
73 prog = talloc_zero(NULL, struct brw_shader_program);
74 if (prog) {
75 prog->base.Name = name;
76 _mesa_init_shader_program(ctx, &prog->base);
77 }
78 return &prog->base;
79 }
80
81 GLboolean
82 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
83 {
84 if (!_mesa_ir_compile_shader(ctx, shader))
85 return GL_FALSE;
86
87 return GL_TRUE;
88 }
89
90 GLboolean
91 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
92 {
93 struct intel_context *intel = intel_context(ctx);
94 if (using_new_fs == -1)
95 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
96
97 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
98 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
99
100 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
101 void *mem_ctx = talloc_new(NULL);
102 bool progress;
103
104 if (shader->ir)
105 talloc_free(shader->ir);
106 shader->ir = new(shader) exec_list;
107 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
108
109 do_mat_op_to_vec(shader->ir);
110 do_mod_to_fract(shader->ir);
111 do_div_to_mul_rcp(shader->ir);
112 do_sub_to_add_neg(shader->ir);
113 do_explog_to_explog2(shader->ir);
114 do_lower_texture_projection(shader->ir);
115 brw_do_cubemap_normalize(shader->ir);
116
117 do {
118 progress = false;
119
120 brw_do_channel_expressions(shader->ir);
121 brw_do_vector_splitting(shader->ir);
122
123 progress = do_lower_jumps(shader->ir, true, true,
124 true, /* main return */
125 false, /* continue */
126 false /* loops */
127 ) || progress;
128
129 progress = do_common_optimization(shader->ir, true, 32) || progress;
130
131 progress = lower_noise(shader->ir) || progress;
132 progress =
133 lower_variable_index_to_cond_assign(shader->ir,
134 GL_TRUE, /* input */
135 GL_TRUE, /* output */
136 GL_TRUE, /* temp */
137 GL_TRUE /* uniform */
138 ) || progress;
139 if (intel->gen == 6) {
140 progress = do_if_to_cond_assign(shader->ir) || progress;
141 }
142 } while (progress);
143
144 validate_ir_tree(shader->ir);
145
146 reparent_ir(shader->ir, shader->ir);
147 talloc_free(mem_ctx);
148 }
149 }
150
151 if (!_mesa_ir_link_shader(ctx, prog))
152 return GL_FALSE;
153
154 return GL_TRUE;
155 }
156
157 static int
158 type_size(const struct glsl_type *type)
159 {
160 unsigned int size, i;
161
162 switch (type->base_type) {
163 case GLSL_TYPE_UINT:
164 case GLSL_TYPE_INT:
165 case GLSL_TYPE_FLOAT:
166 case GLSL_TYPE_BOOL:
167 return type->components();
168 case GLSL_TYPE_ARRAY:
169 return type_size(type->fields.array) * type->length;
170 case GLSL_TYPE_STRUCT:
171 size = 0;
172 for (i = 0; i < type->length; i++) {
173 size += type_size(type->fields.structure[i].type);
174 }
175 return size;
176 case GLSL_TYPE_SAMPLER:
177 /* Samplers take up no register space, since they're baked in at
178 * link time.
179 */
180 return 0;
181 default:
182 assert(!"not reached");
183 return 0;
184 }
185 }
186
187 static const fs_reg reg_undef;
188 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
189
190 int
191 fs_visitor::virtual_grf_alloc(int size)
192 {
193 if (virtual_grf_array_size <= virtual_grf_next) {
194 if (virtual_grf_array_size == 0)
195 virtual_grf_array_size = 16;
196 else
197 virtual_grf_array_size *= 2;
198 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
199 int, virtual_grf_array_size);
200
201 /* This slot is always unused. */
202 virtual_grf_sizes[0] = 0;
203 }
204 virtual_grf_sizes[virtual_grf_next] = size;
205 return virtual_grf_next++;
206 }
207
208 /** Fixed HW reg constructor. */
209 fs_reg::fs_reg(enum register_file file, int hw_reg)
210 {
211 init();
212 this->file = file;
213 this->hw_reg = hw_reg;
214 this->type = BRW_REGISTER_TYPE_F;
215 }
216
217 int
218 brw_type_for_base_type(const struct glsl_type *type)
219 {
220 switch (type->base_type) {
221 case GLSL_TYPE_FLOAT:
222 return BRW_REGISTER_TYPE_F;
223 case GLSL_TYPE_INT:
224 case GLSL_TYPE_BOOL:
225 return BRW_REGISTER_TYPE_D;
226 case GLSL_TYPE_UINT:
227 return BRW_REGISTER_TYPE_UD;
228 case GLSL_TYPE_ARRAY:
229 case GLSL_TYPE_STRUCT:
230 /* These should be overridden with the type of the member when
231 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
232 * way to trip up if we don't.
233 */
234 return BRW_REGISTER_TYPE_UD;
235 default:
236 assert(!"not reached");
237 return BRW_REGISTER_TYPE_F;
238 }
239 }
240
241 /** Automatic reg constructor. */
242 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
243 {
244 init();
245
246 this->file = GRF;
247 this->reg = v->virtual_grf_alloc(type_size(type));
248 this->reg_offset = 0;
249 this->type = brw_type_for_base_type(type);
250 }
251
252 fs_reg *
253 fs_visitor::variable_storage(ir_variable *var)
254 {
255 return (fs_reg *)hash_table_find(this->variable_ht, var);
256 }
257
258 /* Our support for uniforms is piggy-backed on the struct
259 * gl_fragment_program, because that's where the values actually
260 * get stored, rather than in some global gl_shader_program uniform
261 * store.
262 */
263 int
264 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
265 {
266 unsigned int offset = 0;
267 float *vec_values;
268
269 if (type->is_matrix()) {
270 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
271 type->vector_elements,
272 1);
273
274 for (unsigned int i = 0; i < type->matrix_columns; i++) {
275 offset += setup_uniform_values(loc + offset, column);
276 }
277
278 return offset;
279 }
280
281 switch (type->base_type) {
282 case GLSL_TYPE_FLOAT:
283 case GLSL_TYPE_UINT:
284 case GLSL_TYPE_INT:
285 case GLSL_TYPE_BOOL:
286 vec_values = fp->Base.Parameters->ParameterValues[loc];
287 for (unsigned int i = 0; i < type->vector_elements; i++) {
288 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
289 }
290 return 1;
291
292 case GLSL_TYPE_STRUCT:
293 for (unsigned int i = 0; i < type->length; i++) {
294 offset += setup_uniform_values(loc + offset,
295 type->fields.structure[i].type);
296 }
297 return offset;
298
299 case GLSL_TYPE_ARRAY:
300 for (unsigned int i = 0; i < type->length; i++) {
301 offset += setup_uniform_values(loc + offset, type->fields.array);
302 }
303 return offset;
304
305 case GLSL_TYPE_SAMPLER:
306 /* The sampler takes up a slot, but we don't use any values from it. */
307 return 1;
308
309 default:
310 assert(!"not reached");
311 return 0;
312 }
313 }
314
315
316 /* Our support for builtin uniforms is even scarier than non-builtin.
317 * It sits on top of the PROG_STATE_VAR parameters that are
318 * automatically updated from GL context state.
319 */
320 void
321 fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
322 {
323 const struct gl_builtin_uniform_desc *statevar = NULL;
324
325 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
326 statevar = &_mesa_builtin_uniform_desc[i];
327 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
328 break;
329 }
330
331 if (!statevar->name) {
332 this->fail = true;
333 printf("Failed to find builtin uniform `%s'\n", ir->name);
334 return;
335 }
336
337 int array_count;
338 if (ir->type->is_array()) {
339 array_count = ir->type->length;
340 } else {
341 array_count = 1;
342 }
343
344 for (int a = 0; a < array_count; a++) {
345 for (unsigned int i = 0; i < statevar->num_elements; i++) {
346 struct gl_builtin_uniform_element *element = &statevar->elements[i];
347 int tokens[STATE_LENGTH];
348
349 memcpy(tokens, element->tokens, sizeof(element->tokens));
350 if (ir->type->is_array()) {
351 tokens[1] = a;
352 }
353
354 /* This state reference has already been setup by ir_to_mesa,
355 * but we'll get the same index back here.
356 */
357 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
358 (gl_state_index *)tokens);
359 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
360
361 /* Add each of the unique swizzles of the element as a
362 * parameter. This'll end up matching the expected layout of
363 * the array/matrix/structure we're trying to fill in.
364 */
365 int last_swiz = -1;
366 for (unsigned int i = 0; i < 4; i++) {
367 int swiz = GET_SWZ(element->swizzle, i);
368 if (swiz == last_swiz)
369 break;
370 last_swiz = swiz;
371
372 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
373 }
374 }
375 }
376 }
377
378 fs_reg *
379 fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
380 {
381 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
382 fs_reg wpos = *reg;
383 fs_reg neg_y = this->pixel_y;
384 neg_y.negate = true;
385
386 /* gl_FragCoord.x */
387 if (ir->pixel_center_integer) {
388 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
389 } else {
390 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
391 }
392 wpos.reg_offset++;
393
394 /* gl_FragCoord.y */
395 if (ir->origin_upper_left && ir->pixel_center_integer) {
396 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
397 } else {
398 fs_reg pixel_y = this->pixel_y;
399 float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
400
401 if (!ir->origin_upper_left) {
402 pixel_y.negate = true;
403 offset += c->key.drawable_height - 1.0;
404 }
405
406 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
407 }
408 wpos.reg_offset++;
409
410 /* gl_FragCoord.z */
411 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
412 interp_reg(FRAG_ATTRIB_WPOS, 2)));
413 wpos.reg_offset++;
414
415 /* gl_FragCoord.w: Already set up in emit_interpolation */
416 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
417
418 return reg;
419 }
420
421 fs_reg *
422 fs_visitor::emit_general_interpolation(ir_variable *ir)
423 {
424 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
425 /* Interpolation is always in floating point regs. */
426 reg->type = BRW_REGISTER_TYPE_F;
427 fs_reg attr = *reg;
428
429 unsigned int array_elements;
430 const glsl_type *type;
431
432 if (ir->type->is_array()) {
433 array_elements = ir->type->length;
434 if (array_elements == 0) {
435 this->fail = true;
436 }
437 type = ir->type->fields.array;
438 } else {
439 array_elements = 1;
440 type = ir->type;
441 }
442
443 int location = ir->location;
444 for (unsigned int i = 0; i < array_elements; i++) {
445 for (unsigned int j = 0; j < type->matrix_columns; j++) {
446 if (urb_setup[location] == -1) {
447 /* If there's no incoming setup data for this slot, don't
448 * emit interpolation for it.
449 */
450 attr.reg_offset += type->vector_elements;
451 location++;
452 continue;
453 }
454
455 for (unsigned int c = 0; c < type->vector_elements; c++) {
456 struct brw_reg interp = interp_reg(location, c);
457 emit(fs_inst(FS_OPCODE_LINTERP,
458 attr,
459 this->delta_x,
460 this->delta_y,
461 fs_reg(interp)));
462 attr.reg_offset++;
463 }
464
465 if (intel->gen < 6) {
466 attr.reg_offset -= type->vector_elements;
467 for (unsigned int c = 0; c < type->vector_elements; c++) {
468 emit(fs_inst(BRW_OPCODE_MUL,
469 attr,
470 attr,
471 this->pixel_w));
472 attr.reg_offset++;
473 }
474 }
475 location++;
476 }
477 }
478
479 return reg;
480 }
481
482 fs_reg *
483 fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
484 {
485 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
486
487 /* The frontfacing comes in as a bit in the thread payload. */
488 if (intel->gen >= 6) {
489 emit(fs_inst(BRW_OPCODE_ASR,
490 *reg,
491 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
492 fs_reg(15)));
493 emit(fs_inst(BRW_OPCODE_NOT,
494 *reg,
495 *reg));
496 emit(fs_inst(BRW_OPCODE_AND,
497 *reg,
498 *reg,
499 fs_reg(1)));
500 } else {
501 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
502 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
503 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
504 * us front face
505 */
506 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
507 *reg,
508 fs_reg(r1_6ud),
509 fs_reg(1u << 31)));
510 inst->conditional_mod = BRW_CONDITIONAL_L;
511 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
512 }
513
514 return reg;
515 }
516
517 fs_inst *
518 fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src)
519 {
520 switch (opcode) {
521 case FS_OPCODE_RCP:
522 case FS_OPCODE_RSQ:
523 case FS_OPCODE_SQRT:
524 case FS_OPCODE_EXP2:
525 case FS_OPCODE_LOG2:
526 case FS_OPCODE_SIN:
527 case FS_OPCODE_COS:
528 break;
529 default:
530 assert(!"not reached: bad math opcode");
531 return NULL;
532 }
533 fs_inst *inst = emit(fs_inst(opcode, dst, src));
534
535 if (intel->gen < 6) {
536 inst->base_mrf = 2;
537 inst->mlen = 1;
538 }
539
540 return inst;
541 }
542
543 fs_inst *
544 fs_visitor::emit_math(fs_opcodes opcode, fs_reg dst, fs_reg src0, fs_reg src1)
545 {
546 int base_mrf = 2;
547 fs_inst *inst;
548
549 assert(opcode == FS_OPCODE_POW);
550
551 if (intel->gen >= 6) {
552 inst = emit(fs_inst(opcode, dst, src0, src1));
553 } else {
554 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + 1), src1));
555 inst = emit(fs_inst(opcode, dst, src0, reg_null));
556
557 inst->base_mrf = base_mrf;
558 inst->mlen = 2;
559 }
560 return inst;
561 }
562
563 void
564 fs_visitor::visit(ir_variable *ir)
565 {
566 fs_reg *reg = NULL;
567
568 if (variable_storage(ir))
569 return;
570
571 if (strcmp(ir->name, "gl_FragColor") == 0) {
572 this->frag_color = ir;
573 } else if (strcmp(ir->name, "gl_FragData") == 0) {
574 this->frag_data = ir;
575 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
576 this->frag_depth = ir;
577 }
578
579 if (ir->mode == ir_var_in) {
580 if (!strcmp(ir->name, "gl_FragCoord")) {
581 reg = emit_fragcoord_interpolation(ir);
582 } else if (!strcmp(ir->name, "gl_FrontFacing")) {
583 reg = emit_frontfacing_interpolation(ir);
584 } else {
585 reg = emit_general_interpolation(ir);
586 }
587 assert(reg);
588 hash_table_insert(this->variable_ht, reg, ir);
589 return;
590 }
591
592 if (ir->mode == ir_var_uniform) {
593 int param_index = c->prog_data.nr_params;
594
595 if (!strncmp(ir->name, "gl_", 3)) {
596 setup_builtin_uniform_values(ir);
597 } else {
598 setup_uniform_values(ir->location, ir->type);
599 }
600
601 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
602 }
603
604 if (!reg)
605 reg = new(this->mem_ctx) fs_reg(this, ir->type);
606
607 hash_table_insert(this->variable_ht, reg, ir);
608 }
609
610 void
611 fs_visitor::visit(ir_dereference_variable *ir)
612 {
613 fs_reg *reg = variable_storage(ir->var);
614 this->result = *reg;
615 }
616
617 void
618 fs_visitor::visit(ir_dereference_record *ir)
619 {
620 const glsl_type *struct_type = ir->record->type;
621
622 ir->record->accept(this);
623
624 unsigned int offset = 0;
625 for (unsigned int i = 0; i < struct_type->length; i++) {
626 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
627 break;
628 offset += type_size(struct_type->fields.structure[i].type);
629 }
630 this->result.reg_offset += offset;
631 this->result.type = brw_type_for_base_type(ir->type);
632 }
633
634 void
635 fs_visitor::visit(ir_dereference_array *ir)
636 {
637 ir_constant *index;
638 int element_size;
639
640 ir->array->accept(this);
641 index = ir->array_index->as_constant();
642
643 element_size = type_size(ir->type);
644 this->result.type = brw_type_for_base_type(ir->type);
645
646 if (index) {
647 assert(this->result.file == UNIFORM ||
648 (this->result.file == GRF &&
649 this->result.reg != 0));
650 this->result.reg_offset += index->value.i[0] * element_size;
651 } else {
652 assert(!"FINISHME: non-constant array element");
653 }
654 }
655
656 void
657 fs_visitor::visit(ir_expression *ir)
658 {
659 unsigned int operand;
660 fs_reg op[2], temp;
661 fs_reg result;
662 fs_inst *inst;
663
664 for (operand = 0; operand < ir->get_num_operands(); operand++) {
665 ir->operands[operand]->accept(this);
666 if (this->result.file == BAD_FILE) {
667 ir_print_visitor v;
668 printf("Failed to get tree for expression operand:\n");
669 ir->operands[operand]->accept(&v);
670 this->fail = true;
671 }
672 op[operand] = this->result;
673
674 /* Matrix expression operands should have been broken down to vector
675 * operations already.
676 */
677 assert(!ir->operands[operand]->type->is_matrix());
678 /* And then those vector operands should have been broken down to scalar.
679 */
680 assert(!ir->operands[operand]->type->is_vector());
681 }
682
683 /* Storage for our result. If our result goes into an assignment, it will
684 * just get copy-propagated out, so no worries.
685 */
686 this->result = fs_reg(this, ir->type);
687
688 switch (ir->operation) {
689 case ir_unop_logic_not:
690 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
691 break;
692 case ir_unop_neg:
693 op[0].negate = !op[0].negate;
694 this->result = op[0];
695 break;
696 case ir_unop_abs:
697 op[0].abs = true;
698 this->result = op[0];
699 break;
700 case ir_unop_sign:
701 temp = fs_reg(this, ir->type);
702
703 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
704
705 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
706 inst->conditional_mod = BRW_CONDITIONAL_G;
707 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
708 inst->predicated = true;
709
710 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
711 inst->conditional_mod = BRW_CONDITIONAL_L;
712 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
713 inst->predicated = true;
714
715 break;
716 case ir_unop_rcp:
717 emit_math(FS_OPCODE_RCP, this->result, op[0]);
718 break;
719
720 case ir_unop_exp2:
721 emit_math(FS_OPCODE_EXP2, this->result, op[0]);
722 break;
723 case ir_unop_log2:
724 emit_math(FS_OPCODE_LOG2, this->result, op[0]);
725 break;
726 case ir_unop_exp:
727 case ir_unop_log:
728 assert(!"not reached: should be handled by ir_explog_to_explog2");
729 break;
730 case ir_unop_sin:
731 emit_math(FS_OPCODE_SIN, this->result, op[0]);
732 break;
733 case ir_unop_cos:
734 emit_math(FS_OPCODE_COS, this->result, op[0]);
735 break;
736
737 case ir_unop_dFdx:
738 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
739 break;
740 case ir_unop_dFdy:
741 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
742 break;
743
744 case ir_binop_add:
745 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
746 break;
747 case ir_binop_sub:
748 assert(!"not reached: should be handled by ir_sub_to_add_neg");
749 break;
750
751 case ir_binop_mul:
752 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
753 break;
754 case ir_binop_div:
755 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
756 break;
757 case ir_binop_mod:
758 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
759 break;
760
761 case ir_binop_less:
762 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
763 inst->conditional_mod = BRW_CONDITIONAL_L;
764 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
765 break;
766 case ir_binop_greater:
767 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
768 inst->conditional_mod = BRW_CONDITIONAL_G;
769 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
770 break;
771 case ir_binop_lequal:
772 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
773 inst->conditional_mod = BRW_CONDITIONAL_LE;
774 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
775 break;
776 case ir_binop_gequal:
777 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
778 inst->conditional_mod = BRW_CONDITIONAL_GE;
779 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
780 break;
781 case ir_binop_equal:
782 case ir_binop_all_equal: /* same as nequal for scalars */
783 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
784 inst->conditional_mod = BRW_CONDITIONAL_Z;
785 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
786 break;
787 case ir_binop_nequal:
788 case ir_binop_any_nequal: /* same as nequal for scalars */
789 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
790 inst->conditional_mod = BRW_CONDITIONAL_NZ;
791 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
792 break;
793
794 case ir_binop_logic_xor:
795 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
796 break;
797
798 case ir_binop_logic_or:
799 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
800 break;
801
802 case ir_binop_logic_and:
803 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
804 break;
805
806 case ir_binop_dot:
807 case ir_binop_cross:
808 case ir_unop_any:
809 assert(!"not reached: should be handled by brw_fs_channel_expressions");
810 break;
811
812 case ir_unop_noise:
813 assert(!"not reached: should be handled by lower_noise");
814 break;
815
816 case ir_unop_sqrt:
817 emit_math(FS_OPCODE_SQRT, this->result, op[0]);
818 break;
819
820 case ir_unop_rsq:
821 emit_math(FS_OPCODE_RSQ, this->result, op[0]);
822 break;
823
824 case ir_unop_i2f:
825 case ir_unop_b2f:
826 case ir_unop_b2i:
827 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
828 break;
829 case ir_unop_f2i:
830 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
831 break;
832 case ir_unop_f2b:
833 case ir_unop_i2b:
834 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
835 inst->conditional_mod = BRW_CONDITIONAL_NZ;
836
837 case ir_unop_trunc:
838 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
839 break;
840 case ir_unop_ceil:
841 op[0].negate = ~op[0].negate;
842 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
843 this->result.negate = true;
844 break;
845 case ir_unop_floor:
846 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
847 break;
848 case ir_unop_fract:
849 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
850 break;
851
852 case ir_binop_min:
853 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
854 inst->conditional_mod = BRW_CONDITIONAL_L;
855
856 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
857 inst->predicated = true;
858 break;
859 case ir_binop_max:
860 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
861 inst->conditional_mod = BRW_CONDITIONAL_G;
862
863 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
864 inst->predicated = true;
865 break;
866
867 case ir_binop_pow:
868 emit_math(FS_OPCODE_POW, this->result, op[0], op[1]);
869 break;
870
871 case ir_unop_bit_not:
872 case ir_unop_u2f:
873 case ir_binop_lshift:
874 case ir_binop_rshift:
875 case ir_binop_bit_and:
876 case ir_binop_bit_xor:
877 case ir_binop_bit_or:
878 assert(!"GLSL 1.30 features unsupported");
879 break;
880 }
881 }
882
883 void
884 fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
885 const glsl_type *type, bool predicated)
886 {
887 switch (type->base_type) {
888 case GLSL_TYPE_FLOAT:
889 case GLSL_TYPE_UINT:
890 case GLSL_TYPE_INT:
891 case GLSL_TYPE_BOOL:
892 for (unsigned int i = 0; i < type->components(); i++) {
893 l.type = brw_type_for_base_type(type);
894 r.type = brw_type_for_base_type(type);
895
896 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
897 inst->predicated = predicated;
898
899 l.reg_offset++;
900 r.reg_offset++;
901 }
902 break;
903 case GLSL_TYPE_ARRAY:
904 for (unsigned int i = 0; i < type->length; i++) {
905 emit_assignment_writes(l, r, type->fields.array, predicated);
906 }
907
908 case GLSL_TYPE_STRUCT:
909 for (unsigned int i = 0; i < type->length; i++) {
910 emit_assignment_writes(l, r, type->fields.structure[i].type,
911 predicated);
912 }
913 break;
914
915 case GLSL_TYPE_SAMPLER:
916 break;
917
918 default:
919 assert(!"not reached");
920 break;
921 }
922 }
923
924 void
925 fs_visitor::visit(ir_assignment *ir)
926 {
927 struct fs_reg l, r;
928 fs_inst *inst;
929
930 /* FINISHME: arrays on the lhs */
931 ir->lhs->accept(this);
932 l = this->result;
933
934 ir->rhs->accept(this);
935 r = this->result;
936
937 assert(l.file != BAD_FILE);
938 assert(r.file != BAD_FILE);
939
940 if (ir->condition) {
941 /* Get the condition bool into the predicate. */
942 ir->condition->accept(this);
943 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
944 inst->conditional_mod = BRW_CONDITIONAL_NZ;
945 }
946
947 if (ir->lhs->type->is_scalar() ||
948 ir->lhs->type->is_vector()) {
949 for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
950 if (ir->write_mask & (1 << i)) {
951 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
952 if (ir->condition)
953 inst->predicated = true;
954 r.reg_offset++;
955 }
956 l.reg_offset++;
957 }
958 } else {
959 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
960 }
961 }
962
963 fs_inst *
964 fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
965 {
966 int mlen;
967 int base_mrf = 1;
968 bool simd16 = false;
969 fs_reg orig_dst;
970
971 /* g0 header. */
972 mlen = 1;
973
974 if (ir->shadow_comparitor) {
975 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
976 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
977 coordinate));
978 coordinate.reg_offset++;
979 }
980 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
981 mlen += 3;
982
983 if (ir->op == ir_tex) {
984 /* There's no plain shadow compare message, so we use shadow
985 * compare with a bias of 0.0.
986 */
987 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
988 fs_reg(0.0f)));
989 mlen++;
990 } else if (ir->op == ir_txb) {
991 ir->lod_info.bias->accept(this);
992 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
993 this->result));
994 mlen++;
995 } else {
996 assert(ir->op == ir_txl);
997 ir->lod_info.lod->accept(this);
998 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
999 this->result));
1000 mlen++;
1001 }
1002
1003 ir->shadow_comparitor->accept(this);
1004 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1005 mlen++;
1006 } else if (ir->op == ir_tex) {
1007 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1008 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1009 coordinate));
1010 coordinate.reg_offset++;
1011 }
1012 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1013 mlen += 3;
1014 } else {
1015 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
1016 * instructions. We'll need to do SIMD16 here.
1017 */
1018 assert(ir->op == ir_txb || ir->op == ir_txl);
1019
1020 for (int i = 0; i < ir->coordinate->type->vector_elements * 2;) {
1021 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i * 2),
1022 coordinate));
1023 coordinate.reg_offset++;
1024 }
1025
1026 /* lod/bias appears after u/v/r. */
1027 mlen += 6;
1028
1029 if (ir->op == ir_txb) {
1030 ir->lod_info.bias->accept(this);
1031 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1032 this->result));
1033 mlen++;
1034 } else {
1035 ir->lod_info.lod->accept(this);
1036 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1037 this->result));
1038 mlen++;
1039 }
1040
1041 /* The unused upper half. */
1042 mlen++;
1043
1044 /* Now, since we're doing simd16, the return is 2 interleaved
1045 * vec4s where the odd-indexed ones are junk. We'll need to move
1046 * this weirdness around to the expected layout.
1047 */
1048 simd16 = true;
1049 orig_dst = dst;
1050 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
1051 2));
1052 dst.type = BRW_REGISTER_TYPE_F;
1053 }
1054
1055 fs_inst *inst = NULL;
1056 switch (ir->op) {
1057 case ir_tex:
1058 inst = emit(fs_inst(FS_OPCODE_TEX, dst));
1059 break;
1060 case ir_txb:
1061 inst = emit(fs_inst(FS_OPCODE_TXB, dst));
1062 break;
1063 case ir_txl:
1064 inst = emit(fs_inst(FS_OPCODE_TXL, dst));
1065 break;
1066 case ir_txd:
1067 case ir_txf:
1068 assert(!"GLSL 1.30 features unsupported");
1069 break;
1070 }
1071 inst->base_mrf = base_mrf;
1072 inst->mlen = mlen;
1073
1074 if (simd16) {
1075 for (int i = 0; i < 4; i++) {
1076 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst));
1077 orig_dst.reg_offset++;
1078 dst.reg_offset += 2;
1079 }
1080 }
1081
1082 return inst;
1083 }
1084
1085 fs_inst *
1086 fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1087 {
1088 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
1089 * optional parameters like shadow comparitor or LOD bias. If
1090 * optional parameters aren't present, those base slots are
1091 * optional and don't need to be included in the message.
1092 *
1093 * We don't fill in the unnecessary slots regardless, which may
1094 * look surprising in the disassembly.
1095 */
1096 int mlen = 1; /* g0 header always present. */
1097 int base_mrf = 1;
1098
1099 for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1100 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen + i),
1101 coordinate));
1102 coordinate.reg_offset++;
1103 }
1104 mlen += ir->coordinate->type->vector_elements;
1105
1106 if (ir->shadow_comparitor) {
1107 mlen = MAX2(mlen, 5);
1108
1109 ir->shadow_comparitor->accept(this);
1110 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1111 mlen++;
1112 }
1113
1114 fs_inst *inst = NULL;
1115 switch (ir->op) {
1116 case ir_tex:
1117 inst = emit(fs_inst(FS_OPCODE_TEX, dst));
1118 break;
1119 case ir_txb:
1120 ir->lod_info.bias->accept(this);
1121 mlen = MAX2(mlen, 5);
1122 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1123 mlen++;
1124
1125 inst = emit(fs_inst(FS_OPCODE_TXB, dst));
1126 break;
1127 case ir_txl:
1128 ir->lod_info.lod->accept(this);
1129 mlen = MAX2(mlen, 5);
1130 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1131 mlen++;
1132
1133 inst = emit(fs_inst(FS_OPCODE_TXL, dst));
1134 break;
1135 case ir_txd:
1136 case ir_txf:
1137 assert(!"GLSL 1.30 features unsupported");
1138 break;
1139 }
1140 inst->base_mrf = base_mrf;
1141 inst->mlen = mlen;
1142
1143 return inst;
1144 }
1145
1146 void
1147 fs_visitor::visit(ir_texture *ir)
1148 {
1149 fs_inst *inst = NULL;
1150
1151 ir->coordinate->accept(this);
1152 fs_reg coordinate = this->result;
1153
1154 /* Should be lowered by do_lower_texture_projection */
1155 assert(!ir->projector);
1156
1157 /* Writemasking doesn't eliminate channels on SIMD8 texture
1158 * samples, so don't worry about them.
1159 */
1160 fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1161
1162 if (intel->gen < 5) {
1163 inst = emit_texture_gen4(ir, dst, coordinate);
1164 } else {
1165 inst = emit_texture_gen5(ir, dst, coordinate);
1166 }
1167
1168 inst->sampler =
1169 _mesa_get_sampler_uniform_value(ir->sampler,
1170 ctx->Shader.CurrentProgram,
1171 &brw->fragment_program->Base);
1172 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1173
1174 this->result = dst;
1175
1176 if (ir->shadow_comparitor)
1177 inst->shadow_compare = true;
1178
1179 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
1180 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
1181
1182 for (int i = 0; i < 4; i++) {
1183 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1184 fs_reg l = swizzle_dst;
1185 l.reg_offset += i;
1186
1187 if (swiz == SWIZZLE_ZERO) {
1188 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f)));
1189 } else if (swiz == SWIZZLE_ONE) {
1190 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f)));
1191 } else {
1192 fs_reg r = dst;
1193 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1194 emit(fs_inst(BRW_OPCODE_MOV, l, r));
1195 }
1196 }
1197 this->result = swizzle_dst;
1198 }
1199 }
1200
1201 void
1202 fs_visitor::visit(ir_swizzle *ir)
1203 {
1204 ir->val->accept(this);
1205 fs_reg val = this->result;
1206
1207 if (ir->type->vector_elements == 1) {
1208 this->result.reg_offset += ir->mask.x;
1209 return;
1210 }
1211
1212 fs_reg result = fs_reg(this, ir->type);
1213 this->result = result;
1214
1215 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1216 fs_reg channel = val;
1217 int swiz = 0;
1218
1219 switch (i) {
1220 case 0:
1221 swiz = ir->mask.x;
1222 break;
1223 case 1:
1224 swiz = ir->mask.y;
1225 break;
1226 case 2:
1227 swiz = ir->mask.z;
1228 break;
1229 case 3:
1230 swiz = ir->mask.w;
1231 break;
1232 }
1233
1234 channel.reg_offset += swiz;
1235 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1236 result.reg_offset++;
1237 }
1238 }
1239
1240 void
1241 fs_visitor::visit(ir_discard *ir)
1242 {
1243 fs_reg temp = fs_reg(this, glsl_type::uint_type);
1244
1245 assert(ir->condition == NULL); /* FINISHME */
1246
1247 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null));
1248 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null, temp));
1249 kill_emitted = true;
1250 }
1251
1252 void
1253 fs_visitor::visit(ir_constant *ir)
1254 {
1255 fs_reg reg(this, ir->type);
1256 this->result = reg;
1257
1258 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1259 switch (ir->type->base_type) {
1260 case GLSL_TYPE_FLOAT:
1261 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1262 break;
1263 case GLSL_TYPE_UINT:
1264 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1265 break;
1266 case GLSL_TYPE_INT:
1267 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1268 break;
1269 case GLSL_TYPE_BOOL:
1270 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1271 break;
1272 default:
1273 assert(!"Non-float/uint/int/bool constant");
1274 }
1275 reg.reg_offset++;
1276 }
1277 }
1278
1279 void
1280 fs_visitor::visit(ir_if *ir)
1281 {
1282 fs_inst *inst;
1283
1284 /* Don't point the annotation at the if statement, because then it plus
1285 * the then and else blocks get printed.
1286 */
1287 this->base_ir = ir->condition;
1288
1289 /* Generate the condition into the condition code. */
1290 ir->condition->accept(this);
1291 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1292 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1293
1294 inst = emit(fs_inst(BRW_OPCODE_IF));
1295 inst->predicated = true;
1296
1297 foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1298 ir_instruction *ir = (ir_instruction *)iter.get();
1299 this->base_ir = ir;
1300
1301 ir->accept(this);
1302 }
1303
1304 if (!ir->else_instructions.is_empty()) {
1305 emit(fs_inst(BRW_OPCODE_ELSE));
1306
1307 foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1308 ir_instruction *ir = (ir_instruction *)iter.get();
1309 this->base_ir = ir;
1310
1311 ir->accept(this);
1312 }
1313 }
1314
1315 emit(fs_inst(BRW_OPCODE_ENDIF));
1316 }
1317
1318 void
1319 fs_visitor::visit(ir_loop *ir)
1320 {
1321 fs_reg counter = reg_undef;
1322
1323 if (ir->counter) {
1324 this->base_ir = ir->counter;
1325 ir->counter->accept(this);
1326 counter = *(variable_storage(ir->counter));
1327
1328 if (ir->from) {
1329 this->base_ir = ir->from;
1330 ir->from->accept(this);
1331
1332 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1333 }
1334 }
1335
1336 emit(fs_inst(BRW_OPCODE_DO));
1337
1338 if (ir->to) {
1339 this->base_ir = ir->to;
1340 ir->to->accept(this);
1341
1342 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1343 counter, this->result));
1344 switch (ir->cmp) {
1345 case ir_binop_equal:
1346 inst->conditional_mod = BRW_CONDITIONAL_Z;
1347 break;
1348 case ir_binop_nequal:
1349 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1350 break;
1351 case ir_binop_gequal:
1352 inst->conditional_mod = BRW_CONDITIONAL_GE;
1353 break;
1354 case ir_binop_lequal:
1355 inst->conditional_mod = BRW_CONDITIONAL_LE;
1356 break;
1357 case ir_binop_greater:
1358 inst->conditional_mod = BRW_CONDITIONAL_G;
1359 break;
1360 case ir_binop_less:
1361 inst->conditional_mod = BRW_CONDITIONAL_L;
1362 break;
1363 default:
1364 assert(!"not reached: unknown loop condition");
1365 this->fail = true;
1366 break;
1367 }
1368
1369 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1370 inst->predicated = true;
1371 }
1372
1373 foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1374 ir_instruction *ir = (ir_instruction *)iter.get();
1375
1376 this->base_ir = ir;
1377 ir->accept(this);
1378 }
1379
1380 if (ir->increment) {
1381 this->base_ir = ir->increment;
1382 ir->increment->accept(this);
1383 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1384 }
1385
1386 emit(fs_inst(BRW_OPCODE_WHILE));
1387 }
1388
1389 void
1390 fs_visitor::visit(ir_loop_jump *ir)
1391 {
1392 switch (ir->mode) {
1393 case ir_loop_jump::jump_break:
1394 emit(fs_inst(BRW_OPCODE_BREAK));
1395 break;
1396 case ir_loop_jump::jump_continue:
1397 emit(fs_inst(BRW_OPCODE_CONTINUE));
1398 break;
1399 }
1400 }
1401
1402 void
1403 fs_visitor::visit(ir_call *ir)
1404 {
1405 assert(!"FINISHME");
1406 }
1407
1408 void
1409 fs_visitor::visit(ir_return *ir)
1410 {
1411 assert(!"FINISHME");
1412 }
1413
1414 void
1415 fs_visitor::visit(ir_function *ir)
1416 {
1417 /* Ignore function bodies other than main() -- we shouldn't see calls to
1418 * them since they should all be inlined before we get to ir_to_mesa.
1419 */
1420 if (strcmp(ir->name, "main") == 0) {
1421 const ir_function_signature *sig;
1422 exec_list empty;
1423
1424 sig = ir->matching_signature(&empty);
1425
1426 assert(sig);
1427
1428 foreach_iter(exec_list_iterator, iter, sig->body) {
1429 ir_instruction *ir = (ir_instruction *)iter.get();
1430 this->base_ir = ir;
1431
1432 ir->accept(this);
1433 }
1434 }
1435 }
1436
1437 void
1438 fs_visitor::visit(ir_function_signature *ir)
1439 {
1440 assert(!"not reached");
1441 (void)ir;
1442 }
1443
1444 fs_inst *
1445 fs_visitor::emit(fs_inst inst)
1446 {
1447 fs_inst *list_inst = new(mem_ctx) fs_inst;
1448 *list_inst = inst;
1449
1450 list_inst->annotation = this->current_annotation;
1451 list_inst->ir = this->base_ir;
1452
1453 this->instructions.push_tail(list_inst);
1454
1455 return list_inst;
1456 }
1457
1458 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1459 void
1460 fs_visitor::emit_dummy_fs()
1461 {
1462 /* Everyone's favorite color. */
1463 emit(fs_inst(BRW_OPCODE_MOV,
1464 fs_reg(MRF, 2),
1465 fs_reg(1.0f)));
1466 emit(fs_inst(BRW_OPCODE_MOV,
1467 fs_reg(MRF, 3),
1468 fs_reg(0.0f)));
1469 emit(fs_inst(BRW_OPCODE_MOV,
1470 fs_reg(MRF, 4),
1471 fs_reg(1.0f)));
1472 emit(fs_inst(BRW_OPCODE_MOV,
1473 fs_reg(MRF, 5),
1474 fs_reg(0.0f)));
1475
1476 fs_inst *write;
1477 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1478 fs_reg(0),
1479 fs_reg(0)));
1480 write->base_mrf = 0;
1481 }
1482
1483 /* The register location here is relative to the start of the URB
1484 * data. It will get adjusted to be a real location before
1485 * generate_code() time.
1486 */
1487 struct brw_reg
1488 fs_visitor::interp_reg(int location, int channel)
1489 {
1490 int regnr = urb_setup[location] * 2 + channel / 2;
1491 int stride = (channel & 1) * 4;
1492
1493 assert(urb_setup[location] != -1);
1494
1495 return brw_vec1_grf(regnr, stride);
1496 }
1497
1498 /** Emits the interpolation for the varying inputs. */
1499 void
1500 fs_visitor::emit_interpolation_setup_gen4()
1501 {
1502 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1503
1504 this->current_annotation = "compute pixel centers";
1505 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1506 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1507 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1508 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1509 emit(fs_inst(BRW_OPCODE_ADD,
1510 this->pixel_x,
1511 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1512 fs_reg(brw_imm_v(0x10101010))));
1513 emit(fs_inst(BRW_OPCODE_ADD,
1514 this->pixel_y,
1515 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1516 fs_reg(brw_imm_v(0x11001100))));
1517
1518 this->current_annotation = "compute pixel deltas from v0";
1519 if (brw->has_pln) {
1520 this->delta_x = fs_reg(this, glsl_type::vec2_type);
1521 this->delta_y = this->delta_x;
1522 this->delta_y.reg_offset++;
1523 } else {
1524 this->delta_x = fs_reg(this, glsl_type::float_type);
1525 this->delta_y = fs_reg(this, glsl_type::float_type);
1526 }
1527 emit(fs_inst(BRW_OPCODE_ADD,
1528 this->delta_x,
1529 this->pixel_x,
1530 fs_reg(negate(brw_vec1_grf(1, 0)))));
1531 emit(fs_inst(BRW_OPCODE_ADD,
1532 this->delta_y,
1533 this->pixel_y,
1534 fs_reg(negate(brw_vec1_grf(1, 1)))));
1535
1536 this->current_annotation = "compute pos.w and 1/pos.w";
1537 /* Compute wpos.w. It's always in our setup, since it's needed to
1538 * interpolate the other attributes.
1539 */
1540 this->wpos_w = fs_reg(this, glsl_type::float_type);
1541 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1542 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1543 /* Compute the pixel 1/W value from wpos.w. */
1544 this->pixel_w = fs_reg(this, glsl_type::float_type);
1545 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
1546 this->current_annotation = NULL;
1547 }
1548
1549 /** Emits the interpolation for the varying inputs. */
1550 void
1551 fs_visitor::emit_interpolation_setup_gen6()
1552 {
1553 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1554
1555 /* If the pixel centers end up used, the setup is the same as for gen4. */
1556 this->current_annotation = "compute pixel centers";
1557 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1558 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1559 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1560 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1561 emit(fs_inst(BRW_OPCODE_ADD,
1562 this->pixel_x,
1563 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1564 fs_reg(brw_imm_v(0x10101010))));
1565 emit(fs_inst(BRW_OPCODE_ADD,
1566 this->pixel_y,
1567 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1568 fs_reg(brw_imm_v(0x11001100))));
1569
1570 this->current_annotation = "compute 1/pos.w";
1571 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
1572 this->pixel_w = fs_reg(this, glsl_type::float_type);
1573 emit_math(FS_OPCODE_RCP, this->pixel_w, wpos_w);
1574
1575 this->delta_x = fs_reg(brw_vec8_grf(2, 0));
1576 this->delta_y = fs_reg(brw_vec8_grf(3, 0));
1577
1578 this->current_annotation = NULL;
1579 }
1580
1581 void
1582 fs_visitor::emit_fb_writes()
1583 {
1584 this->current_annotation = "FB write header";
1585 GLboolean header_present = GL_TRUE;
1586 int nr = 0;
1587
1588 if (intel->gen >= 6 &&
1589 !this->kill_emitted &&
1590 c->key.nr_color_regions == 1) {
1591 header_present = false;
1592 }
1593
1594 if (header_present) {
1595 /* m0, m1 header */
1596 nr += 2;
1597 }
1598
1599 if (c->key.aa_dest_stencil_reg) {
1600 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1601 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1602 }
1603
1604 /* Reserve space for color. It'll be filled in per MRT below. */
1605 int color_mrf = nr;
1606 nr += 4;
1607
1608 if (c->key.source_depth_to_render_target) {
1609 if (c->key.computes_depth) {
1610 /* Hand over gl_FragDepth. */
1611 assert(this->frag_depth);
1612 fs_reg depth = *(variable_storage(this->frag_depth));
1613
1614 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1615 } else {
1616 /* Pass through the payload depth. */
1617 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1618 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1619 }
1620 }
1621
1622 if (c->key.dest_depth_reg) {
1623 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1624 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1625 }
1626
1627 fs_reg color = reg_undef;
1628 if (this->frag_color)
1629 color = *(variable_storage(this->frag_color));
1630 else if (this->frag_data)
1631 color = *(variable_storage(this->frag_data));
1632
1633 for (int target = 0; target < c->key.nr_color_regions; target++) {
1634 this->current_annotation = talloc_asprintf(this->mem_ctx,
1635 "FB write target %d",
1636 target);
1637 if (this->frag_color || this->frag_data) {
1638 for (int i = 0; i < 4; i++) {
1639 emit(fs_inst(BRW_OPCODE_MOV,
1640 fs_reg(MRF, color_mrf + i),
1641 color));
1642 color.reg_offset++;
1643 }
1644 }
1645
1646 if (this->frag_color)
1647 color.reg_offset -= 4;
1648
1649 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1650 reg_undef, reg_undef));
1651 inst->target = target;
1652 inst->base_mrf = 0;
1653 inst->mlen = nr;
1654 if (target == c->key.nr_color_regions - 1)
1655 inst->eot = true;
1656 inst->header_present = header_present;
1657 }
1658
1659 if (c->key.nr_color_regions == 0) {
1660 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1661 reg_undef, reg_undef));
1662 inst->base_mrf = 0;
1663 inst->mlen = nr;
1664 inst->eot = true;
1665 inst->header_present = header_present;
1666 }
1667
1668 this->current_annotation = NULL;
1669 }
1670
1671 void
1672 fs_visitor::generate_fb_write(fs_inst *inst)
1673 {
1674 GLboolean eot = inst->eot;
1675 struct brw_reg implied_header;
1676
1677 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1678 * move, here's g1.
1679 */
1680 brw_push_insn_state(p);
1681 brw_set_mask_control(p, BRW_MASK_DISABLE);
1682 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1683
1684 if (inst->header_present) {
1685 if (intel->gen >= 6) {
1686 brw_MOV(p,
1687 brw_message_reg(inst->base_mrf),
1688 brw_vec8_grf(0, 0));
1689 implied_header = brw_null_reg();
1690 } else {
1691 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
1692 }
1693
1694 brw_MOV(p,
1695 brw_message_reg(inst->base_mrf + 1),
1696 brw_vec8_grf(1, 0));
1697 } else {
1698 implied_header = brw_null_reg();
1699 }
1700
1701 brw_pop_insn_state(p);
1702
1703 brw_fb_WRITE(p,
1704 8, /* dispatch_width */
1705 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1706 inst->base_mrf,
1707 implied_header,
1708 inst->target,
1709 inst->mlen,
1710 0,
1711 eot);
1712 }
1713
1714 void
1715 fs_visitor::generate_linterp(fs_inst *inst,
1716 struct brw_reg dst, struct brw_reg *src)
1717 {
1718 struct brw_reg delta_x = src[0];
1719 struct brw_reg delta_y = src[1];
1720 struct brw_reg interp = src[2];
1721
1722 if (brw->has_pln &&
1723 delta_y.nr == delta_x.nr + 1 &&
1724 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1725 brw_PLN(p, dst, interp, delta_x);
1726 } else {
1727 brw_LINE(p, brw_null_reg(), interp, delta_x);
1728 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1729 }
1730 }
1731
1732 void
1733 fs_visitor::generate_math(fs_inst *inst,
1734 struct brw_reg dst, struct brw_reg *src)
1735 {
1736 int op;
1737
1738 switch (inst->opcode) {
1739 case FS_OPCODE_RCP:
1740 op = BRW_MATH_FUNCTION_INV;
1741 break;
1742 case FS_OPCODE_RSQ:
1743 op = BRW_MATH_FUNCTION_RSQ;
1744 break;
1745 case FS_OPCODE_SQRT:
1746 op = BRW_MATH_FUNCTION_SQRT;
1747 break;
1748 case FS_OPCODE_EXP2:
1749 op = BRW_MATH_FUNCTION_EXP;
1750 break;
1751 case FS_OPCODE_LOG2:
1752 op = BRW_MATH_FUNCTION_LOG;
1753 break;
1754 case FS_OPCODE_POW:
1755 op = BRW_MATH_FUNCTION_POW;
1756 break;
1757 case FS_OPCODE_SIN:
1758 op = BRW_MATH_FUNCTION_SIN;
1759 break;
1760 case FS_OPCODE_COS:
1761 op = BRW_MATH_FUNCTION_COS;
1762 break;
1763 default:
1764 assert(!"not reached: unknown math function");
1765 op = 0;
1766 break;
1767 }
1768
1769 if (intel->gen >= 6) {
1770 assert(inst->mlen == 0);
1771
1772 if (inst->opcode == FS_OPCODE_POW) {
1773 brw_math2(p, dst, op, src[0], src[1]);
1774 } else {
1775 brw_math(p, dst,
1776 op,
1777 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1778 BRW_MATH_SATURATE_NONE,
1779 0, src[0],
1780 BRW_MATH_DATA_VECTOR,
1781 BRW_MATH_PRECISION_FULL);
1782 }
1783 } else {
1784 assert(inst->mlen >= 1);
1785
1786 brw_math(p, dst,
1787 op,
1788 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1789 BRW_MATH_SATURATE_NONE,
1790 inst->base_mrf, src[0],
1791 BRW_MATH_DATA_VECTOR,
1792 BRW_MATH_PRECISION_FULL);
1793 }
1794 }
1795
1796 void
1797 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst)
1798 {
1799 int msg_type = -1;
1800 int rlen = 4;
1801 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
1802
1803 if (intel->gen >= 5) {
1804 switch (inst->opcode) {
1805 case FS_OPCODE_TEX:
1806 if (inst->shadow_compare) {
1807 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1808 } else {
1809 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1810 }
1811 break;
1812 case FS_OPCODE_TXB:
1813 if (inst->shadow_compare) {
1814 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1815 } else {
1816 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1817 }
1818 break;
1819 }
1820 } else {
1821 switch (inst->opcode) {
1822 case FS_OPCODE_TEX:
1823 /* Note that G45 and older determines shadow compare and dispatch width
1824 * from message length for most messages.
1825 */
1826 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
1827 if (inst->shadow_compare) {
1828 assert(inst->mlen == 5);
1829 } else {
1830 assert(inst->mlen <= 6);
1831 }
1832 break;
1833 case FS_OPCODE_TXB:
1834 if (inst->shadow_compare) {
1835 assert(inst->mlen == 5);
1836 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
1837 } else {
1838 assert(inst->mlen == 8);
1839 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1840 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
1841 }
1842 break;
1843 }
1844 }
1845 assert(msg_type != -1);
1846
1847 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
1848 rlen = 8;
1849 dst = vec16(dst);
1850 }
1851
1852 brw_SAMPLE(p,
1853 retype(dst, BRW_REGISTER_TYPE_UW),
1854 inst->base_mrf,
1855 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1856 SURF_INDEX_TEXTURE(inst->sampler),
1857 inst->sampler,
1858 WRITEMASK_XYZW,
1859 msg_type,
1860 rlen,
1861 inst->mlen,
1862 0,
1863 1,
1864 simd_mode);
1865 }
1866
1867
1868 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1869 * looking like:
1870 *
1871 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1872 *
1873 * and we're trying to produce:
1874 *
1875 * DDX DDY
1876 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
1877 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
1878 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
1879 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
1880 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
1881 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
1882 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
1883 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
1884 *
1885 * and add another set of two more subspans if in 16-pixel dispatch mode.
1886 *
1887 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1888 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1889 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1890 * between each other. We could probably do it like ddx and swizzle the right
1891 * order later, but bail for now and just produce
1892 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1893 */
1894 void
1895 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1896 {
1897 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
1898 BRW_REGISTER_TYPE_F,
1899 BRW_VERTICAL_STRIDE_2,
1900 BRW_WIDTH_2,
1901 BRW_HORIZONTAL_STRIDE_0,
1902 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1903 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
1904 BRW_REGISTER_TYPE_F,
1905 BRW_VERTICAL_STRIDE_2,
1906 BRW_WIDTH_2,
1907 BRW_HORIZONTAL_STRIDE_0,
1908 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1909 brw_ADD(p, dst, src0, negate(src1));
1910 }
1911
1912 void
1913 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1914 {
1915 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
1916 BRW_REGISTER_TYPE_F,
1917 BRW_VERTICAL_STRIDE_4,
1918 BRW_WIDTH_4,
1919 BRW_HORIZONTAL_STRIDE_0,
1920 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1921 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
1922 BRW_REGISTER_TYPE_F,
1923 BRW_VERTICAL_STRIDE_4,
1924 BRW_WIDTH_4,
1925 BRW_HORIZONTAL_STRIDE_0,
1926 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1927 brw_ADD(p, dst, src0, negate(src1));
1928 }
1929
1930 void
1931 fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask)
1932 {
1933 brw_push_insn_state(p);
1934 brw_set_mask_control(p, BRW_MASK_DISABLE);
1935 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */
1936 brw_pop_insn_state(p);
1937 }
1938
1939 void
1940 fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask)
1941 {
1942 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1943 mask = brw_uw1_reg(mask.file, mask.nr, 0);
1944
1945 brw_push_insn_state(p);
1946 brw_set_mask_control(p, BRW_MASK_DISABLE);
1947 brw_AND(p, g0, mask, g0);
1948 brw_pop_insn_state(p);
1949 }
1950
1951 void
1952 fs_visitor::assign_curb_setup()
1953 {
1954 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1955 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1956
1957 /* Map the offsets in the UNIFORM file to fixed HW regs. */
1958 foreach_iter(exec_list_iterator, iter, this->instructions) {
1959 fs_inst *inst = (fs_inst *)iter.get();
1960
1961 for (unsigned int i = 0; i < 3; i++) {
1962 if (inst->src[i].file == UNIFORM) {
1963 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1964 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1965 constant_nr / 8,
1966 constant_nr % 8);
1967
1968 inst->src[i].file = FIXED_HW_REG;
1969 inst->src[i].fixed_hw_reg = brw_reg;
1970 }
1971 }
1972 }
1973 }
1974
1975 void
1976 fs_visitor::calculate_urb_setup()
1977 {
1978 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1979 urb_setup[i] = -1;
1980 }
1981
1982 int urb_next = 0;
1983 /* Figure out where each of the incoming setup attributes lands. */
1984 if (intel->gen >= 6) {
1985 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1986 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) {
1987 urb_setup[i] = urb_next++;
1988 }
1989 }
1990 } else {
1991 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
1992 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
1993 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
1994 int fp_index;
1995
1996 if (i >= VERT_RESULT_VAR0)
1997 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
1998 else if (i <= VERT_RESULT_TEX7)
1999 fp_index = i;
2000 else
2001 fp_index = -1;
2002
2003 if (fp_index >= 0)
2004 urb_setup[fp_index] = urb_next++;
2005 }
2006 }
2007 }
2008
2009 /* Each attribute is 4 setup channels, each of which is half a reg. */
2010 c->prog_data.urb_read_length = urb_next * 2;
2011 }
2012
2013 void
2014 fs_visitor::assign_urb_setup()
2015 {
2016 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
2017
2018 /* Offset all the urb_setup[] index by the actual position of the
2019 * setup regs, now that the location of the constants has been chosen.
2020 */
2021 foreach_iter(exec_list_iterator, iter, this->instructions) {
2022 fs_inst *inst = (fs_inst *)iter.get();
2023
2024 if (inst->opcode != FS_OPCODE_LINTERP)
2025 continue;
2026
2027 assert(inst->src[2].file == FIXED_HW_REG);
2028
2029 inst->src[2].fixed_hw_reg.nr += urb_start;
2030 }
2031
2032 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
2033 }
2034
2035 static void
2036 assign_reg(int *reg_hw_locations, fs_reg *reg)
2037 {
2038 if (reg->file == GRF && reg->reg != 0) {
2039 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
2040 reg->reg = 0;
2041 }
2042 }
2043
2044 void
2045 fs_visitor::assign_regs_trivial()
2046 {
2047 int last_grf = 0;
2048 int hw_reg_mapping[this->virtual_grf_next];
2049 int i;
2050
2051 hw_reg_mapping[0] = 0;
2052 hw_reg_mapping[1] = this->first_non_payload_grf;
2053 for (i = 2; i < this->virtual_grf_next; i++) {
2054 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
2055 this->virtual_grf_sizes[i - 1]);
2056 }
2057 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
2058
2059 foreach_iter(exec_list_iterator, iter, this->instructions) {
2060 fs_inst *inst = (fs_inst *)iter.get();
2061
2062 assign_reg(hw_reg_mapping, &inst->dst);
2063 assign_reg(hw_reg_mapping, &inst->src[0]);
2064 assign_reg(hw_reg_mapping, &inst->src[1]);
2065 }
2066
2067 this->grf_used = last_grf + 1;
2068 }
2069
2070 void
2071 fs_visitor::assign_regs()
2072 {
2073 int last_grf = 0;
2074 int hw_reg_mapping[this->virtual_grf_next + 1];
2075 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
2076 int class_sizes[base_reg_count];
2077 int class_count = 0;
2078 int aligned_pair_class = -1;
2079
2080 /* Set up the register classes.
2081 *
2082 * The base registers store a scalar value. For texture samples,
2083 * we get virtual GRFs composed of 4 contiguous hw register. For
2084 * structures and arrays, we store them as contiguous larger things
2085 * than that, though we should be able to do better most of the
2086 * time.
2087 */
2088 class_sizes[class_count++] = 1;
2089 if (brw->has_pln && intel->gen < 6) {
2090 /* Always set up the (unaligned) pairs for gen5, so we can find
2091 * them for making the aligned pair class.
2092 */
2093 class_sizes[class_count++] = 2;
2094 }
2095 for (int r = 1; r < this->virtual_grf_next; r++) {
2096 int i;
2097
2098 for (i = 0; i < class_count; i++) {
2099 if (class_sizes[i] == this->virtual_grf_sizes[r])
2100 break;
2101 }
2102 if (i == class_count) {
2103 if (this->virtual_grf_sizes[r] >= base_reg_count) {
2104 fprintf(stderr, "Object too large to register allocate.\n");
2105 this->fail = true;
2106 }
2107
2108 class_sizes[class_count++] = this->virtual_grf_sizes[r];
2109 }
2110 }
2111
2112 int ra_reg_count = 0;
2113 int class_base_reg[class_count];
2114 int class_reg_count[class_count];
2115 int classes[class_count + 1];
2116
2117 for (int i = 0; i < class_count; i++) {
2118 class_base_reg[i] = ra_reg_count;
2119 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
2120 ra_reg_count += class_reg_count[i];
2121 }
2122
2123 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
2124 for (int i = 0; i < class_count; i++) {
2125 classes[i] = ra_alloc_reg_class(regs);
2126
2127 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2128 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
2129 }
2130
2131 /* Add conflicts between our contiguous registers aliasing
2132 * base regs and other register classes' contiguous registers
2133 * that alias base regs, or the base regs themselves for classes[0].
2134 */
2135 for (int c = 0; c <= i; c++) {
2136 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2137 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
2138 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
2139 c_r++) {
2140
2141 if (0) {
2142 printf("%d/%d conflicts %d/%d\n",
2143 class_sizes[i], this->first_non_payload_grf + i_r,
2144 class_sizes[c], this->first_non_payload_grf + c_r);
2145 }
2146
2147 ra_add_reg_conflict(regs,
2148 class_base_reg[i] + i_r,
2149 class_base_reg[c] + c_r);
2150 }
2151 }
2152 }
2153 }
2154
2155 /* Add a special class for aligned pairs, which we'll put delta_x/y
2156 * in on gen5 so that we can do PLN.
2157 */
2158 if (brw->has_pln && intel->gen < 6) {
2159 int reg_count = (base_reg_count - 1) / 2;
2160 int unaligned_pair_class = 1;
2161 assert(class_sizes[unaligned_pair_class] == 2);
2162
2163 aligned_pair_class = class_count;
2164 classes[aligned_pair_class] = ra_alloc_reg_class(regs);
2165 class_base_reg[aligned_pair_class] = 0;
2166 class_reg_count[aligned_pair_class] = 0;
2167 int start = (this->first_non_payload_grf & 1) ? 1 : 0;
2168
2169 for (int i = 0; i < reg_count; i++) {
2170 ra_class_add_reg(regs, classes[aligned_pair_class],
2171 class_base_reg[unaligned_pair_class] + i * 2 + start);
2172 }
2173 class_count++;
2174 }
2175
2176 ra_set_finalize(regs);
2177
2178 struct ra_graph *g = ra_alloc_interference_graph(regs,
2179 this->virtual_grf_next);
2180 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
2181 * with nodes.
2182 */
2183 ra_set_node_class(g, 0, classes[0]);
2184
2185 for (int i = 1; i < this->virtual_grf_next; i++) {
2186 for (int c = 0; c < class_count; c++) {
2187 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
2188 if (aligned_pair_class >= 0 &&
2189 this->delta_x.reg == i) {
2190 ra_set_node_class(g, i, classes[aligned_pair_class]);
2191 } else {
2192 ra_set_node_class(g, i, classes[c]);
2193 }
2194 break;
2195 }
2196 }
2197
2198 for (int j = 1; j < i; j++) {
2199 if (virtual_grf_interferes(i, j)) {
2200 ra_add_node_interference(g, i, j);
2201 }
2202 }
2203 }
2204
2205 /* FINISHME: Handle spilling */
2206 if (!ra_allocate_no_spills(g)) {
2207 fprintf(stderr, "Failed to allocate registers.\n");
2208 this->fail = true;
2209 return;
2210 }
2211
2212 /* Get the chosen virtual registers for each node, and map virtual
2213 * regs in the register classes back down to real hardware reg
2214 * numbers.
2215 */
2216 hw_reg_mapping[0] = 0; /* unused */
2217 for (int i = 1; i < this->virtual_grf_next; i++) {
2218 int reg = ra_get_node_reg(g, i);
2219 int hw_reg = -1;
2220
2221 for (int c = 0; c < class_count; c++) {
2222 if (reg >= class_base_reg[c] &&
2223 reg < class_base_reg[c] + class_reg_count[c]) {
2224 hw_reg = reg - class_base_reg[c];
2225 break;
2226 }
2227 }
2228
2229 assert(hw_reg != -1);
2230 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
2231 last_grf = MAX2(last_grf,
2232 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
2233 }
2234
2235 foreach_iter(exec_list_iterator, iter, this->instructions) {
2236 fs_inst *inst = (fs_inst *)iter.get();
2237
2238 assign_reg(hw_reg_mapping, &inst->dst);
2239 assign_reg(hw_reg_mapping, &inst->src[0]);
2240 assign_reg(hw_reg_mapping, &inst->src[1]);
2241 }
2242
2243 this->grf_used = last_grf + 1;
2244
2245 talloc_free(g);
2246 talloc_free(regs);
2247 }
2248
2249 void
2250 fs_visitor::calculate_live_intervals()
2251 {
2252 int num_vars = this->virtual_grf_next;
2253 int *def = talloc_array(mem_ctx, int, num_vars);
2254 int *use = talloc_array(mem_ctx, int, num_vars);
2255 int loop_depth = 0;
2256 int loop_start = 0;
2257
2258 for (int i = 0; i < num_vars; i++) {
2259 def[i] = 1 << 30;
2260 use[i] = -1;
2261 }
2262
2263 int ip = 0;
2264 foreach_iter(exec_list_iterator, iter, this->instructions) {
2265 fs_inst *inst = (fs_inst *)iter.get();
2266
2267 if (inst->opcode == BRW_OPCODE_DO) {
2268 if (loop_depth++ == 0)
2269 loop_start = ip;
2270 } else if (inst->opcode == BRW_OPCODE_WHILE) {
2271 loop_depth--;
2272
2273 if (loop_depth == 0) {
2274 /* FINISHME:
2275 *
2276 * Patches up any vars marked for use within the loop as
2277 * live until the end. This is conservative, as there
2278 * will often be variables defined and used inside the
2279 * loop but dead at the end of the loop body.
2280 */
2281 for (int i = 0; i < num_vars; i++) {
2282 if (use[i] == loop_start) {
2283 use[i] = ip;
2284 }
2285 }
2286 }
2287 } else {
2288 int eip = ip;
2289
2290 if (loop_depth)
2291 eip = loop_start;
2292
2293 for (unsigned int i = 0; i < 3; i++) {
2294 if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
2295 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip);
2296 }
2297 }
2298 if (inst->dst.file == GRF && inst->dst.reg != 0) {
2299 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip);
2300 }
2301 }
2302
2303 ip++;
2304 }
2305
2306 talloc_free(this->virtual_grf_def);
2307 talloc_free(this->virtual_grf_use);
2308 this->virtual_grf_def = def;
2309 this->virtual_grf_use = use;
2310 }
2311
2312 /**
2313 * Attempts to move immediate constants into the immediate
2314 * constant slot of following instructions.
2315 *
2316 * Immediate constants are a bit tricky -- they have to be in the last
2317 * operand slot, you can't do abs/negate on them,
2318 */
2319
2320 bool
2321 fs_visitor::propagate_constants()
2322 {
2323 bool progress = false;
2324
2325 foreach_iter(exec_list_iterator, iter, this->instructions) {
2326 fs_inst *inst = (fs_inst *)iter.get();
2327
2328 if (inst->opcode != BRW_OPCODE_MOV ||
2329 inst->predicated ||
2330 inst->dst.file != GRF || inst->src[0].file != IMM ||
2331 inst->dst.type != inst->src[0].type)
2332 continue;
2333
2334 /* Don't bother with cases where we should have had the
2335 * operation on the constant folded in GLSL already.
2336 */
2337 if (inst->saturate)
2338 continue;
2339
2340 /* Found a move of a constant to a GRF. Find anything else using the GRF
2341 * before it's written, and replace it with the constant if we can.
2342 */
2343 exec_list_iterator scan_iter = iter;
2344 scan_iter.next();
2345 for (; scan_iter.has_next(); scan_iter.next()) {
2346 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2347
2348 if (scan_inst->opcode == BRW_OPCODE_DO ||
2349 scan_inst->opcode == BRW_OPCODE_WHILE ||
2350 scan_inst->opcode == BRW_OPCODE_ELSE ||
2351 scan_inst->opcode == BRW_OPCODE_ENDIF) {
2352 break;
2353 }
2354
2355 for (int i = 2; i >= 0; i--) {
2356 if (scan_inst->src[i].file != GRF ||
2357 scan_inst->src[i].reg != inst->dst.reg ||
2358 scan_inst->src[i].reg_offset != inst->dst.reg_offset)
2359 continue;
2360
2361 /* Don't bother with cases where we should have had the
2362 * operation on the constant folded in GLSL already.
2363 */
2364 if (scan_inst->src[i].negate || scan_inst->src[i].abs)
2365 continue;
2366
2367 switch (scan_inst->opcode) {
2368 case BRW_OPCODE_MOV:
2369 scan_inst->src[i] = inst->src[0];
2370 progress = true;
2371 break;
2372
2373 case BRW_OPCODE_MUL:
2374 case BRW_OPCODE_ADD:
2375 if (i == 1) {
2376 scan_inst->src[i] = inst->src[0];
2377 progress = true;
2378 } else if (i == 0 && scan_inst->src[1].file != IMM) {
2379 /* Fit this constant in by commuting the operands */
2380 scan_inst->src[0] = scan_inst->src[1];
2381 scan_inst->src[1] = inst->src[0];
2382 }
2383 break;
2384 case BRW_OPCODE_CMP:
2385 if (i == 1) {
2386 scan_inst->src[i] = inst->src[0];
2387 progress = true;
2388 }
2389 }
2390 }
2391
2392 if (scan_inst->dst.file == GRF &&
2393 scan_inst->dst.reg == inst->dst.reg &&
2394 (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2395 scan_inst->opcode == FS_OPCODE_TEX)) {
2396 break;
2397 }
2398 }
2399 }
2400
2401 return progress;
2402 }
2403 /**
2404 * Must be called after calculate_live_intervales() to remove unused
2405 * writes to registers -- register allocation will fail otherwise
2406 * because something deffed but not used won't be considered to
2407 * interfere with other regs.
2408 */
2409 bool
2410 fs_visitor::dead_code_eliminate()
2411 {
2412 bool progress = false;
2413 int num_vars = this->virtual_grf_next;
2414 bool dead[num_vars];
2415
2416 for (int i = 0; i < num_vars; i++) {
2417 dead[i] = this->virtual_grf_def[i] >= this->virtual_grf_use[i];
2418
2419 if (dead[i]) {
2420 /* Mark off its interval so it won't interfere with anything. */
2421 this->virtual_grf_def[i] = -1;
2422 this->virtual_grf_use[i] = -1;
2423 }
2424 }
2425
2426 foreach_iter(exec_list_iterator, iter, this->instructions) {
2427 fs_inst *inst = (fs_inst *)iter.get();
2428
2429 if (inst->dst.file == GRF && dead[inst->dst.reg]) {
2430 inst->remove();
2431 progress = true;
2432 }
2433 }
2434
2435 return progress;
2436 }
2437
2438 bool
2439 fs_visitor::register_coalesce()
2440 {
2441 bool progress = false;
2442
2443 foreach_iter(exec_list_iterator, iter, this->instructions) {
2444 fs_inst *inst = (fs_inst *)iter.get();
2445
2446 if (inst->opcode != BRW_OPCODE_MOV ||
2447 inst->predicated ||
2448 inst->saturate ||
2449 inst->dst.file != GRF || inst->src[0].file != GRF ||
2450 inst->dst.type != inst->src[0].type)
2451 continue;
2452
2453 /* Found a move of a GRF to a GRF. Let's see if we can coalesce
2454 * them: check for no writes to either one until the exit of the
2455 * program.
2456 */
2457 bool interfered = false;
2458 exec_list_iterator scan_iter = iter;
2459 scan_iter.next();
2460 for (; scan_iter.has_next(); scan_iter.next()) {
2461 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2462
2463 if (scan_inst->opcode == BRW_OPCODE_DO ||
2464 scan_inst->opcode == BRW_OPCODE_WHILE ||
2465 scan_inst->opcode == BRW_OPCODE_ENDIF) {
2466 interfered = true;
2467 iter = scan_iter;
2468 break;
2469 }
2470
2471 if (scan_inst->dst.file == GRF) {
2472 if (scan_inst->dst.reg == inst->dst.reg &&
2473 (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2474 scan_inst->opcode == FS_OPCODE_TEX)) {
2475 interfered = true;
2476 break;
2477 }
2478 if (scan_inst->dst.reg == inst->src[0].reg &&
2479 (scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
2480 scan_inst->opcode == FS_OPCODE_TEX)) {
2481 interfered = true;
2482 break;
2483 }
2484 }
2485 }
2486 if (interfered) {
2487 continue;
2488 }
2489
2490 /* Rewrite the later usage to point at the source of the move to
2491 * be removed.
2492 */
2493 for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
2494 scan_iter.next()) {
2495 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2496
2497 for (int i = 0; i < 3; i++) {
2498 if (scan_inst->src[i].file == GRF &&
2499 scan_inst->src[i].reg == inst->dst.reg &&
2500 scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
2501 scan_inst->src[i].reg = inst->src[0].reg;
2502 scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
2503 scan_inst->src[i].abs |= inst->src[0].abs;
2504 scan_inst->src[i].negate ^= inst->src[0].negate;
2505 }
2506 }
2507 }
2508
2509 inst->remove();
2510 progress = true;
2511 }
2512
2513 return progress;
2514 }
2515
2516
2517 bool
2518 fs_visitor::compute_to_mrf()
2519 {
2520 bool progress = false;
2521 int next_ip = 0;
2522
2523 foreach_iter(exec_list_iterator, iter, this->instructions) {
2524 fs_inst *inst = (fs_inst *)iter.get();
2525
2526 int ip = next_ip;
2527 next_ip++;
2528
2529 if (inst->opcode != BRW_OPCODE_MOV ||
2530 inst->predicated ||
2531 inst->dst.file != MRF || inst->src[0].file != GRF ||
2532 inst->dst.type != inst->src[0].type ||
2533 inst->src[0].abs || inst->src[0].negate)
2534 continue;
2535
2536 /* Can't compute-to-MRF this GRF if someone else was going to
2537 * read it later.
2538 */
2539 if (this->virtual_grf_use[inst->src[0].reg] > ip)
2540 continue;
2541
2542 /* Found a move of a GRF to a MRF. Let's see if we can go
2543 * rewrite the thing that made this GRF to write into the MRF.
2544 */
2545 bool found = false;
2546 fs_inst *scan_inst;
2547 for (scan_inst = (fs_inst *)inst->prev;
2548 scan_inst->prev != NULL;
2549 scan_inst = (fs_inst *)scan_inst->prev) {
2550 /* We don't handle flow control here. Most computation of
2551 * values that end up in MRFs are shortly before the MRF
2552 * write anyway.
2553 */
2554 if (scan_inst->opcode == BRW_OPCODE_DO ||
2555 scan_inst->opcode == BRW_OPCODE_WHILE ||
2556 scan_inst->opcode == BRW_OPCODE_ENDIF) {
2557 break;
2558 }
2559
2560 /* You can't read from an MRF, so if someone else reads our
2561 * MRF's source GRF that we wanted to rewrite, that stops us.
2562 */
2563 bool interfered = false;
2564 for (int i = 0; i < 3; i++) {
2565 if (scan_inst->src[i].file == GRF &&
2566 scan_inst->src[i].reg == inst->src[0].reg &&
2567 scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
2568 interfered = true;
2569 }
2570 }
2571 if (interfered)
2572 break;
2573
2574 if (scan_inst->dst.file == MRF &&
2575 scan_inst->dst.hw_reg == inst->dst.hw_reg) {
2576 /* Somebody else wrote our MRF here, so we can't can't
2577 * compute-to-MRF before that.
2578 */
2579 break;
2580 }
2581
2582 if (scan_inst->mlen > 0) {
2583 /* Found a SEND instruction, which will do some amount of
2584 * implied write that may overwrite our MRF that we were
2585 * hoping to compute-to-MRF somewhere above it. Nothing
2586 * we have implied-writes more than 2 MRFs from base_mrf,
2587 * though.
2588 */
2589 int implied_write_len = MIN2(scan_inst->mlen, 2);
2590 if (inst->dst.hw_reg >= scan_inst->base_mrf &&
2591 inst->dst.hw_reg < scan_inst->base_mrf + implied_write_len) {
2592 break;
2593 }
2594 }
2595
2596 if (scan_inst->dst.file == GRF &&
2597 scan_inst->dst.reg == inst->src[0].reg) {
2598 /* Found the last thing to write our reg we want to turn
2599 * into a compute-to-MRF.
2600 */
2601
2602 if (scan_inst->opcode == FS_OPCODE_TEX) {
2603 /* texturing writes several continuous regs, so we can't
2604 * compute-to-mrf that.
2605 */
2606 break;
2607 }
2608
2609 /* If it's predicated, it (probably) didn't populate all
2610 * the channels.
2611 */
2612 if (scan_inst->predicated)
2613 break;
2614
2615 /* SEND instructions can't have MRF as a destination. */
2616 if (scan_inst->mlen)
2617 break;
2618
2619 if (intel->gen >= 6) {
2620 /* gen6 math instructions must have the destination be
2621 * GRF, so no compute-to-MRF for them.
2622 */
2623 if (scan_inst->opcode == FS_OPCODE_RCP ||
2624 scan_inst->opcode == FS_OPCODE_RSQ ||
2625 scan_inst->opcode == FS_OPCODE_SQRT ||
2626 scan_inst->opcode == FS_OPCODE_EXP2 ||
2627 scan_inst->opcode == FS_OPCODE_LOG2 ||
2628 scan_inst->opcode == FS_OPCODE_SIN ||
2629 scan_inst->opcode == FS_OPCODE_COS ||
2630 scan_inst->opcode == FS_OPCODE_POW) {
2631 break;
2632 }
2633 }
2634
2635 if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
2636 /* Found the creator of our MRF's source value. */
2637 found = true;
2638 break;
2639 }
2640 }
2641 }
2642 if (found) {
2643 scan_inst->dst.file = MRF;
2644 scan_inst->dst.hw_reg = inst->dst.hw_reg;
2645 scan_inst->saturate |= inst->saturate;
2646 inst->remove();
2647 progress = true;
2648 }
2649 }
2650
2651 return progress;
2652 }
2653
2654 bool
2655 fs_visitor::virtual_grf_interferes(int a, int b)
2656 {
2657 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
2658 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
2659
2660 /* For dead code, just check if the def interferes with the other range. */
2661 if (this->virtual_grf_use[a] == -1) {
2662 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] &&
2663 this->virtual_grf_def[a] < this->virtual_grf_use[b]);
2664 }
2665 if (this->virtual_grf_use[b] == -1) {
2666 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] &&
2667 this->virtual_grf_def[b] < this->virtual_grf_use[a]);
2668 }
2669
2670 return start < end;
2671 }
2672
2673 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2674 {
2675 struct brw_reg brw_reg;
2676
2677 switch (reg->file) {
2678 case GRF:
2679 case ARF:
2680 case MRF:
2681 brw_reg = brw_vec8_reg(reg->file,
2682 reg->hw_reg, 0);
2683 brw_reg = retype(brw_reg, reg->type);
2684 break;
2685 case IMM:
2686 switch (reg->type) {
2687 case BRW_REGISTER_TYPE_F:
2688 brw_reg = brw_imm_f(reg->imm.f);
2689 break;
2690 case BRW_REGISTER_TYPE_D:
2691 brw_reg = brw_imm_d(reg->imm.i);
2692 break;
2693 case BRW_REGISTER_TYPE_UD:
2694 brw_reg = brw_imm_ud(reg->imm.u);
2695 break;
2696 default:
2697 assert(!"not reached");
2698 break;
2699 }
2700 break;
2701 case FIXED_HW_REG:
2702 brw_reg = reg->fixed_hw_reg;
2703 break;
2704 case BAD_FILE:
2705 /* Probably unused. */
2706 brw_reg = brw_null_reg();
2707 break;
2708 case UNIFORM:
2709 assert(!"not reached");
2710 brw_reg = brw_null_reg();
2711 break;
2712 }
2713 if (reg->abs)
2714 brw_reg = brw_abs(brw_reg);
2715 if (reg->negate)
2716 brw_reg = negate(brw_reg);
2717
2718 return brw_reg;
2719 }
2720
2721 void
2722 fs_visitor::generate_code()
2723 {
2724 unsigned int annotation_len = 0;
2725 int last_native_inst = 0;
2726 struct brw_instruction *if_stack[16], *loop_stack[16];
2727 int if_stack_depth = 0, loop_stack_depth = 0;
2728 int if_depth_in_loop[16];
2729
2730 if_depth_in_loop[loop_stack_depth] = 0;
2731
2732 memset(&if_stack, 0, sizeof(if_stack));
2733 foreach_iter(exec_list_iterator, iter, this->instructions) {
2734 fs_inst *inst = (fs_inst *)iter.get();
2735 struct brw_reg src[3], dst;
2736
2737 for (unsigned int i = 0; i < 3; i++) {
2738 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2739 }
2740 dst = brw_reg_from_fs_reg(&inst->dst);
2741
2742 brw_set_conditionalmod(p, inst->conditional_mod);
2743 brw_set_predicate_control(p, inst->predicated);
2744
2745 switch (inst->opcode) {
2746 case BRW_OPCODE_MOV:
2747 brw_MOV(p, dst, src[0]);
2748 break;
2749 case BRW_OPCODE_ADD:
2750 brw_ADD(p, dst, src[0], src[1]);
2751 break;
2752 case BRW_OPCODE_MUL:
2753 brw_MUL(p, dst, src[0], src[1]);
2754 break;
2755
2756 case BRW_OPCODE_FRC:
2757 brw_FRC(p, dst, src[0]);
2758 break;
2759 case BRW_OPCODE_RNDD:
2760 brw_RNDD(p, dst, src[0]);
2761 break;
2762 case BRW_OPCODE_RNDZ:
2763 brw_RNDZ(p, dst, src[0]);
2764 break;
2765
2766 case BRW_OPCODE_AND:
2767 brw_AND(p, dst, src[0], src[1]);
2768 break;
2769 case BRW_OPCODE_OR:
2770 brw_OR(p, dst, src[0], src[1]);
2771 break;
2772 case BRW_OPCODE_XOR:
2773 brw_XOR(p, dst, src[0], src[1]);
2774 break;
2775 case BRW_OPCODE_NOT:
2776 brw_NOT(p, dst, src[0]);
2777 break;
2778 case BRW_OPCODE_ASR:
2779 brw_ASR(p, dst, src[0], src[1]);
2780 break;
2781 case BRW_OPCODE_SHR:
2782 brw_SHR(p, dst, src[0], src[1]);
2783 break;
2784 case BRW_OPCODE_SHL:
2785 brw_SHL(p, dst, src[0], src[1]);
2786 break;
2787
2788 case BRW_OPCODE_CMP:
2789 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2790 break;
2791 case BRW_OPCODE_SEL:
2792 brw_SEL(p, dst, src[0], src[1]);
2793 break;
2794
2795 case BRW_OPCODE_IF:
2796 assert(if_stack_depth < 16);
2797 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2798 if_depth_in_loop[loop_stack_depth]++;
2799 if_stack_depth++;
2800 break;
2801 case BRW_OPCODE_ELSE:
2802 if_stack[if_stack_depth - 1] =
2803 brw_ELSE(p, if_stack[if_stack_depth - 1]);
2804 break;
2805 case BRW_OPCODE_ENDIF:
2806 if_stack_depth--;
2807 brw_ENDIF(p , if_stack[if_stack_depth]);
2808 if_depth_in_loop[loop_stack_depth]--;
2809 break;
2810
2811 case BRW_OPCODE_DO:
2812 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2813 if_depth_in_loop[loop_stack_depth] = 0;
2814 break;
2815
2816 case BRW_OPCODE_BREAK:
2817 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2818 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2819 break;
2820 case BRW_OPCODE_CONTINUE:
2821 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2822 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2823 break;
2824
2825 case BRW_OPCODE_WHILE: {
2826 struct brw_instruction *inst0, *inst1;
2827 GLuint br = 1;
2828
2829 if (intel->gen >= 5)
2830 br = 2;
2831
2832 assert(loop_stack_depth > 0);
2833 loop_stack_depth--;
2834 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2835 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2836 while (inst0 > loop_stack[loop_stack_depth]) {
2837 inst0--;
2838 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2839 inst0->bits3.if_else.jump_count == 0) {
2840 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2841 }
2842 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2843 inst0->bits3.if_else.jump_count == 0) {
2844 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2845 }
2846 }
2847 }
2848 break;
2849
2850 case FS_OPCODE_RCP:
2851 case FS_OPCODE_RSQ:
2852 case FS_OPCODE_SQRT:
2853 case FS_OPCODE_EXP2:
2854 case FS_OPCODE_LOG2:
2855 case FS_OPCODE_POW:
2856 case FS_OPCODE_SIN:
2857 case FS_OPCODE_COS:
2858 generate_math(inst, dst, src);
2859 break;
2860 case FS_OPCODE_LINTERP:
2861 generate_linterp(inst, dst, src);
2862 break;
2863 case FS_OPCODE_TEX:
2864 case FS_OPCODE_TXB:
2865 case FS_OPCODE_TXL:
2866 generate_tex(inst, dst);
2867 break;
2868 case FS_OPCODE_DISCARD_NOT:
2869 generate_discard_not(inst, dst);
2870 break;
2871 case FS_OPCODE_DISCARD_AND:
2872 generate_discard_and(inst, src[0]);
2873 break;
2874 case FS_OPCODE_DDX:
2875 generate_ddx(inst, dst, src[0]);
2876 break;
2877 case FS_OPCODE_DDY:
2878 generate_ddy(inst, dst, src[0]);
2879 break;
2880 case FS_OPCODE_FB_WRITE:
2881 generate_fb_write(inst);
2882 break;
2883 default:
2884 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2885 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2886 brw_opcodes[inst->opcode].name);
2887 } else {
2888 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2889 }
2890 this->fail = true;
2891 }
2892
2893 if (annotation_len < p->nr_insn) {
2894 annotation_len *= 2;
2895 if (annotation_len < 16)
2896 annotation_len = 16;
2897
2898 this->annotation_string = talloc_realloc(this->mem_ctx,
2899 annotation_string,
2900 const char *,
2901 annotation_len);
2902 this->annotation_ir = talloc_realloc(this->mem_ctx,
2903 annotation_ir,
2904 ir_instruction *,
2905 annotation_len);
2906 }
2907
2908 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2909 this->annotation_string[i] = inst->annotation;
2910 this->annotation_ir[i] = inst->ir;
2911 }
2912 last_native_inst = p->nr_insn;
2913 }
2914 }
2915
2916 GLboolean
2917 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2918 {
2919 struct brw_compile *p = &c->func;
2920 struct intel_context *intel = &brw->intel;
2921 GLcontext *ctx = &intel->ctx;
2922 struct brw_shader *shader = NULL;
2923 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2924
2925 if (!prog)
2926 return GL_FALSE;
2927
2928 if (!using_new_fs)
2929 return GL_FALSE;
2930
2931 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2932 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2933 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2934 break;
2935 }
2936 }
2937 if (!shader)
2938 return GL_FALSE;
2939
2940 /* We always use 8-wide mode, at least for now. For one, flow
2941 * control only works in 8-wide. Also, when we're fragment shader
2942 * bound, we're almost always under register pressure as well, so
2943 * 8-wide would save us from the performance cliff of spilling
2944 * regs.
2945 */
2946 c->dispatch_width = 8;
2947
2948 if (INTEL_DEBUG & DEBUG_WM) {
2949 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2950 _mesa_print_ir(shader->ir, NULL);
2951 printf("\n");
2952 }
2953
2954 /* Now the main event: Visit the shader IR and generate our FS IR for it.
2955 */
2956 fs_visitor v(c, shader);
2957
2958 if (0) {
2959 v.emit_dummy_fs();
2960 } else {
2961 v.calculate_urb_setup();
2962 if (intel->gen < 6)
2963 v.emit_interpolation_setup_gen4();
2964 else
2965 v.emit_interpolation_setup_gen6();
2966
2967 /* Generate FS IR for main(). (the visitor only descends into
2968 * functions called "main").
2969 */
2970 foreach_iter(exec_list_iterator, iter, *shader->ir) {
2971 ir_instruction *ir = (ir_instruction *)iter.get();
2972 v.base_ir = ir;
2973 ir->accept(&v);
2974 }
2975
2976 v.emit_fb_writes();
2977 v.assign_curb_setup();
2978 v.assign_urb_setup();
2979
2980 bool progress;
2981 do {
2982 progress = false;
2983
2984 v.calculate_live_intervals();
2985 progress = v.propagate_constants() || progress;
2986 progress = v.register_coalesce() || progress;
2987 progress = v.compute_to_mrf() || progress;
2988 progress = v.dead_code_eliminate() || progress;
2989 } while (progress);
2990
2991 if (0)
2992 v.assign_regs_trivial();
2993 else
2994 v.assign_regs();
2995 }
2996
2997 if (!v.fail)
2998 v.generate_code();
2999
3000 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
3001
3002 if (v.fail)
3003 return GL_FALSE;
3004
3005 if (INTEL_DEBUG & DEBUG_WM) {
3006 const char *last_annotation_string = NULL;
3007 ir_instruction *last_annotation_ir = NULL;
3008
3009 printf("Native code for fragment shader %d:\n", prog->Name);
3010 for (unsigned int i = 0; i < p->nr_insn; i++) {
3011 if (last_annotation_ir != v.annotation_ir[i]) {
3012 last_annotation_ir = v.annotation_ir[i];
3013 if (last_annotation_ir) {
3014 printf(" ");
3015 last_annotation_ir->print();
3016 printf("\n");
3017 }
3018 }
3019 if (last_annotation_string != v.annotation_string[i]) {
3020 last_annotation_string = v.annotation_string[i];
3021 if (last_annotation_string)
3022 printf(" %s\n", last_annotation_string);
3023 }
3024 brw_disasm(stdout, &p->store[i], intel->gen);
3025 }
3026 printf("\n");
3027 }
3028
3029 c->prog_data.total_grf = v.grf_used;
3030 c->prog_data.total_scratch = 0;
3031
3032 return GL_TRUE;
3033 }