i965: Add back gen6 headerless FB writes to the new FS backend.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "main/uniforms.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "program/prog_optimize.h"
38 #include "program/register_allocate.h"
39 #include "program/sampler.h"
40 #include "program/hash_table.h"
41 #include "brw_context.h"
42 #include "brw_eu.h"
43 #include "brw_wm.h"
44 #include "talloc.h"
45 }
46 #include "../glsl/glsl_types.h"
47 #include "../glsl/ir_optimization.h"
48 #include "../glsl/ir_print_visitor.h"
49
50 enum register_file {
51 ARF = BRW_ARCHITECTURE_REGISTER_FILE,
52 GRF = BRW_GENERAL_REGISTER_FILE,
53 MRF = BRW_MESSAGE_REGISTER_FILE,
54 IMM = BRW_IMMEDIATE_VALUE,
55 FIXED_HW_REG, /* a struct brw_reg */
56 UNIFORM, /* prog_data->params[hw_reg] */
57 BAD_FILE
58 };
59
60 enum fs_opcodes {
61 FS_OPCODE_FB_WRITE = 256,
62 FS_OPCODE_RCP,
63 FS_OPCODE_RSQ,
64 FS_OPCODE_SQRT,
65 FS_OPCODE_EXP2,
66 FS_OPCODE_LOG2,
67 FS_OPCODE_POW,
68 FS_OPCODE_SIN,
69 FS_OPCODE_COS,
70 FS_OPCODE_DDX,
71 FS_OPCODE_DDY,
72 FS_OPCODE_LINTERP,
73 FS_OPCODE_TEX,
74 FS_OPCODE_TXB,
75 FS_OPCODE_TXL,
76 FS_OPCODE_DISCARD,
77 };
78
79 static int using_new_fs = -1;
80 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
81
82 struct gl_shader *
83 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
84 {
85 struct brw_shader *shader;
86
87 shader = talloc_zero(NULL, struct brw_shader);
88 if (shader) {
89 shader->base.Type = type;
90 shader->base.Name = name;
91 _mesa_init_shader(ctx, &shader->base);
92 }
93
94 return &shader->base;
95 }
96
97 struct gl_shader_program *
98 brw_new_shader_program(GLcontext *ctx, GLuint name)
99 {
100 struct brw_shader_program *prog;
101 prog = talloc_zero(NULL, struct brw_shader_program);
102 if (prog) {
103 prog->base.Name = name;
104 _mesa_init_shader_program(ctx, &prog->base);
105 }
106 return &prog->base;
107 }
108
109 GLboolean
110 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
111 {
112 if (!_mesa_ir_compile_shader(ctx, shader))
113 return GL_FALSE;
114
115 return GL_TRUE;
116 }
117
118 GLboolean
119 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
120 {
121 if (using_new_fs == -1)
122 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
123
124 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
125 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
126
127 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
128 void *mem_ctx = talloc_new(NULL);
129 bool progress;
130
131 if (shader->ir)
132 talloc_free(shader->ir);
133 shader->ir = new(shader) exec_list;
134 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
135
136 do_mat_op_to_vec(shader->ir);
137 do_mod_to_fract(shader->ir);
138 do_div_to_mul_rcp(shader->ir);
139 do_sub_to_add_neg(shader->ir);
140 do_explog_to_explog2(shader->ir);
141 do_lower_texture_projection(shader->ir);
142
143 do {
144 progress = false;
145
146 brw_do_channel_expressions(shader->ir);
147 brw_do_vector_splitting(shader->ir);
148
149 progress = do_lower_jumps(shader->ir, true, true,
150 true, /* main return */
151 false, /* continue */
152 false /* loops */
153 ) || progress;
154
155 progress = do_common_optimization(shader->ir, true, 32) || progress;
156
157 progress = lower_noise(shader->ir) || progress;
158 progress =
159 lower_variable_index_to_cond_assign(shader->ir,
160 GL_TRUE, /* input */
161 GL_TRUE, /* output */
162 GL_TRUE, /* temp */
163 GL_TRUE /* uniform */
164 ) || progress;
165 } while (progress);
166
167 validate_ir_tree(shader->ir);
168
169 reparent_ir(shader->ir, shader->ir);
170 talloc_free(mem_ctx);
171 }
172 }
173
174 if (!_mesa_ir_link_shader(ctx, prog))
175 return GL_FALSE;
176
177 return GL_TRUE;
178 }
179
180 static int
181 type_size(const struct glsl_type *type)
182 {
183 unsigned int size, i;
184
185 switch (type->base_type) {
186 case GLSL_TYPE_UINT:
187 case GLSL_TYPE_INT:
188 case GLSL_TYPE_FLOAT:
189 case GLSL_TYPE_BOOL:
190 return type->components();
191 case GLSL_TYPE_ARRAY:
192 return type_size(type->fields.array) * type->length;
193 case GLSL_TYPE_STRUCT:
194 size = 0;
195 for (i = 0; i < type->length; i++) {
196 size += type_size(type->fields.structure[i].type);
197 }
198 return size;
199 case GLSL_TYPE_SAMPLER:
200 /* Samplers take up no register space, since they're baked in at
201 * link time.
202 */
203 return 0;
204 default:
205 assert(!"not reached");
206 return 0;
207 }
208 }
209
210 class fs_reg {
211 public:
212 /* Callers of this talloc-based new need not call delete. It's
213 * easier to just talloc_free 'ctx' (or any of its ancestors). */
214 static void* operator new(size_t size, void *ctx)
215 {
216 void *node;
217
218 node = talloc_size(ctx, size);
219 assert(node != NULL);
220
221 return node;
222 }
223
224 void init()
225 {
226 this->reg = 0;
227 this->reg_offset = 0;
228 this->negate = 0;
229 this->abs = 0;
230 this->hw_reg = -1;
231 }
232
233 /** Generic unset register constructor. */
234 fs_reg()
235 {
236 init();
237 this->file = BAD_FILE;
238 }
239
240 /** Immediate value constructor. */
241 fs_reg(float f)
242 {
243 init();
244 this->file = IMM;
245 this->type = BRW_REGISTER_TYPE_F;
246 this->imm.f = f;
247 }
248
249 /** Immediate value constructor. */
250 fs_reg(int32_t i)
251 {
252 init();
253 this->file = IMM;
254 this->type = BRW_REGISTER_TYPE_D;
255 this->imm.i = i;
256 }
257
258 /** Immediate value constructor. */
259 fs_reg(uint32_t u)
260 {
261 init();
262 this->file = IMM;
263 this->type = BRW_REGISTER_TYPE_UD;
264 this->imm.u = u;
265 }
266
267 /** Fixed brw_reg Immediate value constructor. */
268 fs_reg(struct brw_reg fixed_hw_reg)
269 {
270 init();
271 this->file = FIXED_HW_REG;
272 this->fixed_hw_reg = fixed_hw_reg;
273 this->type = fixed_hw_reg.type;
274 }
275
276 fs_reg(enum register_file file, int hw_reg);
277 fs_reg(class fs_visitor *v, const struct glsl_type *type);
278
279 /** Register file: ARF, GRF, MRF, IMM. */
280 enum register_file file;
281 /** virtual register number. 0 = fixed hw reg */
282 int reg;
283 /** Offset within the virtual register. */
284 int reg_offset;
285 /** HW register number. Generally unset until register allocation. */
286 int hw_reg;
287 /** Register type. BRW_REGISTER_TYPE_* */
288 int type;
289 bool negate;
290 bool abs;
291 struct brw_reg fixed_hw_reg;
292
293 /** Value for file == BRW_IMMMEDIATE_FILE */
294 union {
295 int32_t i;
296 uint32_t u;
297 float f;
298 } imm;
299 };
300
301 static const fs_reg reg_undef;
302 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
303
304 class fs_inst : public exec_node {
305 public:
306 /* Callers of this talloc-based new need not call delete. It's
307 * easier to just talloc_free 'ctx' (or any of its ancestors). */
308 static void* operator new(size_t size, void *ctx)
309 {
310 void *node;
311
312 node = talloc_zero_size(ctx, size);
313 assert(node != NULL);
314
315 return node;
316 }
317
318 void init()
319 {
320 this->opcode = BRW_OPCODE_NOP;
321 this->saturate = false;
322 this->conditional_mod = BRW_CONDITIONAL_NONE;
323 this->predicated = false;
324 this->sampler = 0;
325 this->target = 0;
326 this->eot = false;
327 this->header_present = false;
328 this->shadow_compare = false;
329 }
330
331 fs_inst()
332 {
333 init();
334 }
335
336 fs_inst(int opcode)
337 {
338 init();
339 this->opcode = opcode;
340 }
341
342 fs_inst(int opcode, fs_reg dst, fs_reg src0)
343 {
344 init();
345 this->opcode = opcode;
346 this->dst = dst;
347 this->src[0] = src0;
348 }
349
350 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
351 {
352 init();
353 this->opcode = opcode;
354 this->dst = dst;
355 this->src[0] = src0;
356 this->src[1] = src1;
357 }
358
359 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
360 {
361 init();
362 this->opcode = opcode;
363 this->dst = dst;
364 this->src[0] = src0;
365 this->src[1] = src1;
366 this->src[2] = src2;
367 }
368
369 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
370 fs_reg dst;
371 fs_reg src[3];
372 bool saturate;
373 bool predicated;
374 int conditional_mod; /**< BRW_CONDITIONAL_* */
375
376 int mlen; /**< SEND message length */
377 int sampler;
378 int target; /**< MRT target. */
379 bool eot;
380 bool header_present;
381 bool shadow_compare;
382
383 /** @{
384 * Annotation for the generated IR. One of the two can be set.
385 */
386 ir_instruction *ir;
387 const char *annotation;
388 /** @} */
389 };
390
391 class fs_visitor : public ir_visitor
392 {
393 public:
394
395 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
396 {
397 this->c = c;
398 this->p = &c->func;
399 this->brw = p->brw;
400 this->fp = brw->fragment_program;
401 this->intel = &brw->intel;
402 this->ctx = &intel->ctx;
403 this->mem_ctx = talloc_new(NULL);
404 this->shader = shader;
405 this->fail = false;
406 this->variable_ht = hash_table_ctor(0,
407 hash_table_pointer_hash,
408 hash_table_pointer_compare);
409
410 this->frag_color = NULL;
411 this->frag_data = NULL;
412 this->frag_depth = NULL;
413 this->first_non_payload_grf = 0;
414
415 this->current_annotation = NULL;
416 this->annotation_string = NULL;
417 this->annotation_ir = NULL;
418 this->base_ir = NULL;
419
420 this->virtual_grf_sizes = NULL;
421 this->virtual_grf_next = 1;
422 this->virtual_grf_array_size = 0;
423 this->virtual_grf_def = NULL;
424 this->virtual_grf_use = NULL;
425
426 this->kill_emitted = false;
427 }
428
429 ~fs_visitor()
430 {
431 talloc_free(this->mem_ctx);
432 hash_table_dtor(this->variable_ht);
433 }
434
435 fs_reg *variable_storage(ir_variable *var);
436 int virtual_grf_alloc(int size);
437
438 void visit(ir_variable *ir);
439 void visit(ir_assignment *ir);
440 void visit(ir_dereference_variable *ir);
441 void visit(ir_dereference_record *ir);
442 void visit(ir_dereference_array *ir);
443 void visit(ir_expression *ir);
444 void visit(ir_texture *ir);
445 void visit(ir_if *ir);
446 void visit(ir_constant *ir);
447 void visit(ir_swizzle *ir);
448 void visit(ir_return *ir);
449 void visit(ir_loop *ir);
450 void visit(ir_loop_jump *ir);
451 void visit(ir_discard *ir);
452 void visit(ir_call *ir);
453 void visit(ir_function *ir);
454 void visit(ir_function_signature *ir);
455
456 fs_inst *emit(fs_inst inst);
457 void assign_curb_setup();
458 void calculate_urb_setup();
459 void assign_urb_setup();
460 void assign_regs();
461 void assign_regs_trivial();
462 void calculate_live_intervals();
463 bool propagate_constants();
464 bool dead_code_eliminate();
465 bool virtual_grf_interferes(int a, int b);
466 void generate_code();
467 void generate_fb_write(fs_inst *inst);
468 void generate_linterp(fs_inst *inst, struct brw_reg dst,
469 struct brw_reg *src);
470 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
471 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
472 void generate_discard(fs_inst *inst, struct brw_reg temp);
473 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
474 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
475
476 void emit_dummy_fs();
477 void emit_fragcoord_interpolation(ir_variable *ir);
478 void emit_general_interpolation(ir_variable *ir);
479 void emit_interpolation_setup_gen4();
480 void emit_interpolation_setup_gen6();
481 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
482 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
483 void emit_fb_writes();
484 void emit_assignment_writes(fs_reg &l, fs_reg &r,
485 const glsl_type *type, bool predicated);
486
487 struct brw_reg interp_reg(int location, int channel);
488 int setup_uniform_values(int loc, const glsl_type *type);
489 void setup_builtin_uniform_values(ir_variable *ir);
490
491 struct brw_context *brw;
492 const struct gl_fragment_program *fp;
493 struct intel_context *intel;
494 GLcontext *ctx;
495 struct brw_wm_compile *c;
496 struct brw_compile *p;
497 struct brw_shader *shader;
498 void *mem_ctx;
499 exec_list instructions;
500
501 int *virtual_grf_sizes;
502 int virtual_grf_next;
503 int virtual_grf_array_size;
504 int *virtual_grf_def;
505 int *virtual_grf_use;
506
507 struct hash_table *variable_ht;
508 ir_variable *frag_color, *frag_data, *frag_depth;
509 int first_non_payload_grf;
510 int urb_setup[FRAG_ATTRIB_MAX];
511 bool kill_emitted;
512
513 /** @{ debug annotation info */
514 const char *current_annotation;
515 ir_instruction *base_ir;
516 const char **annotation_string;
517 ir_instruction **annotation_ir;
518 /** @} */
519
520 bool fail;
521
522 /* Result of last visit() method. */
523 fs_reg result;
524
525 fs_reg pixel_x;
526 fs_reg pixel_y;
527 fs_reg wpos_w;
528 fs_reg pixel_w;
529 fs_reg delta_x;
530 fs_reg delta_y;
531
532 int grf_used;
533
534 };
535
536 int
537 fs_visitor::virtual_grf_alloc(int size)
538 {
539 if (virtual_grf_array_size <= virtual_grf_next) {
540 if (virtual_grf_array_size == 0)
541 virtual_grf_array_size = 16;
542 else
543 virtual_grf_array_size *= 2;
544 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
545 int, virtual_grf_array_size);
546
547 /* This slot is always unused. */
548 virtual_grf_sizes[0] = 0;
549 }
550 virtual_grf_sizes[virtual_grf_next] = size;
551 return virtual_grf_next++;
552 }
553
554 /** Fixed HW reg constructor. */
555 fs_reg::fs_reg(enum register_file file, int hw_reg)
556 {
557 init();
558 this->file = file;
559 this->hw_reg = hw_reg;
560 this->type = BRW_REGISTER_TYPE_F;
561 }
562
563 int
564 brw_type_for_base_type(const struct glsl_type *type)
565 {
566 switch (type->base_type) {
567 case GLSL_TYPE_FLOAT:
568 return BRW_REGISTER_TYPE_F;
569 case GLSL_TYPE_INT:
570 case GLSL_TYPE_BOOL:
571 return BRW_REGISTER_TYPE_D;
572 case GLSL_TYPE_UINT:
573 return BRW_REGISTER_TYPE_UD;
574 case GLSL_TYPE_ARRAY:
575 case GLSL_TYPE_STRUCT:
576 /* These should be overridden with the type of the member when
577 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
578 * way to trip up if we don't.
579 */
580 return BRW_REGISTER_TYPE_UD;
581 default:
582 assert(!"not reached");
583 return BRW_REGISTER_TYPE_F;
584 }
585 }
586
587 /** Automatic reg constructor. */
588 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
589 {
590 init();
591
592 this->file = GRF;
593 this->reg = v->virtual_grf_alloc(type_size(type));
594 this->reg_offset = 0;
595 this->type = brw_type_for_base_type(type);
596 }
597
598 fs_reg *
599 fs_visitor::variable_storage(ir_variable *var)
600 {
601 return (fs_reg *)hash_table_find(this->variable_ht, var);
602 }
603
604 /* Our support for uniforms is piggy-backed on the struct
605 * gl_fragment_program, because that's where the values actually
606 * get stored, rather than in some global gl_shader_program uniform
607 * store.
608 */
609 int
610 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
611 {
612 unsigned int offset = 0;
613 float *vec_values;
614
615 if (type->is_matrix()) {
616 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
617 type->vector_elements,
618 1);
619
620 for (unsigned int i = 0; i < type->matrix_columns; i++) {
621 offset += setup_uniform_values(loc + offset, column);
622 }
623
624 return offset;
625 }
626
627 switch (type->base_type) {
628 case GLSL_TYPE_FLOAT:
629 case GLSL_TYPE_UINT:
630 case GLSL_TYPE_INT:
631 case GLSL_TYPE_BOOL:
632 vec_values = fp->Base.Parameters->ParameterValues[loc];
633 for (unsigned int i = 0; i < type->vector_elements; i++) {
634 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
635 }
636 return 1;
637
638 case GLSL_TYPE_STRUCT:
639 for (unsigned int i = 0; i < type->length; i++) {
640 offset += setup_uniform_values(loc + offset,
641 type->fields.structure[i].type);
642 }
643 return offset;
644
645 case GLSL_TYPE_ARRAY:
646 for (unsigned int i = 0; i < type->length; i++) {
647 offset += setup_uniform_values(loc + offset, type->fields.array);
648 }
649 return offset;
650
651 case GLSL_TYPE_SAMPLER:
652 /* The sampler takes up a slot, but we don't use any values from it. */
653 return 1;
654
655 default:
656 assert(!"not reached");
657 return 0;
658 }
659 }
660
661
662 /* Our support for builtin uniforms is even scarier than non-builtin.
663 * It sits on top of the PROG_STATE_VAR parameters that are
664 * automatically updated from GL context state.
665 */
666 void
667 fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
668 {
669 const struct gl_builtin_uniform_desc *statevar = NULL;
670
671 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
672 statevar = &_mesa_builtin_uniform_desc[i];
673 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
674 break;
675 }
676
677 if (!statevar->name) {
678 this->fail = true;
679 printf("Failed to find builtin uniform `%s'\n", ir->name);
680 return;
681 }
682
683 int array_count;
684 if (ir->type->is_array()) {
685 array_count = ir->type->length;
686 } else {
687 array_count = 1;
688 }
689
690 for (int a = 0; a < array_count; a++) {
691 for (unsigned int i = 0; i < statevar->num_elements; i++) {
692 struct gl_builtin_uniform_element *element = &statevar->elements[i];
693 int tokens[STATE_LENGTH];
694
695 memcpy(tokens, element->tokens, sizeof(element->tokens));
696 if (ir->type->is_array()) {
697 tokens[1] = a;
698 }
699
700 /* This state reference has already been setup by ir_to_mesa,
701 * but we'll get the same index back here.
702 */
703 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
704 (gl_state_index *)tokens);
705 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
706
707 /* Add each of the unique swizzles of the element as a
708 * parameter. This'll end up matching the expected layout of
709 * the array/matrix/structure we're trying to fill in.
710 */
711 int last_swiz = -1;
712 for (unsigned int i = 0; i < 4; i++) {
713 int swiz = GET_SWZ(element->swizzle, i);
714 if (swiz == last_swiz)
715 break;
716 last_swiz = swiz;
717
718 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
719 }
720 }
721 }
722 }
723
724 void
725 fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
726 {
727 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
728 fs_reg wpos = *reg;
729 fs_reg neg_y = this->pixel_y;
730 neg_y.negate = true;
731
732 /* gl_FragCoord.x */
733 if (ir->pixel_center_integer) {
734 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
735 } else {
736 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
737 }
738 wpos.reg_offset++;
739
740 /* gl_FragCoord.y */
741 if (ir->origin_upper_left && ir->pixel_center_integer) {
742 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
743 } else {
744 fs_reg pixel_y = this->pixel_y;
745 float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
746
747 if (!ir->origin_upper_left) {
748 pixel_y.negate = true;
749 offset += c->key.drawable_height - 1.0;
750 }
751
752 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
753 }
754 wpos.reg_offset++;
755
756 /* gl_FragCoord.z */
757 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
758 interp_reg(FRAG_ATTRIB_WPOS, 2)));
759 wpos.reg_offset++;
760
761 /* gl_FragCoord.w: Already set up in emit_interpolation */
762 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
763
764 hash_table_insert(this->variable_ht, reg, ir);
765 }
766
767
768 void
769 fs_visitor::emit_general_interpolation(ir_variable *ir)
770 {
771 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
772 /* Interpolation is always in floating point regs. */
773 reg->type = BRW_REGISTER_TYPE_F;
774 fs_reg attr = *reg;
775
776 unsigned int array_elements;
777 const glsl_type *type;
778
779 if (ir->type->is_array()) {
780 array_elements = ir->type->length;
781 if (array_elements == 0) {
782 this->fail = true;
783 }
784 type = ir->type->fields.array;
785 } else {
786 array_elements = 1;
787 type = ir->type;
788 }
789
790 int location = ir->location;
791 for (unsigned int i = 0; i < array_elements; i++) {
792 for (unsigned int j = 0; j < type->matrix_columns; j++) {
793 if (urb_setup[location] == -1) {
794 /* If there's no incoming setup data for this slot, don't
795 * emit interpolation for it.
796 */
797 attr.reg_offset += type->vector_elements;
798 location++;
799 continue;
800 }
801
802 for (unsigned int c = 0; c < type->vector_elements; c++) {
803 struct brw_reg interp = interp_reg(location, c);
804 emit(fs_inst(FS_OPCODE_LINTERP,
805 attr,
806 this->delta_x,
807 this->delta_y,
808 fs_reg(interp)));
809 attr.reg_offset++;
810 }
811 attr.reg_offset -= type->vector_elements;
812
813 for (unsigned int c = 0; c < type->vector_elements; c++) {
814 emit(fs_inst(BRW_OPCODE_MUL,
815 attr,
816 attr,
817 this->pixel_w));
818 attr.reg_offset++;
819 }
820 location++;
821 }
822 }
823
824 hash_table_insert(this->variable_ht, reg, ir);
825 }
826
827 void
828 fs_visitor::visit(ir_variable *ir)
829 {
830 fs_reg *reg = NULL;
831
832 if (variable_storage(ir))
833 return;
834
835 if (strcmp(ir->name, "gl_FragColor") == 0) {
836 this->frag_color = ir;
837 } else if (strcmp(ir->name, "gl_FragData") == 0) {
838 this->frag_data = ir;
839 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
840 this->frag_depth = ir;
841 }
842
843 if (ir->mode == ir_var_in) {
844 if (!strcmp(ir->name, "gl_FragCoord")) {
845 emit_fragcoord_interpolation(ir);
846 return;
847 } else if (!strcmp(ir->name, "gl_FrontFacing")) {
848 reg = new(this->mem_ctx) fs_reg(this, ir->type);
849 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
850 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
851 * us front face
852 */
853 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
854 *reg,
855 fs_reg(r1_6ud),
856 fs_reg(1u << 31)));
857 inst->conditional_mod = BRW_CONDITIONAL_L;
858 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
859 } else {
860 emit_general_interpolation(ir);
861 return;
862 }
863 }
864
865 if (ir->mode == ir_var_uniform) {
866 int param_index = c->prog_data.nr_params;
867
868 if (!strncmp(ir->name, "gl_", 3)) {
869 setup_builtin_uniform_values(ir);
870 } else {
871 setup_uniform_values(ir->location, ir->type);
872 }
873
874 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
875 }
876
877 if (!reg)
878 reg = new(this->mem_ctx) fs_reg(this, ir->type);
879
880 hash_table_insert(this->variable_ht, reg, ir);
881 }
882
883 void
884 fs_visitor::visit(ir_dereference_variable *ir)
885 {
886 fs_reg *reg = variable_storage(ir->var);
887 this->result = *reg;
888 }
889
890 void
891 fs_visitor::visit(ir_dereference_record *ir)
892 {
893 const glsl_type *struct_type = ir->record->type;
894
895 ir->record->accept(this);
896
897 unsigned int offset = 0;
898 for (unsigned int i = 0; i < struct_type->length; i++) {
899 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
900 break;
901 offset += type_size(struct_type->fields.structure[i].type);
902 }
903 this->result.reg_offset += offset;
904 this->result.type = brw_type_for_base_type(ir->type);
905 }
906
907 void
908 fs_visitor::visit(ir_dereference_array *ir)
909 {
910 ir_constant *index;
911 int element_size;
912
913 ir->array->accept(this);
914 index = ir->array_index->as_constant();
915
916 element_size = type_size(ir->type);
917 this->result.type = brw_type_for_base_type(ir->type);
918
919 if (index) {
920 assert(this->result.file == UNIFORM ||
921 (this->result.file == GRF &&
922 this->result.reg != 0));
923 this->result.reg_offset += index->value.i[0] * element_size;
924 } else {
925 assert(!"FINISHME: non-constant array element");
926 }
927 }
928
929 void
930 fs_visitor::visit(ir_expression *ir)
931 {
932 unsigned int operand;
933 fs_reg op[2], temp;
934 fs_reg result;
935 fs_inst *inst;
936
937 for (operand = 0; operand < ir->get_num_operands(); operand++) {
938 ir->operands[operand]->accept(this);
939 if (this->result.file == BAD_FILE) {
940 ir_print_visitor v;
941 printf("Failed to get tree for expression operand:\n");
942 ir->operands[operand]->accept(&v);
943 this->fail = true;
944 }
945 op[operand] = this->result;
946
947 /* Matrix expression operands should have been broken down to vector
948 * operations already.
949 */
950 assert(!ir->operands[operand]->type->is_matrix());
951 /* And then those vector operands should have been broken down to scalar.
952 */
953 assert(!ir->operands[operand]->type->is_vector());
954 }
955
956 /* Storage for our result. If our result goes into an assignment, it will
957 * just get copy-propagated out, so no worries.
958 */
959 this->result = fs_reg(this, ir->type);
960
961 switch (ir->operation) {
962 case ir_unop_logic_not:
963 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
964 break;
965 case ir_unop_neg:
966 op[0].negate = !op[0].negate;
967 this->result = op[0];
968 break;
969 case ir_unop_abs:
970 op[0].abs = true;
971 this->result = op[0];
972 break;
973 case ir_unop_sign:
974 temp = fs_reg(this, ir->type);
975
976 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
977
978 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
979 inst->conditional_mod = BRW_CONDITIONAL_G;
980 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
981 inst->predicated = true;
982
983 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
984 inst->conditional_mod = BRW_CONDITIONAL_L;
985 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
986 inst->predicated = true;
987
988 break;
989 case ir_unop_rcp:
990 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
991 break;
992
993 case ir_unop_exp2:
994 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
995 break;
996 case ir_unop_log2:
997 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
998 break;
999 case ir_unop_exp:
1000 case ir_unop_log:
1001 assert(!"not reached: should be handled by ir_explog_to_explog2");
1002 break;
1003 case ir_unop_sin:
1004 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
1005 break;
1006 case ir_unop_cos:
1007 emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
1008 break;
1009
1010 case ir_unop_dFdx:
1011 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
1012 break;
1013 case ir_unop_dFdy:
1014 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
1015 break;
1016
1017 case ir_binop_add:
1018 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
1019 break;
1020 case ir_binop_sub:
1021 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1022 break;
1023
1024 case ir_binop_mul:
1025 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
1026 break;
1027 case ir_binop_div:
1028 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1029 break;
1030 case ir_binop_mod:
1031 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1032 break;
1033
1034 case ir_binop_less:
1035 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1036 inst->conditional_mod = BRW_CONDITIONAL_L;
1037 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1038 break;
1039 case ir_binop_greater:
1040 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1041 inst->conditional_mod = BRW_CONDITIONAL_G;
1042 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1043 break;
1044 case ir_binop_lequal:
1045 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1046 inst->conditional_mod = BRW_CONDITIONAL_LE;
1047 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1048 break;
1049 case ir_binop_gequal:
1050 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1051 inst->conditional_mod = BRW_CONDITIONAL_GE;
1052 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1053 break;
1054 case ir_binop_equal:
1055 case ir_binop_all_equal: /* same as nequal for scalars */
1056 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1057 inst->conditional_mod = BRW_CONDITIONAL_Z;
1058 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1059 break;
1060 case ir_binop_nequal:
1061 case ir_binop_any_nequal: /* same as nequal for scalars */
1062 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1063 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1064 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1065 break;
1066
1067 case ir_binop_logic_xor:
1068 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
1069 break;
1070
1071 case ir_binop_logic_or:
1072 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
1073 break;
1074
1075 case ir_binop_logic_and:
1076 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
1077 break;
1078
1079 case ir_binop_dot:
1080 case ir_binop_cross:
1081 case ir_unop_any:
1082 assert(!"not reached: should be handled by brw_fs_channel_expressions");
1083 break;
1084
1085 case ir_unop_noise:
1086 assert(!"not reached: should be handled by lower_noise");
1087 break;
1088
1089 case ir_unop_sqrt:
1090 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
1091 break;
1092
1093 case ir_unop_rsq:
1094 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
1095 break;
1096
1097 case ir_unop_i2f:
1098 case ir_unop_b2f:
1099 case ir_unop_b2i:
1100 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1101 break;
1102 case ir_unop_f2i:
1103 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1104 break;
1105 case ir_unop_f2b:
1106 case ir_unop_i2b:
1107 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
1108 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1109
1110 case ir_unop_trunc:
1111 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1112 break;
1113 case ir_unop_ceil:
1114 op[0].negate = ~op[0].negate;
1115 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1116 this->result.negate = true;
1117 break;
1118 case ir_unop_floor:
1119 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1120 break;
1121 case ir_unop_fract:
1122 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
1123 break;
1124
1125 case ir_binop_min:
1126 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1127 inst->conditional_mod = BRW_CONDITIONAL_L;
1128
1129 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1130 inst->predicated = true;
1131 break;
1132 case ir_binop_max:
1133 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1134 inst->conditional_mod = BRW_CONDITIONAL_G;
1135
1136 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1137 inst->predicated = true;
1138 break;
1139
1140 case ir_binop_pow:
1141 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
1142 break;
1143
1144 case ir_unop_bit_not:
1145 case ir_unop_u2f:
1146 case ir_binop_lshift:
1147 case ir_binop_rshift:
1148 case ir_binop_bit_and:
1149 case ir_binop_bit_xor:
1150 case ir_binop_bit_or:
1151 assert(!"GLSL 1.30 features unsupported");
1152 break;
1153 }
1154 }
1155
1156 void
1157 fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
1158 const glsl_type *type, bool predicated)
1159 {
1160 switch (type->base_type) {
1161 case GLSL_TYPE_FLOAT:
1162 case GLSL_TYPE_UINT:
1163 case GLSL_TYPE_INT:
1164 case GLSL_TYPE_BOOL:
1165 for (unsigned int i = 0; i < type->components(); i++) {
1166 l.type = brw_type_for_base_type(type);
1167 r.type = brw_type_for_base_type(type);
1168
1169 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1170 inst->predicated = predicated;
1171
1172 l.reg_offset++;
1173 r.reg_offset++;
1174 }
1175 break;
1176 case GLSL_TYPE_ARRAY:
1177 for (unsigned int i = 0; i < type->length; i++) {
1178 emit_assignment_writes(l, r, type->fields.array, predicated);
1179 }
1180
1181 case GLSL_TYPE_STRUCT:
1182 for (unsigned int i = 0; i < type->length; i++) {
1183 emit_assignment_writes(l, r, type->fields.structure[i].type,
1184 predicated);
1185 }
1186 break;
1187
1188 case GLSL_TYPE_SAMPLER:
1189 break;
1190
1191 default:
1192 assert(!"not reached");
1193 break;
1194 }
1195 }
1196
1197 void
1198 fs_visitor::visit(ir_assignment *ir)
1199 {
1200 struct fs_reg l, r;
1201 fs_inst *inst;
1202
1203 /* FINISHME: arrays on the lhs */
1204 ir->lhs->accept(this);
1205 l = this->result;
1206
1207 ir->rhs->accept(this);
1208 r = this->result;
1209
1210 assert(l.file != BAD_FILE);
1211 assert(r.file != BAD_FILE);
1212
1213 if (ir->condition) {
1214 /* Get the condition bool into the predicate. */
1215 ir->condition->accept(this);
1216 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
1217 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1218 }
1219
1220 if (ir->lhs->type->is_scalar() ||
1221 ir->lhs->type->is_vector()) {
1222 for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
1223 if (ir->write_mask & (1 << i)) {
1224 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1225 if (ir->condition)
1226 inst->predicated = true;
1227 r.reg_offset++;
1228 }
1229 l.reg_offset++;
1230 }
1231 } else {
1232 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
1233 }
1234 }
1235
1236 fs_inst *
1237 fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1238 {
1239 int mlen;
1240 int base_mrf = 2;
1241 bool simd16 = false;
1242 fs_reg orig_dst;
1243
1244 if (ir->shadow_comparitor) {
1245 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1246 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1247 coordinate));
1248 coordinate.reg_offset++;
1249 }
1250 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1251 mlen = 3;
1252
1253 if (ir->op == ir_tex) {
1254 /* There's no plain shadow compare message, so we use shadow
1255 * compare with a bias of 0.0.
1256 */
1257 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1258 fs_reg(0.0f)));
1259 mlen++;
1260 } else if (ir->op == ir_txb) {
1261 ir->lod_info.bias->accept(this);
1262 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1263 this->result));
1264 mlen++;
1265 } else {
1266 assert(ir->op == ir_txl);
1267 ir->lod_info.lod->accept(this);
1268 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1269 this->result));
1270 mlen++;
1271 }
1272
1273 ir->shadow_comparitor->accept(this);
1274 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1275 mlen++;
1276 } else if (ir->op == ir_tex) {
1277 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1278 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1279 coordinate));
1280 coordinate.reg_offset++;
1281 }
1282 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1283 mlen = 3;
1284 } else {
1285 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
1286 * instructions. We'll need to do SIMD16 here.
1287 */
1288 assert(ir->op == ir_txb || ir->op == ir_txl);
1289
1290 for (mlen = 0; mlen < ir->coordinate->type->vector_elements * 2;) {
1291 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1292 coordinate));
1293 coordinate.reg_offset++;
1294 mlen++;
1295
1296 /* The unused upper half. */
1297 mlen++;
1298 }
1299
1300 /* lod/bias appears after u/v/r. */
1301 mlen = 6;
1302
1303 if (ir->op == ir_txb) {
1304 ir->lod_info.bias->accept(this);
1305 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1306 this->result));
1307 mlen++;
1308 } else {
1309 ir->lod_info.lod->accept(this);
1310 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1311 this->result));
1312 mlen++;
1313 }
1314
1315 /* The unused upper half. */
1316 mlen++;
1317
1318 /* Now, since we're doing simd16, the return is 2 interleaved
1319 * vec4s where the odd-indexed ones are junk. We'll need to move
1320 * this weirdness around to the expected layout.
1321 */
1322 simd16 = true;
1323 orig_dst = dst;
1324 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
1325 2));
1326 dst.type = BRW_REGISTER_TYPE_F;
1327 }
1328
1329 fs_inst *inst = NULL;
1330 switch (ir->op) {
1331 case ir_tex:
1332 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1333 break;
1334 case ir_txb:
1335 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1336 break;
1337 case ir_txl:
1338 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1339 break;
1340 case ir_txd:
1341 case ir_txf:
1342 assert(!"GLSL 1.30 features unsupported");
1343 break;
1344 }
1345 inst->mlen = mlen;
1346
1347 if (simd16) {
1348 for (int i = 0; i < 4; i++) {
1349 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst));
1350 orig_dst.reg_offset++;
1351 dst.reg_offset += 2;
1352 }
1353 }
1354
1355 return inst;
1356 }
1357
1358 fs_inst *
1359 fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1360 {
1361 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
1362 * optional parameters like shadow comparitor or LOD bias. If
1363 * optional parameters aren't present, those base slots are
1364 * optional and don't need to be included in the message.
1365 *
1366 * We don't fill in the unnecessary slots regardless, which may
1367 * look surprising in the disassembly.
1368 */
1369 int mlen;
1370 int base_mrf = 2;
1371
1372 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1373 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1374 coordinate.reg_offset++;
1375 }
1376
1377 if (ir->shadow_comparitor) {
1378 mlen = MAX2(mlen, 4);
1379
1380 ir->shadow_comparitor->accept(this);
1381 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1382 mlen++;
1383 }
1384
1385 fs_inst *inst = NULL;
1386 switch (ir->op) {
1387 case ir_tex:
1388 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1389 break;
1390 case ir_txb:
1391 ir->lod_info.bias->accept(this);
1392 mlen = MAX2(mlen, 4);
1393 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1394 mlen++;
1395
1396 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1397 break;
1398 case ir_txl:
1399 ir->lod_info.lod->accept(this);
1400 mlen = MAX2(mlen, 4);
1401 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1402 mlen++;
1403
1404 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1405 break;
1406 case ir_txd:
1407 case ir_txf:
1408 assert(!"GLSL 1.30 features unsupported");
1409 break;
1410 }
1411 inst->mlen = mlen;
1412
1413 return inst;
1414 }
1415
1416 void
1417 fs_visitor::visit(ir_texture *ir)
1418 {
1419 fs_inst *inst = NULL;
1420
1421 ir->coordinate->accept(this);
1422 fs_reg coordinate = this->result;
1423
1424 /* Should be lowered by do_lower_texture_projection */
1425 assert(!ir->projector);
1426
1427 /* Writemasking doesn't eliminate channels on SIMD8 texture
1428 * samples, so don't worry about them.
1429 */
1430 fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1431
1432 if (intel->gen < 5) {
1433 inst = emit_texture_gen4(ir, dst, coordinate);
1434 } else {
1435 inst = emit_texture_gen5(ir, dst, coordinate);
1436 }
1437
1438 inst->sampler =
1439 _mesa_get_sampler_uniform_value(ir->sampler,
1440 ctx->Shader.CurrentProgram,
1441 &brw->fragment_program->Base);
1442 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1443
1444 this->result = dst;
1445
1446 if (ir->shadow_comparitor)
1447 inst->shadow_compare = true;
1448
1449 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
1450 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
1451
1452 for (int i = 0; i < 4; i++) {
1453 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1454 fs_reg l = swizzle_dst;
1455 l.reg_offset += i;
1456
1457 if (swiz == SWIZZLE_ZERO) {
1458 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f)));
1459 } else if (swiz == SWIZZLE_ONE) {
1460 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f)));
1461 } else {
1462 fs_reg r = dst;
1463 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1464 emit(fs_inst(BRW_OPCODE_MOV, l, r));
1465 }
1466 }
1467 this->result = swizzle_dst;
1468 }
1469 }
1470
1471 void
1472 fs_visitor::visit(ir_swizzle *ir)
1473 {
1474 ir->val->accept(this);
1475 fs_reg val = this->result;
1476
1477 if (ir->type->vector_elements == 1) {
1478 this->result.reg_offset += ir->mask.x;
1479 return;
1480 }
1481
1482 fs_reg result = fs_reg(this, ir->type);
1483 this->result = result;
1484
1485 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1486 fs_reg channel = val;
1487 int swiz = 0;
1488
1489 switch (i) {
1490 case 0:
1491 swiz = ir->mask.x;
1492 break;
1493 case 1:
1494 swiz = ir->mask.y;
1495 break;
1496 case 2:
1497 swiz = ir->mask.z;
1498 break;
1499 case 3:
1500 swiz = ir->mask.w;
1501 break;
1502 }
1503
1504 channel.reg_offset += swiz;
1505 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1506 result.reg_offset++;
1507 }
1508 }
1509
1510 void
1511 fs_visitor::visit(ir_discard *ir)
1512 {
1513 fs_reg temp = fs_reg(this, glsl_type::uint_type);
1514
1515 assert(ir->condition == NULL); /* FINISHME */
1516
1517 emit(fs_inst(FS_OPCODE_DISCARD, temp, temp));
1518 kill_emitted = true;
1519 }
1520
1521 void
1522 fs_visitor::visit(ir_constant *ir)
1523 {
1524 fs_reg reg(this, ir->type);
1525 this->result = reg;
1526
1527 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1528 switch (ir->type->base_type) {
1529 case GLSL_TYPE_FLOAT:
1530 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1531 break;
1532 case GLSL_TYPE_UINT:
1533 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1534 break;
1535 case GLSL_TYPE_INT:
1536 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1537 break;
1538 case GLSL_TYPE_BOOL:
1539 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1540 break;
1541 default:
1542 assert(!"Non-float/uint/int/bool constant");
1543 }
1544 reg.reg_offset++;
1545 }
1546 }
1547
1548 void
1549 fs_visitor::visit(ir_if *ir)
1550 {
1551 fs_inst *inst;
1552
1553 /* Don't point the annotation at the if statement, because then it plus
1554 * the then and else blocks get printed.
1555 */
1556 this->base_ir = ir->condition;
1557
1558 /* Generate the condition into the condition code. */
1559 ir->condition->accept(this);
1560 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1561 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1562
1563 inst = emit(fs_inst(BRW_OPCODE_IF));
1564 inst->predicated = true;
1565
1566 foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1567 ir_instruction *ir = (ir_instruction *)iter.get();
1568 this->base_ir = ir;
1569
1570 ir->accept(this);
1571 }
1572
1573 if (!ir->else_instructions.is_empty()) {
1574 emit(fs_inst(BRW_OPCODE_ELSE));
1575
1576 foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1577 ir_instruction *ir = (ir_instruction *)iter.get();
1578 this->base_ir = ir;
1579
1580 ir->accept(this);
1581 }
1582 }
1583
1584 emit(fs_inst(BRW_OPCODE_ENDIF));
1585 }
1586
1587 void
1588 fs_visitor::visit(ir_loop *ir)
1589 {
1590 fs_reg counter = reg_undef;
1591
1592 if (ir->counter) {
1593 this->base_ir = ir->counter;
1594 ir->counter->accept(this);
1595 counter = *(variable_storage(ir->counter));
1596
1597 if (ir->from) {
1598 this->base_ir = ir->from;
1599 ir->from->accept(this);
1600
1601 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1602 }
1603 }
1604
1605 emit(fs_inst(BRW_OPCODE_DO));
1606
1607 if (ir->to) {
1608 this->base_ir = ir->to;
1609 ir->to->accept(this);
1610
1611 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1612 counter, this->result));
1613 switch (ir->cmp) {
1614 case ir_binop_equal:
1615 inst->conditional_mod = BRW_CONDITIONAL_Z;
1616 break;
1617 case ir_binop_nequal:
1618 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1619 break;
1620 case ir_binop_gequal:
1621 inst->conditional_mod = BRW_CONDITIONAL_GE;
1622 break;
1623 case ir_binop_lequal:
1624 inst->conditional_mod = BRW_CONDITIONAL_LE;
1625 break;
1626 case ir_binop_greater:
1627 inst->conditional_mod = BRW_CONDITIONAL_G;
1628 break;
1629 case ir_binop_less:
1630 inst->conditional_mod = BRW_CONDITIONAL_L;
1631 break;
1632 default:
1633 assert(!"not reached: unknown loop condition");
1634 this->fail = true;
1635 break;
1636 }
1637
1638 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1639 inst->predicated = true;
1640 }
1641
1642 foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1643 ir_instruction *ir = (ir_instruction *)iter.get();
1644
1645 this->base_ir = ir;
1646 ir->accept(this);
1647 }
1648
1649 if (ir->increment) {
1650 this->base_ir = ir->increment;
1651 ir->increment->accept(this);
1652 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1653 }
1654
1655 emit(fs_inst(BRW_OPCODE_WHILE));
1656 }
1657
1658 void
1659 fs_visitor::visit(ir_loop_jump *ir)
1660 {
1661 switch (ir->mode) {
1662 case ir_loop_jump::jump_break:
1663 emit(fs_inst(BRW_OPCODE_BREAK));
1664 break;
1665 case ir_loop_jump::jump_continue:
1666 emit(fs_inst(BRW_OPCODE_CONTINUE));
1667 break;
1668 }
1669 }
1670
1671 void
1672 fs_visitor::visit(ir_call *ir)
1673 {
1674 assert(!"FINISHME");
1675 }
1676
1677 void
1678 fs_visitor::visit(ir_return *ir)
1679 {
1680 assert(!"FINISHME");
1681 }
1682
1683 void
1684 fs_visitor::visit(ir_function *ir)
1685 {
1686 /* Ignore function bodies other than main() -- we shouldn't see calls to
1687 * them since they should all be inlined before we get to ir_to_mesa.
1688 */
1689 if (strcmp(ir->name, "main") == 0) {
1690 const ir_function_signature *sig;
1691 exec_list empty;
1692
1693 sig = ir->matching_signature(&empty);
1694
1695 assert(sig);
1696
1697 foreach_iter(exec_list_iterator, iter, sig->body) {
1698 ir_instruction *ir = (ir_instruction *)iter.get();
1699 this->base_ir = ir;
1700
1701 ir->accept(this);
1702 }
1703 }
1704 }
1705
1706 void
1707 fs_visitor::visit(ir_function_signature *ir)
1708 {
1709 assert(!"not reached");
1710 (void)ir;
1711 }
1712
1713 fs_inst *
1714 fs_visitor::emit(fs_inst inst)
1715 {
1716 fs_inst *list_inst = new(mem_ctx) fs_inst;
1717 *list_inst = inst;
1718
1719 list_inst->annotation = this->current_annotation;
1720 list_inst->ir = this->base_ir;
1721
1722 this->instructions.push_tail(list_inst);
1723
1724 return list_inst;
1725 }
1726
1727 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1728 void
1729 fs_visitor::emit_dummy_fs()
1730 {
1731 /* Everyone's favorite color. */
1732 emit(fs_inst(BRW_OPCODE_MOV,
1733 fs_reg(MRF, 2),
1734 fs_reg(1.0f)));
1735 emit(fs_inst(BRW_OPCODE_MOV,
1736 fs_reg(MRF, 3),
1737 fs_reg(0.0f)));
1738 emit(fs_inst(BRW_OPCODE_MOV,
1739 fs_reg(MRF, 4),
1740 fs_reg(1.0f)));
1741 emit(fs_inst(BRW_OPCODE_MOV,
1742 fs_reg(MRF, 5),
1743 fs_reg(0.0f)));
1744
1745 fs_inst *write;
1746 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1747 fs_reg(0),
1748 fs_reg(0)));
1749 }
1750
1751 /* The register location here is relative to the start of the URB
1752 * data. It will get adjusted to be a real location before
1753 * generate_code() time.
1754 */
1755 struct brw_reg
1756 fs_visitor::interp_reg(int location, int channel)
1757 {
1758 int regnr = urb_setup[location] * 2 + channel / 2;
1759 int stride = (channel & 1) * 4;
1760
1761 assert(urb_setup[location] != -1);
1762
1763 return brw_vec1_grf(regnr, stride);
1764 }
1765
1766 /** Emits the interpolation for the varying inputs. */
1767 void
1768 fs_visitor::emit_interpolation_setup_gen4()
1769 {
1770 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1771
1772 this->current_annotation = "compute pixel centers";
1773 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1774 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1775 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1776 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1777 emit(fs_inst(BRW_OPCODE_ADD,
1778 this->pixel_x,
1779 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1780 fs_reg(brw_imm_v(0x10101010))));
1781 emit(fs_inst(BRW_OPCODE_ADD,
1782 this->pixel_y,
1783 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1784 fs_reg(brw_imm_v(0x11001100))));
1785
1786 this->current_annotation = "compute pixel deltas from v0";
1787 if (brw->has_pln) {
1788 this->delta_x = fs_reg(this, glsl_type::vec2_type);
1789 this->delta_y = this->delta_x;
1790 this->delta_y.reg_offset++;
1791 } else {
1792 this->delta_x = fs_reg(this, glsl_type::float_type);
1793 this->delta_y = fs_reg(this, glsl_type::float_type);
1794 }
1795 emit(fs_inst(BRW_OPCODE_ADD,
1796 this->delta_x,
1797 this->pixel_x,
1798 fs_reg(negate(brw_vec1_grf(1, 0)))));
1799 emit(fs_inst(BRW_OPCODE_ADD,
1800 this->delta_y,
1801 this->pixel_y,
1802 fs_reg(negate(brw_vec1_grf(1, 1)))));
1803
1804 this->current_annotation = "compute pos.w and 1/pos.w";
1805 /* Compute wpos.w. It's always in our setup, since it's needed to
1806 * interpolate the other attributes.
1807 */
1808 this->wpos_w = fs_reg(this, glsl_type::float_type);
1809 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1810 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1811 /* Compute the pixel 1/W value from wpos.w. */
1812 this->pixel_w = fs_reg(this, glsl_type::float_type);
1813 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1814 this->current_annotation = NULL;
1815 }
1816
1817 /** Emits the interpolation for the varying inputs. */
1818 void
1819 fs_visitor::emit_interpolation_setup_gen6()
1820 {
1821 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1822
1823 /* If the pixel centers end up used, the setup is the same as for gen4. */
1824 this->current_annotation = "compute pixel centers";
1825 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1826 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1827 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1828 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1829 emit(fs_inst(BRW_OPCODE_ADD,
1830 this->pixel_x,
1831 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1832 fs_reg(brw_imm_v(0x10101010))));
1833 emit(fs_inst(BRW_OPCODE_ADD,
1834 this->pixel_y,
1835 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1836 fs_reg(brw_imm_v(0x11001100))));
1837
1838 this->current_annotation = "compute 1/pos.w";
1839 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
1840 this->pixel_w = fs_reg(this, glsl_type::float_type);
1841 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1842
1843 this->delta_x = fs_reg(brw_vec8_grf(2, 0));
1844 this->delta_y = fs_reg(brw_vec8_grf(3, 0));
1845
1846 this->current_annotation = NULL;
1847 }
1848
1849 void
1850 fs_visitor::emit_fb_writes()
1851 {
1852 this->current_annotation = "FB write header";
1853 GLboolean header_present = GL_TRUE;
1854 int nr = 0;
1855
1856 if (intel->gen >= 6 &&
1857 !this->kill_emitted &&
1858 c->key.nr_color_regions == 1) {
1859 header_present = false;
1860 }
1861
1862 if (header_present) {
1863 /* m0, m1 header */
1864 nr += 2;
1865 }
1866
1867 if (c->key.aa_dest_stencil_reg) {
1868 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1869 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1870 }
1871
1872 /* Reserve space for color. It'll be filled in per MRT below. */
1873 int color_mrf = nr;
1874 nr += 4;
1875
1876 if (c->key.source_depth_to_render_target) {
1877 if (c->key.computes_depth) {
1878 /* Hand over gl_FragDepth. */
1879 assert(this->frag_depth);
1880 fs_reg depth = *(variable_storage(this->frag_depth));
1881
1882 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1883 } else {
1884 /* Pass through the payload depth. */
1885 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1886 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1887 }
1888 }
1889
1890 if (c->key.dest_depth_reg) {
1891 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1892 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1893 }
1894
1895 fs_reg color = reg_undef;
1896 if (this->frag_color)
1897 color = *(variable_storage(this->frag_color));
1898 else if (this->frag_data)
1899 color = *(variable_storage(this->frag_data));
1900
1901 for (int target = 0; target < c->key.nr_color_regions; target++) {
1902 this->current_annotation = talloc_asprintf(this->mem_ctx,
1903 "FB write target %d",
1904 target);
1905 if (this->frag_color || this->frag_data) {
1906 for (int i = 0; i < 4; i++) {
1907 emit(fs_inst(BRW_OPCODE_MOV,
1908 fs_reg(MRF, color_mrf + i),
1909 color));
1910 color.reg_offset++;
1911 }
1912 }
1913
1914 if (this->frag_color)
1915 color.reg_offset -= 4;
1916
1917 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1918 reg_undef, reg_undef));
1919 inst->target = target;
1920 inst->mlen = nr;
1921 if (target == c->key.nr_color_regions - 1)
1922 inst->eot = true;
1923 }
1924
1925 if (c->key.nr_color_regions == 0) {
1926 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1927 reg_undef, reg_undef));
1928 inst->mlen = nr;
1929 inst->eot = true;
1930 inst->header_present = header_present;
1931 }
1932
1933 this->current_annotation = NULL;
1934 }
1935
1936 void
1937 fs_visitor::generate_fb_write(fs_inst *inst)
1938 {
1939 GLboolean eot = inst->eot;
1940 struct brw_reg implied_header;
1941
1942 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1943 * move, here's g1.
1944 */
1945 brw_push_insn_state(p);
1946 brw_set_mask_control(p, BRW_MASK_DISABLE);
1947 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1948
1949 if (inst->header_present) {
1950 if (intel->gen >= 6) {
1951 brw_MOV(p,
1952 brw_message_reg(0),
1953 brw_vec8_grf(0, 0));
1954 implied_header = brw_null_reg();
1955 } else {
1956 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
1957 }
1958
1959 brw_MOV(p,
1960 brw_message_reg(1),
1961 brw_vec8_grf(1, 0));
1962 } else {
1963 implied_header = brw_null_reg();
1964 }
1965
1966 brw_pop_insn_state(p);
1967
1968 brw_fb_WRITE(p,
1969 8, /* dispatch_width */
1970 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1971 0, /* base MRF */
1972 implied_header,
1973 inst->target,
1974 inst->mlen,
1975 0,
1976 eot);
1977 }
1978
1979 void
1980 fs_visitor::generate_linterp(fs_inst *inst,
1981 struct brw_reg dst, struct brw_reg *src)
1982 {
1983 struct brw_reg delta_x = src[0];
1984 struct brw_reg delta_y = src[1];
1985 struct brw_reg interp = src[2];
1986
1987 if (brw->has_pln &&
1988 delta_y.nr == delta_x.nr + 1 &&
1989 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1990 brw_PLN(p, dst, interp, delta_x);
1991 } else {
1992 brw_LINE(p, brw_null_reg(), interp, delta_x);
1993 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1994 }
1995 }
1996
1997 void
1998 fs_visitor::generate_math(fs_inst *inst,
1999 struct brw_reg dst, struct brw_reg *src)
2000 {
2001 int op;
2002
2003 switch (inst->opcode) {
2004 case FS_OPCODE_RCP:
2005 op = BRW_MATH_FUNCTION_INV;
2006 break;
2007 case FS_OPCODE_RSQ:
2008 op = BRW_MATH_FUNCTION_RSQ;
2009 break;
2010 case FS_OPCODE_SQRT:
2011 op = BRW_MATH_FUNCTION_SQRT;
2012 break;
2013 case FS_OPCODE_EXP2:
2014 op = BRW_MATH_FUNCTION_EXP;
2015 break;
2016 case FS_OPCODE_LOG2:
2017 op = BRW_MATH_FUNCTION_LOG;
2018 break;
2019 case FS_OPCODE_POW:
2020 op = BRW_MATH_FUNCTION_POW;
2021 break;
2022 case FS_OPCODE_SIN:
2023 op = BRW_MATH_FUNCTION_SIN;
2024 break;
2025 case FS_OPCODE_COS:
2026 op = BRW_MATH_FUNCTION_COS;
2027 break;
2028 default:
2029 assert(!"not reached: unknown math function");
2030 op = 0;
2031 break;
2032 }
2033
2034 if (inst->opcode == FS_OPCODE_POW) {
2035 brw_MOV(p, brw_message_reg(3), src[1]);
2036 }
2037
2038 brw_math(p, dst,
2039 op,
2040 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
2041 BRW_MATH_SATURATE_NONE,
2042 2, src[0],
2043 BRW_MATH_DATA_VECTOR,
2044 BRW_MATH_PRECISION_FULL);
2045 }
2046
2047 void
2048 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2049 {
2050 int msg_type = -1;
2051 int rlen = 4;
2052 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
2053
2054 if (intel->gen == 5) {
2055 switch (inst->opcode) {
2056 case FS_OPCODE_TEX:
2057 if (inst->shadow_compare) {
2058 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
2059 } else {
2060 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
2061 }
2062 break;
2063 case FS_OPCODE_TXB:
2064 if (inst->shadow_compare) {
2065 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
2066 } else {
2067 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
2068 }
2069 break;
2070 }
2071 } else {
2072 switch (inst->opcode) {
2073 case FS_OPCODE_TEX:
2074 /* Note that G45 and older determines shadow compare and dispatch width
2075 * from message length for most messages.
2076 */
2077 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2078 if (inst->shadow_compare) {
2079 assert(inst->mlen == 5);
2080 } else {
2081 assert(inst->mlen <= 6);
2082 }
2083 break;
2084 case FS_OPCODE_TXB:
2085 if (inst->shadow_compare) {
2086 assert(inst->mlen == 5);
2087 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2088 } else {
2089 assert(inst->mlen == 8);
2090 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
2091 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
2092 }
2093 break;
2094 }
2095 }
2096 assert(msg_type != -1);
2097
2098 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
2099 rlen = 8;
2100 dst = vec16(dst);
2101 }
2102
2103 /* g0 header. */
2104 src.nr--;
2105
2106 brw_SAMPLE(p,
2107 retype(dst, BRW_REGISTER_TYPE_UW),
2108 src.nr,
2109 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
2110 SURF_INDEX_TEXTURE(inst->sampler),
2111 inst->sampler,
2112 WRITEMASK_XYZW,
2113 msg_type,
2114 rlen,
2115 inst->mlen + 1,
2116 0,
2117 1,
2118 simd_mode);
2119 }
2120
2121
2122 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
2123 * looking like:
2124 *
2125 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
2126 *
2127 * and we're trying to produce:
2128 *
2129 * DDX DDY
2130 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
2131 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
2132 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
2133 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
2134 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
2135 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
2136 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
2137 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
2138 *
2139 * and add another set of two more subspans if in 16-pixel dispatch mode.
2140 *
2141 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
2142 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
2143 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
2144 * between each other. We could probably do it like ddx and swizzle the right
2145 * order later, but bail for now and just produce
2146 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
2147 */
2148 void
2149 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2150 {
2151 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
2152 BRW_REGISTER_TYPE_F,
2153 BRW_VERTICAL_STRIDE_2,
2154 BRW_WIDTH_2,
2155 BRW_HORIZONTAL_STRIDE_0,
2156 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2157 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
2158 BRW_REGISTER_TYPE_F,
2159 BRW_VERTICAL_STRIDE_2,
2160 BRW_WIDTH_2,
2161 BRW_HORIZONTAL_STRIDE_0,
2162 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2163 brw_ADD(p, dst, src0, negate(src1));
2164 }
2165
2166 void
2167 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2168 {
2169 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
2170 BRW_REGISTER_TYPE_F,
2171 BRW_VERTICAL_STRIDE_4,
2172 BRW_WIDTH_4,
2173 BRW_HORIZONTAL_STRIDE_0,
2174 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2175 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
2176 BRW_REGISTER_TYPE_F,
2177 BRW_VERTICAL_STRIDE_4,
2178 BRW_WIDTH_4,
2179 BRW_HORIZONTAL_STRIDE_0,
2180 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2181 brw_ADD(p, dst, src0, negate(src1));
2182 }
2183
2184 void
2185 fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp)
2186 {
2187 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
2188 temp = brw_uw1_reg(temp.file, temp.nr, 0);
2189
2190 brw_push_insn_state(p);
2191 brw_set_mask_control(p, BRW_MASK_DISABLE);
2192 brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */
2193 brw_AND(p, g0, temp, g0);
2194 brw_pop_insn_state(p);
2195 }
2196
2197 void
2198 fs_visitor::assign_curb_setup()
2199 {
2200 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
2201 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
2202
2203 /* Map the offsets in the UNIFORM file to fixed HW regs. */
2204 foreach_iter(exec_list_iterator, iter, this->instructions) {
2205 fs_inst *inst = (fs_inst *)iter.get();
2206
2207 for (unsigned int i = 0; i < 3; i++) {
2208 if (inst->src[i].file == UNIFORM) {
2209 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
2210 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
2211 constant_nr / 8,
2212 constant_nr % 8);
2213
2214 inst->src[i].file = FIXED_HW_REG;
2215 inst->src[i].fixed_hw_reg = brw_reg;
2216 }
2217 }
2218 }
2219 }
2220
2221 void
2222 fs_visitor::calculate_urb_setup()
2223 {
2224 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2225 urb_setup[i] = -1;
2226 }
2227
2228 int urb_next = 0;
2229 /* Figure out where each of the incoming setup attributes lands. */
2230 if (intel->gen >= 6) {
2231 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2232 if (i == FRAG_ATTRIB_WPOS ||
2233 (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i))) {
2234 urb_setup[i] = urb_next++;
2235 }
2236 }
2237 } else {
2238 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
2239 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
2240 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
2241 int fp_index;
2242
2243 if (i >= VERT_RESULT_VAR0)
2244 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
2245 else if (i <= VERT_RESULT_TEX7)
2246 fp_index = i;
2247 else
2248 fp_index = -1;
2249
2250 if (fp_index >= 0)
2251 urb_setup[fp_index] = urb_next++;
2252 }
2253 }
2254 }
2255
2256 /* Each attribute is 4 setup channels, each of which is half a reg. */
2257 c->prog_data.urb_read_length = urb_next * 2;
2258 }
2259
2260 void
2261 fs_visitor::assign_urb_setup()
2262 {
2263 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
2264
2265 /* Offset all the urb_setup[] index by the actual position of the
2266 * setup regs, now that the location of the constants has been chosen.
2267 */
2268 foreach_iter(exec_list_iterator, iter, this->instructions) {
2269 fs_inst *inst = (fs_inst *)iter.get();
2270
2271 if (inst->opcode != FS_OPCODE_LINTERP)
2272 continue;
2273
2274 assert(inst->src[2].file == FIXED_HW_REG);
2275
2276 inst->src[2].fixed_hw_reg.nr += urb_start;
2277 }
2278
2279 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
2280 }
2281
2282 static void
2283 assign_reg(int *reg_hw_locations, fs_reg *reg)
2284 {
2285 if (reg->file == GRF && reg->reg != 0) {
2286 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
2287 reg->reg = 0;
2288 }
2289 }
2290
2291 void
2292 fs_visitor::assign_regs_trivial()
2293 {
2294 int last_grf = 0;
2295 int hw_reg_mapping[this->virtual_grf_next];
2296 int i;
2297
2298 hw_reg_mapping[0] = 0;
2299 hw_reg_mapping[1] = this->first_non_payload_grf;
2300 for (i = 2; i < this->virtual_grf_next; i++) {
2301 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
2302 this->virtual_grf_sizes[i - 1]);
2303 }
2304 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
2305
2306 foreach_iter(exec_list_iterator, iter, this->instructions) {
2307 fs_inst *inst = (fs_inst *)iter.get();
2308
2309 assign_reg(hw_reg_mapping, &inst->dst);
2310 assign_reg(hw_reg_mapping, &inst->src[0]);
2311 assign_reg(hw_reg_mapping, &inst->src[1]);
2312 }
2313
2314 this->grf_used = last_grf + 1;
2315 }
2316
2317 void
2318 fs_visitor::assign_regs()
2319 {
2320 int last_grf = 0;
2321 int hw_reg_mapping[this->virtual_grf_next + 1];
2322 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
2323 int class_sizes[base_reg_count];
2324 int class_count = 0;
2325 int aligned_pair_class = -1;
2326
2327 /* Set up the register classes.
2328 *
2329 * The base registers store a scalar value. For texture samples,
2330 * we get virtual GRFs composed of 4 contiguous hw register. For
2331 * structures and arrays, we store them as contiguous larger things
2332 * than that, though we should be able to do better most of the
2333 * time.
2334 */
2335 class_sizes[class_count++] = 1;
2336 if (brw->has_pln && intel->gen < 6) {
2337 /* Always set up the (unaligned) pairs for gen5, so we can find
2338 * them for making the aligned pair class.
2339 */
2340 class_sizes[class_count++] = 2;
2341 }
2342 for (int r = 1; r < this->virtual_grf_next; r++) {
2343 int i;
2344
2345 for (i = 0; i < class_count; i++) {
2346 if (class_sizes[i] == this->virtual_grf_sizes[r])
2347 break;
2348 }
2349 if (i == class_count) {
2350 if (this->virtual_grf_sizes[r] >= base_reg_count) {
2351 fprintf(stderr, "Object too large to register allocate.\n");
2352 this->fail = true;
2353 }
2354
2355 class_sizes[class_count++] = this->virtual_grf_sizes[r];
2356 }
2357 }
2358
2359 int ra_reg_count = 0;
2360 int class_base_reg[class_count];
2361 int class_reg_count[class_count];
2362 int classes[class_count + 1];
2363
2364 for (int i = 0; i < class_count; i++) {
2365 class_base_reg[i] = ra_reg_count;
2366 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
2367 ra_reg_count += class_reg_count[i];
2368 }
2369
2370 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
2371 for (int i = 0; i < class_count; i++) {
2372 classes[i] = ra_alloc_reg_class(regs);
2373
2374 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2375 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
2376 }
2377
2378 /* Add conflicts between our contiguous registers aliasing
2379 * base regs and other register classes' contiguous registers
2380 * that alias base regs, or the base regs themselves for classes[0].
2381 */
2382 for (int c = 0; c <= i; c++) {
2383 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2384 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
2385 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
2386 c_r++) {
2387
2388 if (0) {
2389 printf("%d/%d conflicts %d/%d\n",
2390 class_sizes[i], this->first_non_payload_grf + i_r,
2391 class_sizes[c], this->first_non_payload_grf + c_r);
2392 }
2393
2394 ra_add_reg_conflict(regs,
2395 class_base_reg[i] + i_r,
2396 class_base_reg[c] + c_r);
2397 }
2398 }
2399 }
2400 }
2401
2402 /* Add a special class for aligned pairs, which we'll put delta_x/y
2403 * in on gen5 so that we can do PLN.
2404 */
2405 if (brw->has_pln && intel->gen < 6) {
2406 int reg_count = (base_reg_count - 1) / 2;
2407 int unaligned_pair_class = 1;
2408 assert(class_sizes[unaligned_pair_class] == 2);
2409
2410 aligned_pair_class = class_count;
2411 classes[aligned_pair_class] = ra_alloc_reg_class(regs);
2412 class_base_reg[aligned_pair_class] = 0;
2413 class_reg_count[aligned_pair_class] = 0;
2414 int start = (this->first_non_payload_grf & 1) ? 1 : 0;
2415
2416 for (int i = 0; i < reg_count; i++) {
2417 ra_class_add_reg(regs, classes[aligned_pair_class],
2418 class_base_reg[unaligned_pair_class] + i * 2 + start);
2419 }
2420 class_count++;
2421 }
2422
2423 ra_set_finalize(regs);
2424
2425 struct ra_graph *g = ra_alloc_interference_graph(regs,
2426 this->virtual_grf_next);
2427 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
2428 * with nodes.
2429 */
2430 ra_set_node_class(g, 0, classes[0]);
2431
2432 for (int i = 1; i < this->virtual_grf_next; i++) {
2433 for (int c = 0; c < class_count; c++) {
2434 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
2435 if (aligned_pair_class >= 0 &&
2436 this->delta_x.reg == i) {
2437 ra_set_node_class(g, i, classes[aligned_pair_class]);
2438 } else {
2439 ra_set_node_class(g, i, classes[c]);
2440 }
2441 break;
2442 }
2443 }
2444
2445 for (int j = 1; j < i; j++) {
2446 if (virtual_grf_interferes(i, j)) {
2447 ra_add_node_interference(g, i, j);
2448 }
2449 }
2450 }
2451
2452 /* FINISHME: Handle spilling */
2453 if (!ra_allocate_no_spills(g)) {
2454 fprintf(stderr, "Failed to allocate registers.\n");
2455 this->fail = true;
2456 return;
2457 }
2458
2459 /* Get the chosen virtual registers for each node, and map virtual
2460 * regs in the register classes back down to real hardware reg
2461 * numbers.
2462 */
2463 hw_reg_mapping[0] = 0; /* unused */
2464 for (int i = 1; i < this->virtual_grf_next; i++) {
2465 int reg = ra_get_node_reg(g, i);
2466 int hw_reg = -1;
2467
2468 for (int c = 0; c < class_count; c++) {
2469 if (reg >= class_base_reg[c] &&
2470 reg < class_base_reg[c] + class_reg_count[c]) {
2471 hw_reg = reg - class_base_reg[c];
2472 break;
2473 }
2474 }
2475
2476 assert(hw_reg != -1);
2477 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
2478 last_grf = MAX2(last_grf,
2479 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
2480 }
2481
2482 foreach_iter(exec_list_iterator, iter, this->instructions) {
2483 fs_inst *inst = (fs_inst *)iter.get();
2484
2485 assign_reg(hw_reg_mapping, &inst->dst);
2486 assign_reg(hw_reg_mapping, &inst->src[0]);
2487 assign_reg(hw_reg_mapping, &inst->src[1]);
2488 }
2489
2490 this->grf_used = last_grf + 1;
2491
2492 talloc_free(g);
2493 talloc_free(regs);
2494 }
2495
2496 void
2497 fs_visitor::calculate_live_intervals()
2498 {
2499 int num_vars = this->virtual_grf_next;
2500 int *def = talloc_array(mem_ctx, int, num_vars);
2501 int *use = talloc_array(mem_ctx, int, num_vars);
2502 int loop_depth = 0;
2503 int loop_start = 0;
2504
2505 for (int i = 0; i < num_vars; i++) {
2506 def[i] = 1 << 30;
2507 use[i] = -1;
2508 }
2509
2510 int ip = 0;
2511 foreach_iter(exec_list_iterator, iter, this->instructions) {
2512 fs_inst *inst = (fs_inst *)iter.get();
2513
2514 if (inst->opcode == BRW_OPCODE_DO) {
2515 if (loop_depth++ == 0)
2516 loop_start = ip;
2517 } else if (inst->opcode == BRW_OPCODE_WHILE) {
2518 loop_depth--;
2519
2520 if (loop_depth == 0) {
2521 /* FINISHME:
2522 *
2523 * Patches up any vars marked for use within the loop as
2524 * live until the end. This is conservative, as there
2525 * will often be variables defined and used inside the
2526 * loop but dead at the end of the loop body.
2527 */
2528 for (int i = 0; i < num_vars; i++) {
2529 if (use[i] == loop_start) {
2530 use[i] = ip;
2531 }
2532 }
2533 }
2534 } else {
2535 int eip = ip;
2536
2537 if (loop_depth)
2538 eip = loop_start;
2539
2540 for (unsigned int i = 0; i < 3; i++) {
2541 if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
2542 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip);
2543 }
2544 }
2545 if (inst->dst.file == GRF && inst->dst.reg != 0) {
2546 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip);
2547 }
2548 }
2549
2550 ip++;
2551 }
2552
2553 talloc_free(this->virtual_grf_def);
2554 talloc_free(this->virtual_grf_use);
2555 this->virtual_grf_def = def;
2556 this->virtual_grf_use = use;
2557 }
2558
2559 /**
2560 * Attempts to move immediate constants into the immediate
2561 * constant slot of following instructions.
2562 *
2563 * Immediate constants are a bit tricky -- they have to be in the last
2564 * operand slot, you can't do abs/negate on them,
2565 */
2566
2567 bool
2568 fs_visitor::propagate_constants()
2569 {
2570 bool progress = false;
2571
2572 foreach_iter(exec_list_iterator, iter, this->instructions) {
2573 fs_inst *inst = (fs_inst *)iter.get();
2574
2575 if (inst->opcode != BRW_OPCODE_MOV ||
2576 inst->predicated ||
2577 inst->dst.file != GRF || inst->src[0].file != IMM ||
2578 inst->dst.type != inst->src[0].type)
2579 continue;
2580
2581 /* Don't bother with cases where we should have had the
2582 * operation on the constant folded in GLSL already.
2583 */
2584 if (inst->saturate)
2585 continue;
2586
2587 /* Found a move of a constant to a GRF. Find anything else using the GRF
2588 * before it's written, and replace it with the constant if we can.
2589 */
2590 exec_list_iterator scan_iter = iter;
2591 scan_iter.next();
2592 for (; scan_iter.has_next(); scan_iter.next()) {
2593 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2594
2595 if (scan_inst->opcode == BRW_OPCODE_DO ||
2596 scan_inst->opcode == BRW_OPCODE_WHILE ||
2597 scan_inst->opcode == BRW_OPCODE_ELSE ||
2598 scan_inst->opcode == BRW_OPCODE_ENDIF) {
2599 break;
2600 }
2601
2602 for (int i = 2; i >= 0; i--) {
2603 if (scan_inst->src[i].file != GRF ||
2604 scan_inst->src[i].reg != inst->dst.reg ||
2605 scan_inst->src[i].reg_offset != inst->dst.reg_offset)
2606 continue;
2607
2608 /* Don't bother with cases where we should have had the
2609 * operation on the constant folded in GLSL already.
2610 */
2611 if (scan_inst->src[i].negate || scan_inst->src[i].abs)
2612 continue;
2613
2614 switch (scan_inst->opcode) {
2615 case BRW_OPCODE_MOV:
2616 scan_inst->src[i] = inst->src[0];
2617 progress = true;
2618 break;
2619
2620 case BRW_OPCODE_MUL:
2621 case BRW_OPCODE_ADD:
2622 if (i == 1) {
2623 scan_inst->src[i] = inst->src[0];
2624 progress = true;
2625 } else if (i == 0 && scan_inst->src[1].file != IMM) {
2626 /* Fit this constant in by commuting the operands */
2627 scan_inst->src[0] = scan_inst->src[1];
2628 scan_inst->src[1] = inst->src[0];
2629 }
2630 break;
2631 case BRW_OPCODE_CMP:
2632 if (i == 1) {
2633 scan_inst->src[i] = inst->src[0];
2634 progress = true;
2635 }
2636 }
2637 }
2638
2639 if (scan_inst->dst.file == GRF &&
2640 scan_inst->dst.reg == inst->dst.reg &&
2641 (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2642 scan_inst->opcode == FS_OPCODE_TEX)) {
2643 break;
2644 }
2645 }
2646 }
2647
2648 return progress;
2649 }
2650 /**
2651 * Must be called after calculate_live_intervales() to remove unused
2652 * writes to registers -- register allocation will fail otherwise
2653 * because something deffed but not used won't be considered to
2654 * interfere with other regs.
2655 */
2656 bool
2657 fs_visitor::dead_code_eliminate()
2658 {
2659 bool progress = false;
2660 int num_vars = this->virtual_grf_next;
2661 bool dead[num_vars];
2662
2663 for (int i = 0; i < num_vars; i++) {
2664 /* This would be ">=", but FS_OPCODE_DISCARD has a src == dst where
2665 * it writes dst then reads it as src.
2666 */
2667 dead[i] = this->virtual_grf_def[i] > this->virtual_grf_use[i];
2668
2669 if (dead[i]) {
2670 /* Mark off its interval so it won't interfere with anything. */
2671 this->virtual_grf_def[i] = -1;
2672 this->virtual_grf_use[i] = -1;
2673 }
2674 }
2675
2676 foreach_iter(exec_list_iterator, iter, this->instructions) {
2677 fs_inst *inst = (fs_inst *)iter.get();
2678
2679 if (inst->dst.file == GRF && dead[inst->dst.reg]) {
2680 inst->remove();
2681 progress = true;
2682 }
2683 }
2684
2685 return progress;
2686 }
2687
2688 bool
2689 fs_visitor::virtual_grf_interferes(int a, int b)
2690 {
2691 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
2692 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
2693
2694 /* For dead code, just check if the def interferes with the other range. */
2695 if (this->virtual_grf_use[a] == -1) {
2696 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] &&
2697 this->virtual_grf_def[a] < this->virtual_grf_use[b]);
2698 }
2699 if (this->virtual_grf_use[b] == -1) {
2700 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] &&
2701 this->virtual_grf_def[b] < this->virtual_grf_use[a]);
2702 }
2703
2704 return start <= end;
2705 }
2706
2707 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2708 {
2709 struct brw_reg brw_reg;
2710
2711 switch (reg->file) {
2712 case GRF:
2713 case ARF:
2714 case MRF:
2715 brw_reg = brw_vec8_reg(reg->file,
2716 reg->hw_reg, 0);
2717 brw_reg = retype(brw_reg, reg->type);
2718 break;
2719 case IMM:
2720 switch (reg->type) {
2721 case BRW_REGISTER_TYPE_F:
2722 brw_reg = brw_imm_f(reg->imm.f);
2723 break;
2724 case BRW_REGISTER_TYPE_D:
2725 brw_reg = brw_imm_d(reg->imm.i);
2726 break;
2727 case BRW_REGISTER_TYPE_UD:
2728 brw_reg = brw_imm_ud(reg->imm.u);
2729 break;
2730 default:
2731 assert(!"not reached");
2732 break;
2733 }
2734 break;
2735 case FIXED_HW_REG:
2736 brw_reg = reg->fixed_hw_reg;
2737 break;
2738 case BAD_FILE:
2739 /* Probably unused. */
2740 brw_reg = brw_null_reg();
2741 break;
2742 case UNIFORM:
2743 assert(!"not reached");
2744 brw_reg = brw_null_reg();
2745 break;
2746 }
2747 if (reg->abs)
2748 brw_reg = brw_abs(brw_reg);
2749 if (reg->negate)
2750 brw_reg = negate(brw_reg);
2751
2752 return brw_reg;
2753 }
2754
2755 void
2756 fs_visitor::generate_code()
2757 {
2758 unsigned int annotation_len = 0;
2759 int last_native_inst = 0;
2760 struct brw_instruction *if_stack[16], *loop_stack[16];
2761 int if_stack_depth = 0, loop_stack_depth = 0;
2762 int if_depth_in_loop[16];
2763
2764 if_depth_in_loop[loop_stack_depth] = 0;
2765
2766 memset(&if_stack, 0, sizeof(if_stack));
2767 foreach_iter(exec_list_iterator, iter, this->instructions) {
2768 fs_inst *inst = (fs_inst *)iter.get();
2769 struct brw_reg src[3], dst;
2770
2771 for (unsigned int i = 0; i < 3; i++) {
2772 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2773 }
2774 dst = brw_reg_from_fs_reg(&inst->dst);
2775
2776 brw_set_conditionalmod(p, inst->conditional_mod);
2777 brw_set_predicate_control(p, inst->predicated);
2778
2779 switch (inst->opcode) {
2780 case BRW_OPCODE_MOV:
2781 brw_MOV(p, dst, src[0]);
2782 break;
2783 case BRW_OPCODE_ADD:
2784 brw_ADD(p, dst, src[0], src[1]);
2785 break;
2786 case BRW_OPCODE_MUL:
2787 brw_MUL(p, dst, src[0], src[1]);
2788 break;
2789
2790 case BRW_OPCODE_FRC:
2791 brw_FRC(p, dst, src[0]);
2792 break;
2793 case BRW_OPCODE_RNDD:
2794 brw_RNDD(p, dst, src[0]);
2795 break;
2796 case BRW_OPCODE_RNDZ:
2797 brw_RNDZ(p, dst, src[0]);
2798 break;
2799
2800 case BRW_OPCODE_AND:
2801 brw_AND(p, dst, src[0], src[1]);
2802 break;
2803 case BRW_OPCODE_OR:
2804 brw_OR(p, dst, src[0], src[1]);
2805 break;
2806 case BRW_OPCODE_XOR:
2807 brw_XOR(p, dst, src[0], src[1]);
2808 break;
2809
2810 case BRW_OPCODE_CMP:
2811 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2812 break;
2813 case BRW_OPCODE_SEL:
2814 brw_SEL(p, dst, src[0], src[1]);
2815 break;
2816
2817 case BRW_OPCODE_IF:
2818 assert(if_stack_depth < 16);
2819 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2820 if_depth_in_loop[loop_stack_depth]++;
2821 if_stack_depth++;
2822 break;
2823 case BRW_OPCODE_ELSE:
2824 if_stack[if_stack_depth - 1] =
2825 brw_ELSE(p, if_stack[if_stack_depth - 1]);
2826 break;
2827 case BRW_OPCODE_ENDIF:
2828 if_stack_depth--;
2829 brw_ENDIF(p , if_stack[if_stack_depth]);
2830 if_depth_in_loop[loop_stack_depth]--;
2831 break;
2832
2833 case BRW_OPCODE_DO:
2834 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2835 if_depth_in_loop[loop_stack_depth] = 0;
2836 break;
2837
2838 case BRW_OPCODE_BREAK:
2839 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2840 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2841 break;
2842 case BRW_OPCODE_CONTINUE:
2843 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2844 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2845 break;
2846
2847 case BRW_OPCODE_WHILE: {
2848 struct brw_instruction *inst0, *inst1;
2849 GLuint br = 1;
2850
2851 if (intel->gen >= 5)
2852 br = 2;
2853
2854 assert(loop_stack_depth > 0);
2855 loop_stack_depth--;
2856 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2857 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2858 while (inst0 > loop_stack[loop_stack_depth]) {
2859 inst0--;
2860 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2861 inst0->bits3.if_else.jump_count == 0) {
2862 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2863 }
2864 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2865 inst0->bits3.if_else.jump_count == 0) {
2866 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2867 }
2868 }
2869 }
2870 break;
2871
2872 case FS_OPCODE_RCP:
2873 case FS_OPCODE_RSQ:
2874 case FS_OPCODE_SQRT:
2875 case FS_OPCODE_EXP2:
2876 case FS_OPCODE_LOG2:
2877 case FS_OPCODE_POW:
2878 case FS_OPCODE_SIN:
2879 case FS_OPCODE_COS:
2880 generate_math(inst, dst, src);
2881 break;
2882 case FS_OPCODE_LINTERP:
2883 generate_linterp(inst, dst, src);
2884 break;
2885 case FS_OPCODE_TEX:
2886 case FS_OPCODE_TXB:
2887 case FS_OPCODE_TXL:
2888 generate_tex(inst, dst, src[0]);
2889 break;
2890 case FS_OPCODE_DISCARD:
2891 generate_discard(inst, dst /* src0 == dst */);
2892 break;
2893 case FS_OPCODE_DDX:
2894 generate_ddx(inst, dst, src[0]);
2895 break;
2896 case FS_OPCODE_DDY:
2897 generate_ddy(inst, dst, src[0]);
2898 break;
2899 case FS_OPCODE_FB_WRITE:
2900 generate_fb_write(inst);
2901 break;
2902 default:
2903 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2904 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2905 brw_opcodes[inst->opcode].name);
2906 } else {
2907 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2908 }
2909 this->fail = true;
2910 }
2911
2912 if (annotation_len < p->nr_insn) {
2913 annotation_len *= 2;
2914 if (annotation_len < 16)
2915 annotation_len = 16;
2916
2917 this->annotation_string = talloc_realloc(this->mem_ctx,
2918 annotation_string,
2919 const char *,
2920 annotation_len);
2921 this->annotation_ir = talloc_realloc(this->mem_ctx,
2922 annotation_ir,
2923 ir_instruction *,
2924 annotation_len);
2925 }
2926
2927 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2928 this->annotation_string[i] = inst->annotation;
2929 this->annotation_ir[i] = inst->ir;
2930 }
2931 last_native_inst = p->nr_insn;
2932 }
2933 }
2934
2935 GLboolean
2936 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2937 {
2938 struct brw_compile *p = &c->func;
2939 struct intel_context *intel = &brw->intel;
2940 GLcontext *ctx = &intel->ctx;
2941 struct brw_shader *shader = NULL;
2942 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2943
2944 if (!prog)
2945 return GL_FALSE;
2946
2947 if (!using_new_fs)
2948 return GL_FALSE;
2949
2950 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2951 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2952 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2953 break;
2954 }
2955 }
2956 if (!shader)
2957 return GL_FALSE;
2958
2959 /* We always use 8-wide mode, at least for now. For one, flow
2960 * control only works in 8-wide. Also, when we're fragment shader
2961 * bound, we're almost always under register pressure as well, so
2962 * 8-wide would save us from the performance cliff of spilling
2963 * regs.
2964 */
2965 c->dispatch_width = 8;
2966
2967 if (INTEL_DEBUG & DEBUG_WM) {
2968 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2969 _mesa_print_ir(shader->ir, NULL);
2970 printf("\n");
2971 }
2972
2973 /* Now the main event: Visit the shader IR and generate our FS IR for it.
2974 */
2975 fs_visitor v(c, shader);
2976
2977 if (0) {
2978 v.emit_dummy_fs();
2979 } else {
2980 v.calculate_urb_setup();
2981 if (intel->gen < 6)
2982 v.emit_interpolation_setup_gen4();
2983 else
2984 v.emit_interpolation_setup_gen6();
2985
2986 /* Generate FS IR for main(). (the visitor only descends into
2987 * functions called "main").
2988 */
2989 foreach_iter(exec_list_iterator, iter, *shader->ir) {
2990 ir_instruction *ir = (ir_instruction *)iter.get();
2991 v.base_ir = ir;
2992 ir->accept(&v);
2993 }
2994
2995 v.emit_fb_writes();
2996 v.assign_curb_setup();
2997 v.assign_urb_setup();
2998
2999 bool progress;
3000 do {
3001 progress = false;
3002
3003 v.calculate_live_intervals();
3004 progress = v.propagate_constants() || progress;
3005 progress = v.dead_code_eliminate() || progress;
3006 } while (progress);
3007
3008 if (0)
3009 v.assign_regs_trivial();
3010 else
3011 v.assign_regs();
3012 }
3013
3014 if (!v.fail)
3015 v.generate_code();
3016
3017 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
3018
3019 if (v.fail)
3020 return GL_FALSE;
3021
3022 if (INTEL_DEBUG & DEBUG_WM) {
3023 const char *last_annotation_string = NULL;
3024 ir_instruction *last_annotation_ir = NULL;
3025
3026 printf("Native code for fragment shader %d:\n", prog->Name);
3027 for (unsigned int i = 0; i < p->nr_insn; i++) {
3028 if (last_annotation_ir != v.annotation_ir[i]) {
3029 last_annotation_ir = v.annotation_ir[i];
3030 if (last_annotation_ir) {
3031 printf(" ");
3032 last_annotation_ir->print();
3033 printf("\n");
3034 }
3035 }
3036 if (last_annotation_string != v.annotation_string[i]) {
3037 last_annotation_string = v.annotation_string[i];
3038 if (last_annotation_string)
3039 printf(" %s\n", last_annotation_string);
3040 }
3041 brw_disasm(stdout, &p->store[i], intel->gen);
3042 }
3043 printf("\n");
3044 }
3045
3046 c->prog_data.total_grf = v.grf_used;
3047 c->prog_data.total_scratch = 0;
3048
3049 return GL_TRUE;
3050 }