i965: Add live interval analysis and hook it up to the register allocator.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "main/uniforms.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "program/prog_optimize.h"
38 #include "program/register_allocate.h"
39 #include "program/sampler.h"
40 #include "program/hash_table.h"
41 #include "brw_context.h"
42 #include "brw_eu.h"
43 #include "brw_wm.h"
44 #include "talloc.h"
45 }
46 #include "../glsl/glsl_types.h"
47 #include "../glsl/ir_optimization.h"
48 #include "../glsl/ir_print_visitor.h"
49
50 enum register_file {
51 ARF = BRW_ARCHITECTURE_REGISTER_FILE,
52 GRF = BRW_GENERAL_REGISTER_FILE,
53 MRF = BRW_MESSAGE_REGISTER_FILE,
54 IMM = BRW_IMMEDIATE_VALUE,
55 FIXED_HW_REG, /* a struct brw_reg */
56 UNIFORM, /* prog_data->params[hw_reg] */
57 BAD_FILE
58 };
59
60 enum fs_opcodes {
61 FS_OPCODE_FB_WRITE = 256,
62 FS_OPCODE_RCP,
63 FS_OPCODE_RSQ,
64 FS_OPCODE_SQRT,
65 FS_OPCODE_EXP2,
66 FS_OPCODE_LOG2,
67 FS_OPCODE_POW,
68 FS_OPCODE_SIN,
69 FS_OPCODE_COS,
70 FS_OPCODE_DDX,
71 FS_OPCODE_DDY,
72 FS_OPCODE_LINTERP,
73 FS_OPCODE_TEX,
74 FS_OPCODE_TXB,
75 FS_OPCODE_TXL,
76 FS_OPCODE_DISCARD,
77 };
78
79 static int using_new_fs = -1;
80 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
81
82 struct gl_shader *
83 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
84 {
85 struct brw_shader *shader;
86
87 shader = talloc_zero(NULL, struct brw_shader);
88 if (shader) {
89 shader->base.Type = type;
90 shader->base.Name = name;
91 _mesa_init_shader(ctx, &shader->base);
92 }
93
94 return &shader->base;
95 }
96
97 struct gl_shader_program *
98 brw_new_shader_program(GLcontext *ctx, GLuint name)
99 {
100 struct brw_shader_program *prog;
101 prog = talloc_zero(NULL, struct brw_shader_program);
102 if (prog) {
103 prog->base.Name = name;
104 _mesa_init_shader_program(ctx, &prog->base);
105 }
106 return &prog->base;
107 }
108
109 GLboolean
110 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
111 {
112 if (!_mesa_ir_compile_shader(ctx, shader))
113 return GL_FALSE;
114
115 return GL_TRUE;
116 }
117
118 GLboolean
119 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
120 {
121 if (using_new_fs == -1)
122 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
123
124 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
125 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
126
127 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
128 void *mem_ctx = talloc_new(NULL);
129 bool progress;
130
131 if (shader->ir)
132 talloc_free(shader->ir);
133 shader->ir = new(shader) exec_list;
134 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
135
136 do_mat_op_to_vec(shader->ir);
137 do_mod_to_fract(shader->ir);
138 do_div_to_mul_rcp(shader->ir);
139 do_sub_to_add_neg(shader->ir);
140 do_explog_to_explog2(shader->ir);
141
142 do {
143 progress = false;
144
145 brw_do_channel_expressions(shader->ir);
146 brw_do_vector_splitting(shader->ir);
147
148 progress = do_lower_jumps(shader->ir, true, true,
149 true, /* main return */
150 false, /* continue */
151 false /* loops */
152 ) || progress;
153
154 progress = do_common_optimization(shader->ir, true, 32) || progress;
155
156 progress = lower_noise(shader->ir) || progress;
157 progress =
158 lower_variable_index_to_cond_assign(shader->ir,
159 GL_TRUE, /* input */
160 GL_TRUE, /* output */
161 GL_TRUE, /* temp */
162 GL_TRUE /* uniform */
163 ) || progress;
164 } while (progress);
165
166 validate_ir_tree(shader->ir);
167
168 reparent_ir(shader->ir, shader->ir);
169 talloc_free(mem_ctx);
170 }
171 }
172
173 if (!_mesa_ir_link_shader(ctx, prog))
174 return GL_FALSE;
175
176 return GL_TRUE;
177 }
178
179 static int
180 type_size(const struct glsl_type *type)
181 {
182 unsigned int size, i;
183
184 switch (type->base_type) {
185 case GLSL_TYPE_UINT:
186 case GLSL_TYPE_INT:
187 case GLSL_TYPE_FLOAT:
188 case GLSL_TYPE_BOOL:
189 return type->components();
190 case GLSL_TYPE_ARRAY:
191 return type_size(type->fields.array) * type->length;
192 case GLSL_TYPE_STRUCT:
193 size = 0;
194 for (i = 0; i < type->length; i++) {
195 size += type_size(type->fields.structure[i].type);
196 }
197 return size;
198 case GLSL_TYPE_SAMPLER:
199 /* Samplers take up no register space, since they're baked in at
200 * link time.
201 */
202 return 0;
203 default:
204 assert(!"not reached");
205 return 0;
206 }
207 }
208
209 class fs_reg {
210 public:
211 /* Callers of this talloc-based new need not call delete. It's
212 * easier to just talloc_free 'ctx' (or any of its ancestors). */
213 static void* operator new(size_t size, void *ctx)
214 {
215 void *node;
216
217 node = talloc_size(ctx, size);
218 assert(node != NULL);
219
220 return node;
221 }
222
223 void init()
224 {
225 this->reg = 0;
226 this->reg_offset = 0;
227 this->negate = 0;
228 this->abs = 0;
229 this->hw_reg = -1;
230 }
231
232 /** Generic unset register constructor. */
233 fs_reg()
234 {
235 init();
236 this->file = BAD_FILE;
237 }
238
239 /** Immediate value constructor. */
240 fs_reg(float f)
241 {
242 init();
243 this->file = IMM;
244 this->type = BRW_REGISTER_TYPE_F;
245 this->imm.f = f;
246 }
247
248 /** Immediate value constructor. */
249 fs_reg(int32_t i)
250 {
251 init();
252 this->file = IMM;
253 this->type = BRW_REGISTER_TYPE_D;
254 this->imm.i = i;
255 }
256
257 /** Immediate value constructor. */
258 fs_reg(uint32_t u)
259 {
260 init();
261 this->file = IMM;
262 this->type = BRW_REGISTER_TYPE_UD;
263 this->imm.u = u;
264 }
265
266 /** Fixed brw_reg Immediate value constructor. */
267 fs_reg(struct brw_reg fixed_hw_reg)
268 {
269 init();
270 this->file = FIXED_HW_REG;
271 this->fixed_hw_reg = fixed_hw_reg;
272 this->type = fixed_hw_reg.type;
273 }
274
275 fs_reg(enum register_file file, int hw_reg);
276 fs_reg(class fs_visitor *v, const struct glsl_type *type);
277
278 /** Register file: ARF, GRF, MRF, IMM. */
279 enum register_file file;
280 /** virtual register number. 0 = fixed hw reg */
281 int reg;
282 /** Offset within the virtual register. */
283 int reg_offset;
284 /** HW register number. Generally unset until register allocation. */
285 int hw_reg;
286 /** Register type. BRW_REGISTER_TYPE_* */
287 int type;
288 bool negate;
289 bool abs;
290 struct brw_reg fixed_hw_reg;
291
292 /** Value for file == BRW_IMMMEDIATE_FILE */
293 union {
294 int32_t i;
295 uint32_t u;
296 float f;
297 } imm;
298 };
299
300 static const fs_reg reg_undef;
301 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
302
303 class fs_inst : public exec_node {
304 public:
305 /* Callers of this talloc-based new need not call delete. It's
306 * easier to just talloc_free 'ctx' (or any of its ancestors). */
307 static void* operator new(size_t size, void *ctx)
308 {
309 void *node;
310
311 node = talloc_zero_size(ctx, size);
312 assert(node != NULL);
313
314 return node;
315 }
316
317 void init()
318 {
319 this->opcode = BRW_OPCODE_NOP;
320 this->saturate = false;
321 this->conditional_mod = BRW_CONDITIONAL_NONE;
322 this->predicated = false;
323 this->sampler = 0;
324 this->target = 0;
325 this->eot = false;
326 this->shadow_compare = false;
327 }
328
329 fs_inst()
330 {
331 init();
332 }
333
334 fs_inst(int opcode)
335 {
336 init();
337 this->opcode = opcode;
338 }
339
340 fs_inst(int opcode, fs_reg dst, fs_reg src0)
341 {
342 init();
343 this->opcode = opcode;
344 this->dst = dst;
345 this->src[0] = src0;
346 }
347
348 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
349 {
350 init();
351 this->opcode = opcode;
352 this->dst = dst;
353 this->src[0] = src0;
354 this->src[1] = src1;
355 }
356
357 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
358 {
359 init();
360 this->opcode = opcode;
361 this->dst = dst;
362 this->src[0] = src0;
363 this->src[1] = src1;
364 this->src[2] = src2;
365 }
366
367 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
368 fs_reg dst;
369 fs_reg src[3];
370 bool saturate;
371 bool predicated;
372 int conditional_mod; /**< BRW_CONDITIONAL_* */
373
374 int mlen; /**< SEND message length */
375 int sampler;
376 int target; /**< MRT target. */
377 bool eot;
378 bool shadow_compare;
379
380 /** @{
381 * Annotation for the generated IR. One of the two can be set.
382 */
383 ir_instruction *ir;
384 const char *annotation;
385 /** @} */
386 };
387
388 class fs_visitor : public ir_visitor
389 {
390 public:
391
392 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
393 {
394 this->c = c;
395 this->p = &c->func;
396 this->brw = p->brw;
397 this->fp = brw->fragment_program;
398 this->intel = &brw->intel;
399 this->ctx = &intel->ctx;
400 this->mem_ctx = talloc_new(NULL);
401 this->shader = shader;
402 this->fail = false;
403 this->variable_ht = hash_table_ctor(0,
404 hash_table_pointer_hash,
405 hash_table_pointer_compare);
406
407 this->frag_color = NULL;
408 this->frag_data = NULL;
409 this->frag_depth = NULL;
410 this->first_non_payload_grf = 0;
411
412 this->current_annotation = NULL;
413 this->annotation_string = NULL;
414 this->annotation_ir = NULL;
415 this->base_ir = NULL;
416
417 this->virtual_grf_sizes = NULL;
418 this->virtual_grf_next = 1;
419 this->virtual_grf_array_size = 0;
420 this->virtual_grf_def = NULL;
421 this->virtual_grf_use = NULL;
422 }
423 ~fs_visitor()
424 {
425 talloc_free(this->mem_ctx);
426 hash_table_dtor(this->variable_ht);
427 }
428
429 fs_reg *variable_storage(ir_variable *var);
430 int virtual_grf_alloc(int size);
431
432 void visit(ir_variable *ir);
433 void visit(ir_assignment *ir);
434 void visit(ir_dereference_variable *ir);
435 void visit(ir_dereference_record *ir);
436 void visit(ir_dereference_array *ir);
437 void visit(ir_expression *ir);
438 void visit(ir_texture *ir);
439 void visit(ir_if *ir);
440 void visit(ir_constant *ir);
441 void visit(ir_swizzle *ir);
442 void visit(ir_return *ir);
443 void visit(ir_loop *ir);
444 void visit(ir_loop_jump *ir);
445 void visit(ir_discard *ir);
446 void visit(ir_call *ir);
447 void visit(ir_function *ir);
448 void visit(ir_function_signature *ir);
449
450 fs_inst *emit(fs_inst inst);
451 void assign_curb_setup();
452 void assign_urb_setup();
453 void assign_regs();
454 void assign_regs_trivial();
455 void calculate_live_intervals();
456 bool virtual_grf_interferes(int a, int b);
457 void generate_code();
458 void generate_fb_write(fs_inst *inst);
459 void generate_linterp(fs_inst *inst, struct brw_reg dst,
460 struct brw_reg *src);
461 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
462 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
463 void generate_discard(fs_inst *inst, struct brw_reg temp);
464 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
465 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
466
467 void emit_dummy_fs();
468 void emit_fragcoord_interpolation(ir_variable *ir);
469 void emit_general_interpolation(ir_variable *ir);
470 void emit_interpolation_setup();
471 void emit_fb_writes();
472
473 struct brw_reg interp_reg(int location, int channel);
474 int setup_uniform_values(int loc, const glsl_type *type);
475 void setup_builtin_uniform_values(ir_variable *ir);
476
477 struct brw_context *brw;
478 const struct gl_fragment_program *fp;
479 struct intel_context *intel;
480 GLcontext *ctx;
481 struct brw_wm_compile *c;
482 struct brw_compile *p;
483 struct brw_shader *shader;
484 void *mem_ctx;
485 exec_list instructions;
486
487 int *virtual_grf_sizes;
488 int virtual_grf_next;
489 int virtual_grf_array_size;
490 int *virtual_grf_def;
491 int *virtual_grf_use;
492
493 struct hash_table *variable_ht;
494 ir_variable *frag_color, *frag_data, *frag_depth;
495 int first_non_payload_grf;
496
497 /** @{ debug annotation info */
498 const char *current_annotation;
499 ir_instruction *base_ir;
500 const char **annotation_string;
501 ir_instruction **annotation_ir;
502 /** @} */
503
504 bool fail;
505
506 /* Result of last visit() method. */
507 fs_reg result;
508
509 fs_reg pixel_x;
510 fs_reg pixel_y;
511 fs_reg wpos_w;
512 fs_reg pixel_w;
513 fs_reg delta_x;
514 fs_reg delta_y;
515
516 int grf_used;
517
518 };
519
520 int
521 fs_visitor::virtual_grf_alloc(int size)
522 {
523 if (virtual_grf_array_size <= virtual_grf_next) {
524 if (virtual_grf_array_size == 0)
525 virtual_grf_array_size = 16;
526 else
527 virtual_grf_array_size *= 2;
528 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
529 int, virtual_grf_array_size);
530
531 /* This slot is always unused. */
532 virtual_grf_sizes[0] = 0;
533 }
534 virtual_grf_sizes[virtual_grf_next] = size;
535 return virtual_grf_next++;
536 }
537
538 /** Fixed HW reg constructor. */
539 fs_reg::fs_reg(enum register_file file, int hw_reg)
540 {
541 init();
542 this->file = file;
543 this->hw_reg = hw_reg;
544 this->type = BRW_REGISTER_TYPE_F;
545 }
546
547 int
548 brw_type_for_base_type(const struct glsl_type *type)
549 {
550 switch (type->base_type) {
551 case GLSL_TYPE_FLOAT:
552 return BRW_REGISTER_TYPE_F;
553 case GLSL_TYPE_INT:
554 case GLSL_TYPE_BOOL:
555 return BRW_REGISTER_TYPE_D;
556 case GLSL_TYPE_UINT:
557 return BRW_REGISTER_TYPE_UD;
558 case GLSL_TYPE_ARRAY:
559 case GLSL_TYPE_STRUCT:
560 /* These should be overridden with the type of the member when
561 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
562 * way to trip up if we don't.
563 */
564 return BRW_REGISTER_TYPE_UD;
565 default:
566 assert(!"not reached");
567 return BRW_REGISTER_TYPE_F;
568 }
569 }
570
571 /** Automatic reg constructor. */
572 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
573 {
574 init();
575
576 this->file = GRF;
577 this->reg = v->virtual_grf_alloc(type_size(type));
578 this->reg_offset = 0;
579 this->type = brw_type_for_base_type(type);
580 }
581
582 fs_reg *
583 fs_visitor::variable_storage(ir_variable *var)
584 {
585 return (fs_reg *)hash_table_find(this->variable_ht, var);
586 }
587
588 /* Our support for uniforms is piggy-backed on the struct
589 * gl_fragment_program, because that's where the values actually
590 * get stored, rather than in some global gl_shader_program uniform
591 * store.
592 */
593 int
594 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
595 {
596 unsigned int offset = 0;
597 float *vec_values;
598
599 if (type->is_matrix()) {
600 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
601 type->vector_elements,
602 1);
603
604 for (unsigned int i = 0; i < type->matrix_columns; i++) {
605 offset += setup_uniform_values(loc + offset, column);
606 }
607
608 return offset;
609 }
610
611 switch (type->base_type) {
612 case GLSL_TYPE_FLOAT:
613 case GLSL_TYPE_UINT:
614 case GLSL_TYPE_INT:
615 case GLSL_TYPE_BOOL:
616 vec_values = fp->Base.Parameters->ParameterValues[loc];
617 for (unsigned int i = 0; i < type->vector_elements; i++) {
618 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
619 }
620 return 1;
621
622 case GLSL_TYPE_STRUCT:
623 for (unsigned int i = 0; i < type->length; i++) {
624 offset += setup_uniform_values(loc + offset,
625 type->fields.structure[i].type);
626 }
627 return offset;
628
629 case GLSL_TYPE_ARRAY:
630 for (unsigned int i = 0; i < type->length; i++) {
631 offset += setup_uniform_values(loc + offset, type->fields.array);
632 }
633 return offset;
634
635 case GLSL_TYPE_SAMPLER:
636 /* The sampler takes up a slot, but we don't use any values from it. */
637 return 1;
638
639 default:
640 assert(!"not reached");
641 return 0;
642 }
643 }
644
645
646 /* Our support for builtin uniforms is even scarier than non-builtin.
647 * It sits on top of the PROG_STATE_VAR parameters that are
648 * automatically updated from GL context state.
649 */
650 void
651 fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
652 {
653 const struct gl_builtin_uniform_desc *statevar = NULL;
654
655 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
656 statevar = &_mesa_builtin_uniform_desc[i];
657 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
658 break;
659 }
660
661 if (!statevar->name) {
662 this->fail = true;
663 printf("Failed to find builtin uniform `%s'\n", ir->name);
664 return;
665 }
666
667 int array_count;
668 if (ir->type->is_array()) {
669 array_count = ir->type->length;
670 } else {
671 array_count = 1;
672 }
673
674 for (int a = 0; a < array_count; a++) {
675 for (unsigned int i = 0; i < statevar->num_elements; i++) {
676 struct gl_builtin_uniform_element *element = &statevar->elements[i];
677 int tokens[STATE_LENGTH];
678
679 memcpy(tokens, element->tokens, sizeof(element->tokens));
680 if (ir->type->is_array()) {
681 tokens[1] = a;
682 }
683
684 /* This state reference has already been setup by ir_to_mesa,
685 * but we'll get the same index back here.
686 */
687 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
688 (gl_state_index *)tokens);
689 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
690
691 /* Add each of the unique swizzles of the element as a
692 * parameter. This'll end up matching the expected layout of
693 * the array/matrix/structure we're trying to fill in.
694 */
695 int last_swiz = -1;
696 for (unsigned int i = 0; i < 4; i++) {
697 int this_swiz = GET_SWZ(element->swizzle, i);
698 if (this_swiz == last_swiz)
699 break;
700 last_swiz = this_swiz;
701
702 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
703 }
704 }
705 }
706 }
707
708 void
709 fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
710 {
711 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
712 fs_reg wpos = *reg;
713 fs_reg neg_y = this->pixel_y;
714 neg_y.negate = true;
715
716 /* gl_FragCoord.x */
717 if (ir->pixel_center_integer) {
718 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
719 } else {
720 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
721 }
722 wpos.reg_offset++;
723
724 /* gl_FragCoord.y */
725 if (ir->origin_upper_left && ir->pixel_center_integer) {
726 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
727 } else {
728 fs_reg pixel_y = this->pixel_y;
729 float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
730
731 if (!ir->origin_upper_left) {
732 pixel_y.negate = true;
733 offset += c->key.drawable_height - 1.0;
734 }
735
736 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
737 }
738 wpos.reg_offset++;
739
740 /* gl_FragCoord.z */
741 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
742 interp_reg(FRAG_ATTRIB_WPOS, 2)));
743 wpos.reg_offset++;
744
745 /* gl_FragCoord.w: Already set up in emit_interpolation */
746 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
747
748 hash_table_insert(this->variable_ht, reg, ir);
749 }
750
751
752 void
753 fs_visitor::emit_general_interpolation(ir_variable *ir)
754 {
755 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
756 /* Interpolation is always in floating point regs. */
757 reg->type = BRW_REGISTER_TYPE_F;
758 fs_reg attr = *reg;
759
760 unsigned int array_elements;
761 const glsl_type *type;
762
763 if (ir->type->is_array()) {
764 array_elements = ir->type->length;
765 if (array_elements == 0) {
766 this->fail = true;
767 }
768 type = ir->type->fields.array;
769 } else {
770 array_elements = 1;
771 type = ir->type;
772 }
773
774 int location = ir->location;
775 for (unsigned int i = 0; i < array_elements; i++) {
776 for (unsigned int j = 0; j < type->matrix_columns; j++) {
777 if (!(fp->Base.InputsRead & BITFIELD64_BIT(location))) {
778 /* If there's no incoming setup data for this slot, don't
779 * emit interpolation for it (since it's not used, and
780 * we'd fall over later trying to find the setup data.
781 */
782 attr.reg_offset += type->vector_elements;
783 continue;
784 }
785
786 for (unsigned int c = 0; c < type->vector_elements; c++) {
787 struct brw_reg interp = interp_reg(location, c);
788 emit(fs_inst(FS_OPCODE_LINTERP,
789 attr,
790 this->delta_x,
791 this->delta_y,
792 fs_reg(interp)));
793 attr.reg_offset++;
794 }
795 attr.reg_offset -= type->vector_elements;
796
797 for (unsigned int c = 0; c < type->vector_elements; c++) {
798 emit(fs_inst(BRW_OPCODE_MUL,
799 attr,
800 attr,
801 this->pixel_w));
802 attr.reg_offset++;
803 }
804 location++;
805 }
806 }
807
808 hash_table_insert(this->variable_ht, reg, ir);
809 }
810
811 void
812 fs_visitor::visit(ir_variable *ir)
813 {
814 fs_reg *reg = NULL;
815
816 if (variable_storage(ir))
817 return;
818
819 if (strcmp(ir->name, "gl_FragColor") == 0) {
820 this->frag_color = ir;
821 } else if (strcmp(ir->name, "gl_FragData") == 0) {
822 this->frag_data = ir;
823 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
824 this->frag_depth = ir;
825 }
826
827 if (ir->mode == ir_var_in) {
828 if (!strcmp(ir->name, "gl_FragCoord")) {
829 emit_fragcoord_interpolation(ir);
830 return;
831 } else if (!strcmp(ir->name, "gl_FrontFacing")) {
832 reg = new(this->mem_ctx) fs_reg(this, ir->type);
833 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
834 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
835 * us front face
836 */
837 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
838 *reg,
839 fs_reg(r1_6ud),
840 fs_reg(1u << 31)));
841 inst->conditional_mod = BRW_CONDITIONAL_L;
842 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
843 } else {
844 emit_general_interpolation(ir);
845 return;
846 }
847 }
848
849 if (ir->mode == ir_var_uniform) {
850 int param_index = c->prog_data.nr_params;
851
852 if (!strncmp(ir->name, "gl_", 3)) {
853 setup_builtin_uniform_values(ir);
854 } else {
855 setup_uniform_values(ir->location, ir->type);
856 }
857
858 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
859 }
860
861 if (!reg)
862 reg = new(this->mem_ctx) fs_reg(this, ir->type);
863
864 hash_table_insert(this->variable_ht, reg, ir);
865 }
866
867 void
868 fs_visitor::visit(ir_dereference_variable *ir)
869 {
870 fs_reg *reg = variable_storage(ir->var);
871 this->result = *reg;
872 }
873
874 void
875 fs_visitor::visit(ir_dereference_record *ir)
876 {
877 const glsl_type *struct_type = ir->record->type;
878
879 ir->record->accept(this);
880
881 unsigned int offset = 0;
882 for (unsigned int i = 0; i < struct_type->length; i++) {
883 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
884 break;
885 offset += type_size(struct_type->fields.structure[i].type);
886 }
887 this->result.reg_offset += offset;
888 this->result.type = brw_type_for_base_type(ir->type);
889 }
890
891 void
892 fs_visitor::visit(ir_dereference_array *ir)
893 {
894 ir_constant *index;
895 int element_size;
896
897 ir->array->accept(this);
898 index = ir->array_index->as_constant();
899
900 element_size = type_size(ir->type);
901 this->result.type = brw_type_for_base_type(ir->type);
902
903 if (index) {
904 assert(this->result.file == UNIFORM ||
905 (this->result.file == GRF &&
906 this->result.reg != 0));
907 this->result.reg_offset += index->value.i[0] * element_size;
908 } else {
909 assert(!"FINISHME: non-constant array element");
910 }
911 }
912
913 void
914 fs_visitor::visit(ir_expression *ir)
915 {
916 unsigned int operand;
917 fs_reg op[2], temp;
918 fs_reg result;
919 fs_inst *inst;
920
921 for (operand = 0; operand < ir->get_num_operands(); operand++) {
922 ir->operands[operand]->accept(this);
923 if (this->result.file == BAD_FILE) {
924 ir_print_visitor v;
925 printf("Failed to get tree for expression operand:\n");
926 ir->operands[operand]->accept(&v);
927 this->fail = true;
928 }
929 op[operand] = this->result;
930
931 /* Matrix expression operands should have been broken down to vector
932 * operations already.
933 */
934 assert(!ir->operands[operand]->type->is_matrix());
935 /* And then those vector operands should have been broken down to scalar.
936 */
937 assert(!ir->operands[operand]->type->is_vector());
938 }
939
940 /* Storage for our result. If our result goes into an assignment, it will
941 * just get copy-propagated out, so no worries.
942 */
943 this->result = fs_reg(this, ir->type);
944
945 switch (ir->operation) {
946 case ir_unop_logic_not:
947 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
948 break;
949 case ir_unop_neg:
950 op[0].negate = !op[0].negate;
951 this->result = op[0];
952 break;
953 case ir_unop_abs:
954 op[0].abs = true;
955 this->result = op[0];
956 break;
957 case ir_unop_sign:
958 temp = fs_reg(this, ir->type);
959
960 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
961
962 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
963 inst->conditional_mod = BRW_CONDITIONAL_G;
964 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
965 inst->predicated = true;
966
967 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
968 inst->conditional_mod = BRW_CONDITIONAL_L;
969 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
970 inst->predicated = true;
971
972 break;
973 case ir_unop_rcp:
974 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
975 break;
976
977 case ir_unop_exp2:
978 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
979 break;
980 case ir_unop_log2:
981 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
982 break;
983 case ir_unop_exp:
984 case ir_unop_log:
985 assert(!"not reached: should be handled by ir_explog_to_explog2");
986 break;
987 case ir_unop_sin:
988 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
989 break;
990 case ir_unop_cos:
991 emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
992 break;
993
994 case ir_unop_dFdx:
995 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
996 break;
997 case ir_unop_dFdy:
998 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
999 break;
1000
1001 case ir_binop_add:
1002 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
1003 break;
1004 case ir_binop_sub:
1005 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1006 break;
1007
1008 case ir_binop_mul:
1009 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
1010 break;
1011 case ir_binop_div:
1012 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1013 break;
1014 case ir_binop_mod:
1015 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1016 break;
1017
1018 case ir_binop_less:
1019 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1020 inst->conditional_mod = BRW_CONDITIONAL_L;
1021 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1022 break;
1023 case ir_binop_greater:
1024 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1025 inst->conditional_mod = BRW_CONDITIONAL_G;
1026 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1027 break;
1028 case ir_binop_lequal:
1029 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1030 inst->conditional_mod = BRW_CONDITIONAL_LE;
1031 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1032 break;
1033 case ir_binop_gequal:
1034 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1035 inst->conditional_mod = BRW_CONDITIONAL_GE;
1036 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1037 break;
1038 case ir_binop_equal:
1039 case ir_binop_all_equal: /* same as nequal for scalars */
1040 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1041 inst->conditional_mod = BRW_CONDITIONAL_Z;
1042 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1043 break;
1044 case ir_binop_nequal:
1045 case ir_binop_any_nequal: /* same as nequal for scalars */
1046 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1047 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1048 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1049 break;
1050
1051 case ir_binop_logic_xor:
1052 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
1053 break;
1054
1055 case ir_binop_logic_or:
1056 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
1057 break;
1058
1059 case ir_binop_logic_and:
1060 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
1061 break;
1062
1063 case ir_binop_dot:
1064 case ir_binop_cross:
1065 case ir_unop_any:
1066 assert(!"not reached: should be handled by brw_fs_channel_expressions");
1067 break;
1068
1069 case ir_unop_noise:
1070 assert(!"not reached: should be handled by lower_noise");
1071 break;
1072
1073 case ir_unop_sqrt:
1074 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
1075 break;
1076
1077 case ir_unop_rsq:
1078 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
1079 break;
1080
1081 case ir_unop_i2f:
1082 case ir_unop_b2f:
1083 case ir_unop_b2i:
1084 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1085 break;
1086 case ir_unop_f2i:
1087 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1088 break;
1089 case ir_unop_f2b:
1090 case ir_unop_i2b:
1091 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
1092 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1093
1094 case ir_unop_trunc:
1095 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1096 break;
1097 case ir_unop_ceil:
1098 op[0].negate = ~op[0].negate;
1099 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1100 this->result.negate = true;
1101 break;
1102 case ir_unop_floor:
1103 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1104 break;
1105 case ir_unop_fract:
1106 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
1107 break;
1108
1109 case ir_binop_min:
1110 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1111 inst->conditional_mod = BRW_CONDITIONAL_L;
1112
1113 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1114 inst->predicated = true;
1115 break;
1116 case ir_binop_max:
1117 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1118 inst->conditional_mod = BRW_CONDITIONAL_G;
1119
1120 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1121 inst->predicated = true;
1122 break;
1123
1124 case ir_binop_pow:
1125 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
1126 break;
1127
1128 case ir_unop_bit_not:
1129 case ir_unop_u2f:
1130 case ir_binop_lshift:
1131 case ir_binop_rshift:
1132 case ir_binop_bit_and:
1133 case ir_binop_bit_xor:
1134 case ir_binop_bit_or:
1135 assert(!"GLSL 1.30 features unsupported");
1136 break;
1137 }
1138 }
1139
1140 void
1141 fs_visitor::visit(ir_assignment *ir)
1142 {
1143 struct fs_reg l, r;
1144 int i;
1145 int write_mask;
1146 fs_inst *inst;
1147
1148 /* FINISHME: arrays on the lhs */
1149 ir->lhs->accept(this);
1150 l = this->result;
1151
1152 ir->rhs->accept(this);
1153 r = this->result;
1154
1155 /* FINISHME: This should really set to the correct maximal writemask for each
1156 * FINISHME: component written (in the loops below). This case can only
1157 * FINISHME: occur for matrices, arrays, and structures.
1158 */
1159 if (ir->write_mask == 0) {
1160 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1161 write_mask = WRITEMASK_XYZW;
1162 } else {
1163 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
1164 write_mask = ir->write_mask;
1165 }
1166
1167 assert(l.file != BAD_FILE);
1168 assert(r.file != BAD_FILE);
1169
1170 if (ir->condition) {
1171 /* Get the condition bool into the predicate. */
1172 ir->condition->accept(this);
1173 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
1174 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1175 }
1176
1177 for (i = 0; i < type_size(ir->lhs->type); i++) {
1178 if (i >= 4 || (write_mask & (1 << i))) {
1179 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1180 if (ir->condition)
1181 inst->predicated = true;
1182 r.reg_offset++;
1183 }
1184 l.reg_offset++;
1185 }
1186 }
1187
1188 void
1189 fs_visitor::visit(ir_texture *ir)
1190 {
1191 int base_mrf = 2;
1192 fs_inst *inst = NULL;
1193 unsigned int mlen = 0;
1194
1195 ir->coordinate->accept(this);
1196 fs_reg coordinate = this->result;
1197
1198 if (ir->projector) {
1199 fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
1200
1201 ir->projector->accept(this);
1202 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
1203
1204 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
1205 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1206 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
1207 coordinate.reg_offset++;
1208 proj_coordinate.reg_offset++;
1209 }
1210 proj_coordinate.reg_offset = 0;
1211
1212 coordinate = proj_coordinate;
1213 }
1214
1215 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1216 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1217 coordinate.reg_offset++;
1218 }
1219
1220 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1221 if (intel->gen < 5)
1222 mlen = 3;
1223
1224 if (ir->shadow_comparitor) {
1225 /* For shadow comparisons, we have to supply u,v,r. */
1226 mlen = 3;
1227
1228 ir->shadow_comparitor->accept(this);
1229 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1230 mlen++;
1231 }
1232
1233 /* Do we ever want to handle writemasking on texture samples? Is it
1234 * performance relevant?
1235 */
1236 fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1237
1238 switch (ir->op) {
1239 case ir_tex:
1240 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1241 break;
1242 case ir_txb:
1243 ir->lod_info.bias->accept(this);
1244 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1245 mlen++;
1246
1247 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1248 break;
1249 case ir_txl:
1250 ir->lod_info.lod->accept(this);
1251 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1252 mlen++;
1253
1254 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1255 break;
1256 case ir_txd:
1257 case ir_txf:
1258 assert(!"GLSL 1.30 features unsupported");
1259 break;
1260 }
1261
1262 inst->sampler =
1263 _mesa_get_sampler_uniform_value(ir->sampler,
1264 ctx->Shader.CurrentProgram,
1265 &brw->fragment_program->Base);
1266 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1267
1268 this->result = dst;
1269
1270 if (ir->shadow_comparitor)
1271 inst->shadow_compare = true;
1272 inst->mlen = mlen;
1273 }
1274
1275 void
1276 fs_visitor::visit(ir_swizzle *ir)
1277 {
1278 ir->val->accept(this);
1279 fs_reg val = this->result;
1280
1281 fs_reg result = fs_reg(this, ir->type);
1282 this->result = result;
1283
1284 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1285 fs_reg channel = val;
1286 int swiz = 0;
1287
1288 switch (i) {
1289 case 0:
1290 swiz = ir->mask.x;
1291 break;
1292 case 1:
1293 swiz = ir->mask.y;
1294 break;
1295 case 2:
1296 swiz = ir->mask.z;
1297 break;
1298 case 3:
1299 swiz = ir->mask.w;
1300 break;
1301 }
1302
1303 channel.reg_offset += swiz;
1304 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1305 result.reg_offset++;
1306 }
1307 }
1308
1309 void
1310 fs_visitor::visit(ir_discard *ir)
1311 {
1312 fs_reg temp = fs_reg(this, glsl_type::uint_type);
1313
1314 assert(ir->condition == NULL); /* FINISHME */
1315
1316 emit(fs_inst(FS_OPCODE_DISCARD, temp, temp));
1317 }
1318
1319 void
1320 fs_visitor::visit(ir_constant *ir)
1321 {
1322 fs_reg reg(this, ir->type);
1323 this->result = reg;
1324
1325 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1326 switch (ir->type->base_type) {
1327 case GLSL_TYPE_FLOAT:
1328 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1329 break;
1330 case GLSL_TYPE_UINT:
1331 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1332 break;
1333 case GLSL_TYPE_INT:
1334 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1335 break;
1336 case GLSL_TYPE_BOOL:
1337 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1338 break;
1339 default:
1340 assert(!"Non-float/uint/int/bool constant");
1341 }
1342 reg.reg_offset++;
1343 }
1344 }
1345
1346 void
1347 fs_visitor::visit(ir_if *ir)
1348 {
1349 fs_inst *inst;
1350
1351 /* Don't point the annotation at the if statement, because then it plus
1352 * the then and else blocks get printed.
1353 */
1354 this->base_ir = ir->condition;
1355
1356 /* Generate the condition into the condition code. */
1357 ir->condition->accept(this);
1358 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1359 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1360
1361 inst = emit(fs_inst(BRW_OPCODE_IF));
1362 inst->predicated = true;
1363
1364 foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1365 ir_instruction *ir = (ir_instruction *)iter.get();
1366 this->base_ir = ir;
1367
1368 ir->accept(this);
1369 }
1370
1371 if (!ir->else_instructions.is_empty()) {
1372 emit(fs_inst(BRW_OPCODE_ELSE));
1373
1374 foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1375 ir_instruction *ir = (ir_instruction *)iter.get();
1376 this->base_ir = ir;
1377
1378 ir->accept(this);
1379 }
1380 }
1381
1382 emit(fs_inst(BRW_OPCODE_ENDIF));
1383 }
1384
1385 void
1386 fs_visitor::visit(ir_loop *ir)
1387 {
1388 fs_reg counter = reg_undef;
1389
1390 if (ir->counter) {
1391 this->base_ir = ir->counter;
1392 ir->counter->accept(this);
1393 counter = *(variable_storage(ir->counter));
1394
1395 if (ir->from) {
1396 this->base_ir = ir->from;
1397 ir->from->accept(this);
1398
1399 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1400 }
1401 }
1402
1403 /* Start a safety counter. If the user messed up their loop
1404 * counting, we don't want to hang the GPU.
1405 */
1406 fs_reg max_iter = fs_reg(this, glsl_type::int_type);
1407 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
1408
1409 emit(fs_inst(BRW_OPCODE_DO));
1410
1411 if (ir->to) {
1412 this->base_ir = ir->to;
1413 ir->to->accept(this);
1414
1415 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1416 counter, this->result));
1417 switch (ir->cmp) {
1418 case ir_binop_equal:
1419 inst->conditional_mod = BRW_CONDITIONAL_Z;
1420 break;
1421 case ir_binop_nequal:
1422 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1423 break;
1424 case ir_binop_gequal:
1425 inst->conditional_mod = BRW_CONDITIONAL_GE;
1426 break;
1427 case ir_binop_lequal:
1428 inst->conditional_mod = BRW_CONDITIONAL_LE;
1429 break;
1430 case ir_binop_greater:
1431 inst->conditional_mod = BRW_CONDITIONAL_G;
1432 break;
1433 case ir_binop_less:
1434 inst->conditional_mod = BRW_CONDITIONAL_L;
1435 break;
1436 default:
1437 assert(!"not reached: unknown loop condition");
1438 this->fail = true;
1439 break;
1440 }
1441
1442 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1443 inst->predicated = true;
1444 }
1445
1446 foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1447 ir_instruction *ir = (ir_instruction *)iter.get();
1448 fs_inst *inst;
1449
1450 this->base_ir = ir;
1451 ir->accept(this);
1452
1453 /* Check the maximum loop iters counter. */
1454 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1)));
1455 inst->conditional_mod = BRW_CONDITIONAL_Z;
1456
1457 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1458 inst->predicated = true;
1459 }
1460
1461 if (ir->increment) {
1462 this->base_ir = ir->increment;
1463 ir->increment->accept(this);
1464 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1465 }
1466
1467 emit(fs_inst(BRW_OPCODE_WHILE));
1468 }
1469
1470 void
1471 fs_visitor::visit(ir_loop_jump *ir)
1472 {
1473 switch (ir->mode) {
1474 case ir_loop_jump::jump_break:
1475 emit(fs_inst(BRW_OPCODE_BREAK));
1476 break;
1477 case ir_loop_jump::jump_continue:
1478 emit(fs_inst(BRW_OPCODE_CONTINUE));
1479 break;
1480 }
1481 }
1482
1483 void
1484 fs_visitor::visit(ir_call *ir)
1485 {
1486 assert(!"FINISHME");
1487 }
1488
1489 void
1490 fs_visitor::visit(ir_return *ir)
1491 {
1492 assert(!"FINISHME");
1493 }
1494
1495 void
1496 fs_visitor::visit(ir_function *ir)
1497 {
1498 /* Ignore function bodies other than main() -- we shouldn't see calls to
1499 * them since they should all be inlined before we get to ir_to_mesa.
1500 */
1501 if (strcmp(ir->name, "main") == 0) {
1502 const ir_function_signature *sig;
1503 exec_list empty;
1504
1505 sig = ir->matching_signature(&empty);
1506
1507 assert(sig);
1508
1509 foreach_iter(exec_list_iterator, iter, sig->body) {
1510 ir_instruction *ir = (ir_instruction *)iter.get();
1511 this->base_ir = ir;
1512
1513 ir->accept(this);
1514 }
1515 }
1516 }
1517
1518 void
1519 fs_visitor::visit(ir_function_signature *ir)
1520 {
1521 assert(!"not reached");
1522 (void)ir;
1523 }
1524
1525 fs_inst *
1526 fs_visitor::emit(fs_inst inst)
1527 {
1528 fs_inst *list_inst = new(mem_ctx) fs_inst;
1529 *list_inst = inst;
1530
1531 list_inst->annotation = this->current_annotation;
1532 list_inst->ir = this->base_ir;
1533
1534 this->instructions.push_tail(list_inst);
1535
1536 return list_inst;
1537 }
1538
1539 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1540 void
1541 fs_visitor::emit_dummy_fs()
1542 {
1543 /* Everyone's favorite color. */
1544 emit(fs_inst(BRW_OPCODE_MOV,
1545 fs_reg(MRF, 2),
1546 fs_reg(1.0f)));
1547 emit(fs_inst(BRW_OPCODE_MOV,
1548 fs_reg(MRF, 3),
1549 fs_reg(0.0f)));
1550 emit(fs_inst(BRW_OPCODE_MOV,
1551 fs_reg(MRF, 4),
1552 fs_reg(1.0f)));
1553 emit(fs_inst(BRW_OPCODE_MOV,
1554 fs_reg(MRF, 5),
1555 fs_reg(0.0f)));
1556
1557 fs_inst *write;
1558 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1559 fs_reg(0),
1560 fs_reg(0)));
1561 }
1562
1563 /* The register location here is relative to the start of the URB
1564 * data. It will get adjusted to be a real location before
1565 * generate_code() time.
1566 */
1567 struct brw_reg
1568 fs_visitor::interp_reg(int location, int channel)
1569 {
1570 int regnr = location * 2 + channel / 2;
1571 int stride = (channel & 1) * 4;
1572
1573 return brw_vec1_grf(regnr, stride);
1574 }
1575
1576 /** Emits the interpolation for the varying inputs. */
1577 void
1578 fs_visitor::emit_interpolation_setup()
1579 {
1580 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1581
1582 this->current_annotation = "compute pixel centers";
1583 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1584 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1585 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1586 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1587 emit(fs_inst(BRW_OPCODE_ADD,
1588 this->pixel_x,
1589 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1590 fs_reg(brw_imm_v(0x10101010))));
1591 emit(fs_inst(BRW_OPCODE_ADD,
1592 this->pixel_y,
1593 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1594 fs_reg(brw_imm_v(0x11001100))));
1595
1596 this->current_annotation = "compute pixel deltas from v0";
1597 this->delta_x = fs_reg(this, glsl_type::float_type);
1598 this->delta_y = fs_reg(this, glsl_type::float_type);
1599 emit(fs_inst(BRW_OPCODE_ADD,
1600 this->delta_x,
1601 this->pixel_x,
1602 fs_reg(negate(brw_vec1_grf(1, 0)))));
1603 emit(fs_inst(BRW_OPCODE_ADD,
1604 this->delta_y,
1605 this->pixel_y,
1606 fs_reg(negate(brw_vec1_grf(1, 1)))));
1607
1608 this->current_annotation = "compute pos.w and 1/pos.w";
1609 /* Compute wpos.w. It's always in our setup, since it's needed to
1610 * interpolate the other attributes.
1611 */
1612 this->wpos_w = fs_reg(this, glsl_type::float_type);
1613 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1614 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1615 /* Compute the pixel 1/W value from wpos.w. */
1616 this->pixel_w = fs_reg(this, glsl_type::float_type);
1617 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1618 this->current_annotation = NULL;
1619 }
1620
1621 void
1622 fs_visitor::emit_fb_writes()
1623 {
1624 this->current_annotation = "FB write header";
1625 int nr = 0;
1626
1627 /* m0, m1 header */
1628 nr += 2;
1629
1630 if (c->key.aa_dest_stencil_reg) {
1631 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1632 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1633 }
1634
1635 /* Reserve space for color. It'll be filled in per MRT below. */
1636 int color_mrf = nr;
1637 nr += 4;
1638
1639 if (c->key.source_depth_to_render_target) {
1640 if (c->key.computes_depth) {
1641 /* Hand over gl_FragDepth. */
1642 assert(this->frag_depth);
1643 fs_reg depth = *(variable_storage(this->frag_depth));
1644
1645 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1646 } else {
1647 /* Pass through the payload depth. */
1648 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1649 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1650 }
1651 }
1652
1653 if (c->key.dest_depth_reg) {
1654 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1655 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1656 }
1657
1658 fs_reg color = reg_undef;
1659 if (this->frag_color)
1660 color = *(variable_storage(this->frag_color));
1661 else if (this->frag_data)
1662 color = *(variable_storage(this->frag_data));
1663
1664 for (int target = 0; target < c->key.nr_color_regions; target++) {
1665 this->current_annotation = talloc_asprintf(this->mem_ctx,
1666 "FB write target %d",
1667 target);
1668 if (this->frag_color || this->frag_data) {
1669 for (int i = 0; i < 4; i++) {
1670 emit(fs_inst(BRW_OPCODE_MOV,
1671 fs_reg(MRF, color_mrf + i),
1672 color));
1673 color.reg_offset++;
1674 }
1675 }
1676
1677 if (this->frag_color)
1678 color.reg_offset -= 4;
1679
1680 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1681 reg_undef, reg_undef));
1682 inst->target = target;
1683 inst->mlen = nr;
1684 if (target == c->key.nr_color_regions - 1)
1685 inst->eot = true;
1686 }
1687
1688 if (c->key.nr_color_regions == 0) {
1689 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1690 reg_undef, reg_undef));
1691 inst->mlen = nr;
1692 inst->eot = true;
1693 }
1694
1695 this->current_annotation = NULL;
1696 }
1697
1698 void
1699 fs_visitor::generate_fb_write(fs_inst *inst)
1700 {
1701 GLboolean eot = inst->eot;
1702
1703 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1704 * move, here's g1.
1705 */
1706 brw_push_insn_state(p);
1707 brw_set_mask_control(p, BRW_MASK_DISABLE);
1708 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1709 brw_MOV(p,
1710 brw_message_reg(1),
1711 brw_vec8_grf(1, 0));
1712 brw_pop_insn_state(p);
1713
1714 brw_fb_WRITE(p,
1715 8, /* dispatch_width */
1716 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1717 0, /* base MRF */
1718 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1719 inst->target,
1720 inst->mlen,
1721 0,
1722 eot);
1723 }
1724
1725 void
1726 fs_visitor::generate_linterp(fs_inst *inst,
1727 struct brw_reg dst, struct brw_reg *src)
1728 {
1729 struct brw_reg delta_x = src[0];
1730 struct brw_reg delta_y = src[1];
1731 struct brw_reg interp = src[2];
1732
1733 if (brw->has_pln &&
1734 delta_y.nr == delta_x.nr + 1 &&
1735 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1736 brw_PLN(p, dst, interp, delta_x);
1737 } else {
1738 brw_LINE(p, brw_null_reg(), interp, delta_x);
1739 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1740 }
1741 }
1742
1743 void
1744 fs_visitor::generate_math(fs_inst *inst,
1745 struct brw_reg dst, struct brw_reg *src)
1746 {
1747 int op;
1748
1749 switch (inst->opcode) {
1750 case FS_OPCODE_RCP:
1751 op = BRW_MATH_FUNCTION_INV;
1752 break;
1753 case FS_OPCODE_RSQ:
1754 op = BRW_MATH_FUNCTION_RSQ;
1755 break;
1756 case FS_OPCODE_SQRT:
1757 op = BRW_MATH_FUNCTION_SQRT;
1758 break;
1759 case FS_OPCODE_EXP2:
1760 op = BRW_MATH_FUNCTION_EXP;
1761 break;
1762 case FS_OPCODE_LOG2:
1763 op = BRW_MATH_FUNCTION_LOG;
1764 break;
1765 case FS_OPCODE_POW:
1766 op = BRW_MATH_FUNCTION_POW;
1767 break;
1768 case FS_OPCODE_SIN:
1769 op = BRW_MATH_FUNCTION_SIN;
1770 break;
1771 case FS_OPCODE_COS:
1772 op = BRW_MATH_FUNCTION_COS;
1773 break;
1774 default:
1775 assert(!"not reached: unknown math function");
1776 op = 0;
1777 break;
1778 }
1779
1780 if (inst->opcode == FS_OPCODE_POW) {
1781 brw_MOV(p, brw_message_reg(3), src[1]);
1782 }
1783
1784 brw_math(p, dst,
1785 op,
1786 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1787 BRW_MATH_SATURATE_NONE,
1788 2, src[0],
1789 BRW_MATH_DATA_VECTOR,
1790 BRW_MATH_PRECISION_FULL);
1791 }
1792
1793 void
1794 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1795 {
1796 int msg_type = -1;
1797 int rlen = 4;
1798
1799 if (intel->gen == 5) {
1800 switch (inst->opcode) {
1801 case FS_OPCODE_TEX:
1802 if (inst->shadow_compare) {
1803 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1804 } else {
1805 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1806 }
1807 break;
1808 case FS_OPCODE_TXB:
1809 if (inst->shadow_compare) {
1810 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1811 } else {
1812 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1813 }
1814 break;
1815 }
1816 } else {
1817 switch (inst->opcode) {
1818 case FS_OPCODE_TEX:
1819 /* Note that G45 and older determines shadow compare and dispatch width
1820 * from message length for most messages.
1821 */
1822 if (inst->shadow_compare) {
1823 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1824 } else {
1825 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1826 }
1827 case FS_OPCODE_TXB:
1828 if (inst->shadow_compare) {
1829 assert(!"FINISHME: shadow compare with bias.");
1830 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1831 } else {
1832 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1833 rlen = 8;
1834 }
1835 break;
1836 }
1837 }
1838 assert(msg_type != -1);
1839
1840 /* g0 header. */
1841 src.nr--;
1842
1843 brw_SAMPLE(p,
1844 retype(dst, BRW_REGISTER_TYPE_UW),
1845 src.nr,
1846 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1847 SURF_INDEX_TEXTURE(inst->sampler),
1848 inst->sampler,
1849 WRITEMASK_XYZW,
1850 msg_type,
1851 rlen,
1852 inst->mlen + 1,
1853 0,
1854 1,
1855 BRW_SAMPLER_SIMD_MODE_SIMD8);
1856 }
1857
1858
1859 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1860 * looking like:
1861 *
1862 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1863 *
1864 * and we're trying to produce:
1865 *
1866 * DDX DDY
1867 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
1868 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
1869 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
1870 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
1871 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
1872 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
1873 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
1874 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
1875 *
1876 * and add another set of two more subspans if in 16-pixel dispatch mode.
1877 *
1878 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1879 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1880 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1881 * between each other. We could probably do it like ddx and swizzle the right
1882 * order later, but bail for now and just produce
1883 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1884 */
1885 void
1886 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1887 {
1888 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
1889 BRW_REGISTER_TYPE_F,
1890 BRW_VERTICAL_STRIDE_2,
1891 BRW_WIDTH_2,
1892 BRW_HORIZONTAL_STRIDE_0,
1893 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1894 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
1895 BRW_REGISTER_TYPE_F,
1896 BRW_VERTICAL_STRIDE_2,
1897 BRW_WIDTH_2,
1898 BRW_HORIZONTAL_STRIDE_0,
1899 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1900 brw_ADD(p, dst, src0, negate(src1));
1901 }
1902
1903 void
1904 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1905 {
1906 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
1907 BRW_REGISTER_TYPE_F,
1908 BRW_VERTICAL_STRIDE_4,
1909 BRW_WIDTH_4,
1910 BRW_HORIZONTAL_STRIDE_0,
1911 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1912 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
1913 BRW_REGISTER_TYPE_F,
1914 BRW_VERTICAL_STRIDE_4,
1915 BRW_WIDTH_4,
1916 BRW_HORIZONTAL_STRIDE_0,
1917 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1918 brw_ADD(p, dst, src0, negate(src1));
1919 }
1920
1921 void
1922 fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp)
1923 {
1924 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1925 temp = brw_uw1_reg(temp.file, temp.nr, 0);
1926
1927 brw_push_insn_state(p);
1928 brw_set_mask_control(p, BRW_MASK_DISABLE);
1929 brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */
1930 brw_AND(p, g0, temp, g0);
1931 brw_pop_insn_state(p);
1932 }
1933
1934 void
1935 fs_visitor::assign_curb_setup()
1936 {
1937 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1938 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1939
1940 if (intel->gen == 5 && (c->prog_data.first_curbe_grf +
1941 c->prog_data.curb_read_length) & 1) {
1942 /* Align the start of the interpolation coefficients so that we can use
1943 * the PLN instruction.
1944 */
1945 c->prog_data.first_curbe_grf++;
1946 }
1947
1948 /* Map the offsets in the UNIFORM file to fixed HW regs. */
1949 foreach_iter(exec_list_iterator, iter, this->instructions) {
1950 fs_inst *inst = (fs_inst *)iter.get();
1951
1952 for (unsigned int i = 0; i < 3; i++) {
1953 if (inst->src[i].file == UNIFORM) {
1954 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1955 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1956 constant_nr / 8,
1957 constant_nr % 8);
1958
1959 inst->src[i].file = FIXED_HW_REG;
1960 inst->src[i].fixed_hw_reg = brw_reg;
1961 }
1962 }
1963 }
1964 }
1965
1966 void
1967 fs_visitor::assign_urb_setup()
1968 {
1969 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1970 int interp_reg_nr[FRAG_ATTRIB_MAX];
1971
1972 c->prog_data.urb_read_length = 0;
1973
1974 /* Figure out where each of the incoming setup attributes lands. */
1975 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1976 interp_reg_nr[i] = -1;
1977
1978 if (i != FRAG_ATTRIB_WPOS &&
1979 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1980 continue;
1981
1982 /* Each attribute is 4 setup channels, each of which is half a reg. */
1983 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1984 c->prog_data.urb_read_length += 2;
1985 }
1986
1987 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1988 * the correct setup input.
1989 */
1990 foreach_iter(exec_list_iterator, iter, this->instructions) {
1991 fs_inst *inst = (fs_inst *)iter.get();
1992
1993 if (inst->opcode != FS_OPCODE_LINTERP)
1994 continue;
1995
1996 assert(inst->src[2].file == FIXED_HW_REG);
1997
1998 int location = inst->src[2].fixed_hw_reg.nr / 2;
1999 assert(interp_reg_nr[location] != -1);
2000 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
2001 (inst->src[2].fixed_hw_reg.nr & 1));
2002 }
2003
2004 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
2005 }
2006
2007 static void
2008 assign_reg(int *reg_hw_locations, fs_reg *reg)
2009 {
2010 if (reg->file == GRF && reg->reg != 0) {
2011 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
2012 reg->reg = 0;
2013 }
2014 }
2015
2016 void
2017 fs_visitor::assign_regs_trivial()
2018 {
2019 int last_grf = 0;
2020 int hw_reg_mapping[this->virtual_grf_next];
2021 int i;
2022
2023 hw_reg_mapping[0] = 0;
2024 hw_reg_mapping[1] = this->first_non_payload_grf;
2025 for (i = 2; i < this->virtual_grf_next; i++) {
2026 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
2027 this->virtual_grf_sizes[i - 1]);
2028 }
2029 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
2030
2031 foreach_iter(exec_list_iterator, iter, this->instructions) {
2032 fs_inst *inst = (fs_inst *)iter.get();
2033
2034 assign_reg(hw_reg_mapping, &inst->dst);
2035 assign_reg(hw_reg_mapping, &inst->src[0]);
2036 assign_reg(hw_reg_mapping, &inst->src[1]);
2037 }
2038
2039 this->grf_used = last_grf + 1;
2040 }
2041
2042 void
2043 fs_visitor::assign_regs()
2044 {
2045 int last_grf = 0;
2046 int hw_reg_mapping[this->virtual_grf_next + 1];
2047 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
2048 int class_sizes[base_reg_count];
2049 int class_count = 0;
2050
2051 calculate_live_intervals();
2052
2053 /* Set up the register classes.
2054 *
2055 * The base registers store a scalar value. For texture samples,
2056 * we get virtual GRFs composed of 4 contiguous hw register. For
2057 * structures and arrays, we store them as contiguous larger things
2058 * than that, though we should be able to do better most of the
2059 * time.
2060 */
2061 class_sizes[class_count++] = 1;
2062 for (int r = 1; r < this->virtual_grf_next; r++) {
2063 int i;
2064
2065 for (i = 0; i < class_count; i++) {
2066 if (class_sizes[i] == this->virtual_grf_sizes[r])
2067 break;
2068 }
2069 if (i == class_count) {
2070 class_sizes[class_count++] = this->virtual_grf_sizes[r];
2071 }
2072 }
2073
2074 int ra_reg_count = 0;
2075 int class_base_reg[class_count];
2076 int class_reg_count[class_count];
2077 int classes[class_count];
2078
2079 for (int i = 0; i < class_count; i++) {
2080 class_base_reg[i] = ra_reg_count;
2081 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
2082 ra_reg_count += class_reg_count[i];
2083 }
2084
2085 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
2086 for (int i = 0; i < class_count; i++) {
2087 classes[i] = ra_alloc_reg_class(regs);
2088
2089 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2090 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
2091 }
2092
2093 /* Add conflicts between our contiguous registers aliasing
2094 * base regs and other register classes' contiguous registers
2095 * that alias base regs, or the base regs themselves for classes[0].
2096 */
2097 for (int c = 0; c <= i; c++) {
2098 for (int i_r = 0; i_r < class_reg_count[i] - 1; i_r++) {
2099 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
2100 c_r <= MIN2(class_reg_count[c] - 1, i_r + class_sizes[i] - 1);
2101 c_r++) {
2102
2103 if (0) {
2104 printf("%d/%d conflicts %d/%d\n",
2105 class_sizes[i], i_r,
2106 class_sizes[c], c_r);
2107 }
2108
2109 ra_add_reg_conflict(regs,
2110 class_base_reg[i] + i_r,
2111 class_base_reg[c] + c_r);
2112 }
2113 }
2114 }
2115 }
2116
2117 ra_set_finalize(regs);
2118
2119 struct ra_graph *g = ra_alloc_interference_graph(regs,
2120 this->virtual_grf_next);
2121 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
2122 * with nodes.
2123 */
2124 ra_set_node_class(g, 0, classes[0]);
2125
2126 for (int i = 1; i < this->virtual_grf_next; i++) {
2127 for (int c = 0; c < class_count; c++) {
2128 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
2129 ra_set_node_class(g, i, classes[c]);
2130 break;
2131 }
2132 }
2133
2134 for (int j = 1; j < i; j++) {
2135 if (virtual_grf_interferes(i, j)) {
2136 ra_add_node_interference(g, i, j);
2137 }
2138 }
2139 }
2140
2141 /* FINISHME: Handle spilling */
2142 if (!ra_allocate_no_spills(g)) {
2143 fprintf(stderr, "Failed to allocate registers.\n");
2144 this->fail = true;
2145 return;
2146 }
2147
2148 /* Get the chosen virtual registers for each node, and map virtual
2149 * regs in the register classes back down to real hardware reg
2150 * numbers.
2151 */
2152 hw_reg_mapping[0] = 0; /* unused */
2153 for (int i = 1; i < this->virtual_grf_next; i++) {
2154 int reg = ra_get_node_reg(g, i);
2155 int hw_reg = -1;
2156
2157 for (int c = 0; c < class_count; c++) {
2158 if (reg >= class_base_reg[c] &&
2159 reg < class_base_reg[c] + class_reg_count[c] - 1) {
2160 hw_reg = reg - class_base_reg[c];
2161 break;
2162 }
2163 }
2164
2165 assert(hw_reg != -1);
2166 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
2167 last_grf = MAX2(last_grf,
2168 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
2169 }
2170
2171 foreach_iter(exec_list_iterator, iter, this->instructions) {
2172 fs_inst *inst = (fs_inst *)iter.get();
2173
2174 assign_reg(hw_reg_mapping, &inst->dst);
2175 assign_reg(hw_reg_mapping, &inst->src[0]);
2176 assign_reg(hw_reg_mapping, &inst->src[1]);
2177 }
2178
2179 this->grf_used = last_grf + 1;
2180
2181 talloc_free(g);
2182 talloc_free(regs);
2183 }
2184
2185 void
2186 fs_visitor::calculate_live_intervals()
2187 {
2188 int num_vars = this->virtual_grf_next;
2189 int *def = talloc_array(mem_ctx, int, num_vars);
2190 int *use = talloc_array(mem_ctx, int, num_vars);
2191 int loop_depth = 0;
2192 int loop_start = 0;
2193
2194 for (int i = 0; i < num_vars; i++) {
2195 def[i] = 1 << 30;
2196 use[i] = 0;
2197 }
2198
2199 int ip = 0;
2200 foreach_iter(exec_list_iterator, iter, this->instructions) {
2201 fs_inst *inst = (fs_inst *)iter.get();
2202
2203 if (inst->opcode == BRW_OPCODE_DO) {
2204 if (loop_depth++ == 0)
2205 loop_start = ip;
2206 } else if (inst->opcode == BRW_OPCODE_WHILE) {
2207 loop_depth--;
2208
2209 if (loop_depth == 0) {
2210 /* FINISHME:
2211 *
2212 * Patches up any vars marked for use within the loop as
2213 * live until the end. This is conservative, as there
2214 * will often be variables defined and used inside the
2215 * loop but dead at the end of the loop body.
2216 */
2217 for (int i = 0; i < num_vars; i++) {
2218 if (use[i] == loop_start) {
2219 use[i] = ip;
2220 }
2221 }
2222 }
2223 } else {
2224 int eip = ip;
2225
2226 if (loop_depth)
2227 eip = loop_start;
2228
2229 for (unsigned int i = 0; i < 3; i++) {
2230 if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
2231 def[inst->src[i].reg] = MIN2(def[inst->src[i].reg], eip);
2232 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip);
2233 }
2234 }
2235 if (inst->dst.file == GRF && inst->dst.reg != 0) {
2236 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip);
2237 use[inst->dst.reg] = MAX2(use[inst->dst.reg], eip);
2238 }
2239 }
2240
2241 ip++;
2242 }
2243
2244 this->virtual_grf_def = def;
2245 this->virtual_grf_use = use;
2246 }
2247
2248 bool
2249 fs_visitor::virtual_grf_interferes(int a, int b)
2250 {
2251 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
2252 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
2253
2254 return start <= end;
2255 }
2256
2257 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2258 {
2259 struct brw_reg brw_reg;
2260
2261 switch (reg->file) {
2262 case GRF:
2263 case ARF:
2264 case MRF:
2265 brw_reg = brw_vec8_reg(reg->file,
2266 reg->hw_reg, 0);
2267 brw_reg = retype(brw_reg, reg->type);
2268 break;
2269 case IMM:
2270 switch (reg->type) {
2271 case BRW_REGISTER_TYPE_F:
2272 brw_reg = brw_imm_f(reg->imm.f);
2273 break;
2274 case BRW_REGISTER_TYPE_D:
2275 brw_reg = brw_imm_d(reg->imm.i);
2276 break;
2277 case BRW_REGISTER_TYPE_UD:
2278 brw_reg = brw_imm_ud(reg->imm.u);
2279 break;
2280 default:
2281 assert(!"not reached");
2282 break;
2283 }
2284 break;
2285 case FIXED_HW_REG:
2286 brw_reg = reg->fixed_hw_reg;
2287 break;
2288 case BAD_FILE:
2289 /* Probably unused. */
2290 brw_reg = brw_null_reg();
2291 break;
2292 case UNIFORM:
2293 assert(!"not reached");
2294 brw_reg = brw_null_reg();
2295 break;
2296 }
2297 if (reg->abs)
2298 brw_reg = brw_abs(brw_reg);
2299 if (reg->negate)
2300 brw_reg = negate(brw_reg);
2301
2302 return brw_reg;
2303 }
2304
2305 void
2306 fs_visitor::generate_code()
2307 {
2308 unsigned int annotation_len = 0;
2309 int last_native_inst = 0;
2310 struct brw_instruction *if_stack[16], *loop_stack[16];
2311 int if_stack_depth = 0, loop_stack_depth = 0;
2312 int if_depth_in_loop[16];
2313
2314 if_depth_in_loop[loop_stack_depth] = 0;
2315
2316 memset(&if_stack, 0, sizeof(if_stack));
2317 foreach_iter(exec_list_iterator, iter, this->instructions) {
2318 fs_inst *inst = (fs_inst *)iter.get();
2319 struct brw_reg src[3], dst;
2320
2321 for (unsigned int i = 0; i < 3; i++) {
2322 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2323 }
2324 dst = brw_reg_from_fs_reg(&inst->dst);
2325
2326 brw_set_conditionalmod(p, inst->conditional_mod);
2327 brw_set_predicate_control(p, inst->predicated);
2328
2329 switch (inst->opcode) {
2330 case BRW_OPCODE_MOV:
2331 brw_MOV(p, dst, src[0]);
2332 break;
2333 case BRW_OPCODE_ADD:
2334 brw_ADD(p, dst, src[0], src[1]);
2335 break;
2336 case BRW_OPCODE_MUL:
2337 brw_MUL(p, dst, src[0], src[1]);
2338 break;
2339
2340 case BRW_OPCODE_FRC:
2341 brw_FRC(p, dst, src[0]);
2342 break;
2343 case BRW_OPCODE_RNDD:
2344 brw_RNDD(p, dst, src[0]);
2345 break;
2346 case BRW_OPCODE_RNDZ:
2347 brw_RNDZ(p, dst, src[0]);
2348 break;
2349
2350 case BRW_OPCODE_AND:
2351 brw_AND(p, dst, src[0], src[1]);
2352 break;
2353 case BRW_OPCODE_OR:
2354 brw_OR(p, dst, src[0], src[1]);
2355 break;
2356 case BRW_OPCODE_XOR:
2357 brw_XOR(p, dst, src[0], src[1]);
2358 break;
2359
2360 case BRW_OPCODE_CMP:
2361 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2362 break;
2363 case BRW_OPCODE_SEL:
2364 brw_SEL(p, dst, src[0], src[1]);
2365 break;
2366
2367 case BRW_OPCODE_IF:
2368 assert(if_stack_depth < 16);
2369 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2370 if_depth_in_loop[loop_stack_depth]++;
2371 if_stack_depth++;
2372 break;
2373 case BRW_OPCODE_ELSE:
2374 if_stack[if_stack_depth - 1] =
2375 brw_ELSE(p, if_stack[if_stack_depth - 1]);
2376 break;
2377 case BRW_OPCODE_ENDIF:
2378 if_stack_depth--;
2379 brw_ENDIF(p , if_stack[if_stack_depth]);
2380 if_depth_in_loop[loop_stack_depth]--;
2381 break;
2382
2383 case BRW_OPCODE_DO:
2384 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2385 if_depth_in_loop[loop_stack_depth] = 0;
2386 break;
2387
2388 case BRW_OPCODE_BREAK:
2389 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2390 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2391 break;
2392 case BRW_OPCODE_CONTINUE:
2393 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2394 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2395 break;
2396
2397 case BRW_OPCODE_WHILE: {
2398 struct brw_instruction *inst0, *inst1;
2399 GLuint br = 1;
2400
2401 if (intel->gen == 5)
2402 br = 2;
2403
2404 assert(loop_stack_depth > 0);
2405 loop_stack_depth--;
2406 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2407 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2408 while (inst0 > loop_stack[loop_stack_depth]) {
2409 inst0--;
2410 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2411 inst0->bits3.if_else.jump_count == 0) {
2412 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2413 }
2414 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2415 inst0->bits3.if_else.jump_count == 0) {
2416 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2417 }
2418 }
2419 }
2420 break;
2421
2422 case FS_OPCODE_RCP:
2423 case FS_OPCODE_RSQ:
2424 case FS_OPCODE_SQRT:
2425 case FS_OPCODE_EXP2:
2426 case FS_OPCODE_LOG2:
2427 case FS_OPCODE_POW:
2428 case FS_OPCODE_SIN:
2429 case FS_OPCODE_COS:
2430 generate_math(inst, dst, src);
2431 break;
2432 case FS_OPCODE_LINTERP:
2433 generate_linterp(inst, dst, src);
2434 break;
2435 case FS_OPCODE_TEX:
2436 case FS_OPCODE_TXB:
2437 case FS_OPCODE_TXL:
2438 generate_tex(inst, dst, src[0]);
2439 break;
2440 case FS_OPCODE_DISCARD:
2441 generate_discard(inst, dst /* src0 == dst */);
2442 break;
2443 case FS_OPCODE_DDX:
2444 generate_ddx(inst, dst, src[0]);
2445 break;
2446 case FS_OPCODE_DDY:
2447 generate_ddy(inst, dst, src[0]);
2448 break;
2449 case FS_OPCODE_FB_WRITE:
2450 generate_fb_write(inst);
2451 break;
2452 default:
2453 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2454 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2455 brw_opcodes[inst->opcode].name);
2456 } else {
2457 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2458 }
2459 this->fail = true;
2460 }
2461
2462 if (annotation_len < p->nr_insn) {
2463 annotation_len *= 2;
2464 if (annotation_len < 16)
2465 annotation_len = 16;
2466
2467 this->annotation_string = talloc_realloc(this->mem_ctx,
2468 annotation_string,
2469 const char *,
2470 annotation_len);
2471 this->annotation_ir = talloc_realloc(this->mem_ctx,
2472 annotation_ir,
2473 ir_instruction *,
2474 annotation_len);
2475 }
2476
2477 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2478 this->annotation_string[i] = inst->annotation;
2479 this->annotation_ir[i] = inst->ir;
2480 }
2481 last_native_inst = p->nr_insn;
2482 }
2483 }
2484
2485 GLboolean
2486 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2487 {
2488 struct brw_compile *p = &c->func;
2489 struct intel_context *intel = &brw->intel;
2490 GLcontext *ctx = &intel->ctx;
2491 struct brw_shader *shader = NULL;
2492 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2493
2494 if (!prog)
2495 return GL_FALSE;
2496
2497 if (!using_new_fs)
2498 return GL_FALSE;
2499
2500 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2501 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2502 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2503 break;
2504 }
2505 }
2506 if (!shader)
2507 return GL_FALSE;
2508
2509 /* We always use 8-wide mode, at least for now. For one, flow
2510 * control only works in 8-wide. Also, when we're fragment shader
2511 * bound, we're almost always under register pressure as well, so
2512 * 8-wide would save us from the performance cliff of spilling
2513 * regs.
2514 */
2515 c->dispatch_width = 8;
2516
2517 if (INTEL_DEBUG & DEBUG_WM) {
2518 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2519 _mesa_print_ir(shader->ir, NULL);
2520 printf("\n");
2521 }
2522
2523 /* Now the main event: Visit the shader IR and generate our FS IR for it.
2524 */
2525 fs_visitor v(c, shader);
2526
2527 if (0) {
2528 v.emit_dummy_fs();
2529 } else {
2530 v.emit_interpolation_setup();
2531
2532 /* Generate FS IR for main(). (the visitor only descends into
2533 * functions called "main").
2534 */
2535 foreach_iter(exec_list_iterator, iter, *shader->ir) {
2536 ir_instruction *ir = (ir_instruction *)iter.get();
2537 v.base_ir = ir;
2538 ir->accept(&v);
2539 }
2540
2541 v.emit_fb_writes();
2542 v.assign_curb_setup();
2543 v.assign_urb_setup();
2544 if (0)
2545 v.assign_regs_trivial();
2546 else
2547 v.assign_regs();
2548 }
2549
2550 v.generate_code();
2551
2552 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
2553
2554 if (v.fail)
2555 return GL_FALSE;
2556
2557 if (INTEL_DEBUG & DEBUG_WM) {
2558 const char *last_annotation_string = NULL;
2559 ir_instruction *last_annotation_ir = NULL;
2560
2561 printf("Native code for fragment shader %d:\n", prog->Name);
2562 for (unsigned int i = 0; i < p->nr_insn; i++) {
2563 if (last_annotation_ir != v.annotation_ir[i]) {
2564 last_annotation_ir = v.annotation_ir[i];
2565 if (last_annotation_ir) {
2566 printf(" ");
2567 last_annotation_ir->print();
2568 printf("\n");
2569 }
2570 }
2571 if (last_annotation_string != v.annotation_string[i]) {
2572 last_annotation_string = v.annotation_string[i];
2573 if (last_annotation_string)
2574 printf(" %s\n", last_annotation_string);
2575 }
2576 brw_disasm(stdout, &p->store[i], intel->gen);
2577 }
2578 printf("\n");
2579 }
2580
2581 c->prog_data.total_grf = v.grf_used;
2582 c->prog_data.total_scratch = 0;
2583
2584 return GL_TRUE;
2585 }