i965: Split FS_OPCODE_DISCARD into two steps.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "main/uniforms.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "program/prog_optimize.h"
38 #include "program/register_allocate.h"
39 #include "program/sampler.h"
40 #include "program/hash_table.h"
41 #include "brw_context.h"
42 #include "brw_eu.h"
43 #include "brw_wm.h"
44 #include "talloc.h"
45 }
46 #include "../glsl/glsl_types.h"
47 #include "../glsl/ir_optimization.h"
48 #include "../glsl/ir_print_visitor.h"
49
50 enum register_file {
51 ARF = BRW_ARCHITECTURE_REGISTER_FILE,
52 GRF = BRW_GENERAL_REGISTER_FILE,
53 MRF = BRW_MESSAGE_REGISTER_FILE,
54 IMM = BRW_IMMEDIATE_VALUE,
55 FIXED_HW_REG, /* a struct brw_reg */
56 UNIFORM, /* prog_data->params[hw_reg] */
57 BAD_FILE
58 };
59
60 enum fs_opcodes {
61 FS_OPCODE_FB_WRITE = 256,
62 FS_OPCODE_RCP,
63 FS_OPCODE_RSQ,
64 FS_OPCODE_SQRT,
65 FS_OPCODE_EXP2,
66 FS_OPCODE_LOG2,
67 FS_OPCODE_POW,
68 FS_OPCODE_SIN,
69 FS_OPCODE_COS,
70 FS_OPCODE_DDX,
71 FS_OPCODE_DDY,
72 FS_OPCODE_LINTERP,
73 FS_OPCODE_TEX,
74 FS_OPCODE_TXB,
75 FS_OPCODE_TXL,
76 FS_OPCODE_DISCARD_NOT,
77 FS_OPCODE_DISCARD_AND,
78 };
79
80 static int using_new_fs = -1;
81 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
82
83 struct gl_shader *
84 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
85 {
86 struct brw_shader *shader;
87
88 shader = talloc_zero(NULL, struct brw_shader);
89 if (shader) {
90 shader->base.Type = type;
91 shader->base.Name = name;
92 _mesa_init_shader(ctx, &shader->base);
93 }
94
95 return &shader->base;
96 }
97
98 struct gl_shader_program *
99 brw_new_shader_program(GLcontext *ctx, GLuint name)
100 {
101 struct brw_shader_program *prog;
102 prog = talloc_zero(NULL, struct brw_shader_program);
103 if (prog) {
104 prog->base.Name = name;
105 _mesa_init_shader_program(ctx, &prog->base);
106 }
107 return &prog->base;
108 }
109
110 GLboolean
111 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
112 {
113 if (!_mesa_ir_compile_shader(ctx, shader))
114 return GL_FALSE;
115
116 return GL_TRUE;
117 }
118
119 GLboolean
120 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
121 {
122 struct intel_context *intel = intel_context(ctx);
123 if (using_new_fs == -1)
124 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
125
126 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
127 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
128
129 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
130 void *mem_ctx = talloc_new(NULL);
131 bool progress;
132
133 if (shader->ir)
134 talloc_free(shader->ir);
135 shader->ir = new(shader) exec_list;
136 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
137
138 do_mat_op_to_vec(shader->ir);
139 do_mod_to_fract(shader->ir);
140 do_div_to_mul_rcp(shader->ir);
141 do_sub_to_add_neg(shader->ir);
142 do_explog_to_explog2(shader->ir);
143 do_lower_texture_projection(shader->ir);
144 brw_do_cubemap_normalize(shader->ir);
145
146 do {
147 progress = false;
148
149 brw_do_channel_expressions(shader->ir);
150 brw_do_vector_splitting(shader->ir);
151
152 progress = do_lower_jumps(shader->ir, true, true,
153 true, /* main return */
154 false, /* continue */
155 false /* loops */
156 ) || progress;
157
158 progress = do_common_optimization(shader->ir, true, 32) || progress;
159
160 progress = lower_noise(shader->ir) || progress;
161 progress =
162 lower_variable_index_to_cond_assign(shader->ir,
163 GL_TRUE, /* input */
164 GL_TRUE, /* output */
165 GL_TRUE, /* temp */
166 GL_TRUE /* uniform */
167 ) || progress;
168 if (intel->gen == 6) {
169 progress = do_if_to_cond_assign(shader->ir) || progress;
170 }
171 } while (progress);
172
173 validate_ir_tree(shader->ir);
174
175 reparent_ir(shader->ir, shader->ir);
176 talloc_free(mem_ctx);
177 }
178 }
179
180 if (!_mesa_ir_link_shader(ctx, prog))
181 return GL_FALSE;
182
183 return GL_TRUE;
184 }
185
186 static int
187 type_size(const struct glsl_type *type)
188 {
189 unsigned int size, i;
190
191 switch (type->base_type) {
192 case GLSL_TYPE_UINT:
193 case GLSL_TYPE_INT:
194 case GLSL_TYPE_FLOAT:
195 case GLSL_TYPE_BOOL:
196 return type->components();
197 case GLSL_TYPE_ARRAY:
198 return type_size(type->fields.array) * type->length;
199 case GLSL_TYPE_STRUCT:
200 size = 0;
201 for (i = 0; i < type->length; i++) {
202 size += type_size(type->fields.structure[i].type);
203 }
204 return size;
205 case GLSL_TYPE_SAMPLER:
206 /* Samplers take up no register space, since they're baked in at
207 * link time.
208 */
209 return 0;
210 default:
211 assert(!"not reached");
212 return 0;
213 }
214 }
215
216 class fs_reg {
217 public:
218 /* Callers of this talloc-based new need not call delete. It's
219 * easier to just talloc_free 'ctx' (or any of its ancestors). */
220 static void* operator new(size_t size, void *ctx)
221 {
222 void *node;
223
224 node = talloc_size(ctx, size);
225 assert(node != NULL);
226
227 return node;
228 }
229
230 void init()
231 {
232 this->reg = 0;
233 this->reg_offset = 0;
234 this->negate = 0;
235 this->abs = 0;
236 this->hw_reg = -1;
237 }
238
239 /** Generic unset register constructor. */
240 fs_reg()
241 {
242 init();
243 this->file = BAD_FILE;
244 }
245
246 /** Immediate value constructor. */
247 fs_reg(float f)
248 {
249 init();
250 this->file = IMM;
251 this->type = BRW_REGISTER_TYPE_F;
252 this->imm.f = f;
253 }
254
255 /** Immediate value constructor. */
256 fs_reg(int32_t i)
257 {
258 init();
259 this->file = IMM;
260 this->type = BRW_REGISTER_TYPE_D;
261 this->imm.i = i;
262 }
263
264 /** Immediate value constructor. */
265 fs_reg(uint32_t u)
266 {
267 init();
268 this->file = IMM;
269 this->type = BRW_REGISTER_TYPE_UD;
270 this->imm.u = u;
271 }
272
273 /** Fixed brw_reg Immediate value constructor. */
274 fs_reg(struct brw_reg fixed_hw_reg)
275 {
276 init();
277 this->file = FIXED_HW_REG;
278 this->fixed_hw_reg = fixed_hw_reg;
279 this->type = fixed_hw_reg.type;
280 }
281
282 fs_reg(enum register_file file, int hw_reg);
283 fs_reg(class fs_visitor *v, const struct glsl_type *type);
284
285 /** Register file: ARF, GRF, MRF, IMM. */
286 enum register_file file;
287 /** virtual register number. 0 = fixed hw reg */
288 int reg;
289 /** Offset within the virtual register. */
290 int reg_offset;
291 /** HW register number. Generally unset until register allocation. */
292 int hw_reg;
293 /** Register type. BRW_REGISTER_TYPE_* */
294 int type;
295 bool negate;
296 bool abs;
297 struct brw_reg fixed_hw_reg;
298
299 /** Value for file == BRW_IMMMEDIATE_FILE */
300 union {
301 int32_t i;
302 uint32_t u;
303 float f;
304 } imm;
305 };
306
307 static const fs_reg reg_undef;
308 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
309
310 class fs_inst : public exec_node {
311 public:
312 /* Callers of this talloc-based new need not call delete. It's
313 * easier to just talloc_free 'ctx' (or any of its ancestors). */
314 static void* operator new(size_t size, void *ctx)
315 {
316 void *node;
317
318 node = talloc_zero_size(ctx, size);
319 assert(node != NULL);
320
321 return node;
322 }
323
324 void init()
325 {
326 this->opcode = BRW_OPCODE_NOP;
327 this->saturate = false;
328 this->conditional_mod = BRW_CONDITIONAL_NONE;
329 this->predicated = false;
330 this->sampler = 0;
331 this->target = 0;
332 this->eot = false;
333 this->header_present = false;
334 this->shadow_compare = false;
335 }
336
337 fs_inst()
338 {
339 init();
340 }
341
342 fs_inst(int opcode)
343 {
344 init();
345 this->opcode = opcode;
346 }
347
348 fs_inst(int opcode, fs_reg dst, fs_reg src0)
349 {
350 init();
351 this->opcode = opcode;
352 this->dst = dst;
353 this->src[0] = src0;
354 }
355
356 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
357 {
358 init();
359 this->opcode = opcode;
360 this->dst = dst;
361 this->src[0] = src0;
362 this->src[1] = src1;
363 }
364
365 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
366 {
367 init();
368 this->opcode = opcode;
369 this->dst = dst;
370 this->src[0] = src0;
371 this->src[1] = src1;
372 this->src[2] = src2;
373 }
374
375 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
376 fs_reg dst;
377 fs_reg src[3];
378 bool saturate;
379 bool predicated;
380 int conditional_mod; /**< BRW_CONDITIONAL_* */
381
382 int mlen; /**< SEND message length */
383 int sampler;
384 int target; /**< MRT target. */
385 bool eot;
386 bool header_present;
387 bool shadow_compare;
388
389 /** @{
390 * Annotation for the generated IR. One of the two can be set.
391 */
392 ir_instruction *ir;
393 const char *annotation;
394 /** @} */
395 };
396
397 class fs_visitor : public ir_visitor
398 {
399 public:
400
401 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
402 {
403 this->c = c;
404 this->p = &c->func;
405 this->brw = p->brw;
406 this->fp = brw->fragment_program;
407 this->intel = &brw->intel;
408 this->ctx = &intel->ctx;
409 this->mem_ctx = talloc_new(NULL);
410 this->shader = shader;
411 this->fail = false;
412 this->variable_ht = hash_table_ctor(0,
413 hash_table_pointer_hash,
414 hash_table_pointer_compare);
415
416 this->frag_color = NULL;
417 this->frag_data = NULL;
418 this->frag_depth = NULL;
419 this->first_non_payload_grf = 0;
420
421 this->current_annotation = NULL;
422 this->annotation_string = NULL;
423 this->annotation_ir = NULL;
424 this->base_ir = NULL;
425
426 this->virtual_grf_sizes = NULL;
427 this->virtual_grf_next = 1;
428 this->virtual_grf_array_size = 0;
429 this->virtual_grf_def = NULL;
430 this->virtual_grf_use = NULL;
431
432 this->kill_emitted = false;
433 }
434
435 ~fs_visitor()
436 {
437 talloc_free(this->mem_ctx);
438 hash_table_dtor(this->variable_ht);
439 }
440
441 fs_reg *variable_storage(ir_variable *var);
442 int virtual_grf_alloc(int size);
443
444 void visit(ir_variable *ir);
445 void visit(ir_assignment *ir);
446 void visit(ir_dereference_variable *ir);
447 void visit(ir_dereference_record *ir);
448 void visit(ir_dereference_array *ir);
449 void visit(ir_expression *ir);
450 void visit(ir_texture *ir);
451 void visit(ir_if *ir);
452 void visit(ir_constant *ir);
453 void visit(ir_swizzle *ir);
454 void visit(ir_return *ir);
455 void visit(ir_loop *ir);
456 void visit(ir_loop_jump *ir);
457 void visit(ir_discard *ir);
458 void visit(ir_call *ir);
459 void visit(ir_function *ir);
460 void visit(ir_function_signature *ir);
461
462 fs_inst *emit(fs_inst inst);
463 void assign_curb_setup();
464 void calculate_urb_setup();
465 void assign_urb_setup();
466 void assign_regs();
467 void assign_regs_trivial();
468 void calculate_live_intervals();
469 bool propagate_constants();
470 bool register_coalesce();
471 bool dead_code_eliminate();
472 bool virtual_grf_interferes(int a, int b);
473 void generate_code();
474 void generate_fb_write(fs_inst *inst);
475 void generate_linterp(fs_inst *inst, struct brw_reg dst,
476 struct brw_reg *src);
477 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
478 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
479 void generate_discard_not(fs_inst *inst, struct brw_reg temp);
480 void generate_discard_and(fs_inst *inst, struct brw_reg temp);
481 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
482 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
483
484 void emit_dummy_fs();
485 fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
486 fs_reg *emit_frontfacing_interpolation(ir_variable *ir);
487 fs_reg *emit_general_interpolation(ir_variable *ir);
488 void emit_interpolation_setup_gen4();
489 void emit_interpolation_setup_gen6();
490 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
491 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
492 void emit_fb_writes();
493 void emit_assignment_writes(fs_reg &l, fs_reg &r,
494 const glsl_type *type, bool predicated);
495
496 struct brw_reg interp_reg(int location, int channel);
497 int setup_uniform_values(int loc, const glsl_type *type);
498 void setup_builtin_uniform_values(ir_variable *ir);
499
500 struct brw_context *brw;
501 const struct gl_fragment_program *fp;
502 struct intel_context *intel;
503 GLcontext *ctx;
504 struct brw_wm_compile *c;
505 struct brw_compile *p;
506 struct brw_shader *shader;
507 void *mem_ctx;
508 exec_list instructions;
509
510 int *virtual_grf_sizes;
511 int virtual_grf_next;
512 int virtual_grf_array_size;
513 int *virtual_grf_def;
514 int *virtual_grf_use;
515
516 struct hash_table *variable_ht;
517 ir_variable *frag_color, *frag_data, *frag_depth;
518 int first_non_payload_grf;
519 int urb_setup[FRAG_ATTRIB_MAX];
520 bool kill_emitted;
521
522 /** @{ debug annotation info */
523 const char *current_annotation;
524 ir_instruction *base_ir;
525 const char **annotation_string;
526 ir_instruction **annotation_ir;
527 /** @} */
528
529 bool fail;
530
531 /* Result of last visit() method. */
532 fs_reg result;
533
534 fs_reg pixel_x;
535 fs_reg pixel_y;
536 fs_reg wpos_w;
537 fs_reg pixel_w;
538 fs_reg delta_x;
539 fs_reg delta_y;
540
541 int grf_used;
542
543 };
544
545 int
546 fs_visitor::virtual_grf_alloc(int size)
547 {
548 if (virtual_grf_array_size <= virtual_grf_next) {
549 if (virtual_grf_array_size == 0)
550 virtual_grf_array_size = 16;
551 else
552 virtual_grf_array_size *= 2;
553 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
554 int, virtual_grf_array_size);
555
556 /* This slot is always unused. */
557 virtual_grf_sizes[0] = 0;
558 }
559 virtual_grf_sizes[virtual_grf_next] = size;
560 return virtual_grf_next++;
561 }
562
563 /** Fixed HW reg constructor. */
564 fs_reg::fs_reg(enum register_file file, int hw_reg)
565 {
566 init();
567 this->file = file;
568 this->hw_reg = hw_reg;
569 this->type = BRW_REGISTER_TYPE_F;
570 }
571
572 int
573 brw_type_for_base_type(const struct glsl_type *type)
574 {
575 switch (type->base_type) {
576 case GLSL_TYPE_FLOAT:
577 return BRW_REGISTER_TYPE_F;
578 case GLSL_TYPE_INT:
579 case GLSL_TYPE_BOOL:
580 return BRW_REGISTER_TYPE_D;
581 case GLSL_TYPE_UINT:
582 return BRW_REGISTER_TYPE_UD;
583 case GLSL_TYPE_ARRAY:
584 case GLSL_TYPE_STRUCT:
585 /* These should be overridden with the type of the member when
586 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
587 * way to trip up if we don't.
588 */
589 return BRW_REGISTER_TYPE_UD;
590 default:
591 assert(!"not reached");
592 return BRW_REGISTER_TYPE_F;
593 }
594 }
595
596 /** Automatic reg constructor. */
597 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
598 {
599 init();
600
601 this->file = GRF;
602 this->reg = v->virtual_grf_alloc(type_size(type));
603 this->reg_offset = 0;
604 this->type = brw_type_for_base_type(type);
605 }
606
607 fs_reg *
608 fs_visitor::variable_storage(ir_variable *var)
609 {
610 return (fs_reg *)hash_table_find(this->variable_ht, var);
611 }
612
613 /* Our support for uniforms is piggy-backed on the struct
614 * gl_fragment_program, because that's where the values actually
615 * get stored, rather than in some global gl_shader_program uniform
616 * store.
617 */
618 int
619 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
620 {
621 unsigned int offset = 0;
622 float *vec_values;
623
624 if (type->is_matrix()) {
625 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
626 type->vector_elements,
627 1);
628
629 for (unsigned int i = 0; i < type->matrix_columns; i++) {
630 offset += setup_uniform_values(loc + offset, column);
631 }
632
633 return offset;
634 }
635
636 switch (type->base_type) {
637 case GLSL_TYPE_FLOAT:
638 case GLSL_TYPE_UINT:
639 case GLSL_TYPE_INT:
640 case GLSL_TYPE_BOOL:
641 vec_values = fp->Base.Parameters->ParameterValues[loc];
642 for (unsigned int i = 0; i < type->vector_elements; i++) {
643 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
644 }
645 return 1;
646
647 case GLSL_TYPE_STRUCT:
648 for (unsigned int i = 0; i < type->length; i++) {
649 offset += setup_uniform_values(loc + offset,
650 type->fields.structure[i].type);
651 }
652 return offset;
653
654 case GLSL_TYPE_ARRAY:
655 for (unsigned int i = 0; i < type->length; i++) {
656 offset += setup_uniform_values(loc + offset, type->fields.array);
657 }
658 return offset;
659
660 case GLSL_TYPE_SAMPLER:
661 /* The sampler takes up a slot, but we don't use any values from it. */
662 return 1;
663
664 default:
665 assert(!"not reached");
666 return 0;
667 }
668 }
669
670
671 /* Our support for builtin uniforms is even scarier than non-builtin.
672 * It sits on top of the PROG_STATE_VAR parameters that are
673 * automatically updated from GL context state.
674 */
675 void
676 fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
677 {
678 const struct gl_builtin_uniform_desc *statevar = NULL;
679
680 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
681 statevar = &_mesa_builtin_uniform_desc[i];
682 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
683 break;
684 }
685
686 if (!statevar->name) {
687 this->fail = true;
688 printf("Failed to find builtin uniform `%s'\n", ir->name);
689 return;
690 }
691
692 int array_count;
693 if (ir->type->is_array()) {
694 array_count = ir->type->length;
695 } else {
696 array_count = 1;
697 }
698
699 for (int a = 0; a < array_count; a++) {
700 for (unsigned int i = 0; i < statevar->num_elements; i++) {
701 struct gl_builtin_uniform_element *element = &statevar->elements[i];
702 int tokens[STATE_LENGTH];
703
704 memcpy(tokens, element->tokens, sizeof(element->tokens));
705 if (ir->type->is_array()) {
706 tokens[1] = a;
707 }
708
709 /* This state reference has already been setup by ir_to_mesa,
710 * but we'll get the same index back here.
711 */
712 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
713 (gl_state_index *)tokens);
714 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
715
716 /* Add each of the unique swizzles of the element as a
717 * parameter. This'll end up matching the expected layout of
718 * the array/matrix/structure we're trying to fill in.
719 */
720 int last_swiz = -1;
721 for (unsigned int i = 0; i < 4; i++) {
722 int swiz = GET_SWZ(element->swizzle, i);
723 if (swiz == last_swiz)
724 break;
725 last_swiz = swiz;
726
727 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
728 }
729 }
730 }
731 }
732
733 fs_reg *
734 fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
735 {
736 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
737 fs_reg wpos = *reg;
738 fs_reg neg_y = this->pixel_y;
739 neg_y.negate = true;
740
741 /* gl_FragCoord.x */
742 if (ir->pixel_center_integer) {
743 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
744 } else {
745 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
746 }
747 wpos.reg_offset++;
748
749 /* gl_FragCoord.y */
750 if (ir->origin_upper_left && ir->pixel_center_integer) {
751 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
752 } else {
753 fs_reg pixel_y = this->pixel_y;
754 float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
755
756 if (!ir->origin_upper_left) {
757 pixel_y.negate = true;
758 offset += c->key.drawable_height - 1.0;
759 }
760
761 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
762 }
763 wpos.reg_offset++;
764
765 /* gl_FragCoord.z */
766 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
767 interp_reg(FRAG_ATTRIB_WPOS, 2)));
768 wpos.reg_offset++;
769
770 /* gl_FragCoord.w: Already set up in emit_interpolation */
771 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
772
773 return reg;
774 }
775
776 fs_reg *
777 fs_visitor::emit_general_interpolation(ir_variable *ir)
778 {
779 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
780 /* Interpolation is always in floating point regs. */
781 reg->type = BRW_REGISTER_TYPE_F;
782 fs_reg attr = *reg;
783
784 unsigned int array_elements;
785 const glsl_type *type;
786
787 if (ir->type->is_array()) {
788 array_elements = ir->type->length;
789 if (array_elements == 0) {
790 this->fail = true;
791 }
792 type = ir->type->fields.array;
793 } else {
794 array_elements = 1;
795 type = ir->type;
796 }
797
798 int location = ir->location;
799 for (unsigned int i = 0; i < array_elements; i++) {
800 for (unsigned int j = 0; j < type->matrix_columns; j++) {
801 if (urb_setup[location] == -1) {
802 /* If there's no incoming setup data for this slot, don't
803 * emit interpolation for it.
804 */
805 attr.reg_offset += type->vector_elements;
806 location++;
807 continue;
808 }
809
810 for (unsigned int c = 0; c < type->vector_elements; c++) {
811 struct brw_reg interp = interp_reg(location, c);
812 emit(fs_inst(FS_OPCODE_LINTERP,
813 attr,
814 this->delta_x,
815 this->delta_y,
816 fs_reg(interp)));
817 attr.reg_offset++;
818 }
819
820 if (intel->gen < 6) {
821 attr.reg_offset -= type->vector_elements;
822 for (unsigned int c = 0; c < type->vector_elements; c++) {
823 emit(fs_inst(BRW_OPCODE_MUL,
824 attr,
825 attr,
826 this->pixel_w));
827 attr.reg_offset++;
828 }
829 }
830 location++;
831 }
832 }
833
834 return reg;
835 }
836
837 fs_reg *
838 fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
839 {
840 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
841
842 /* The frontfacing comes in as a bit in the thread payload. */
843 if (intel->gen >= 6) {
844 emit(fs_inst(BRW_OPCODE_ASR,
845 *reg,
846 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
847 fs_reg(15)));
848 emit(fs_inst(BRW_OPCODE_NOT,
849 *reg,
850 *reg));
851 emit(fs_inst(BRW_OPCODE_AND,
852 *reg,
853 *reg,
854 fs_reg(1)));
855 } else {
856 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
857 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
858 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
859 * us front face
860 */
861 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
862 *reg,
863 fs_reg(r1_6ud),
864 fs_reg(1u << 31)));
865 inst->conditional_mod = BRW_CONDITIONAL_L;
866 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
867 }
868
869 return reg;
870 }
871
872 void
873 fs_visitor::visit(ir_variable *ir)
874 {
875 fs_reg *reg = NULL;
876
877 if (variable_storage(ir))
878 return;
879
880 if (strcmp(ir->name, "gl_FragColor") == 0) {
881 this->frag_color = ir;
882 } else if (strcmp(ir->name, "gl_FragData") == 0) {
883 this->frag_data = ir;
884 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
885 this->frag_depth = ir;
886 }
887
888 if (ir->mode == ir_var_in) {
889 if (!strcmp(ir->name, "gl_FragCoord")) {
890 reg = emit_fragcoord_interpolation(ir);
891 } else if (!strcmp(ir->name, "gl_FrontFacing")) {
892 reg = emit_frontfacing_interpolation(ir);
893 } else {
894 reg = emit_general_interpolation(ir);
895 }
896 assert(reg);
897 hash_table_insert(this->variable_ht, reg, ir);
898 return;
899 }
900
901 if (ir->mode == ir_var_uniform) {
902 int param_index = c->prog_data.nr_params;
903
904 if (!strncmp(ir->name, "gl_", 3)) {
905 setup_builtin_uniform_values(ir);
906 } else {
907 setup_uniform_values(ir->location, ir->type);
908 }
909
910 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
911 }
912
913 if (!reg)
914 reg = new(this->mem_ctx) fs_reg(this, ir->type);
915
916 hash_table_insert(this->variable_ht, reg, ir);
917 }
918
919 void
920 fs_visitor::visit(ir_dereference_variable *ir)
921 {
922 fs_reg *reg = variable_storage(ir->var);
923 this->result = *reg;
924 }
925
926 void
927 fs_visitor::visit(ir_dereference_record *ir)
928 {
929 const glsl_type *struct_type = ir->record->type;
930
931 ir->record->accept(this);
932
933 unsigned int offset = 0;
934 for (unsigned int i = 0; i < struct_type->length; i++) {
935 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
936 break;
937 offset += type_size(struct_type->fields.structure[i].type);
938 }
939 this->result.reg_offset += offset;
940 this->result.type = brw_type_for_base_type(ir->type);
941 }
942
943 void
944 fs_visitor::visit(ir_dereference_array *ir)
945 {
946 ir_constant *index;
947 int element_size;
948
949 ir->array->accept(this);
950 index = ir->array_index->as_constant();
951
952 element_size = type_size(ir->type);
953 this->result.type = brw_type_for_base_type(ir->type);
954
955 if (index) {
956 assert(this->result.file == UNIFORM ||
957 (this->result.file == GRF &&
958 this->result.reg != 0));
959 this->result.reg_offset += index->value.i[0] * element_size;
960 } else {
961 assert(!"FINISHME: non-constant array element");
962 }
963 }
964
965 void
966 fs_visitor::visit(ir_expression *ir)
967 {
968 unsigned int operand;
969 fs_reg op[2], temp;
970 fs_reg result;
971 fs_inst *inst;
972
973 for (operand = 0; operand < ir->get_num_operands(); operand++) {
974 ir->operands[operand]->accept(this);
975 if (this->result.file == BAD_FILE) {
976 ir_print_visitor v;
977 printf("Failed to get tree for expression operand:\n");
978 ir->operands[operand]->accept(&v);
979 this->fail = true;
980 }
981 op[operand] = this->result;
982
983 /* Matrix expression operands should have been broken down to vector
984 * operations already.
985 */
986 assert(!ir->operands[operand]->type->is_matrix());
987 /* And then those vector operands should have been broken down to scalar.
988 */
989 assert(!ir->operands[operand]->type->is_vector());
990 }
991
992 /* Storage for our result. If our result goes into an assignment, it will
993 * just get copy-propagated out, so no worries.
994 */
995 this->result = fs_reg(this, ir->type);
996
997 switch (ir->operation) {
998 case ir_unop_logic_not:
999 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
1000 break;
1001 case ir_unop_neg:
1002 op[0].negate = !op[0].negate;
1003 this->result = op[0];
1004 break;
1005 case ir_unop_abs:
1006 op[0].abs = true;
1007 this->result = op[0];
1008 break;
1009 case ir_unop_sign:
1010 temp = fs_reg(this, ir->type);
1011
1012 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
1013
1014 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
1015 inst->conditional_mod = BRW_CONDITIONAL_G;
1016 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
1017 inst->predicated = true;
1018
1019 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
1020 inst->conditional_mod = BRW_CONDITIONAL_L;
1021 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
1022 inst->predicated = true;
1023
1024 break;
1025 case ir_unop_rcp:
1026 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
1027 break;
1028
1029 case ir_unop_exp2:
1030 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
1031 break;
1032 case ir_unop_log2:
1033 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
1034 break;
1035 case ir_unop_exp:
1036 case ir_unop_log:
1037 assert(!"not reached: should be handled by ir_explog_to_explog2");
1038 break;
1039 case ir_unop_sin:
1040 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
1041 break;
1042 case ir_unop_cos:
1043 emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
1044 break;
1045
1046 case ir_unop_dFdx:
1047 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
1048 break;
1049 case ir_unop_dFdy:
1050 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
1051 break;
1052
1053 case ir_binop_add:
1054 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
1055 break;
1056 case ir_binop_sub:
1057 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1058 break;
1059
1060 case ir_binop_mul:
1061 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
1062 break;
1063 case ir_binop_div:
1064 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1065 break;
1066 case ir_binop_mod:
1067 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1068 break;
1069
1070 case ir_binop_less:
1071 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1072 inst->conditional_mod = BRW_CONDITIONAL_L;
1073 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1074 break;
1075 case ir_binop_greater:
1076 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1077 inst->conditional_mod = BRW_CONDITIONAL_G;
1078 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1079 break;
1080 case ir_binop_lequal:
1081 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1082 inst->conditional_mod = BRW_CONDITIONAL_LE;
1083 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1084 break;
1085 case ir_binop_gequal:
1086 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1087 inst->conditional_mod = BRW_CONDITIONAL_GE;
1088 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1089 break;
1090 case ir_binop_equal:
1091 case ir_binop_all_equal: /* same as nequal for scalars */
1092 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1093 inst->conditional_mod = BRW_CONDITIONAL_Z;
1094 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1095 break;
1096 case ir_binop_nequal:
1097 case ir_binop_any_nequal: /* same as nequal for scalars */
1098 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1099 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1100 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1101 break;
1102
1103 case ir_binop_logic_xor:
1104 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
1105 break;
1106
1107 case ir_binop_logic_or:
1108 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
1109 break;
1110
1111 case ir_binop_logic_and:
1112 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
1113 break;
1114
1115 case ir_binop_dot:
1116 case ir_binop_cross:
1117 case ir_unop_any:
1118 assert(!"not reached: should be handled by brw_fs_channel_expressions");
1119 break;
1120
1121 case ir_unop_noise:
1122 assert(!"not reached: should be handled by lower_noise");
1123 break;
1124
1125 case ir_unop_sqrt:
1126 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
1127 break;
1128
1129 case ir_unop_rsq:
1130 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
1131 break;
1132
1133 case ir_unop_i2f:
1134 case ir_unop_b2f:
1135 case ir_unop_b2i:
1136 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1137 break;
1138 case ir_unop_f2i:
1139 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1140 break;
1141 case ir_unop_f2b:
1142 case ir_unop_i2b:
1143 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
1144 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1145
1146 case ir_unop_trunc:
1147 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1148 break;
1149 case ir_unop_ceil:
1150 op[0].negate = ~op[0].negate;
1151 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1152 this->result.negate = true;
1153 break;
1154 case ir_unop_floor:
1155 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1156 break;
1157 case ir_unop_fract:
1158 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
1159 break;
1160
1161 case ir_binop_min:
1162 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1163 inst->conditional_mod = BRW_CONDITIONAL_L;
1164
1165 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1166 inst->predicated = true;
1167 break;
1168 case ir_binop_max:
1169 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1170 inst->conditional_mod = BRW_CONDITIONAL_G;
1171
1172 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1173 inst->predicated = true;
1174 break;
1175
1176 case ir_binop_pow:
1177 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
1178 break;
1179
1180 case ir_unop_bit_not:
1181 case ir_unop_u2f:
1182 case ir_binop_lshift:
1183 case ir_binop_rshift:
1184 case ir_binop_bit_and:
1185 case ir_binop_bit_xor:
1186 case ir_binop_bit_or:
1187 assert(!"GLSL 1.30 features unsupported");
1188 break;
1189 }
1190 }
1191
1192 void
1193 fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
1194 const glsl_type *type, bool predicated)
1195 {
1196 switch (type->base_type) {
1197 case GLSL_TYPE_FLOAT:
1198 case GLSL_TYPE_UINT:
1199 case GLSL_TYPE_INT:
1200 case GLSL_TYPE_BOOL:
1201 for (unsigned int i = 0; i < type->components(); i++) {
1202 l.type = brw_type_for_base_type(type);
1203 r.type = brw_type_for_base_type(type);
1204
1205 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1206 inst->predicated = predicated;
1207
1208 l.reg_offset++;
1209 r.reg_offset++;
1210 }
1211 break;
1212 case GLSL_TYPE_ARRAY:
1213 for (unsigned int i = 0; i < type->length; i++) {
1214 emit_assignment_writes(l, r, type->fields.array, predicated);
1215 }
1216
1217 case GLSL_TYPE_STRUCT:
1218 for (unsigned int i = 0; i < type->length; i++) {
1219 emit_assignment_writes(l, r, type->fields.structure[i].type,
1220 predicated);
1221 }
1222 break;
1223
1224 case GLSL_TYPE_SAMPLER:
1225 break;
1226
1227 default:
1228 assert(!"not reached");
1229 break;
1230 }
1231 }
1232
1233 void
1234 fs_visitor::visit(ir_assignment *ir)
1235 {
1236 struct fs_reg l, r;
1237 fs_inst *inst;
1238
1239 /* FINISHME: arrays on the lhs */
1240 ir->lhs->accept(this);
1241 l = this->result;
1242
1243 ir->rhs->accept(this);
1244 r = this->result;
1245
1246 assert(l.file != BAD_FILE);
1247 assert(r.file != BAD_FILE);
1248
1249 if (ir->condition) {
1250 /* Get the condition bool into the predicate. */
1251 ir->condition->accept(this);
1252 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
1253 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1254 }
1255
1256 if (ir->lhs->type->is_scalar() ||
1257 ir->lhs->type->is_vector()) {
1258 for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
1259 if (ir->write_mask & (1 << i)) {
1260 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1261 if (ir->condition)
1262 inst->predicated = true;
1263 r.reg_offset++;
1264 }
1265 l.reg_offset++;
1266 }
1267 } else {
1268 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
1269 }
1270 }
1271
1272 fs_inst *
1273 fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1274 {
1275 int mlen;
1276 int base_mrf = 2;
1277 bool simd16 = false;
1278 fs_reg orig_dst;
1279
1280 if (ir->shadow_comparitor) {
1281 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1282 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1283 coordinate));
1284 coordinate.reg_offset++;
1285 }
1286 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1287 mlen = 3;
1288
1289 if (ir->op == ir_tex) {
1290 /* There's no plain shadow compare message, so we use shadow
1291 * compare with a bias of 0.0.
1292 */
1293 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1294 fs_reg(0.0f)));
1295 mlen++;
1296 } else if (ir->op == ir_txb) {
1297 ir->lod_info.bias->accept(this);
1298 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1299 this->result));
1300 mlen++;
1301 } else {
1302 assert(ir->op == ir_txl);
1303 ir->lod_info.lod->accept(this);
1304 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1305 this->result));
1306 mlen++;
1307 }
1308
1309 ir->shadow_comparitor->accept(this);
1310 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1311 mlen++;
1312 } else if (ir->op == ir_tex) {
1313 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1314 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1315 coordinate));
1316 coordinate.reg_offset++;
1317 }
1318 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1319 mlen = 3;
1320 } else {
1321 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
1322 * instructions. We'll need to do SIMD16 here.
1323 */
1324 assert(ir->op == ir_txb || ir->op == ir_txl);
1325
1326 for (mlen = 0; mlen < ir->coordinate->type->vector_elements * 2;) {
1327 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1328 coordinate));
1329 coordinate.reg_offset++;
1330 mlen++;
1331
1332 /* The unused upper half. */
1333 mlen++;
1334 }
1335
1336 /* lod/bias appears after u/v/r. */
1337 mlen = 6;
1338
1339 if (ir->op == ir_txb) {
1340 ir->lod_info.bias->accept(this);
1341 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1342 this->result));
1343 mlen++;
1344 } else {
1345 ir->lod_info.lod->accept(this);
1346 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1347 this->result));
1348 mlen++;
1349 }
1350
1351 /* The unused upper half. */
1352 mlen++;
1353
1354 /* Now, since we're doing simd16, the return is 2 interleaved
1355 * vec4s where the odd-indexed ones are junk. We'll need to move
1356 * this weirdness around to the expected layout.
1357 */
1358 simd16 = true;
1359 orig_dst = dst;
1360 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
1361 2));
1362 dst.type = BRW_REGISTER_TYPE_F;
1363 }
1364
1365 fs_inst *inst = NULL;
1366 switch (ir->op) {
1367 case ir_tex:
1368 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1369 break;
1370 case ir_txb:
1371 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1372 break;
1373 case ir_txl:
1374 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1375 break;
1376 case ir_txd:
1377 case ir_txf:
1378 assert(!"GLSL 1.30 features unsupported");
1379 break;
1380 }
1381 inst->mlen = mlen;
1382
1383 if (simd16) {
1384 for (int i = 0; i < 4; i++) {
1385 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst));
1386 orig_dst.reg_offset++;
1387 dst.reg_offset += 2;
1388 }
1389 }
1390
1391 return inst;
1392 }
1393
1394 fs_inst *
1395 fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1396 {
1397 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
1398 * optional parameters like shadow comparitor or LOD bias. If
1399 * optional parameters aren't present, those base slots are
1400 * optional and don't need to be included in the message.
1401 *
1402 * We don't fill in the unnecessary slots regardless, which may
1403 * look surprising in the disassembly.
1404 */
1405 int mlen;
1406 int base_mrf = 2;
1407
1408 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1409 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1410 coordinate.reg_offset++;
1411 }
1412
1413 if (ir->shadow_comparitor) {
1414 mlen = MAX2(mlen, 4);
1415
1416 ir->shadow_comparitor->accept(this);
1417 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1418 mlen++;
1419 }
1420
1421 fs_inst *inst = NULL;
1422 switch (ir->op) {
1423 case ir_tex:
1424 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1425 break;
1426 case ir_txb:
1427 ir->lod_info.bias->accept(this);
1428 mlen = MAX2(mlen, 4);
1429 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1430 mlen++;
1431
1432 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1433 break;
1434 case ir_txl:
1435 ir->lod_info.lod->accept(this);
1436 mlen = MAX2(mlen, 4);
1437 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1438 mlen++;
1439
1440 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1441 break;
1442 case ir_txd:
1443 case ir_txf:
1444 assert(!"GLSL 1.30 features unsupported");
1445 break;
1446 }
1447 inst->mlen = mlen;
1448
1449 return inst;
1450 }
1451
1452 void
1453 fs_visitor::visit(ir_texture *ir)
1454 {
1455 fs_inst *inst = NULL;
1456
1457 ir->coordinate->accept(this);
1458 fs_reg coordinate = this->result;
1459
1460 /* Should be lowered by do_lower_texture_projection */
1461 assert(!ir->projector);
1462
1463 /* Writemasking doesn't eliminate channels on SIMD8 texture
1464 * samples, so don't worry about them.
1465 */
1466 fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1467
1468 if (intel->gen < 5) {
1469 inst = emit_texture_gen4(ir, dst, coordinate);
1470 } else {
1471 inst = emit_texture_gen5(ir, dst, coordinate);
1472 }
1473
1474 inst->sampler =
1475 _mesa_get_sampler_uniform_value(ir->sampler,
1476 ctx->Shader.CurrentProgram,
1477 &brw->fragment_program->Base);
1478 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1479
1480 this->result = dst;
1481
1482 if (ir->shadow_comparitor)
1483 inst->shadow_compare = true;
1484
1485 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
1486 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
1487
1488 for (int i = 0; i < 4; i++) {
1489 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1490 fs_reg l = swizzle_dst;
1491 l.reg_offset += i;
1492
1493 if (swiz == SWIZZLE_ZERO) {
1494 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f)));
1495 } else if (swiz == SWIZZLE_ONE) {
1496 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f)));
1497 } else {
1498 fs_reg r = dst;
1499 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1500 emit(fs_inst(BRW_OPCODE_MOV, l, r));
1501 }
1502 }
1503 this->result = swizzle_dst;
1504 }
1505 }
1506
1507 void
1508 fs_visitor::visit(ir_swizzle *ir)
1509 {
1510 ir->val->accept(this);
1511 fs_reg val = this->result;
1512
1513 if (ir->type->vector_elements == 1) {
1514 this->result.reg_offset += ir->mask.x;
1515 return;
1516 }
1517
1518 fs_reg result = fs_reg(this, ir->type);
1519 this->result = result;
1520
1521 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1522 fs_reg channel = val;
1523 int swiz = 0;
1524
1525 switch (i) {
1526 case 0:
1527 swiz = ir->mask.x;
1528 break;
1529 case 1:
1530 swiz = ir->mask.y;
1531 break;
1532 case 2:
1533 swiz = ir->mask.z;
1534 break;
1535 case 3:
1536 swiz = ir->mask.w;
1537 break;
1538 }
1539
1540 channel.reg_offset += swiz;
1541 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1542 result.reg_offset++;
1543 }
1544 }
1545
1546 void
1547 fs_visitor::visit(ir_discard *ir)
1548 {
1549 fs_reg temp = fs_reg(this, glsl_type::uint_type);
1550
1551 assert(ir->condition == NULL); /* FINISHME */
1552
1553 emit(fs_inst(FS_OPCODE_DISCARD_NOT, temp, reg_null));
1554 emit(fs_inst(FS_OPCODE_DISCARD_AND, reg_null, temp));
1555 kill_emitted = true;
1556 }
1557
1558 void
1559 fs_visitor::visit(ir_constant *ir)
1560 {
1561 fs_reg reg(this, ir->type);
1562 this->result = reg;
1563
1564 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1565 switch (ir->type->base_type) {
1566 case GLSL_TYPE_FLOAT:
1567 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1568 break;
1569 case GLSL_TYPE_UINT:
1570 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1571 break;
1572 case GLSL_TYPE_INT:
1573 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1574 break;
1575 case GLSL_TYPE_BOOL:
1576 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1577 break;
1578 default:
1579 assert(!"Non-float/uint/int/bool constant");
1580 }
1581 reg.reg_offset++;
1582 }
1583 }
1584
1585 void
1586 fs_visitor::visit(ir_if *ir)
1587 {
1588 fs_inst *inst;
1589
1590 /* Don't point the annotation at the if statement, because then it plus
1591 * the then and else blocks get printed.
1592 */
1593 this->base_ir = ir->condition;
1594
1595 /* Generate the condition into the condition code. */
1596 ir->condition->accept(this);
1597 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1598 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1599
1600 inst = emit(fs_inst(BRW_OPCODE_IF));
1601 inst->predicated = true;
1602
1603 foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1604 ir_instruction *ir = (ir_instruction *)iter.get();
1605 this->base_ir = ir;
1606
1607 ir->accept(this);
1608 }
1609
1610 if (!ir->else_instructions.is_empty()) {
1611 emit(fs_inst(BRW_OPCODE_ELSE));
1612
1613 foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1614 ir_instruction *ir = (ir_instruction *)iter.get();
1615 this->base_ir = ir;
1616
1617 ir->accept(this);
1618 }
1619 }
1620
1621 emit(fs_inst(BRW_OPCODE_ENDIF));
1622 }
1623
1624 void
1625 fs_visitor::visit(ir_loop *ir)
1626 {
1627 fs_reg counter = reg_undef;
1628
1629 if (ir->counter) {
1630 this->base_ir = ir->counter;
1631 ir->counter->accept(this);
1632 counter = *(variable_storage(ir->counter));
1633
1634 if (ir->from) {
1635 this->base_ir = ir->from;
1636 ir->from->accept(this);
1637
1638 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1639 }
1640 }
1641
1642 emit(fs_inst(BRW_OPCODE_DO));
1643
1644 if (ir->to) {
1645 this->base_ir = ir->to;
1646 ir->to->accept(this);
1647
1648 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1649 counter, this->result));
1650 switch (ir->cmp) {
1651 case ir_binop_equal:
1652 inst->conditional_mod = BRW_CONDITIONAL_Z;
1653 break;
1654 case ir_binop_nequal:
1655 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1656 break;
1657 case ir_binop_gequal:
1658 inst->conditional_mod = BRW_CONDITIONAL_GE;
1659 break;
1660 case ir_binop_lequal:
1661 inst->conditional_mod = BRW_CONDITIONAL_LE;
1662 break;
1663 case ir_binop_greater:
1664 inst->conditional_mod = BRW_CONDITIONAL_G;
1665 break;
1666 case ir_binop_less:
1667 inst->conditional_mod = BRW_CONDITIONAL_L;
1668 break;
1669 default:
1670 assert(!"not reached: unknown loop condition");
1671 this->fail = true;
1672 break;
1673 }
1674
1675 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1676 inst->predicated = true;
1677 }
1678
1679 foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1680 ir_instruction *ir = (ir_instruction *)iter.get();
1681
1682 this->base_ir = ir;
1683 ir->accept(this);
1684 }
1685
1686 if (ir->increment) {
1687 this->base_ir = ir->increment;
1688 ir->increment->accept(this);
1689 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1690 }
1691
1692 emit(fs_inst(BRW_OPCODE_WHILE));
1693 }
1694
1695 void
1696 fs_visitor::visit(ir_loop_jump *ir)
1697 {
1698 switch (ir->mode) {
1699 case ir_loop_jump::jump_break:
1700 emit(fs_inst(BRW_OPCODE_BREAK));
1701 break;
1702 case ir_loop_jump::jump_continue:
1703 emit(fs_inst(BRW_OPCODE_CONTINUE));
1704 break;
1705 }
1706 }
1707
1708 void
1709 fs_visitor::visit(ir_call *ir)
1710 {
1711 assert(!"FINISHME");
1712 }
1713
1714 void
1715 fs_visitor::visit(ir_return *ir)
1716 {
1717 assert(!"FINISHME");
1718 }
1719
1720 void
1721 fs_visitor::visit(ir_function *ir)
1722 {
1723 /* Ignore function bodies other than main() -- we shouldn't see calls to
1724 * them since they should all be inlined before we get to ir_to_mesa.
1725 */
1726 if (strcmp(ir->name, "main") == 0) {
1727 const ir_function_signature *sig;
1728 exec_list empty;
1729
1730 sig = ir->matching_signature(&empty);
1731
1732 assert(sig);
1733
1734 foreach_iter(exec_list_iterator, iter, sig->body) {
1735 ir_instruction *ir = (ir_instruction *)iter.get();
1736 this->base_ir = ir;
1737
1738 ir->accept(this);
1739 }
1740 }
1741 }
1742
1743 void
1744 fs_visitor::visit(ir_function_signature *ir)
1745 {
1746 assert(!"not reached");
1747 (void)ir;
1748 }
1749
1750 fs_inst *
1751 fs_visitor::emit(fs_inst inst)
1752 {
1753 fs_inst *list_inst = new(mem_ctx) fs_inst;
1754 *list_inst = inst;
1755
1756 list_inst->annotation = this->current_annotation;
1757 list_inst->ir = this->base_ir;
1758
1759 this->instructions.push_tail(list_inst);
1760
1761 return list_inst;
1762 }
1763
1764 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1765 void
1766 fs_visitor::emit_dummy_fs()
1767 {
1768 /* Everyone's favorite color. */
1769 emit(fs_inst(BRW_OPCODE_MOV,
1770 fs_reg(MRF, 2),
1771 fs_reg(1.0f)));
1772 emit(fs_inst(BRW_OPCODE_MOV,
1773 fs_reg(MRF, 3),
1774 fs_reg(0.0f)));
1775 emit(fs_inst(BRW_OPCODE_MOV,
1776 fs_reg(MRF, 4),
1777 fs_reg(1.0f)));
1778 emit(fs_inst(BRW_OPCODE_MOV,
1779 fs_reg(MRF, 5),
1780 fs_reg(0.0f)));
1781
1782 fs_inst *write;
1783 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1784 fs_reg(0),
1785 fs_reg(0)));
1786 }
1787
1788 /* The register location here is relative to the start of the URB
1789 * data. It will get adjusted to be a real location before
1790 * generate_code() time.
1791 */
1792 struct brw_reg
1793 fs_visitor::interp_reg(int location, int channel)
1794 {
1795 int regnr = urb_setup[location] * 2 + channel / 2;
1796 int stride = (channel & 1) * 4;
1797
1798 assert(urb_setup[location] != -1);
1799
1800 return brw_vec1_grf(regnr, stride);
1801 }
1802
1803 /** Emits the interpolation for the varying inputs. */
1804 void
1805 fs_visitor::emit_interpolation_setup_gen4()
1806 {
1807 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1808
1809 this->current_annotation = "compute pixel centers";
1810 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1811 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1812 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1813 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1814 emit(fs_inst(BRW_OPCODE_ADD,
1815 this->pixel_x,
1816 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1817 fs_reg(brw_imm_v(0x10101010))));
1818 emit(fs_inst(BRW_OPCODE_ADD,
1819 this->pixel_y,
1820 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1821 fs_reg(brw_imm_v(0x11001100))));
1822
1823 this->current_annotation = "compute pixel deltas from v0";
1824 if (brw->has_pln) {
1825 this->delta_x = fs_reg(this, glsl_type::vec2_type);
1826 this->delta_y = this->delta_x;
1827 this->delta_y.reg_offset++;
1828 } else {
1829 this->delta_x = fs_reg(this, glsl_type::float_type);
1830 this->delta_y = fs_reg(this, glsl_type::float_type);
1831 }
1832 emit(fs_inst(BRW_OPCODE_ADD,
1833 this->delta_x,
1834 this->pixel_x,
1835 fs_reg(negate(brw_vec1_grf(1, 0)))));
1836 emit(fs_inst(BRW_OPCODE_ADD,
1837 this->delta_y,
1838 this->pixel_y,
1839 fs_reg(negate(brw_vec1_grf(1, 1)))));
1840
1841 this->current_annotation = "compute pos.w and 1/pos.w";
1842 /* Compute wpos.w. It's always in our setup, since it's needed to
1843 * interpolate the other attributes.
1844 */
1845 this->wpos_w = fs_reg(this, glsl_type::float_type);
1846 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1847 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1848 /* Compute the pixel 1/W value from wpos.w. */
1849 this->pixel_w = fs_reg(this, glsl_type::float_type);
1850 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1851 this->current_annotation = NULL;
1852 }
1853
1854 /** Emits the interpolation for the varying inputs. */
1855 void
1856 fs_visitor::emit_interpolation_setup_gen6()
1857 {
1858 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1859
1860 /* If the pixel centers end up used, the setup is the same as for gen4. */
1861 this->current_annotation = "compute pixel centers";
1862 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1863 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1864 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1865 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1866 emit(fs_inst(BRW_OPCODE_ADD,
1867 this->pixel_x,
1868 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1869 fs_reg(brw_imm_v(0x10101010))));
1870 emit(fs_inst(BRW_OPCODE_ADD,
1871 this->pixel_y,
1872 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1873 fs_reg(brw_imm_v(0x11001100))));
1874
1875 this->current_annotation = "compute 1/pos.w";
1876 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
1877 this->pixel_w = fs_reg(this, glsl_type::float_type);
1878 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1879
1880 this->delta_x = fs_reg(brw_vec8_grf(2, 0));
1881 this->delta_y = fs_reg(brw_vec8_grf(3, 0));
1882
1883 this->current_annotation = NULL;
1884 }
1885
1886 void
1887 fs_visitor::emit_fb_writes()
1888 {
1889 this->current_annotation = "FB write header";
1890 GLboolean header_present = GL_TRUE;
1891 int nr = 0;
1892
1893 if (intel->gen >= 6 &&
1894 !this->kill_emitted &&
1895 c->key.nr_color_regions == 1) {
1896 header_present = false;
1897 }
1898
1899 if (header_present) {
1900 /* m0, m1 header */
1901 nr += 2;
1902 }
1903
1904 if (c->key.aa_dest_stencil_reg) {
1905 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1906 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1907 }
1908
1909 /* Reserve space for color. It'll be filled in per MRT below. */
1910 int color_mrf = nr;
1911 nr += 4;
1912
1913 if (c->key.source_depth_to_render_target) {
1914 if (c->key.computes_depth) {
1915 /* Hand over gl_FragDepth. */
1916 assert(this->frag_depth);
1917 fs_reg depth = *(variable_storage(this->frag_depth));
1918
1919 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1920 } else {
1921 /* Pass through the payload depth. */
1922 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1923 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1924 }
1925 }
1926
1927 if (c->key.dest_depth_reg) {
1928 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1929 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1930 }
1931
1932 fs_reg color = reg_undef;
1933 if (this->frag_color)
1934 color = *(variable_storage(this->frag_color));
1935 else if (this->frag_data)
1936 color = *(variable_storage(this->frag_data));
1937
1938 for (int target = 0; target < c->key.nr_color_regions; target++) {
1939 this->current_annotation = talloc_asprintf(this->mem_ctx,
1940 "FB write target %d",
1941 target);
1942 if (this->frag_color || this->frag_data) {
1943 for (int i = 0; i < 4; i++) {
1944 emit(fs_inst(BRW_OPCODE_MOV,
1945 fs_reg(MRF, color_mrf + i),
1946 color));
1947 color.reg_offset++;
1948 }
1949 }
1950
1951 if (this->frag_color)
1952 color.reg_offset -= 4;
1953
1954 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1955 reg_undef, reg_undef));
1956 inst->target = target;
1957 inst->mlen = nr;
1958 if (target == c->key.nr_color_regions - 1)
1959 inst->eot = true;
1960 inst->header_present = header_present;
1961 }
1962
1963 if (c->key.nr_color_regions == 0) {
1964 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1965 reg_undef, reg_undef));
1966 inst->mlen = nr;
1967 inst->eot = true;
1968 inst->header_present = header_present;
1969 }
1970
1971 this->current_annotation = NULL;
1972 }
1973
1974 void
1975 fs_visitor::generate_fb_write(fs_inst *inst)
1976 {
1977 GLboolean eot = inst->eot;
1978 struct brw_reg implied_header;
1979
1980 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1981 * move, here's g1.
1982 */
1983 brw_push_insn_state(p);
1984 brw_set_mask_control(p, BRW_MASK_DISABLE);
1985 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1986
1987 if (inst->header_present) {
1988 if (intel->gen >= 6) {
1989 brw_MOV(p,
1990 brw_message_reg(0),
1991 brw_vec8_grf(0, 0));
1992 implied_header = brw_null_reg();
1993 } else {
1994 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
1995 }
1996
1997 brw_MOV(p,
1998 brw_message_reg(1),
1999 brw_vec8_grf(1, 0));
2000 } else {
2001 implied_header = brw_null_reg();
2002 }
2003
2004 brw_pop_insn_state(p);
2005
2006 brw_fb_WRITE(p,
2007 8, /* dispatch_width */
2008 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
2009 0, /* base MRF */
2010 implied_header,
2011 inst->target,
2012 inst->mlen,
2013 0,
2014 eot);
2015 }
2016
2017 void
2018 fs_visitor::generate_linterp(fs_inst *inst,
2019 struct brw_reg dst, struct brw_reg *src)
2020 {
2021 struct brw_reg delta_x = src[0];
2022 struct brw_reg delta_y = src[1];
2023 struct brw_reg interp = src[2];
2024
2025 if (brw->has_pln &&
2026 delta_y.nr == delta_x.nr + 1 &&
2027 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
2028 brw_PLN(p, dst, interp, delta_x);
2029 } else {
2030 brw_LINE(p, brw_null_reg(), interp, delta_x);
2031 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
2032 }
2033 }
2034
2035 void
2036 fs_visitor::generate_math(fs_inst *inst,
2037 struct brw_reg dst, struct brw_reg *src)
2038 {
2039 int op;
2040
2041 switch (inst->opcode) {
2042 case FS_OPCODE_RCP:
2043 op = BRW_MATH_FUNCTION_INV;
2044 break;
2045 case FS_OPCODE_RSQ:
2046 op = BRW_MATH_FUNCTION_RSQ;
2047 break;
2048 case FS_OPCODE_SQRT:
2049 op = BRW_MATH_FUNCTION_SQRT;
2050 break;
2051 case FS_OPCODE_EXP2:
2052 op = BRW_MATH_FUNCTION_EXP;
2053 break;
2054 case FS_OPCODE_LOG2:
2055 op = BRW_MATH_FUNCTION_LOG;
2056 break;
2057 case FS_OPCODE_POW:
2058 op = BRW_MATH_FUNCTION_POW;
2059 break;
2060 case FS_OPCODE_SIN:
2061 op = BRW_MATH_FUNCTION_SIN;
2062 break;
2063 case FS_OPCODE_COS:
2064 op = BRW_MATH_FUNCTION_COS;
2065 break;
2066 default:
2067 assert(!"not reached: unknown math function");
2068 op = 0;
2069 break;
2070 }
2071
2072 if (inst->opcode == FS_OPCODE_POW) {
2073 brw_MOV(p, brw_message_reg(3), src[1]);
2074 }
2075
2076 brw_math(p, dst,
2077 op,
2078 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
2079 BRW_MATH_SATURATE_NONE,
2080 2, src[0],
2081 BRW_MATH_DATA_VECTOR,
2082 BRW_MATH_PRECISION_FULL);
2083 }
2084
2085 void
2086 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2087 {
2088 int msg_type = -1;
2089 int rlen = 4;
2090 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
2091
2092 if (intel->gen >= 5) {
2093 switch (inst->opcode) {
2094 case FS_OPCODE_TEX:
2095 if (inst->shadow_compare) {
2096 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
2097 } else {
2098 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
2099 }
2100 break;
2101 case FS_OPCODE_TXB:
2102 if (inst->shadow_compare) {
2103 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
2104 } else {
2105 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
2106 }
2107 break;
2108 }
2109 } else {
2110 switch (inst->opcode) {
2111 case FS_OPCODE_TEX:
2112 /* Note that G45 and older determines shadow compare and dispatch width
2113 * from message length for most messages.
2114 */
2115 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2116 if (inst->shadow_compare) {
2117 assert(inst->mlen == 5);
2118 } else {
2119 assert(inst->mlen <= 6);
2120 }
2121 break;
2122 case FS_OPCODE_TXB:
2123 if (inst->shadow_compare) {
2124 assert(inst->mlen == 5);
2125 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2126 } else {
2127 assert(inst->mlen == 8);
2128 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
2129 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
2130 }
2131 break;
2132 }
2133 }
2134 assert(msg_type != -1);
2135
2136 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
2137 rlen = 8;
2138 dst = vec16(dst);
2139 }
2140
2141 /* g0 header. */
2142 src.nr--;
2143
2144 brw_SAMPLE(p,
2145 retype(dst, BRW_REGISTER_TYPE_UW),
2146 src.nr,
2147 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
2148 SURF_INDEX_TEXTURE(inst->sampler),
2149 inst->sampler,
2150 WRITEMASK_XYZW,
2151 msg_type,
2152 rlen,
2153 inst->mlen + 1,
2154 0,
2155 1,
2156 simd_mode);
2157 }
2158
2159
2160 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
2161 * looking like:
2162 *
2163 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
2164 *
2165 * and we're trying to produce:
2166 *
2167 * DDX DDY
2168 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
2169 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
2170 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
2171 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
2172 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
2173 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
2174 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
2175 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
2176 *
2177 * and add another set of two more subspans if in 16-pixel dispatch mode.
2178 *
2179 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
2180 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
2181 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
2182 * between each other. We could probably do it like ddx and swizzle the right
2183 * order later, but bail for now and just produce
2184 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
2185 */
2186 void
2187 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2188 {
2189 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
2190 BRW_REGISTER_TYPE_F,
2191 BRW_VERTICAL_STRIDE_2,
2192 BRW_WIDTH_2,
2193 BRW_HORIZONTAL_STRIDE_0,
2194 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2195 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
2196 BRW_REGISTER_TYPE_F,
2197 BRW_VERTICAL_STRIDE_2,
2198 BRW_WIDTH_2,
2199 BRW_HORIZONTAL_STRIDE_0,
2200 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2201 brw_ADD(p, dst, src0, negate(src1));
2202 }
2203
2204 void
2205 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2206 {
2207 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
2208 BRW_REGISTER_TYPE_F,
2209 BRW_VERTICAL_STRIDE_4,
2210 BRW_WIDTH_4,
2211 BRW_HORIZONTAL_STRIDE_0,
2212 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2213 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
2214 BRW_REGISTER_TYPE_F,
2215 BRW_VERTICAL_STRIDE_4,
2216 BRW_WIDTH_4,
2217 BRW_HORIZONTAL_STRIDE_0,
2218 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2219 brw_ADD(p, dst, src0, negate(src1));
2220 }
2221
2222 void
2223 fs_visitor::generate_discard_not(fs_inst *inst, struct brw_reg mask)
2224 {
2225 brw_push_insn_state(p);
2226 brw_set_mask_control(p, BRW_MASK_DISABLE);
2227 brw_NOT(p, mask, brw_mask_reg(1)); /* IMASK */
2228 brw_pop_insn_state(p);
2229 }
2230
2231 void
2232 fs_visitor::generate_discard_and(fs_inst *inst, struct brw_reg mask)
2233 {
2234 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
2235 mask = brw_uw1_reg(mask.file, mask.nr, 0);
2236
2237 brw_push_insn_state(p);
2238 brw_set_mask_control(p, BRW_MASK_DISABLE);
2239 brw_AND(p, g0, mask, g0);
2240 brw_pop_insn_state(p);
2241 }
2242
2243 void
2244 fs_visitor::assign_curb_setup()
2245 {
2246 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
2247 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
2248
2249 /* Map the offsets in the UNIFORM file to fixed HW regs. */
2250 foreach_iter(exec_list_iterator, iter, this->instructions) {
2251 fs_inst *inst = (fs_inst *)iter.get();
2252
2253 for (unsigned int i = 0; i < 3; i++) {
2254 if (inst->src[i].file == UNIFORM) {
2255 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
2256 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
2257 constant_nr / 8,
2258 constant_nr % 8);
2259
2260 inst->src[i].file = FIXED_HW_REG;
2261 inst->src[i].fixed_hw_reg = brw_reg;
2262 }
2263 }
2264 }
2265 }
2266
2267 void
2268 fs_visitor::calculate_urb_setup()
2269 {
2270 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2271 urb_setup[i] = -1;
2272 }
2273
2274 int urb_next = 0;
2275 /* Figure out where each of the incoming setup attributes lands. */
2276 if (intel->gen >= 6) {
2277 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2278 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) {
2279 urb_setup[i] = urb_next++;
2280 }
2281 }
2282 } else {
2283 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
2284 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
2285 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
2286 int fp_index;
2287
2288 if (i >= VERT_RESULT_VAR0)
2289 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
2290 else if (i <= VERT_RESULT_TEX7)
2291 fp_index = i;
2292 else
2293 fp_index = -1;
2294
2295 if (fp_index >= 0)
2296 urb_setup[fp_index] = urb_next++;
2297 }
2298 }
2299 }
2300
2301 /* Each attribute is 4 setup channels, each of which is half a reg. */
2302 c->prog_data.urb_read_length = urb_next * 2;
2303 }
2304
2305 void
2306 fs_visitor::assign_urb_setup()
2307 {
2308 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
2309
2310 /* Offset all the urb_setup[] index by the actual position of the
2311 * setup regs, now that the location of the constants has been chosen.
2312 */
2313 foreach_iter(exec_list_iterator, iter, this->instructions) {
2314 fs_inst *inst = (fs_inst *)iter.get();
2315
2316 if (inst->opcode != FS_OPCODE_LINTERP)
2317 continue;
2318
2319 assert(inst->src[2].file == FIXED_HW_REG);
2320
2321 inst->src[2].fixed_hw_reg.nr += urb_start;
2322 }
2323
2324 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
2325 }
2326
2327 static void
2328 assign_reg(int *reg_hw_locations, fs_reg *reg)
2329 {
2330 if (reg->file == GRF && reg->reg != 0) {
2331 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
2332 reg->reg = 0;
2333 }
2334 }
2335
2336 void
2337 fs_visitor::assign_regs_trivial()
2338 {
2339 int last_grf = 0;
2340 int hw_reg_mapping[this->virtual_grf_next];
2341 int i;
2342
2343 hw_reg_mapping[0] = 0;
2344 hw_reg_mapping[1] = this->first_non_payload_grf;
2345 for (i = 2; i < this->virtual_grf_next; i++) {
2346 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
2347 this->virtual_grf_sizes[i - 1]);
2348 }
2349 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
2350
2351 foreach_iter(exec_list_iterator, iter, this->instructions) {
2352 fs_inst *inst = (fs_inst *)iter.get();
2353
2354 assign_reg(hw_reg_mapping, &inst->dst);
2355 assign_reg(hw_reg_mapping, &inst->src[0]);
2356 assign_reg(hw_reg_mapping, &inst->src[1]);
2357 }
2358
2359 this->grf_used = last_grf + 1;
2360 }
2361
2362 void
2363 fs_visitor::assign_regs()
2364 {
2365 int last_grf = 0;
2366 int hw_reg_mapping[this->virtual_grf_next + 1];
2367 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
2368 int class_sizes[base_reg_count];
2369 int class_count = 0;
2370 int aligned_pair_class = -1;
2371
2372 /* Set up the register classes.
2373 *
2374 * The base registers store a scalar value. For texture samples,
2375 * we get virtual GRFs composed of 4 contiguous hw register. For
2376 * structures and arrays, we store them as contiguous larger things
2377 * than that, though we should be able to do better most of the
2378 * time.
2379 */
2380 class_sizes[class_count++] = 1;
2381 if (brw->has_pln && intel->gen < 6) {
2382 /* Always set up the (unaligned) pairs for gen5, so we can find
2383 * them for making the aligned pair class.
2384 */
2385 class_sizes[class_count++] = 2;
2386 }
2387 for (int r = 1; r < this->virtual_grf_next; r++) {
2388 int i;
2389
2390 for (i = 0; i < class_count; i++) {
2391 if (class_sizes[i] == this->virtual_grf_sizes[r])
2392 break;
2393 }
2394 if (i == class_count) {
2395 if (this->virtual_grf_sizes[r] >= base_reg_count) {
2396 fprintf(stderr, "Object too large to register allocate.\n");
2397 this->fail = true;
2398 }
2399
2400 class_sizes[class_count++] = this->virtual_grf_sizes[r];
2401 }
2402 }
2403
2404 int ra_reg_count = 0;
2405 int class_base_reg[class_count];
2406 int class_reg_count[class_count];
2407 int classes[class_count + 1];
2408
2409 for (int i = 0; i < class_count; i++) {
2410 class_base_reg[i] = ra_reg_count;
2411 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
2412 ra_reg_count += class_reg_count[i];
2413 }
2414
2415 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
2416 for (int i = 0; i < class_count; i++) {
2417 classes[i] = ra_alloc_reg_class(regs);
2418
2419 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2420 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
2421 }
2422
2423 /* Add conflicts between our contiguous registers aliasing
2424 * base regs and other register classes' contiguous registers
2425 * that alias base regs, or the base regs themselves for classes[0].
2426 */
2427 for (int c = 0; c <= i; c++) {
2428 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2429 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
2430 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
2431 c_r++) {
2432
2433 if (0) {
2434 printf("%d/%d conflicts %d/%d\n",
2435 class_sizes[i], this->first_non_payload_grf + i_r,
2436 class_sizes[c], this->first_non_payload_grf + c_r);
2437 }
2438
2439 ra_add_reg_conflict(regs,
2440 class_base_reg[i] + i_r,
2441 class_base_reg[c] + c_r);
2442 }
2443 }
2444 }
2445 }
2446
2447 /* Add a special class for aligned pairs, which we'll put delta_x/y
2448 * in on gen5 so that we can do PLN.
2449 */
2450 if (brw->has_pln && intel->gen < 6) {
2451 int reg_count = (base_reg_count - 1) / 2;
2452 int unaligned_pair_class = 1;
2453 assert(class_sizes[unaligned_pair_class] == 2);
2454
2455 aligned_pair_class = class_count;
2456 classes[aligned_pair_class] = ra_alloc_reg_class(regs);
2457 class_base_reg[aligned_pair_class] = 0;
2458 class_reg_count[aligned_pair_class] = 0;
2459 int start = (this->first_non_payload_grf & 1) ? 1 : 0;
2460
2461 for (int i = 0; i < reg_count; i++) {
2462 ra_class_add_reg(regs, classes[aligned_pair_class],
2463 class_base_reg[unaligned_pair_class] + i * 2 + start);
2464 }
2465 class_count++;
2466 }
2467
2468 ra_set_finalize(regs);
2469
2470 struct ra_graph *g = ra_alloc_interference_graph(regs,
2471 this->virtual_grf_next);
2472 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
2473 * with nodes.
2474 */
2475 ra_set_node_class(g, 0, classes[0]);
2476
2477 for (int i = 1; i < this->virtual_grf_next; i++) {
2478 for (int c = 0; c < class_count; c++) {
2479 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
2480 if (aligned_pair_class >= 0 &&
2481 this->delta_x.reg == i) {
2482 ra_set_node_class(g, i, classes[aligned_pair_class]);
2483 } else {
2484 ra_set_node_class(g, i, classes[c]);
2485 }
2486 break;
2487 }
2488 }
2489
2490 for (int j = 1; j < i; j++) {
2491 if (virtual_grf_interferes(i, j)) {
2492 ra_add_node_interference(g, i, j);
2493 }
2494 }
2495 }
2496
2497 /* FINISHME: Handle spilling */
2498 if (!ra_allocate_no_spills(g)) {
2499 fprintf(stderr, "Failed to allocate registers.\n");
2500 this->fail = true;
2501 return;
2502 }
2503
2504 /* Get the chosen virtual registers for each node, and map virtual
2505 * regs in the register classes back down to real hardware reg
2506 * numbers.
2507 */
2508 hw_reg_mapping[0] = 0; /* unused */
2509 for (int i = 1; i < this->virtual_grf_next; i++) {
2510 int reg = ra_get_node_reg(g, i);
2511 int hw_reg = -1;
2512
2513 for (int c = 0; c < class_count; c++) {
2514 if (reg >= class_base_reg[c] &&
2515 reg < class_base_reg[c] + class_reg_count[c]) {
2516 hw_reg = reg - class_base_reg[c];
2517 break;
2518 }
2519 }
2520
2521 assert(hw_reg != -1);
2522 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
2523 last_grf = MAX2(last_grf,
2524 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
2525 }
2526
2527 foreach_iter(exec_list_iterator, iter, this->instructions) {
2528 fs_inst *inst = (fs_inst *)iter.get();
2529
2530 assign_reg(hw_reg_mapping, &inst->dst);
2531 assign_reg(hw_reg_mapping, &inst->src[0]);
2532 assign_reg(hw_reg_mapping, &inst->src[1]);
2533 }
2534
2535 this->grf_used = last_grf + 1;
2536
2537 talloc_free(g);
2538 talloc_free(regs);
2539 }
2540
2541 void
2542 fs_visitor::calculate_live_intervals()
2543 {
2544 int num_vars = this->virtual_grf_next;
2545 int *def = talloc_array(mem_ctx, int, num_vars);
2546 int *use = talloc_array(mem_ctx, int, num_vars);
2547 int loop_depth = 0;
2548 int loop_start = 0;
2549
2550 for (int i = 0; i < num_vars; i++) {
2551 def[i] = 1 << 30;
2552 use[i] = -1;
2553 }
2554
2555 int ip = 0;
2556 foreach_iter(exec_list_iterator, iter, this->instructions) {
2557 fs_inst *inst = (fs_inst *)iter.get();
2558
2559 if (inst->opcode == BRW_OPCODE_DO) {
2560 if (loop_depth++ == 0)
2561 loop_start = ip;
2562 } else if (inst->opcode == BRW_OPCODE_WHILE) {
2563 loop_depth--;
2564
2565 if (loop_depth == 0) {
2566 /* FINISHME:
2567 *
2568 * Patches up any vars marked for use within the loop as
2569 * live until the end. This is conservative, as there
2570 * will often be variables defined and used inside the
2571 * loop but dead at the end of the loop body.
2572 */
2573 for (int i = 0; i < num_vars; i++) {
2574 if (use[i] == loop_start) {
2575 use[i] = ip;
2576 }
2577 }
2578 }
2579 } else {
2580 int eip = ip;
2581
2582 if (loop_depth)
2583 eip = loop_start;
2584
2585 for (unsigned int i = 0; i < 3; i++) {
2586 if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
2587 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip);
2588 }
2589 }
2590 if (inst->dst.file == GRF && inst->dst.reg != 0) {
2591 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip);
2592 }
2593 }
2594
2595 ip++;
2596 }
2597
2598 talloc_free(this->virtual_grf_def);
2599 talloc_free(this->virtual_grf_use);
2600 this->virtual_grf_def = def;
2601 this->virtual_grf_use = use;
2602 }
2603
2604 /**
2605 * Attempts to move immediate constants into the immediate
2606 * constant slot of following instructions.
2607 *
2608 * Immediate constants are a bit tricky -- they have to be in the last
2609 * operand slot, you can't do abs/negate on them,
2610 */
2611
2612 bool
2613 fs_visitor::propagate_constants()
2614 {
2615 bool progress = false;
2616
2617 foreach_iter(exec_list_iterator, iter, this->instructions) {
2618 fs_inst *inst = (fs_inst *)iter.get();
2619
2620 if (inst->opcode != BRW_OPCODE_MOV ||
2621 inst->predicated ||
2622 inst->dst.file != GRF || inst->src[0].file != IMM ||
2623 inst->dst.type != inst->src[0].type)
2624 continue;
2625
2626 /* Don't bother with cases where we should have had the
2627 * operation on the constant folded in GLSL already.
2628 */
2629 if (inst->saturate)
2630 continue;
2631
2632 /* Found a move of a constant to a GRF. Find anything else using the GRF
2633 * before it's written, and replace it with the constant if we can.
2634 */
2635 exec_list_iterator scan_iter = iter;
2636 scan_iter.next();
2637 for (; scan_iter.has_next(); scan_iter.next()) {
2638 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2639
2640 if (scan_inst->opcode == BRW_OPCODE_DO ||
2641 scan_inst->opcode == BRW_OPCODE_WHILE ||
2642 scan_inst->opcode == BRW_OPCODE_ELSE ||
2643 scan_inst->opcode == BRW_OPCODE_ENDIF) {
2644 break;
2645 }
2646
2647 for (int i = 2; i >= 0; i--) {
2648 if (scan_inst->src[i].file != GRF ||
2649 scan_inst->src[i].reg != inst->dst.reg ||
2650 scan_inst->src[i].reg_offset != inst->dst.reg_offset)
2651 continue;
2652
2653 /* Don't bother with cases where we should have had the
2654 * operation on the constant folded in GLSL already.
2655 */
2656 if (scan_inst->src[i].negate || scan_inst->src[i].abs)
2657 continue;
2658
2659 switch (scan_inst->opcode) {
2660 case BRW_OPCODE_MOV:
2661 scan_inst->src[i] = inst->src[0];
2662 progress = true;
2663 break;
2664
2665 case BRW_OPCODE_MUL:
2666 case BRW_OPCODE_ADD:
2667 if (i == 1) {
2668 scan_inst->src[i] = inst->src[0];
2669 progress = true;
2670 } else if (i == 0 && scan_inst->src[1].file != IMM) {
2671 /* Fit this constant in by commuting the operands */
2672 scan_inst->src[0] = scan_inst->src[1];
2673 scan_inst->src[1] = inst->src[0];
2674 }
2675 break;
2676 case BRW_OPCODE_CMP:
2677 if (i == 1) {
2678 scan_inst->src[i] = inst->src[0];
2679 progress = true;
2680 }
2681 }
2682 }
2683
2684 if (scan_inst->dst.file == GRF &&
2685 scan_inst->dst.reg == inst->dst.reg &&
2686 (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2687 scan_inst->opcode == FS_OPCODE_TEX)) {
2688 break;
2689 }
2690 }
2691 }
2692
2693 return progress;
2694 }
2695 /**
2696 * Must be called after calculate_live_intervales() to remove unused
2697 * writes to registers -- register allocation will fail otherwise
2698 * because something deffed but not used won't be considered to
2699 * interfere with other regs.
2700 */
2701 bool
2702 fs_visitor::dead_code_eliminate()
2703 {
2704 bool progress = false;
2705 int num_vars = this->virtual_grf_next;
2706 bool dead[num_vars];
2707
2708 for (int i = 0; i < num_vars; i++) {
2709 /* This would be ">=", but FS_OPCODE_DISCARD has a src == dst where
2710 * it writes dst then reads it as src.
2711 */
2712 dead[i] = this->virtual_grf_def[i] > this->virtual_grf_use[i];
2713
2714 if (dead[i]) {
2715 /* Mark off its interval so it won't interfere with anything. */
2716 this->virtual_grf_def[i] = -1;
2717 this->virtual_grf_use[i] = -1;
2718 }
2719 }
2720
2721 foreach_iter(exec_list_iterator, iter, this->instructions) {
2722 fs_inst *inst = (fs_inst *)iter.get();
2723
2724 if (inst->dst.file == GRF && dead[inst->dst.reg]) {
2725 inst->remove();
2726 progress = true;
2727 }
2728 }
2729
2730 return progress;
2731 }
2732
2733 bool
2734 fs_visitor::register_coalesce()
2735 {
2736 bool progress = false;
2737
2738 foreach_iter(exec_list_iterator, iter, this->instructions) {
2739 fs_inst *inst = (fs_inst *)iter.get();
2740
2741 if (inst->opcode != BRW_OPCODE_MOV ||
2742 inst->predicated ||
2743 inst->saturate ||
2744 inst->dst.file != GRF || inst->src[0].file != GRF ||
2745 inst->dst.type != inst->src[0].type)
2746 continue;
2747
2748 /* Found a move of a GRF to a GRF. Let's see if we can coalesce
2749 * them: check for no writes to either one until the exit of the
2750 * program.
2751 */
2752 bool interfered = false;
2753 exec_list_iterator scan_iter = iter;
2754 scan_iter.next();
2755 for (; scan_iter.has_next(); scan_iter.next()) {
2756 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2757
2758 if (scan_inst->opcode == BRW_OPCODE_DO ||
2759 scan_inst->opcode == BRW_OPCODE_WHILE ||
2760 scan_inst->opcode == BRW_OPCODE_ENDIF) {
2761 interfered = true;
2762 iter = scan_iter;
2763 break;
2764 }
2765
2766 if (scan_inst->dst.file == GRF) {
2767 if (scan_inst->dst.reg == inst->dst.reg &&
2768 (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2769 scan_inst->opcode == FS_OPCODE_TEX)) {
2770 interfered = true;
2771 break;
2772 }
2773 if (scan_inst->dst.reg == inst->src[0].reg &&
2774 (scan_inst->dst.reg_offset == inst->src[0].reg_offset ||
2775 scan_inst->opcode == FS_OPCODE_TEX)) {
2776 interfered = true;
2777 break;
2778 }
2779 }
2780 }
2781 if (interfered) {
2782 continue;
2783 }
2784
2785 /* Rewrite the later usage to point at the source of the move to
2786 * be removed.
2787 */
2788 for (exec_list_iterator scan_iter = iter; scan_iter.has_next();
2789 scan_iter.next()) {
2790 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2791
2792 for (int i = 0; i < 3; i++) {
2793 if (scan_inst->src[i].file == GRF &&
2794 scan_inst->src[i].reg == inst->dst.reg &&
2795 scan_inst->src[i].reg_offset == inst->dst.reg_offset) {
2796 scan_inst->src[i].reg = inst->src[0].reg;
2797 scan_inst->src[i].reg_offset = inst->src[0].reg_offset;
2798 scan_inst->src[i].abs |= inst->src[0].abs;
2799 scan_inst->src[i].negate ^= inst->src[0].negate;
2800 }
2801 }
2802 }
2803
2804 inst->remove();
2805 progress = true;
2806 }
2807
2808 return progress;
2809 }
2810
2811 bool
2812 fs_visitor::virtual_grf_interferes(int a, int b)
2813 {
2814 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
2815 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
2816
2817 /* For dead code, just check if the def interferes with the other range. */
2818 if (this->virtual_grf_use[a] == -1) {
2819 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] &&
2820 this->virtual_grf_def[a] < this->virtual_grf_use[b]);
2821 }
2822 if (this->virtual_grf_use[b] == -1) {
2823 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] &&
2824 this->virtual_grf_def[b] < this->virtual_grf_use[a]);
2825 }
2826
2827 return start <= end;
2828 }
2829
2830 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2831 {
2832 struct brw_reg brw_reg;
2833
2834 switch (reg->file) {
2835 case GRF:
2836 case ARF:
2837 case MRF:
2838 brw_reg = brw_vec8_reg(reg->file,
2839 reg->hw_reg, 0);
2840 brw_reg = retype(brw_reg, reg->type);
2841 break;
2842 case IMM:
2843 switch (reg->type) {
2844 case BRW_REGISTER_TYPE_F:
2845 brw_reg = brw_imm_f(reg->imm.f);
2846 break;
2847 case BRW_REGISTER_TYPE_D:
2848 brw_reg = brw_imm_d(reg->imm.i);
2849 break;
2850 case BRW_REGISTER_TYPE_UD:
2851 brw_reg = brw_imm_ud(reg->imm.u);
2852 break;
2853 default:
2854 assert(!"not reached");
2855 break;
2856 }
2857 break;
2858 case FIXED_HW_REG:
2859 brw_reg = reg->fixed_hw_reg;
2860 break;
2861 case BAD_FILE:
2862 /* Probably unused. */
2863 brw_reg = brw_null_reg();
2864 break;
2865 case UNIFORM:
2866 assert(!"not reached");
2867 brw_reg = brw_null_reg();
2868 break;
2869 }
2870 if (reg->abs)
2871 brw_reg = brw_abs(brw_reg);
2872 if (reg->negate)
2873 brw_reg = negate(brw_reg);
2874
2875 return brw_reg;
2876 }
2877
2878 void
2879 fs_visitor::generate_code()
2880 {
2881 unsigned int annotation_len = 0;
2882 int last_native_inst = 0;
2883 struct brw_instruction *if_stack[16], *loop_stack[16];
2884 int if_stack_depth = 0, loop_stack_depth = 0;
2885 int if_depth_in_loop[16];
2886
2887 if_depth_in_loop[loop_stack_depth] = 0;
2888
2889 memset(&if_stack, 0, sizeof(if_stack));
2890 foreach_iter(exec_list_iterator, iter, this->instructions) {
2891 fs_inst *inst = (fs_inst *)iter.get();
2892 struct brw_reg src[3], dst;
2893
2894 for (unsigned int i = 0; i < 3; i++) {
2895 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2896 }
2897 dst = brw_reg_from_fs_reg(&inst->dst);
2898
2899 brw_set_conditionalmod(p, inst->conditional_mod);
2900 brw_set_predicate_control(p, inst->predicated);
2901
2902 switch (inst->opcode) {
2903 case BRW_OPCODE_MOV:
2904 brw_MOV(p, dst, src[0]);
2905 break;
2906 case BRW_OPCODE_ADD:
2907 brw_ADD(p, dst, src[0], src[1]);
2908 break;
2909 case BRW_OPCODE_MUL:
2910 brw_MUL(p, dst, src[0], src[1]);
2911 break;
2912
2913 case BRW_OPCODE_FRC:
2914 brw_FRC(p, dst, src[0]);
2915 break;
2916 case BRW_OPCODE_RNDD:
2917 brw_RNDD(p, dst, src[0]);
2918 break;
2919 case BRW_OPCODE_RNDZ:
2920 brw_RNDZ(p, dst, src[0]);
2921 break;
2922
2923 case BRW_OPCODE_AND:
2924 brw_AND(p, dst, src[0], src[1]);
2925 break;
2926 case BRW_OPCODE_OR:
2927 brw_OR(p, dst, src[0], src[1]);
2928 break;
2929 case BRW_OPCODE_XOR:
2930 brw_XOR(p, dst, src[0], src[1]);
2931 break;
2932 case BRW_OPCODE_NOT:
2933 brw_NOT(p, dst, src[0]);
2934 break;
2935 case BRW_OPCODE_ASR:
2936 brw_ASR(p, dst, src[0], src[1]);
2937 break;
2938 case BRW_OPCODE_SHR:
2939 brw_SHR(p, dst, src[0], src[1]);
2940 break;
2941 case BRW_OPCODE_SHL:
2942 brw_SHL(p, dst, src[0], src[1]);
2943 break;
2944
2945 case BRW_OPCODE_CMP:
2946 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2947 break;
2948 case BRW_OPCODE_SEL:
2949 brw_SEL(p, dst, src[0], src[1]);
2950 break;
2951
2952 case BRW_OPCODE_IF:
2953 assert(if_stack_depth < 16);
2954 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2955 if_depth_in_loop[loop_stack_depth]++;
2956 if_stack_depth++;
2957 break;
2958 case BRW_OPCODE_ELSE:
2959 if_stack[if_stack_depth - 1] =
2960 brw_ELSE(p, if_stack[if_stack_depth - 1]);
2961 break;
2962 case BRW_OPCODE_ENDIF:
2963 if_stack_depth--;
2964 brw_ENDIF(p , if_stack[if_stack_depth]);
2965 if_depth_in_loop[loop_stack_depth]--;
2966 break;
2967
2968 case BRW_OPCODE_DO:
2969 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2970 if_depth_in_loop[loop_stack_depth] = 0;
2971 break;
2972
2973 case BRW_OPCODE_BREAK:
2974 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2975 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2976 break;
2977 case BRW_OPCODE_CONTINUE:
2978 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2979 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2980 break;
2981
2982 case BRW_OPCODE_WHILE: {
2983 struct brw_instruction *inst0, *inst1;
2984 GLuint br = 1;
2985
2986 if (intel->gen >= 5)
2987 br = 2;
2988
2989 assert(loop_stack_depth > 0);
2990 loop_stack_depth--;
2991 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2992 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2993 while (inst0 > loop_stack[loop_stack_depth]) {
2994 inst0--;
2995 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2996 inst0->bits3.if_else.jump_count == 0) {
2997 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2998 }
2999 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
3000 inst0->bits3.if_else.jump_count == 0) {
3001 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
3002 }
3003 }
3004 }
3005 break;
3006
3007 case FS_OPCODE_RCP:
3008 case FS_OPCODE_RSQ:
3009 case FS_OPCODE_SQRT:
3010 case FS_OPCODE_EXP2:
3011 case FS_OPCODE_LOG2:
3012 case FS_OPCODE_POW:
3013 case FS_OPCODE_SIN:
3014 case FS_OPCODE_COS:
3015 generate_math(inst, dst, src);
3016 break;
3017 case FS_OPCODE_LINTERP:
3018 generate_linterp(inst, dst, src);
3019 break;
3020 case FS_OPCODE_TEX:
3021 case FS_OPCODE_TXB:
3022 case FS_OPCODE_TXL:
3023 generate_tex(inst, dst, src[0]);
3024 break;
3025 case FS_OPCODE_DISCARD_NOT:
3026 generate_discard_not(inst, dst);
3027 break;
3028 case FS_OPCODE_DISCARD_AND:
3029 generate_discard_and(inst, src[0]);
3030 break;
3031 case FS_OPCODE_DDX:
3032 generate_ddx(inst, dst, src[0]);
3033 break;
3034 case FS_OPCODE_DDY:
3035 generate_ddy(inst, dst, src[0]);
3036 break;
3037 case FS_OPCODE_FB_WRITE:
3038 generate_fb_write(inst);
3039 break;
3040 default:
3041 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
3042 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
3043 brw_opcodes[inst->opcode].name);
3044 } else {
3045 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
3046 }
3047 this->fail = true;
3048 }
3049
3050 if (annotation_len < p->nr_insn) {
3051 annotation_len *= 2;
3052 if (annotation_len < 16)
3053 annotation_len = 16;
3054
3055 this->annotation_string = talloc_realloc(this->mem_ctx,
3056 annotation_string,
3057 const char *,
3058 annotation_len);
3059 this->annotation_ir = talloc_realloc(this->mem_ctx,
3060 annotation_ir,
3061 ir_instruction *,
3062 annotation_len);
3063 }
3064
3065 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
3066 this->annotation_string[i] = inst->annotation;
3067 this->annotation_ir[i] = inst->ir;
3068 }
3069 last_native_inst = p->nr_insn;
3070 }
3071 }
3072
3073 GLboolean
3074 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
3075 {
3076 struct brw_compile *p = &c->func;
3077 struct intel_context *intel = &brw->intel;
3078 GLcontext *ctx = &intel->ctx;
3079 struct brw_shader *shader = NULL;
3080 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
3081
3082 if (!prog)
3083 return GL_FALSE;
3084
3085 if (!using_new_fs)
3086 return GL_FALSE;
3087
3088 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
3089 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
3090 shader = (struct brw_shader *)prog->_LinkedShaders[i];
3091 break;
3092 }
3093 }
3094 if (!shader)
3095 return GL_FALSE;
3096
3097 /* We always use 8-wide mode, at least for now. For one, flow
3098 * control only works in 8-wide. Also, when we're fragment shader
3099 * bound, we're almost always under register pressure as well, so
3100 * 8-wide would save us from the performance cliff of spilling
3101 * regs.
3102 */
3103 c->dispatch_width = 8;
3104
3105 if (INTEL_DEBUG & DEBUG_WM) {
3106 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
3107 _mesa_print_ir(shader->ir, NULL);
3108 printf("\n");
3109 }
3110
3111 /* Now the main event: Visit the shader IR and generate our FS IR for it.
3112 */
3113 fs_visitor v(c, shader);
3114
3115 if (0) {
3116 v.emit_dummy_fs();
3117 } else {
3118 v.calculate_urb_setup();
3119 if (intel->gen < 6)
3120 v.emit_interpolation_setup_gen4();
3121 else
3122 v.emit_interpolation_setup_gen6();
3123
3124 /* Generate FS IR for main(). (the visitor only descends into
3125 * functions called "main").
3126 */
3127 foreach_iter(exec_list_iterator, iter, *shader->ir) {
3128 ir_instruction *ir = (ir_instruction *)iter.get();
3129 v.base_ir = ir;
3130 ir->accept(&v);
3131 }
3132
3133 v.emit_fb_writes();
3134 v.assign_curb_setup();
3135 v.assign_urb_setup();
3136
3137 bool progress;
3138 do {
3139 progress = false;
3140
3141 v.calculate_live_intervals();
3142 progress = v.propagate_constants() || progress;
3143 progress = v.register_coalesce() || progress;
3144 progress = v.dead_code_eliminate() || progress;
3145 } while (progress);
3146
3147 if (0)
3148 v.assign_regs_trivial();
3149 else
3150 v.assign_regs();
3151 }
3152
3153 if (!v.fail)
3154 v.generate_code();
3155
3156 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
3157
3158 if (v.fail)
3159 return GL_FALSE;
3160
3161 if (INTEL_DEBUG & DEBUG_WM) {
3162 const char *last_annotation_string = NULL;
3163 ir_instruction *last_annotation_ir = NULL;
3164
3165 printf("Native code for fragment shader %d:\n", prog->Name);
3166 for (unsigned int i = 0; i < p->nr_insn; i++) {
3167 if (last_annotation_ir != v.annotation_ir[i]) {
3168 last_annotation_ir = v.annotation_ir[i];
3169 if (last_annotation_ir) {
3170 printf(" ");
3171 last_annotation_ir->print();
3172 printf("\n");
3173 }
3174 }
3175 if (last_annotation_string != v.annotation_string[i]) {
3176 last_annotation_string = v.annotation_string[i];
3177 if (last_annotation_string)
3178 printf(" %s\n", last_annotation_string);
3179 }
3180 brw_disasm(stdout, &p->store[i], intel->gen);
3181 }
3182 printf("\n");
3183 }
3184
3185 c->prog_data.total_grf = v.grf_used;
3186 c->prog_data.total_scratch = 0;
3187
3188 return GL_TRUE;
3189 }