0b342c22b035bae552c6c4ea77277c02ab1a6afa
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "main/uniforms.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "program/prog_optimize.h"
38 #include "program/register_allocate.h"
39 #include "program/sampler.h"
40 #include "program/hash_table.h"
41 #include "brw_context.h"
42 #include "brw_eu.h"
43 #include "brw_wm.h"
44 #include "talloc.h"
45 }
46 #include "../glsl/glsl_types.h"
47 #include "../glsl/ir_optimization.h"
48 #include "../glsl/ir_print_visitor.h"
49
50 enum register_file {
51 ARF = BRW_ARCHITECTURE_REGISTER_FILE,
52 GRF = BRW_GENERAL_REGISTER_FILE,
53 MRF = BRW_MESSAGE_REGISTER_FILE,
54 IMM = BRW_IMMEDIATE_VALUE,
55 FIXED_HW_REG, /* a struct brw_reg */
56 UNIFORM, /* prog_data->params[hw_reg] */
57 BAD_FILE
58 };
59
60 enum fs_opcodes {
61 FS_OPCODE_FB_WRITE = 256,
62 FS_OPCODE_RCP,
63 FS_OPCODE_RSQ,
64 FS_OPCODE_SQRT,
65 FS_OPCODE_EXP2,
66 FS_OPCODE_LOG2,
67 FS_OPCODE_POW,
68 FS_OPCODE_SIN,
69 FS_OPCODE_COS,
70 FS_OPCODE_DDX,
71 FS_OPCODE_DDY,
72 FS_OPCODE_LINTERP,
73 FS_OPCODE_TEX,
74 FS_OPCODE_TXB,
75 FS_OPCODE_TXL,
76 FS_OPCODE_DISCARD,
77 };
78
79 static int using_new_fs = -1;
80 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
81
82 struct gl_shader *
83 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
84 {
85 struct brw_shader *shader;
86
87 shader = talloc_zero(NULL, struct brw_shader);
88 if (shader) {
89 shader->base.Type = type;
90 shader->base.Name = name;
91 _mesa_init_shader(ctx, &shader->base);
92 }
93
94 return &shader->base;
95 }
96
97 struct gl_shader_program *
98 brw_new_shader_program(GLcontext *ctx, GLuint name)
99 {
100 struct brw_shader_program *prog;
101 prog = talloc_zero(NULL, struct brw_shader_program);
102 if (prog) {
103 prog->base.Name = name;
104 _mesa_init_shader_program(ctx, &prog->base);
105 }
106 return &prog->base;
107 }
108
109 GLboolean
110 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
111 {
112 if (!_mesa_ir_compile_shader(ctx, shader))
113 return GL_FALSE;
114
115 return GL_TRUE;
116 }
117
118 GLboolean
119 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
120 {
121 struct intel_context *intel = intel_context(ctx);
122 if (using_new_fs == -1)
123 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
124
125 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
126 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
127
128 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
129 void *mem_ctx = talloc_new(NULL);
130 bool progress;
131
132 if (shader->ir)
133 talloc_free(shader->ir);
134 shader->ir = new(shader) exec_list;
135 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
136
137 do_mat_op_to_vec(shader->ir);
138 do_mod_to_fract(shader->ir);
139 do_div_to_mul_rcp(shader->ir);
140 do_sub_to_add_neg(shader->ir);
141 do_explog_to_explog2(shader->ir);
142 do_lower_texture_projection(shader->ir);
143
144 do {
145 progress = false;
146
147 brw_do_channel_expressions(shader->ir);
148 brw_do_vector_splitting(shader->ir);
149
150 progress = do_lower_jumps(shader->ir, true, true,
151 true, /* main return */
152 false, /* continue */
153 false /* loops */
154 ) || progress;
155
156 progress = do_common_optimization(shader->ir, true, 32) || progress;
157
158 progress = lower_noise(shader->ir) || progress;
159 progress =
160 lower_variable_index_to_cond_assign(shader->ir,
161 GL_TRUE, /* input */
162 GL_TRUE, /* output */
163 GL_TRUE, /* temp */
164 GL_TRUE /* uniform */
165 ) || progress;
166 if (intel->gen == 6) {
167 progress = do_if_to_cond_assign(shader->ir) || progress;
168 }
169 } while (progress);
170
171 validate_ir_tree(shader->ir);
172
173 reparent_ir(shader->ir, shader->ir);
174 talloc_free(mem_ctx);
175 }
176 }
177
178 if (!_mesa_ir_link_shader(ctx, prog))
179 return GL_FALSE;
180
181 return GL_TRUE;
182 }
183
184 static int
185 type_size(const struct glsl_type *type)
186 {
187 unsigned int size, i;
188
189 switch (type->base_type) {
190 case GLSL_TYPE_UINT:
191 case GLSL_TYPE_INT:
192 case GLSL_TYPE_FLOAT:
193 case GLSL_TYPE_BOOL:
194 return type->components();
195 case GLSL_TYPE_ARRAY:
196 return type_size(type->fields.array) * type->length;
197 case GLSL_TYPE_STRUCT:
198 size = 0;
199 for (i = 0; i < type->length; i++) {
200 size += type_size(type->fields.structure[i].type);
201 }
202 return size;
203 case GLSL_TYPE_SAMPLER:
204 /* Samplers take up no register space, since they're baked in at
205 * link time.
206 */
207 return 0;
208 default:
209 assert(!"not reached");
210 return 0;
211 }
212 }
213
214 class fs_reg {
215 public:
216 /* Callers of this talloc-based new need not call delete. It's
217 * easier to just talloc_free 'ctx' (or any of its ancestors). */
218 static void* operator new(size_t size, void *ctx)
219 {
220 void *node;
221
222 node = talloc_size(ctx, size);
223 assert(node != NULL);
224
225 return node;
226 }
227
228 void init()
229 {
230 this->reg = 0;
231 this->reg_offset = 0;
232 this->negate = 0;
233 this->abs = 0;
234 this->hw_reg = -1;
235 }
236
237 /** Generic unset register constructor. */
238 fs_reg()
239 {
240 init();
241 this->file = BAD_FILE;
242 }
243
244 /** Immediate value constructor. */
245 fs_reg(float f)
246 {
247 init();
248 this->file = IMM;
249 this->type = BRW_REGISTER_TYPE_F;
250 this->imm.f = f;
251 }
252
253 /** Immediate value constructor. */
254 fs_reg(int32_t i)
255 {
256 init();
257 this->file = IMM;
258 this->type = BRW_REGISTER_TYPE_D;
259 this->imm.i = i;
260 }
261
262 /** Immediate value constructor. */
263 fs_reg(uint32_t u)
264 {
265 init();
266 this->file = IMM;
267 this->type = BRW_REGISTER_TYPE_UD;
268 this->imm.u = u;
269 }
270
271 /** Fixed brw_reg Immediate value constructor. */
272 fs_reg(struct brw_reg fixed_hw_reg)
273 {
274 init();
275 this->file = FIXED_HW_REG;
276 this->fixed_hw_reg = fixed_hw_reg;
277 this->type = fixed_hw_reg.type;
278 }
279
280 fs_reg(enum register_file file, int hw_reg);
281 fs_reg(class fs_visitor *v, const struct glsl_type *type);
282
283 /** Register file: ARF, GRF, MRF, IMM. */
284 enum register_file file;
285 /** virtual register number. 0 = fixed hw reg */
286 int reg;
287 /** Offset within the virtual register. */
288 int reg_offset;
289 /** HW register number. Generally unset until register allocation. */
290 int hw_reg;
291 /** Register type. BRW_REGISTER_TYPE_* */
292 int type;
293 bool negate;
294 bool abs;
295 struct brw_reg fixed_hw_reg;
296
297 /** Value for file == BRW_IMMMEDIATE_FILE */
298 union {
299 int32_t i;
300 uint32_t u;
301 float f;
302 } imm;
303 };
304
305 static const fs_reg reg_undef;
306 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
307
308 class fs_inst : public exec_node {
309 public:
310 /* Callers of this talloc-based new need not call delete. It's
311 * easier to just talloc_free 'ctx' (or any of its ancestors). */
312 static void* operator new(size_t size, void *ctx)
313 {
314 void *node;
315
316 node = talloc_zero_size(ctx, size);
317 assert(node != NULL);
318
319 return node;
320 }
321
322 void init()
323 {
324 this->opcode = BRW_OPCODE_NOP;
325 this->saturate = false;
326 this->conditional_mod = BRW_CONDITIONAL_NONE;
327 this->predicated = false;
328 this->sampler = 0;
329 this->target = 0;
330 this->eot = false;
331 this->header_present = false;
332 this->shadow_compare = false;
333 }
334
335 fs_inst()
336 {
337 init();
338 }
339
340 fs_inst(int opcode)
341 {
342 init();
343 this->opcode = opcode;
344 }
345
346 fs_inst(int opcode, fs_reg dst, fs_reg src0)
347 {
348 init();
349 this->opcode = opcode;
350 this->dst = dst;
351 this->src[0] = src0;
352 }
353
354 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
355 {
356 init();
357 this->opcode = opcode;
358 this->dst = dst;
359 this->src[0] = src0;
360 this->src[1] = src1;
361 }
362
363 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
364 {
365 init();
366 this->opcode = opcode;
367 this->dst = dst;
368 this->src[0] = src0;
369 this->src[1] = src1;
370 this->src[2] = src2;
371 }
372
373 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
374 fs_reg dst;
375 fs_reg src[3];
376 bool saturate;
377 bool predicated;
378 int conditional_mod; /**< BRW_CONDITIONAL_* */
379
380 int mlen; /**< SEND message length */
381 int sampler;
382 int target; /**< MRT target. */
383 bool eot;
384 bool header_present;
385 bool shadow_compare;
386
387 /** @{
388 * Annotation for the generated IR. One of the two can be set.
389 */
390 ir_instruction *ir;
391 const char *annotation;
392 /** @} */
393 };
394
395 class fs_visitor : public ir_visitor
396 {
397 public:
398
399 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
400 {
401 this->c = c;
402 this->p = &c->func;
403 this->brw = p->brw;
404 this->fp = brw->fragment_program;
405 this->intel = &brw->intel;
406 this->ctx = &intel->ctx;
407 this->mem_ctx = talloc_new(NULL);
408 this->shader = shader;
409 this->fail = false;
410 this->variable_ht = hash_table_ctor(0,
411 hash_table_pointer_hash,
412 hash_table_pointer_compare);
413
414 this->frag_color = NULL;
415 this->frag_data = NULL;
416 this->frag_depth = NULL;
417 this->first_non_payload_grf = 0;
418
419 this->current_annotation = NULL;
420 this->annotation_string = NULL;
421 this->annotation_ir = NULL;
422 this->base_ir = NULL;
423
424 this->virtual_grf_sizes = NULL;
425 this->virtual_grf_next = 1;
426 this->virtual_grf_array_size = 0;
427 this->virtual_grf_def = NULL;
428 this->virtual_grf_use = NULL;
429
430 this->kill_emitted = false;
431 }
432
433 ~fs_visitor()
434 {
435 talloc_free(this->mem_ctx);
436 hash_table_dtor(this->variable_ht);
437 }
438
439 fs_reg *variable_storage(ir_variable *var);
440 int virtual_grf_alloc(int size);
441
442 void visit(ir_variable *ir);
443 void visit(ir_assignment *ir);
444 void visit(ir_dereference_variable *ir);
445 void visit(ir_dereference_record *ir);
446 void visit(ir_dereference_array *ir);
447 void visit(ir_expression *ir);
448 void visit(ir_texture *ir);
449 void visit(ir_if *ir);
450 void visit(ir_constant *ir);
451 void visit(ir_swizzle *ir);
452 void visit(ir_return *ir);
453 void visit(ir_loop *ir);
454 void visit(ir_loop_jump *ir);
455 void visit(ir_discard *ir);
456 void visit(ir_call *ir);
457 void visit(ir_function *ir);
458 void visit(ir_function_signature *ir);
459
460 fs_inst *emit(fs_inst inst);
461 void assign_curb_setup();
462 void calculate_urb_setup();
463 void assign_urb_setup();
464 void assign_regs();
465 void assign_regs_trivial();
466 void calculate_live_intervals();
467 bool propagate_constants();
468 bool dead_code_eliminate();
469 bool virtual_grf_interferes(int a, int b);
470 void generate_code();
471 void generate_fb_write(fs_inst *inst);
472 void generate_linterp(fs_inst *inst, struct brw_reg dst,
473 struct brw_reg *src);
474 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
475 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
476 void generate_discard(fs_inst *inst, struct brw_reg temp);
477 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
478 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
479
480 void emit_dummy_fs();
481 fs_reg *emit_fragcoord_interpolation(ir_variable *ir);
482 fs_reg *emit_frontfacing_interpolation(ir_variable *ir);
483 fs_reg *emit_general_interpolation(ir_variable *ir);
484 void emit_interpolation_setup_gen4();
485 void emit_interpolation_setup_gen6();
486 fs_inst *emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate);
487 fs_inst *emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate);
488 void emit_fb_writes();
489 void emit_assignment_writes(fs_reg &l, fs_reg &r,
490 const glsl_type *type, bool predicated);
491
492 struct brw_reg interp_reg(int location, int channel);
493 int setup_uniform_values(int loc, const glsl_type *type);
494 void setup_builtin_uniform_values(ir_variable *ir);
495
496 struct brw_context *brw;
497 const struct gl_fragment_program *fp;
498 struct intel_context *intel;
499 GLcontext *ctx;
500 struct brw_wm_compile *c;
501 struct brw_compile *p;
502 struct brw_shader *shader;
503 void *mem_ctx;
504 exec_list instructions;
505
506 int *virtual_grf_sizes;
507 int virtual_grf_next;
508 int virtual_grf_array_size;
509 int *virtual_grf_def;
510 int *virtual_grf_use;
511
512 struct hash_table *variable_ht;
513 ir_variable *frag_color, *frag_data, *frag_depth;
514 int first_non_payload_grf;
515 int urb_setup[FRAG_ATTRIB_MAX];
516 bool kill_emitted;
517
518 /** @{ debug annotation info */
519 const char *current_annotation;
520 ir_instruction *base_ir;
521 const char **annotation_string;
522 ir_instruction **annotation_ir;
523 /** @} */
524
525 bool fail;
526
527 /* Result of last visit() method. */
528 fs_reg result;
529
530 fs_reg pixel_x;
531 fs_reg pixel_y;
532 fs_reg wpos_w;
533 fs_reg pixel_w;
534 fs_reg delta_x;
535 fs_reg delta_y;
536
537 int grf_used;
538
539 };
540
541 int
542 fs_visitor::virtual_grf_alloc(int size)
543 {
544 if (virtual_grf_array_size <= virtual_grf_next) {
545 if (virtual_grf_array_size == 0)
546 virtual_grf_array_size = 16;
547 else
548 virtual_grf_array_size *= 2;
549 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
550 int, virtual_grf_array_size);
551
552 /* This slot is always unused. */
553 virtual_grf_sizes[0] = 0;
554 }
555 virtual_grf_sizes[virtual_grf_next] = size;
556 return virtual_grf_next++;
557 }
558
559 /** Fixed HW reg constructor. */
560 fs_reg::fs_reg(enum register_file file, int hw_reg)
561 {
562 init();
563 this->file = file;
564 this->hw_reg = hw_reg;
565 this->type = BRW_REGISTER_TYPE_F;
566 }
567
568 int
569 brw_type_for_base_type(const struct glsl_type *type)
570 {
571 switch (type->base_type) {
572 case GLSL_TYPE_FLOAT:
573 return BRW_REGISTER_TYPE_F;
574 case GLSL_TYPE_INT:
575 case GLSL_TYPE_BOOL:
576 return BRW_REGISTER_TYPE_D;
577 case GLSL_TYPE_UINT:
578 return BRW_REGISTER_TYPE_UD;
579 case GLSL_TYPE_ARRAY:
580 case GLSL_TYPE_STRUCT:
581 /* These should be overridden with the type of the member when
582 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
583 * way to trip up if we don't.
584 */
585 return BRW_REGISTER_TYPE_UD;
586 default:
587 assert(!"not reached");
588 return BRW_REGISTER_TYPE_F;
589 }
590 }
591
592 /** Automatic reg constructor. */
593 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
594 {
595 init();
596
597 this->file = GRF;
598 this->reg = v->virtual_grf_alloc(type_size(type));
599 this->reg_offset = 0;
600 this->type = brw_type_for_base_type(type);
601 }
602
603 fs_reg *
604 fs_visitor::variable_storage(ir_variable *var)
605 {
606 return (fs_reg *)hash_table_find(this->variable_ht, var);
607 }
608
609 /* Our support for uniforms is piggy-backed on the struct
610 * gl_fragment_program, because that's where the values actually
611 * get stored, rather than in some global gl_shader_program uniform
612 * store.
613 */
614 int
615 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
616 {
617 unsigned int offset = 0;
618 float *vec_values;
619
620 if (type->is_matrix()) {
621 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
622 type->vector_elements,
623 1);
624
625 for (unsigned int i = 0; i < type->matrix_columns; i++) {
626 offset += setup_uniform_values(loc + offset, column);
627 }
628
629 return offset;
630 }
631
632 switch (type->base_type) {
633 case GLSL_TYPE_FLOAT:
634 case GLSL_TYPE_UINT:
635 case GLSL_TYPE_INT:
636 case GLSL_TYPE_BOOL:
637 vec_values = fp->Base.Parameters->ParameterValues[loc];
638 for (unsigned int i = 0; i < type->vector_elements; i++) {
639 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
640 }
641 return 1;
642
643 case GLSL_TYPE_STRUCT:
644 for (unsigned int i = 0; i < type->length; i++) {
645 offset += setup_uniform_values(loc + offset,
646 type->fields.structure[i].type);
647 }
648 return offset;
649
650 case GLSL_TYPE_ARRAY:
651 for (unsigned int i = 0; i < type->length; i++) {
652 offset += setup_uniform_values(loc + offset, type->fields.array);
653 }
654 return offset;
655
656 case GLSL_TYPE_SAMPLER:
657 /* The sampler takes up a slot, but we don't use any values from it. */
658 return 1;
659
660 default:
661 assert(!"not reached");
662 return 0;
663 }
664 }
665
666
667 /* Our support for builtin uniforms is even scarier than non-builtin.
668 * It sits on top of the PROG_STATE_VAR parameters that are
669 * automatically updated from GL context state.
670 */
671 void
672 fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
673 {
674 const struct gl_builtin_uniform_desc *statevar = NULL;
675
676 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
677 statevar = &_mesa_builtin_uniform_desc[i];
678 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
679 break;
680 }
681
682 if (!statevar->name) {
683 this->fail = true;
684 printf("Failed to find builtin uniform `%s'\n", ir->name);
685 return;
686 }
687
688 int array_count;
689 if (ir->type->is_array()) {
690 array_count = ir->type->length;
691 } else {
692 array_count = 1;
693 }
694
695 for (int a = 0; a < array_count; a++) {
696 for (unsigned int i = 0; i < statevar->num_elements; i++) {
697 struct gl_builtin_uniform_element *element = &statevar->elements[i];
698 int tokens[STATE_LENGTH];
699
700 memcpy(tokens, element->tokens, sizeof(element->tokens));
701 if (ir->type->is_array()) {
702 tokens[1] = a;
703 }
704
705 /* This state reference has already been setup by ir_to_mesa,
706 * but we'll get the same index back here.
707 */
708 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
709 (gl_state_index *)tokens);
710 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
711
712 /* Add each of the unique swizzles of the element as a
713 * parameter. This'll end up matching the expected layout of
714 * the array/matrix/structure we're trying to fill in.
715 */
716 int last_swiz = -1;
717 for (unsigned int i = 0; i < 4; i++) {
718 int swiz = GET_SWZ(element->swizzle, i);
719 if (swiz == last_swiz)
720 break;
721 last_swiz = swiz;
722
723 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[swiz];
724 }
725 }
726 }
727 }
728
729 fs_reg *
730 fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
731 {
732 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
733 fs_reg wpos = *reg;
734 fs_reg neg_y = this->pixel_y;
735 neg_y.negate = true;
736
737 /* gl_FragCoord.x */
738 if (ir->pixel_center_integer) {
739 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
740 } else {
741 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
742 }
743 wpos.reg_offset++;
744
745 /* gl_FragCoord.y */
746 if (ir->origin_upper_left && ir->pixel_center_integer) {
747 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
748 } else {
749 fs_reg pixel_y = this->pixel_y;
750 float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
751
752 if (!ir->origin_upper_left) {
753 pixel_y.negate = true;
754 offset += c->key.drawable_height - 1.0;
755 }
756
757 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
758 }
759 wpos.reg_offset++;
760
761 /* gl_FragCoord.z */
762 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
763 interp_reg(FRAG_ATTRIB_WPOS, 2)));
764 wpos.reg_offset++;
765
766 /* gl_FragCoord.w: Already set up in emit_interpolation */
767 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
768
769 return reg;
770 }
771
772 fs_reg *
773 fs_visitor::emit_general_interpolation(ir_variable *ir)
774 {
775 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
776 /* Interpolation is always in floating point regs. */
777 reg->type = BRW_REGISTER_TYPE_F;
778 fs_reg attr = *reg;
779
780 unsigned int array_elements;
781 const glsl_type *type;
782
783 if (ir->type->is_array()) {
784 array_elements = ir->type->length;
785 if (array_elements == 0) {
786 this->fail = true;
787 }
788 type = ir->type->fields.array;
789 } else {
790 array_elements = 1;
791 type = ir->type;
792 }
793
794 int location = ir->location;
795 for (unsigned int i = 0; i < array_elements; i++) {
796 for (unsigned int j = 0; j < type->matrix_columns; j++) {
797 if (urb_setup[location] == -1) {
798 /* If there's no incoming setup data for this slot, don't
799 * emit interpolation for it.
800 */
801 attr.reg_offset += type->vector_elements;
802 location++;
803 continue;
804 }
805
806 for (unsigned int c = 0; c < type->vector_elements; c++) {
807 struct brw_reg interp = interp_reg(location, c);
808 emit(fs_inst(FS_OPCODE_LINTERP,
809 attr,
810 this->delta_x,
811 this->delta_y,
812 fs_reg(interp)));
813 attr.reg_offset++;
814 }
815 attr.reg_offset -= type->vector_elements;
816
817 if (intel->gen < 6) {
818 for (unsigned int c = 0; c < type->vector_elements; c++) {
819 emit(fs_inst(BRW_OPCODE_MUL,
820 attr,
821 attr,
822 this->pixel_w));
823 attr.reg_offset++;
824 }
825 }
826 location++;
827 }
828 }
829
830 return reg;
831 }
832
833 fs_reg *
834 fs_visitor::emit_frontfacing_interpolation(ir_variable *ir)
835 {
836 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
837
838 /* The frontfacing comes in as a bit in the thread payload. */
839 if (intel->gen >= 6) {
840 emit(fs_inst(BRW_OPCODE_ASR,
841 *reg,
842 fs_reg(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_D)),
843 fs_reg(15)));
844 emit(fs_inst(BRW_OPCODE_NOT,
845 *reg,
846 *reg));
847 emit(fs_inst(BRW_OPCODE_AND,
848 *reg,
849 *reg,
850 fs_reg(1)));
851 } else {
852 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
853 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
854 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
855 * us front face
856 */
857 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
858 *reg,
859 fs_reg(r1_6ud),
860 fs_reg(1u << 31)));
861 inst->conditional_mod = BRW_CONDITIONAL_L;
862 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
863 }
864
865 return reg;
866 }
867
868 void
869 fs_visitor::visit(ir_variable *ir)
870 {
871 fs_reg *reg = NULL;
872
873 if (variable_storage(ir))
874 return;
875
876 if (strcmp(ir->name, "gl_FragColor") == 0) {
877 this->frag_color = ir;
878 } else if (strcmp(ir->name, "gl_FragData") == 0) {
879 this->frag_data = ir;
880 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
881 this->frag_depth = ir;
882 }
883
884 if (ir->mode == ir_var_in) {
885 if (!strcmp(ir->name, "gl_FragCoord")) {
886 reg = emit_fragcoord_interpolation(ir);
887 } else if (!strcmp(ir->name, "gl_FrontFacing")) {
888 reg = emit_frontfacing_interpolation(ir);
889 } else {
890 reg = emit_general_interpolation(ir);
891 }
892 assert(reg);
893 hash_table_insert(this->variable_ht, reg, ir);
894 return;
895 }
896
897 if (ir->mode == ir_var_uniform) {
898 int param_index = c->prog_data.nr_params;
899
900 if (!strncmp(ir->name, "gl_", 3)) {
901 setup_builtin_uniform_values(ir);
902 } else {
903 setup_uniform_values(ir->location, ir->type);
904 }
905
906 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
907 }
908
909 if (!reg)
910 reg = new(this->mem_ctx) fs_reg(this, ir->type);
911
912 hash_table_insert(this->variable_ht, reg, ir);
913 }
914
915 void
916 fs_visitor::visit(ir_dereference_variable *ir)
917 {
918 fs_reg *reg = variable_storage(ir->var);
919 this->result = *reg;
920 }
921
922 void
923 fs_visitor::visit(ir_dereference_record *ir)
924 {
925 const glsl_type *struct_type = ir->record->type;
926
927 ir->record->accept(this);
928
929 unsigned int offset = 0;
930 for (unsigned int i = 0; i < struct_type->length; i++) {
931 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
932 break;
933 offset += type_size(struct_type->fields.structure[i].type);
934 }
935 this->result.reg_offset += offset;
936 this->result.type = brw_type_for_base_type(ir->type);
937 }
938
939 void
940 fs_visitor::visit(ir_dereference_array *ir)
941 {
942 ir_constant *index;
943 int element_size;
944
945 ir->array->accept(this);
946 index = ir->array_index->as_constant();
947
948 element_size = type_size(ir->type);
949 this->result.type = brw_type_for_base_type(ir->type);
950
951 if (index) {
952 assert(this->result.file == UNIFORM ||
953 (this->result.file == GRF &&
954 this->result.reg != 0));
955 this->result.reg_offset += index->value.i[0] * element_size;
956 } else {
957 assert(!"FINISHME: non-constant array element");
958 }
959 }
960
961 void
962 fs_visitor::visit(ir_expression *ir)
963 {
964 unsigned int operand;
965 fs_reg op[2], temp;
966 fs_reg result;
967 fs_inst *inst;
968
969 for (operand = 0; operand < ir->get_num_operands(); operand++) {
970 ir->operands[operand]->accept(this);
971 if (this->result.file == BAD_FILE) {
972 ir_print_visitor v;
973 printf("Failed to get tree for expression operand:\n");
974 ir->operands[operand]->accept(&v);
975 this->fail = true;
976 }
977 op[operand] = this->result;
978
979 /* Matrix expression operands should have been broken down to vector
980 * operations already.
981 */
982 assert(!ir->operands[operand]->type->is_matrix());
983 /* And then those vector operands should have been broken down to scalar.
984 */
985 assert(!ir->operands[operand]->type->is_vector());
986 }
987
988 /* Storage for our result. If our result goes into an assignment, it will
989 * just get copy-propagated out, so no worries.
990 */
991 this->result = fs_reg(this, ir->type);
992
993 switch (ir->operation) {
994 case ir_unop_logic_not:
995 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
996 break;
997 case ir_unop_neg:
998 op[0].negate = !op[0].negate;
999 this->result = op[0];
1000 break;
1001 case ir_unop_abs:
1002 op[0].abs = true;
1003 this->result = op[0];
1004 break;
1005 case ir_unop_sign:
1006 temp = fs_reg(this, ir->type);
1007
1008 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
1009
1010 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
1011 inst->conditional_mod = BRW_CONDITIONAL_G;
1012 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
1013 inst->predicated = true;
1014
1015 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
1016 inst->conditional_mod = BRW_CONDITIONAL_L;
1017 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
1018 inst->predicated = true;
1019
1020 break;
1021 case ir_unop_rcp:
1022 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
1023 break;
1024
1025 case ir_unop_exp2:
1026 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
1027 break;
1028 case ir_unop_log2:
1029 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
1030 break;
1031 case ir_unop_exp:
1032 case ir_unop_log:
1033 assert(!"not reached: should be handled by ir_explog_to_explog2");
1034 break;
1035 case ir_unop_sin:
1036 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
1037 break;
1038 case ir_unop_cos:
1039 emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
1040 break;
1041
1042 case ir_unop_dFdx:
1043 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
1044 break;
1045 case ir_unop_dFdy:
1046 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
1047 break;
1048
1049 case ir_binop_add:
1050 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
1051 break;
1052 case ir_binop_sub:
1053 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1054 break;
1055
1056 case ir_binop_mul:
1057 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
1058 break;
1059 case ir_binop_div:
1060 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1061 break;
1062 case ir_binop_mod:
1063 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1064 break;
1065
1066 case ir_binop_less:
1067 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1068 inst->conditional_mod = BRW_CONDITIONAL_L;
1069 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1070 break;
1071 case ir_binop_greater:
1072 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1073 inst->conditional_mod = BRW_CONDITIONAL_G;
1074 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1075 break;
1076 case ir_binop_lequal:
1077 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1078 inst->conditional_mod = BRW_CONDITIONAL_LE;
1079 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1080 break;
1081 case ir_binop_gequal:
1082 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1083 inst->conditional_mod = BRW_CONDITIONAL_GE;
1084 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1085 break;
1086 case ir_binop_equal:
1087 case ir_binop_all_equal: /* same as nequal for scalars */
1088 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1089 inst->conditional_mod = BRW_CONDITIONAL_Z;
1090 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1091 break;
1092 case ir_binop_nequal:
1093 case ir_binop_any_nequal: /* same as nequal for scalars */
1094 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1095 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1096 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1097 break;
1098
1099 case ir_binop_logic_xor:
1100 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
1101 break;
1102
1103 case ir_binop_logic_or:
1104 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
1105 break;
1106
1107 case ir_binop_logic_and:
1108 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
1109 break;
1110
1111 case ir_binop_dot:
1112 case ir_binop_cross:
1113 case ir_unop_any:
1114 assert(!"not reached: should be handled by brw_fs_channel_expressions");
1115 break;
1116
1117 case ir_unop_noise:
1118 assert(!"not reached: should be handled by lower_noise");
1119 break;
1120
1121 case ir_unop_sqrt:
1122 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
1123 break;
1124
1125 case ir_unop_rsq:
1126 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
1127 break;
1128
1129 case ir_unop_i2f:
1130 case ir_unop_b2f:
1131 case ir_unop_b2i:
1132 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1133 break;
1134 case ir_unop_f2i:
1135 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1136 break;
1137 case ir_unop_f2b:
1138 case ir_unop_i2b:
1139 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
1140 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1141
1142 case ir_unop_trunc:
1143 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1144 break;
1145 case ir_unop_ceil:
1146 op[0].negate = ~op[0].negate;
1147 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1148 this->result.negate = true;
1149 break;
1150 case ir_unop_floor:
1151 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1152 break;
1153 case ir_unop_fract:
1154 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
1155 break;
1156
1157 case ir_binop_min:
1158 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1159 inst->conditional_mod = BRW_CONDITIONAL_L;
1160
1161 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1162 inst->predicated = true;
1163 break;
1164 case ir_binop_max:
1165 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1166 inst->conditional_mod = BRW_CONDITIONAL_G;
1167
1168 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1169 inst->predicated = true;
1170 break;
1171
1172 case ir_binop_pow:
1173 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
1174 break;
1175
1176 case ir_unop_bit_not:
1177 case ir_unop_u2f:
1178 case ir_binop_lshift:
1179 case ir_binop_rshift:
1180 case ir_binop_bit_and:
1181 case ir_binop_bit_xor:
1182 case ir_binop_bit_or:
1183 assert(!"GLSL 1.30 features unsupported");
1184 break;
1185 }
1186 }
1187
1188 void
1189 fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
1190 const glsl_type *type, bool predicated)
1191 {
1192 switch (type->base_type) {
1193 case GLSL_TYPE_FLOAT:
1194 case GLSL_TYPE_UINT:
1195 case GLSL_TYPE_INT:
1196 case GLSL_TYPE_BOOL:
1197 for (unsigned int i = 0; i < type->components(); i++) {
1198 l.type = brw_type_for_base_type(type);
1199 r.type = brw_type_for_base_type(type);
1200
1201 fs_inst *inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1202 inst->predicated = predicated;
1203
1204 l.reg_offset++;
1205 r.reg_offset++;
1206 }
1207 break;
1208 case GLSL_TYPE_ARRAY:
1209 for (unsigned int i = 0; i < type->length; i++) {
1210 emit_assignment_writes(l, r, type->fields.array, predicated);
1211 }
1212
1213 case GLSL_TYPE_STRUCT:
1214 for (unsigned int i = 0; i < type->length; i++) {
1215 emit_assignment_writes(l, r, type->fields.structure[i].type,
1216 predicated);
1217 }
1218 break;
1219
1220 case GLSL_TYPE_SAMPLER:
1221 break;
1222
1223 default:
1224 assert(!"not reached");
1225 break;
1226 }
1227 }
1228
1229 void
1230 fs_visitor::visit(ir_assignment *ir)
1231 {
1232 struct fs_reg l, r;
1233 fs_inst *inst;
1234
1235 /* FINISHME: arrays on the lhs */
1236 ir->lhs->accept(this);
1237 l = this->result;
1238
1239 ir->rhs->accept(this);
1240 r = this->result;
1241
1242 assert(l.file != BAD_FILE);
1243 assert(r.file != BAD_FILE);
1244
1245 if (ir->condition) {
1246 /* Get the condition bool into the predicate. */
1247 ir->condition->accept(this);
1248 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
1249 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1250 }
1251
1252 if (ir->lhs->type->is_scalar() ||
1253 ir->lhs->type->is_vector()) {
1254 for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
1255 if (ir->write_mask & (1 << i)) {
1256 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1257 if (ir->condition)
1258 inst->predicated = true;
1259 r.reg_offset++;
1260 }
1261 l.reg_offset++;
1262 }
1263 } else {
1264 emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
1265 }
1266 }
1267
1268 fs_inst *
1269 fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1270 {
1271 int mlen;
1272 int base_mrf = 2;
1273 bool simd16 = false;
1274 fs_reg orig_dst;
1275
1276 if (ir->shadow_comparitor) {
1277 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1278 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1279 coordinate));
1280 coordinate.reg_offset++;
1281 }
1282 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1283 mlen = 3;
1284
1285 if (ir->op == ir_tex) {
1286 /* There's no plain shadow compare message, so we use shadow
1287 * compare with a bias of 0.0.
1288 */
1289 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1290 fs_reg(0.0f)));
1291 mlen++;
1292 } else if (ir->op == ir_txb) {
1293 ir->lod_info.bias->accept(this);
1294 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1295 this->result));
1296 mlen++;
1297 } else {
1298 assert(ir->op == ir_txl);
1299 ir->lod_info.lod->accept(this);
1300 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1301 this->result));
1302 mlen++;
1303 }
1304
1305 ir->shadow_comparitor->accept(this);
1306 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1307 mlen++;
1308 } else if (ir->op == ir_tex) {
1309 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1310 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1311 coordinate));
1312 coordinate.reg_offset++;
1313 }
1314 /* gen4's SIMD8 sampler always has the slots for u,v,r present. */
1315 mlen = 3;
1316 } else {
1317 /* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
1318 * instructions. We'll need to do SIMD16 here.
1319 */
1320 assert(ir->op == ir_txb || ir->op == ir_txl);
1321
1322 for (mlen = 0; mlen < ir->coordinate->type->vector_elements * 2;) {
1323 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1324 coordinate));
1325 coordinate.reg_offset++;
1326 mlen++;
1327
1328 /* The unused upper half. */
1329 mlen++;
1330 }
1331
1332 /* lod/bias appears after u/v/r. */
1333 mlen = 6;
1334
1335 if (ir->op == ir_txb) {
1336 ir->lod_info.bias->accept(this);
1337 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1338 this->result));
1339 mlen++;
1340 } else {
1341 ir->lod_info.lod->accept(this);
1342 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen),
1343 this->result));
1344 mlen++;
1345 }
1346
1347 /* The unused upper half. */
1348 mlen++;
1349
1350 /* Now, since we're doing simd16, the return is 2 interleaved
1351 * vec4s where the odd-indexed ones are junk. We'll need to move
1352 * this weirdness around to the expected layout.
1353 */
1354 simd16 = true;
1355 orig_dst = dst;
1356 dst = fs_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type,
1357 2));
1358 dst.type = BRW_REGISTER_TYPE_F;
1359 }
1360
1361 fs_inst *inst = NULL;
1362 switch (ir->op) {
1363 case ir_tex:
1364 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1365 break;
1366 case ir_txb:
1367 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1368 break;
1369 case ir_txl:
1370 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1371 break;
1372 case ir_txd:
1373 case ir_txf:
1374 assert(!"GLSL 1.30 features unsupported");
1375 break;
1376 }
1377 inst->mlen = mlen;
1378
1379 if (simd16) {
1380 for (int i = 0; i < 4; i++) {
1381 emit(fs_inst(BRW_OPCODE_MOV, orig_dst, dst));
1382 orig_dst.reg_offset++;
1383 dst.reg_offset += 2;
1384 }
1385 }
1386
1387 return inst;
1388 }
1389
1390 fs_inst *
1391 fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate)
1392 {
1393 /* gen5's SIMD8 sampler has slots for u, v, r, array index, then
1394 * optional parameters like shadow comparitor or LOD bias. If
1395 * optional parameters aren't present, those base slots are
1396 * optional and don't need to be included in the message.
1397 *
1398 * We don't fill in the unnecessary slots regardless, which may
1399 * look surprising in the disassembly.
1400 */
1401 int mlen;
1402 int base_mrf = 2;
1403
1404 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1405 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1406 coordinate.reg_offset++;
1407 }
1408
1409 if (ir->shadow_comparitor) {
1410 mlen = MAX2(mlen, 4);
1411
1412 ir->shadow_comparitor->accept(this);
1413 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1414 mlen++;
1415 }
1416
1417 fs_inst *inst = NULL;
1418 switch (ir->op) {
1419 case ir_tex:
1420 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1421 break;
1422 case ir_txb:
1423 ir->lod_info.bias->accept(this);
1424 mlen = MAX2(mlen, 4);
1425 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1426 mlen++;
1427
1428 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1429 break;
1430 case ir_txl:
1431 ir->lod_info.lod->accept(this);
1432 mlen = MAX2(mlen, 4);
1433 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1434 mlen++;
1435
1436 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1437 break;
1438 case ir_txd:
1439 case ir_txf:
1440 assert(!"GLSL 1.30 features unsupported");
1441 break;
1442 }
1443 inst->mlen = mlen;
1444
1445 return inst;
1446 }
1447
1448 void
1449 fs_visitor::visit(ir_texture *ir)
1450 {
1451 fs_inst *inst = NULL;
1452
1453 ir->coordinate->accept(this);
1454 fs_reg coordinate = this->result;
1455
1456 /* Should be lowered by do_lower_texture_projection */
1457 assert(!ir->projector);
1458
1459 /* Writemasking doesn't eliminate channels on SIMD8 texture
1460 * samples, so don't worry about them.
1461 */
1462 fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1463
1464 if (intel->gen < 5) {
1465 inst = emit_texture_gen4(ir, dst, coordinate);
1466 } else {
1467 inst = emit_texture_gen5(ir, dst, coordinate);
1468 }
1469
1470 inst->sampler =
1471 _mesa_get_sampler_uniform_value(ir->sampler,
1472 ctx->Shader.CurrentProgram,
1473 &brw->fragment_program->Base);
1474 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1475
1476 this->result = dst;
1477
1478 if (ir->shadow_comparitor)
1479 inst->shadow_compare = true;
1480
1481 if (c->key.tex_swizzles[inst->sampler] != SWIZZLE_NOOP) {
1482 fs_reg swizzle_dst = fs_reg(this, glsl_type::vec4_type);
1483
1484 for (int i = 0; i < 4; i++) {
1485 int swiz = GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1486 fs_reg l = swizzle_dst;
1487 l.reg_offset += i;
1488
1489 if (swiz == SWIZZLE_ZERO) {
1490 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(0.0f)));
1491 } else if (swiz == SWIZZLE_ONE) {
1492 emit(fs_inst(BRW_OPCODE_MOV, l, fs_reg(1.0f)));
1493 } else {
1494 fs_reg r = dst;
1495 r.reg_offset += GET_SWZ(c->key.tex_swizzles[inst->sampler], i);
1496 emit(fs_inst(BRW_OPCODE_MOV, l, r));
1497 }
1498 }
1499 this->result = swizzle_dst;
1500 }
1501 }
1502
1503 void
1504 fs_visitor::visit(ir_swizzle *ir)
1505 {
1506 ir->val->accept(this);
1507 fs_reg val = this->result;
1508
1509 if (ir->type->vector_elements == 1) {
1510 this->result.reg_offset += ir->mask.x;
1511 return;
1512 }
1513
1514 fs_reg result = fs_reg(this, ir->type);
1515 this->result = result;
1516
1517 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1518 fs_reg channel = val;
1519 int swiz = 0;
1520
1521 switch (i) {
1522 case 0:
1523 swiz = ir->mask.x;
1524 break;
1525 case 1:
1526 swiz = ir->mask.y;
1527 break;
1528 case 2:
1529 swiz = ir->mask.z;
1530 break;
1531 case 3:
1532 swiz = ir->mask.w;
1533 break;
1534 }
1535
1536 channel.reg_offset += swiz;
1537 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1538 result.reg_offset++;
1539 }
1540 }
1541
1542 void
1543 fs_visitor::visit(ir_discard *ir)
1544 {
1545 fs_reg temp = fs_reg(this, glsl_type::uint_type);
1546
1547 assert(ir->condition == NULL); /* FINISHME */
1548
1549 emit(fs_inst(FS_OPCODE_DISCARD, temp, temp));
1550 kill_emitted = true;
1551 }
1552
1553 void
1554 fs_visitor::visit(ir_constant *ir)
1555 {
1556 fs_reg reg(this, ir->type);
1557 this->result = reg;
1558
1559 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1560 switch (ir->type->base_type) {
1561 case GLSL_TYPE_FLOAT:
1562 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1563 break;
1564 case GLSL_TYPE_UINT:
1565 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1566 break;
1567 case GLSL_TYPE_INT:
1568 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1569 break;
1570 case GLSL_TYPE_BOOL:
1571 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1572 break;
1573 default:
1574 assert(!"Non-float/uint/int/bool constant");
1575 }
1576 reg.reg_offset++;
1577 }
1578 }
1579
1580 void
1581 fs_visitor::visit(ir_if *ir)
1582 {
1583 fs_inst *inst;
1584
1585 /* Don't point the annotation at the if statement, because then it plus
1586 * the then and else blocks get printed.
1587 */
1588 this->base_ir = ir->condition;
1589
1590 /* Generate the condition into the condition code. */
1591 ir->condition->accept(this);
1592 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1593 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1594
1595 inst = emit(fs_inst(BRW_OPCODE_IF));
1596 inst->predicated = true;
1597
1598 foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1599 ir_instruction *ir = (ir_instruction *)iter.get();
1600 this->base_ir = ir;
1601
1602 ir->accept(this);
1603 }
1604
1605 if (!ir->else_instructions.is_empty()) {
1606 emit(fs_inst(BRW_OPCODE_ELSE));
1607
1608 foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1609 ir_instruction *ir = (ir_instruction *)iter.get();
1610 this->base_ir = ir;
1611
1612 ir->accept(this);
1613 }
1614 }
1615
1616 emit(fs_inst(BRW_OPCODE_ENDIF));
1617 }
1618
1619 void
1620 fs_visitor::visit(ir_loop *ir)
1621 {
1622 fs_reg counter = reg_undef;
1623
1624 if (ir->counter) {
1625 this->base_ir = ir->counter;
1626 ir->counter->accept(this);
1627 counter = *(variable_storage(ir->counter));
1628
1629 if (ir->from) {
1630 this->base_ir = ir->from;
1631 ir->from->accept(this);
1632
1633 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1634 }
1635 }
1636
1637 emit(fs_inst(BRW_OPCODE_DO));
1638
1639 if (ir->to) {
1640 this->base_ir = ir->to;
1641 ir->to->accept(this);
1642
1643 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1644 counter, this->result));
1645 switch (ir->cmp) {
1646 case ir_binop_equal:
1647 inst->conditional_mod = BRW_CONDITIONAL_Z;
1648 break;
1649 case ir_binop_nequal:
1650 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1651 break;
1652 case ir_binop_gequal:
1653 inst->conditional_mod = BRW_CONDITIONAL_GE;
1654 break;
1655 case ir_binop_lequal:
1656 inst->conditional_mod = BRW_CONDITIONAL_LE;
1657 break;
1658 case ir_binop_greater:
1659 inst->conditional_mod = BRW_CONDITIONAL_G;
1660 break;
1661 case ir_binop_less:
1662 inst->conditional_mod = BRW_CONDITIONAL_L;
1663 break;
1664 default:
1665 assert(!"not reached: unknown loop condition");
1666 this->fail = true;
1667 break;
1668 }
1669
1670 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1671 inst->predicated = true;
1672 }
1673
1674 foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1675 ir_instruction *ir = (ir_instruction *)iter.get();
1676
1677 this->base_ir = ir;
1678 ir->accept(this);
1679 }
1680
1681 if (ir->increment) {
1682 this->base_ir = ir->increment;
1683 ir->increment->accept(this);
1684 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1685 }
1686
1687 emit(fs_inst(BRW_OPCODE_WHILE));
1688 }
1689
1690 void
1691 fs_visitor::visit(ir_loop_jump *ir)
1692 {
1693 switch (ir->mode) {
1694 case ir_loop_jump::jump_break:
1695 emit(fs_inst(BRW_OPCODE_BREAK));
1696 break;
1697 case ir_loop_jump::jump_continue:
1698 emit(fs_inst(BRW_OPCODE_CONTINUE));
1699 break;
1700 }
1701 }
1702
1703 void
1704 fs_visitor::visit(ir_call *ir)
1705 {
1706 assert(!"FINISHME");
1707 }
1708
1709 void
1710 fs_visitor::visit(ir_return *ir)
1711 {
1712 assert(!"FINISHME");
1713 }
1714
1715 void
1716 fs_visitor::visit(ir_function *ir)
1717 {
1718 /* Ignore function bodies other than main() -- we shouldn't see calls to
1719 * them since they should all be inlined before we get to ir_to_mesa.
1720 */
1721 if (strcmp(ir->name, "main") == 0) {
1722 const ir_function_signature *sig;
1723 exec_list empty;
1724
1725 sig = ir->matching_signature(&empty);
1726
1727 assert(sig);
1728
1729 foreach_iter(exec_list_iterator, iter, sig->body) {
1730 ir_instruction *ir = (ir_instruction *)iter.get();
1731 this->base_ir = ir;
1732
1733 ir->accept(this);
1734 }
1735 }
1736 }
1737
1738 void
1739 fs_visitor::visit(ir_function_signature *ir)
1740 {
1741 assert(!"not reached");
1742 (void)ir;
1743 }
1744
1745 fs_inst *
1746 fs_visitor::emit(fs_inst inst)
1747 {
1748 fs_inst *list_inst = new(mem_ctx) fs_inst;
1749 *list_inst = inst;
1750
1751 list_inst->annotation = this->current_annotation;
1752 list_inst->ir = this->base_ir;
1753
1754 this->instructions.push_tail(list_inst);
1755
1756 return list_inst;
1757 }
1758
1759 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1760 void
1761 fs_visitor::emit_dummy_fs()
1762 {
1763 /* Everyone's favorite color. */
1764 emit(fs_inst(BRW_OPCODE_MOV,
1765 fs_reg(MRF, 2),
1766 fs_reg(1.0f)));
1767 emit(fs_inst(BRW_OPCODE_MOV,
1768 fs_reg(MRF, 3),
1769 fs_reg(0.0f)));
1770 emit(fs_inst(BRW_OPCODE_MOV,
1771 fs_reg(MRF, 4),
1772 fs_reg(1.0f)));
1773 emit(fs_inst(BRW_OPCODE_MOV,
1774 fs_reg(MRF, 5),
1775 fs_reg(0.0f)));
1776
1777 fs_inst *write;
1778 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1779 fs_reg(0),
1780 fs_reg(0)));
1781 }
1782
1783 /* The register location here is relative to the start of the URB
1784 * data. It will get adjusted to be a real location before
1785 * generate_code() time.
1786 */
1787 struct brw_reg
1788 fs_visitor::interp_reg(int location, int channel)
1789 {
1790 int regnr = urb_setup[location] * 2 + channel / 2;
1791 int stride = (channel & 1) * 4;
1792
1793 assert(urb_setup[location] != -1);
1794
1795 return brw_vec1_grf(regnr, stride);
1796 }
1797
1798 /** Emits the interpolation for the varying inputs. */
1799 void
1800 fs_visitor::emit_interpolation_setup_gen4()
1801 {
1802 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1803
1804 this->current_annotation = "compute pixel centers";
1805 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1806 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1807 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1808 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1809 emit(fs_inst(BRW_OPCODE_ADD,
1810 this->pixel_x,
1811 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1812 fs_reg(brw_imm_v(0x10101010))));
1813 emit(fs_inst(BRW_OPCODE_ADD,
1814 this->pixel_y,
1815 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1816 fs_reg(brw_imm_v(0x11001100))));
1817
1818 this->current_annotation = "compute pixel deltas from v0";
1819 if (brw->has_pln) {
1820 this->delta_x = fs_reg(this, glsl_type::vec2_type);
1821 this->delta_y = this->delta_x;
1822 this->delta_y.reg_offset++;
1823 } else {
1824 this->delta_x = fs_reg(this, glsl_type::float_type);
1825 this->delta_y = fs_reg(this, glsl_type::float_type);
1826 }
1827 emit(fs_inst(BRW_OPCODE_ADD,
1828 this->delta_x,
1829 this->pixel_x,
1830 fs_reg(negate(brw_vec1_grf(1, 0)))));
1831 emit(fs_inst(BRW_OPCODE_ADD,
1832 this->delta_y,
1833 this->pixel_y,
1834 fs_reg(negate(brw_vec1_grf(1, 1)))));
1835
1836 this->current_annotation = "compute pos.w and 1/pos.w";
1837 /* Compute wpos.w. It's always in our setup, since it's needed to
1838 * interpolate the other attributes.
1839 */
1840 this->wpos_w = fs_reg(this, glsl_type::float_type);
1841 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1842 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1843 /* Compute the pixel 1/W value from wpos.w. */
1844 this->pixel_w = fs_reg(this, glsl_type::float_type);
1845 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1846 this->current_annotation = NULL;
1847 }
1848
1849 /** Emits the interpolation for the varying inputs. */
1850 void
1851 fs_visitor::emit_interpolation_setup_gen6()
1852 {
1853 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1854
1855 /* If the pixel centers end up used, the setup is the same as for gen4. */
1856 this->current_annotation = "compute pixel centers";
1857 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1858 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1859 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1860 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1861 emit(fs_inst(BRW_OPCODE_ADD,
1862 this->pixel_x,
1863 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1864 fs_reg(brw_imm_v(0x10101010))));
1865 emit(fs_inst(BRW_OPCODE_ADD,
1866 this->pixel_y,
1867 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1868 fs_reg(brw_imm_v(0x11001100))));
1869
1870 this->current_annotation = "compute 1/pos.w";
1871 this->wpos_w = fs_reg(brw_vec8_grf(c->key.source_w_reg, 0));
1872 this->pixel_w = fs_reg(this, glsl_type::float_type);
1873 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1874
1875 this->delta_x = fs_reg(brw_vec8_grf(2, 0));
1876 this->delta_y = fs_reg(brw_vec8_grf(3, 0));
1877
1878 this->current_annotation = NULL;
1879 }
1880
1881 void
1882 fs_visitor::emit_fb_writes()
1883 {
1884 this->current_annotation = "FB write header";
1885 GLboolean header_present = GL_TRUE;
1886 int nr = 0;
1887
1888 if (intel->gen >= 6 &&
1889 !this->kill_emitted &&
1890 c->key.nr_color_regions == 1) {
1891 header_present = false;
1892 }
1893
1894 if (header_present) {
1895 /* m0, m1 header */
1896 nr += 2;
1897 }
1898
1899 if (c->key.aa_dest_stencil_reg) {
1900 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1901 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1902 }
1903
1904 /* Reserve space for color. It'll be filled in per MRT below. */
1905 int color_mrf = nr;
1906 nr += 4;
1907
1908 if (c->key.source_depth_to_render_target) {
1909 if (c->key.computes_depth) {
1910 /* Hand over gl_FragDepth. */
1911 assert(this->frag_depth);
1912 fs_reg depth = *(variable_storage(this->frag_depth));
1913
1914 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1915 } else {
1916 /* Pass through the payload depth. */
1917 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1918 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1919 }
1920 }
1921
1922 if (c->key.dest_depth_reg) {
1923 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1924 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1925 }
1926
1927 fs_reg color = reg_undef;
1928 if (this->frag_color)
1929 color = *(variable_storage(this->frag_color));
1930 else if (this->frag_data)
1931 color = *(variable_storage(this->frag_data));
1932
1933 for (int target = 0; target < c->key.nr_color_regions; target++) {
1934 this->current_annotation = talloc_asprintf(this->mem_ctx,
1935 "FB write target %d",
1936 target);
1937 if (this->frag_color || this->frag_data) {
1938 for (int i = 0; i < 4; i++) {
1939 emit(fs_inst(BRW_OPCODE_MOV,
1940 fs_reg(MRF, color_mrf + i),
1941 color));
1942 color.reg_offset++;
1943 }
1944 }
1945
1946 if (this->frag_color)
1947 color.reg_offset -= 4;
1948
1949 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1950 reg_undef, reg_undef));
1951 inst->target = target;
1952 inst->mlen = nr;
1953 if (target == c->key.nr_color_regions - 1)
1954 inst->eot = true;
1955 inst->header_present = header_present;
1956 }
1957
1958 if (c->key.nr_color_regions == 0) {
1959 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1960 reg_undef, reg_undef));
1961 inst->mlen = nr;
1962 inst->eot = true;
1963 inst->header_present = header_present;
1964 }
1965
1966 this->current_annotation = NULL;
1967 }
1968
1969 void
1970 fs_visitor::generate_fb_write(fs_inst *inst)
1971 {
1972 GLboolean eot = inst->eot;
1973 struct brw_reg implied_header;
1974
1975 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1976 * move, here's g1.
1977 */
1978 brw_push_insn_state(p);
1979 brw_set_mask_control(p, BRW_MASK_DISABLE);
1980 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1981
1982 if (inst->header_present) {
1983 if (intel->gen >= 6) {
1984 brw_MOV(p,
1985 brw_message_reg(0),
1986 brw_vec8_grf(0, 0));
1987 implied_header = brw_null_reg();
1988 } else {
1989 implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW);
1990 }
1991
1992 brw_MOV(p,
1993 brw_message_reg(1),
1994 brw_vec8_grf(1, 0));
1995 } else {
1996 implied_header = brw_null_reg();
1997 }
1998
1999 brw_pop_insn_state(p);
2000
2001 brw_fb_WRITE(p,
2002 8, /* dispatch_width */
2003 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
2004 0, /* base MRF */
2005 implied_header,
2006 inst->target,
2007 inst->mlen,
2008 0,
2009 eot);
2010 }
2011
2012 void
2013 fs_visitor::generate_linterp(fs_inst *inst,
2014 struct brw_reg dst, struct brw_reg *src)
2015 {
2016 struct brw_reg delta_x = src[0];
2017 struct brw_reg delta_y = src[1];
2018 struct brw_reg interp = src[2];
2019
2020 if (brw->has_pln &&
2021 delta_y.nr == delta_x.nr + 1 &&
2022 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
2023 brw_PLN(p, dst, interp, delta_x);
2024 } else {
2025 brw_LINE(p, brw_null_reg(), interp, delta_x);
2026 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
2027 }
2028 }
2029
2030 void
2031 fs_visitor::generate_math(fs_inst *inst,
2032 struct brw_reg dst, struct brw_reg *src)
2033 {
2034 int op;
2035
2036 switch (inst->opcode) {
2037 case FS_OPCODE_RCP:
2038 op = BRW_MATH_FUNCTION_INV;
2039 break;
2040 case FS_OPCODE_RSQ:
2041 op = BRW_MATH_FUNCTION_RSQ;
2042 break;
2043 case FS_OPCODE_SQRT:
2044 op = BRW_MATH_FUNCTION_SQRT;
2045 break;
2046 case FS_OPCODE_EXP2:
2047 op = BRW_MATH_FUNCTION_EXP;
2048 break;
2049 case FS_OPCODE_LOG2:
2050 op = BRW_MATH_FUNCTION_LOG;
2051 break;
2052 case FS_OPCODE_POW:
2053 op = BRW_MATH_FUNCTION_POW;
2054 break;
2055 case FS_OPCODE_SIN:
2056 op = BRW_MATH_FUNCTION_SIN;
2057 break;
2058 case FS_OPCODE_COS:
2059 op = BRW_MATH_FUNCTION_COS;
2060 break;
2061 default:
2062 assert(!"not reached: unknown math function");
2063 op = 0;
2064 break;
2065 }
2066
2067 if (inst->opcode == FS_OPCODE_POW) {
2068 brw_MOV(p, brw_message_reg(3), src[1]);
2069 }
2070
2071 brw_math(p, dst,
2072 op,
2073 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
2074 BRW_MATH_SATURATE_NONE,
2075 2, src[0],
2076 BRW_MATH_DATA_VECTOR,
2077 BRW_MATH_PRECISION_FULL);
2078 }
2079
2080 void
2081 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2082 {
2083 int msg_type = -1;
2084 int rlen = 4;
2085 uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
2086
2087 if (intel->gen >= 5) {
2088 switch (inst->opcode) {
2089 case FS_OPCODE_TEX:
2090 if (inst->shadow_compare) {
2091 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
2092 } else {
2093 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
2094 }
2095 break;
2096 case FS_OPCODE_TXB:
2097 if (inst->shadow_compare) {
2098 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
2099 } else {
2100 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
2101 }
2102 break;
2103 }
2104 } else {
2105 switch (inst->opcode) {
2106 case FS_OPCODE_TEX:
2107 /* Note that G45 and older determines shadow compare and dispatch width
2108 * from message length for most messages.
2109 */
2110 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2111 if (inst->shadow_compare) {
2112 assert(inst->mlen == 5);
2113 } else {
2114 assert(inst->mlen <= 6);
2115 }
2116 break;
2117 case FS_OPCODE_TXB:
2118 if (inst->shadow_compare) {
2119 assert(inst->mlen == 5);
2120 msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;
2121 } else {
2122 assert(inst->mlen == 8);
2123 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
2124 simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
2125 }
2126 break;
2127 }
2128 }
2129 assert(msg_type != -1);
2130
2131 if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) {
2132 rlen = 8;
2133 dst = vec16(dst);
2134 }
2135
2136 /* g0 header. */
2137 src.nr--;
2138
2139 brw_SAMPLE(p,
2140 retype(dst, BRW_REGISTER_TYPE_UW),
2141 src.nr,
2142 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
2143 SURF_INDEX_TEXTURE(inst->sampler),
2144 inst->sampler,
2145 WRITEMASK_XYZW,
2146 msg_type,
2147 rlen,
2148 inst->mlen + 1,
2149 0,
2150 1,
2151 simd_mode);
2152 }
2153
2154
2155 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
2156 * looking like:
2157 *
2158 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
2159 *
2160 * and we're trying to produce:
2161 *
2162 * DDX DDY
2163 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
2164 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
2165 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
2166 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
2167 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
2168 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
2169 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
2170 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
2171 *
2172 * and add another set of two more subspans if in 16-pixel dispatch mode.
2173 *
2174 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
2175 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
2176 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
2177 * between each other. We could probably do it like ddx and swizzle the right
2178 * order later, but bail for now and just produce
2179 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
2180 */
2181 void
2182 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2183 {
2184 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
2185 BRW_REGISTER_TYPE_F,
2186 BRW_VERTICAL_STRIDE_2,
2187 BRW_WIDTH_2,
2188 BRW_HORIZONTAL_STRIDE_0,
2189 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2190 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
2191 BRW_REGISTER_TYPE_F,
2192 BRW_VERTICAL_STRIDE_2,
2193 BRW_WIDTH_2,
2194 BRW_HORIZONTAL_STRIDE_0,
2195 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2196 brw_ADD(p, dst, src0, negate(src1));
2197 }
2198
2199 void
2200 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
2201 {
2202 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
2203 BRW_REGISTER_TYPE_F,
2204 BRW_VERTICAL_STRIDE_4,
2205 BRW_WIDTH_4,
2206 BRW_HORIZONTAL_STRIDE_0,
2207 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2208 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
2209 BRW_REGISTER_TYPE_F,
2210 BRW_VERTICAL_STRIDE_4,
2211 BRW_WIDTH_4,
2212 BRW_HORIZONTAL_STRIDE_0,
2213 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
2214 brw_ADD(p, dst, src0, negate(src1));
2215 }
2216
2217 void
2218 fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp)
2219 {
2220 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
2221 temp = brw_uw1_reg(temp.file, temp.nr, 0);
2222
2223 brw_push_insn_state(p);
2224 brw_set_mask_control(p, BRW_MASK_DISABLE);
2225 brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */
2226 brw_AND(p, g0, temp, g0);
2227 brw_pop_insn_state(p);
2228 }
2229
2230 void
2231 fs_visitor::assign_curb_setup()
2232 {
2233 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
2234 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
2235
2236 /* Map the offsets in the UNIFORM file to fixed HW regs. */
2237 foreach_iter(exec_list_iterator, iter, this->instructions) {
2238 fs_inst *inst = (fs_inst *)iter.get();
2239
2240 for (unsigned int i = 0; i < 3; i++) {
2241 if (inst->src[i].file == UNIFORM) {
2242 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
2243 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
2244 constant_nr / 8,
2245 constant_nr % 8);
2246
2247 inst->src[i].file = FIXED_HW_REG;
2248 inst->src[i].fixed_hw_reg = brw_reg;
2249 }
2250 }
2251 }
2252 }
2253
2254 void
2255 fs_visitor::calculate_urb_setup()
2256 {
2257 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2258 urb_setup[i] = -1;
2259 }
2260
2261 int urb_next = 0;
2262 /* Figure out where each of the incoming setup attributes lands. */
2263 if (intel->gen >= 6) {
2264 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
2265 if (brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)) {
2266 urb_setup[i] = urb_next++;
2267 }
2268 }
2269 } else {
2270 /* FINISHME: The sf doesn't map VS->FS inputs for us very well. */
2271 for (unsigned int i = 0; i < VERT_RESULT_MAX; i++) {
2272 if (c->key.vp_outputs_written & BITFIELD64_BIT(i)) {
2273 int fp_index;
2274
2275 if (i >= VERT_RESULT_VAR0)
2276 fp_index = i - (VERT_RESULT_VAR0 - FRAG_ATTRIB_VAR0);
2277 else if (i <= VERT_RESULT_TEX7)
2278 fp_index = i;
2279 else
2280 fp_index = -1;
2281
2282 if (fp_index >= 0)
2283 urb_setup[fp_index] = urb_next++;
2284 }
2285 }
2286 }
2287
2288 /* Each attribute is 4 setup channels, each of which is half a reg. */
2289 c->prog_data.urb_read_length = urb_next * 2;
2290 }
2291
2292 void
2293 fs_visitor::assign_urb_setup()
2294 {
2295 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
2296
2297 /* Offset all the urb_setup[] index by the actual position of the
2298 * setup regs, now that the location of the constants has been chosen.
2299 */
2300 foreach_iter(exec_list_iterator, iter, this->instructions) {
2301 fs_inst *inst = (fs_inst *)iter.get();
2302
2303 if (inst->opcode != FS_OPCODE_LINTERP)
2304 continue;
2305
2306 assert(inst->src[2].file == FIXED_HW_REG);
2307
2308 inst->src[2].fixed_hw_reg.nr += urb_start;
2309 }
2310
2311 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
2312 }
2313
2314 static void
2315 assign_reg(int *reg_hw_locations, fs_reg *reg)
2316 {
2317 if (reg->file == GRF && reg->reg != 0) {
2318 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
2319 reg->reg = 0;
2320 }
2321 }
2322
2323 void
2324 fs_visitor::assign_regs_trivial()
2325 {
2326 int last_grf = 0;
2327 int hw_reg_mapping[this->virtual_grf_next];
2328 int i;
2329
2330 hw_reg_mapping[0] = 0;
2331 hw_reg_mapping[1] = this->first_non_payload_grf;
2332 for (i = 2; i < this->virtual_grf_next; i++) {
2333 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
2334 this->virtual_grf_sizes[i - 1]);
2335 }
2336 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
2337
2338 foreach_iter(exec_list_iterator, iter, this->instructions) {
2339 fs_inst *inst = (fs_inst *)iter.get();
2340
2341 assign_reg(hw_reg_mapping, &inst->dst);
2342 assign_reg(hw_reg_mapping, &inst->src[0]);
2343 assign_reg(hw_reg_mapping, &inst->src[1]);
2344 }
2345
2346 this->grf_used = last_grf + 1;
2347 }
2348
2349 void
2350 fs_visitor::assign_regs()
2351 {
2352 int last_grf = 0;
2353 int hw_reg_mapping[this->virtual_grf_next + 1];
2354 int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
2355 int class_sizes[base_reg_count];
2356 int class_count = 0;
2357 int aligned_pair_class = -1;
2358
2359 /* Set up the register classes.
2360 *
2361 * The base registers store a scalar value. For texture samples,
2362 * we get virtual GRFs composed of 4 contiguous hw register. For
2363 * structures and arrays, we store them as contiguous larger things
2364 * than that, though we should be able to do better most of the
2365 * time.
2366 */
2367 class_sizes[class_count++] = 1;
2368 if (brw->has_pln && intel->gen < 6) {
2369 /* Always set up the (unaligned) pairs for gen5, so we can find
2370 * them for making the aligned pair class.
2371 */
2372 class_sizes[class_count++] = 2;
2373 }
2374 for (int r = 1; r < this->virtual_grf_next; r++) {
2375 int i;
2376
2377 for (i = 0; i < class_count; i++) {
2378 if (class_sizes[i] == this->virtual_grf_sizes[r])
2379 break;
2380 }
2381 if (i == class_count) {
2382 if (this->virtual_grf_sizes[r] >= base_reg_count) {
2383 fprintf(stderr, "Object too large to register allocate.\n");
2384 this->fail = true;
2385 }
2386
2387 class_sizes[class_count++] = this->virtual_grf_sizes[r];
2388 }
2389 }
2390
2391 int ra_reg_count = 0;
2392 int class_base_reg[class_count];
2393 int class_reg_count[class_count];
2394 int classes[class_count + 1];
2395
2396 for (int i = 0; i < class_count; i++) {
2397 class_base_reg[i] = ra_reg_count;
2398 class_reg_count[i] = base_reg_count - (class_sizes[i] - 1);
2399 ra_reg_count += class_reg_count[i];
2400 }
2401
2402 struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
2403 for (int i = 0; i < class_count; i++) {
2404 classes[i] = ra_alloc_reg_class(regs);
2405
2406 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2407 ra_class_add_reg(regs, classes[i], class_base_reg[i] + i_r);
2408 }
2409
2410 /* Add conflicts between our contiguous registers aliasing
2411 * base regs and other register classes' contiguous registers
2412 * that alias base regs, or the base regs themselves for classes[0].
2413 */
2414 for (int c = 0; c <= i; c++) {
2415 for (int i_r = 0; i_r < class_reg_count[i]; i_r++) {
2416 for (int c_r = MAX2(0, i_r - (class_sizes[c] - 1));
2417 c_r < MIN2(class_reg_count[c], i_r + class_sizes[i]);
2418 c_r++) {
2419
2420 if (0) {
2421 printf("%d/%d conflicts %d/%d\n",
2422 class_sizes[i], this->first_non_payload_grf + i_r,
2423 class_sizes[c], this->first_non_payload_grf + c_r);
2424 }
2425
2426 ra_add_reg_conflict(regs,
2427 class_base_reg[i] + i_r,
2428 class_base_reg[c] + c_r);
2429 }
2430 }
2431 }
2432 }
2433
2434 /* Add a special class for aligned pairs, which we'll put delta_x/y
2435 * in on gen5 so that we can do PLN.
2436 */
2437 if (brw->has_pln && intel->gen < 6) {
2438 int reg_count = (base_reg_count - 1) / 2;
2439 int unaligned_pair_class = 1;
2440 assert(class_sizes[unaligned_pair_class] == 2);
2441
2442 aligned_pair_class = class_count;
2443 classes[aligned_pair_class] = ra_alloc_reg_class(regs);
2444 class_base_reg[aligned_pair_class] = 0;
2445 class_reg_count[aligned_pair_class] = 0;
2446 int start = (this->first_non_payload_grf & 1) ? 1 : 0;
2447
2448 for (int i = 0; i < reg_count; i++) {
2449 ra_class_add_reg(regs, classes[aligned_pair_class],
2450 class_base_reg[unaligned_pair_class] + i * 2 + start);
2451 }
2452 class_count++;
2453 }
2454
2455 ra_set_finalize(regs);
2456
2457 struct ra_graph *g = ra_alloc_interference_graph(regs,
2458 this->virtual_grf_next);
2459 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
2460 * with nodes.
2461 */
2462 ra_set_node_class(g, 0, classes[0]);
2463
2464 for (int i = 1; i < this->virtual_grf_next; i++) {
2465 for (int c = 0; c < class_count; c++) {
2466 if (class_sizes[c] == this->virtual_grf_sizes[i]) {
2467 if (aligned_pair_class >= 0 &&
2468 this->delta_x.reg == i) {
2469 ra_set_node_class(g, i, classes[aligned_pair_class]);
2470 } else {
2471 ra_set_node_class(g, i, classes[c]);
2472 }
2473 break;
2474 }
2475 }
2476
2477 for (int j = 1; j < i; j++) {
2478 if (virtual_grf_interferes(i, j)) {
2479 ra_add_node_interference(g, i, j);
2480 }
2481 }
2482 }
2483
2484 /* FINISHME: Handle spilling */
2485 if (!ra_allocate_no_spills(g)) {
2486 fprintf(stderr, "Failed to allocate registers.\n");
2487 this->fail = true;
2488 return;
2489 }
2490
2491 /* Get the chosen virtual registers for each node, and map virtual
2492 * regs in the register classes back down to real hardware reg
2493 * numbers.
2494 */
2495 hw_reg_mapping[0] = 0; /* unused */
2496 for (int i = 1; i < this->virtual_grf_next; i++) {
2497 int reg = ra_get_node_reg(g, i);
2498 int hw_reg = -1;
2499
2500 for (int c = 0; c < class_count; c++) {
2501 if (reg >= class_base_reg[c] &&
2502 reg < class_base_reg[c] + class_reg_count[c]) {
2503 hw_reg = reg - class_base_reg[c];
2504 break;
2505 }
2506 }
2507
2508 assert(hw_reg != -1);
2509 hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
2510 last_grf = MAX2(last_grf,
2511 hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
2512 }
2513
2514 foreach_iter(exec_list_iterator, iter, this->instructions) {
2515 fs_inst *inst = (fs_inst *)iter.get();
2516
2517 assign_reg(hw_reg_mapping, &inst->dst);
2518 assign_reg(hw_reg_mapping, &inst->src[0]);
2519 assign_reg(hw_reg_mapping, &inst->src[1]);
2520 }
2521
2522 this->grf_used = last_grf + 1;
2523
2524 talloc_free(g);
2525 talloc_free(regs);
2526 }
2527
2528 void
2529 fs_visitor::calculate_live_intervals()
2530 {
2531 int num_vars = this->virtual_grf_next;
2532 int *def = talloc_array(mem_ctx, int, num_vars);
2533 int *use = talloc_array(mem_ctx, int, num_vars);
2534 int loop_depth = 0;
2535 int loop_start = 0;
2536
2537 for (int i = 0; i < num_vars; i++) {
2538 def[i] = 1 << 30;
2539 use[i] = -1;
2540 }
2541
2542 int ip = 0;
2543 foreach_iter(exec_list_iterator, iter, this->instructions) {
2544 fs_inst *inst = (fs_inst *)iter.get();
2545
2546 if (inst->opcode == BRW_OPCODE_DO) {
2547 if (loop_depth++ == 0)
2548 loop_start = ip;
2549 } else if (inst->opcode == BRW_OPCODE_WHILE) {
2550 loop_depth--;
2551
2552 if (loop_depth == 0) {
2553 /* FINISHME:
2554 *
2555 * Patches up any vars marked for use within the loop as
2556 * live until the end. This is conservative, as there
2557 * will often be variables defined and used inside the
2558 * loop but dead at the end of the loop body.
2559 */
2560 for (int i = 0; i < num_vars; i++) {
2561 if (use[i] == loop_start) {
2562 use[i] = ip;
2563 }
2564 }
2565 }
2566 } else {
2567 int eip = ip;
2568
2569 if (loop_depth)
2570 eip = loop_start;
2571
2572 for (unsigned int i = 0; i < 3; i++) {
2573 if (inst->src[i].file == GRF && inst->src[i].reg != 0) {
2574 use[inst->src[i].reg] = MAX2(use[inst->src[i].reg], eip);
2575 }
2576 }
2577 if (inst->dst.file == GRF && inst->dst.reg != 0) {
2578 def[inst->dst.reg] = MIN2(def[inst->dst.reg], eip);
2579 }
2580 }
2581
2582 ip++;
2583 }
2584
2585 talloc_free(this->virtual_grf_def);
2586 talloc_free(this->virtual_grf_use);
2587 this->virtual_grf_def = def;
2588 this->virtual_grf_use = use;
2589 }
2590
2591 /**
2592 * Attempts to move immediate constants into the immediate
2593 * constant slot of following instructions.
2594 *
2595 * Immediate constants are a bit tricky -- they have to be in the last
2596 * operand slot, you can't do abs/negate on them,
2597 */
2598
2599 bool
2600 fs_visitor::propagate_constants()
2601 {
2602 bool progress = false;
2603
2604 foreach_iter(exec_list_iterator, iter, this->instructions) {
2605 fs_inst *inst = (fs_inst *)iter.get();
2606
2607 if (inst->opcode != BRW_OPCODE_MOV ||
2608 inst->predicated ||
2609 inst->dst.file != GRF || inst->src[0].file != IMM ||
2610 inst->dst.type != inst->src[0].type)
2611 continue;
2612
2613 /* Don't bother with cases where we should have had the
2614 * operation on the constant folded in GLSL already.
2615 */
2616 if (inst->saturate)
2617 continue;
2618
2619 /* Found a move of a constant to a GRF. Find anything else using the GRF
2620 * before it's written, and replace it with the constant if we can.
2621 */
2622 exec_list_iterator scan_iter = iter;
2623 scan_iter.next();
2624 for (; scan_iter.has_next(); scan_iter.next()) {
2625 fs_inst *scan_inst = (fs_inst *)scan_iter.get();
2626
2627 if (scan_inst->opcode == BRW_OPCODE_DO ||
2628 scan_inst->opcode == BRW_OPCODE_WHILE ||
2629 scan_inst->opcode == BRW_OPCODE_ELSE ||
2630 scan_inst->opcode == BRW_OPCODE_ENDIF) {
2631 break;
2632 }
2633
2634 for (int i = 2; i >= 0; i--) {
2635 if (scan_inst->src[i].file != GRF ||
2636 scan_inst->src[i].reg != inst->dst.reg ||
2637 scan_inst->src[i].reg_offset != inst->dst.reg_offset)
2638 continue;
2639
2640 /* Don't bother with cases where we should have had the
2641 * operation on the constant folded in GLSL already.
2642 */
2643 if (scan_inst->src[i].negate || scan_inst->src[i].abs)
2644 continue;
2645
2646 switch (scan_inst->opcode) {
2647 case BRW_OPCODE_MOV:
2648 scan_inst->src[i] = inst->src[0];
2649 progress = true;
2650 break;
2651
2652 case BRW_OPCODE_MUL:
2653 case BRW_OPCODE_ADD:
2654 if (i == 1) {
2655 scan_inst->src[i] = inst->src[0];
2656 progress = true;
2657 } else if (i == 0 && scan_inst->src[1].file != IMM) {
2658 /* Fit this constant in by commuting the operands */
2659 scan_inst->src[0] = scan_inst->src[1];
2660 scan_inst->src[1] = inst->src[0];
2661 }
2662 break;
2663 case BRW_OPCODE_CMP:
2664 if (i == 1) {
2665 scan_inst->src[i] = inst->src[0];
2666 progress = true;
2667 }
2668 }
2669 }
2670
2671 if (scan_inst->dst.file == GRF &&
2672 scan_inst->dst.reg == inst->dst.reg &&
2673 (scan_inst->dst.reg_offset == inst->dst.reg_offset ||
2674 scan_inst->opcode == FS_OPCODE_TEX)) {
2675 break;
2676 }
2677 }
2678 }
2679
2680 return progress;
2681 }
2682 /**
2683 * Must be called after calculate_live_intervales() to remove unused
2684 * writes to registers -- register allocation will fail otherwise
2685 * because something deffed but not used won't be considered to
2686 * interfere with other regs.
2687 */
2688 bool
2689 fs_visitor::dead_code_eliminate()
2690 {
2691 bool progress = false;
2692 int num_vars = this->virtual_grf_next;
2693 bool dead[num_vars];
2694
2695 for (int i = 0; i < num_vars; i++) {
2696 /* This would be ">=", but FS_OPCODE_DISCARD has a src == dst where
2697 * it writes dst then reads it as src.
2698 */
2699 dead[i] = this->virtual_grf_def[i] > this->virtual_grf_use[i];
2700
2701 if (dead[i]) {
2702 /* Mark off its interval so it won't interfere with anything. */
2703 this->virtual_grf_def[i] = -1;
2704 this->virtual_grf_use[i] = -1;
2705 }
2706 }
2707
2708 foreach_iter(exec_list_iterator, iter, this->instructions) {
2709 fs_inst *inst = (fs_inst *)iter.get();
2710
2711 if (inst->dst.file == GRF && dead[inst->dst.reg]) {
2712 inst->remove();
2713 progress = true;
2714 }
2715 }
2716
2717 return progress;
2718 }
2719
2720 bool
2721 fs_visitor::virtual_grf_interferes(int a, int b)
2722 {
2723 int start = MAX2(this->virtual_grf_def[a], this->virtual_grf_def[b]);
2724 int end = MIN2(this->virtual_grf_use[a], this->virtual_grf_use[b]);
2725
2726 /* For dead code, just check if the def interferes with the other range. */
2727 if (this->virtual_grf_use[a] == -1) {
2728 return (this->virtual_grf_def[a] >= this->virtual_grf_def[b] &&
2729 this->virtual_grf_def[a] < this->virtual_grf_use[b]);
2730 }
2731 if (this->virtual_grf_use[b] == -1) {
2732 return (this->virtual_grf_def[b] >= this->virtual_grf_def[a] &&
2733 this->virtual_grf_def[b] < this->virtual_grf_use[a]);
2734 }
2735
2736 return start <= end;
2737 }
2738
2739 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2740 {
2741 struct brw_reg brw_reg;
2742
2743 switch (reg->file) {
2744 case GRF:
2745 case ARF:
2746 case MRF:
2747 brw_reg = brw_vec8_reg(reg->file,
2748 reg->hw_reg, 0);
2749 brw_reg = retype(brw_reg, reg->type);
2750 break;
2751 case IMM:
2752 switch (reg->type) {
2753 case BRW_REGISTER_TYPE_F:
2754 brw_reg = brw_imm_f(reg->imm.f);
2755 break;
2756 case BRW_REGISTER_TYPE_D:
2757 brw_reg = brw_imm_d(reg->imm.i);
2758 break;
2759 case BRW_REGISTER_TYPE_UD:
2760 brw_reg = brw_imm_ud(reg->imm.u);
2761 break;
2762 default:
2763 assert(!"not reached");
2764 break;
2765 }
2766 break;
2767 case FIXED_HW_REG:
2768 brw_reg = reg->fixed_hw_reg;
2769 break;
2770 case BAD_FILE:
2771 /* Probably unused. */
2772 brw_reg = brw_null_reg();
2773 break;
2774 case UNIFORM:
2775 assert(!"not reached");
2776 brw_reg = brw_null_reg();
2777 break;
2778 }
2779 if (reg->abs)
2780 brw_reg = brw_abs(brw_reg);
2781 if (reg->negate)
2782 brw_reg = negate(brw_reg);
2783
2784 return brw_reg;
2785 }
2786
2787 void
2788 fs_visitor::generate_code()
2789 {
2790 unsigned int annotation_len = 0;
2791 int last_native_inst = 0;
2792 struct brw_instruction *if_stack[16], *loop_stack[16];
2793 int if_stack_depth = 0, loop_stack_depth = 0;
2794 int if_depth_in_loop[16];
2795
2796 if_depth_in_loop[loop_stack_depth] = 0;
2797
2798 memset(&if_stack, 0, sizeof(if_stack));
2799 foreach_iter(exec_list_iterator, iter, this->instructions) {
2800 fs_inst *inst = (fs_inst *)iter.get();
2801 struct brw_reg src[3], dst;
2802
2803 for (unsigned int i = 0; i < 3; i++) {
2804 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2805 }
2806 dst = brw_reg_from_fs_reg(&inst->dst);
2807
2808 brw_set_conditionalmod(p, inst->conditional_mod);
2809 brw_set_predicate_control(p, inst->predicated);
2810
2811 switch (inst->opcode) {
2812 case BRW_OPCODE_MOV:
2813 brw_MOV(p, dst, src[0]);
2814 break;
2815 case BRW_OPCODE_ADD:
2816 brw_ADD(p, dst, src[0], src[1]);
2817 break;
2818 case BRW_OPCODE_MUL:
2819 brw_MUL(p, dst, src[0], src[1]);
2820 break;
2821
2822 case BRW_OPCODE_FRC:
2823 brw_FRC(p, dst, src[0]);
2824 break;
2825 case BRW_OPCODE_RNDD:
2826 brw_RNDD(p, dst, src[0]);
2827 break;
2828 case BRW_OPCODE_RNDZ:
2829 brw_RNDZ(p, dst, src[0]);
2830 break;
2831
2832 case BRW_OPCODE_AND:
2833 brw_AND(p, dst, src[0], src[1]);
2834 break;
2835 case BRW_OPCODE_OR:
2836 brw_OR(p, dst, src[0], src[1]);
2837 break;
2838 case BRW_OPCODE_XOR:
2839 brw_XOR(p, dst, src[0], src[1]);
2840 break;
2841 case BRW_OPCODE_NOT:
2842 brw_NOT(p, dst, src[0]);
2843 break;
2844 case BRW_OPCODE_ASR:
2845 brw_ASR(p, dst, src[0], src[1]);
2846 break;
2847 case BRW_OPCODE_SHR:
2848 brw_SHR(p, dst, src[0], src[1]);
2849 break;
2850 case BRW_OPCODE_SHL:
2851 brw_SHL(p, dst, src[0], src[1]);
2852 break;
2853
2854 case BRW_OPCODE_CMP:
2855 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2856 break;
2857 case BRW_OPCODE_SEL:
2858 brw_SEL(p, dst, src[0], src[1]);
2859 break;
2860
2861 case BRW_OPCODE_IF:
2862 assert(if_stack_depth < 16);
2863 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2864 if_depth_in_loop[loop_stack_depth]++;
2865 if_stack_depth++;
2866 break;
2867 case BRW_OPCODE_ELSE:
2868 if_stack[if_stack_depth - 1] =
2869 brw_ELSE(p, if_stack[if_stack_depth - 1]);
2870 break;
2871 case BRW_OPCODE_ENDIF:
2872 if_stack_depth--;
2873 brw_ENDIF(p , if_stack[if_stack_depth]);
2874 if_depth_in_loop[loop_stack_depth]--;
2875 break;
2876
2877 case BRW_OPCODE_DO:
2878 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2879 if_depth_in_loop[loop_stack_depth] = 0;
2880 break;
2881
2882 case BRW_OPCODE_BREAK:
2883 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2884 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2885 break;
2886 case BRW_OPCODE_CONTINUE:
2887 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2888 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2889 break;
2890
2891 case BRW_OPCODE_WHILE: {
2892 struct brw_instruction *inst0, *inst1;
2893 GLuint br = 1;
2894
2895 if (intel->gen >= 5)
2896 br = 2;
2897
2898 assert(loop_stack_depth > 0);
2899 loop_stack_depth--;
2900 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2901 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2902 while (inst0 > loop_stack[loop_stack_depth]) {
2903 inst0--;
2904 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2905 inst0->bits3.if_else.jump_count == 0) {
2906 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2907 }
2908 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2909 inst0->bits3.if_else.jump_count == 0) {
2910 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2911 }
2912 }
2913 }
2914 break;
2915
2916 case FS_OPCODE_RCP:
2917 case FS_OPCODE_RSQ:
2918 case FS_OPCODE_SQRT:
2919 case FS_OPCODE_EXP2:
2920 case FS_OPCODE_LOG2:
2921 case FS_OPCODE_POW:
2922 case FS_OPCODE_SIN:
2923 case FS_OPCODE_COS:
2924 generate_math(inst, dst, src);
2925 break;
2926 case FS_OPCODE_LINTERP:
2927 generate_linterp(inst, dst, src);
2928 break;
2929 case FS_OPCODE_TEX:
2930 case FS_OPCODE_TXB:
2931 case FS_OPCODE_TXL:
2932 generate_tex(inst, dst, src[0]);
2933 break;
2934 case FS_OPCODE_DISCARD:
2935 generate_discard(inst, dst /* src0 == dst */);
2936 break;
2937 case FS_OPCODE_DDX:
2938 generate_ddx(inst, dst, src[0]);
2939 break;
2940 case FS_OPCODE_DDY:
2941 generate_ddy(inst, dst, src[0]);
2942 break;
2943 case FS_OPCODE_FB_WRITE:
2944 generate_fb_write(inst);
2945 break;
2946 default:
2947 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2948 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2949 brw_opcodes[inst->opcode].name);
2950 } else {
2951 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2952 }
2953 this->fail = true;
2954 }
2955
2956 if (annotation_len < p->nr_insn) {
2957 annotation_len *= 2;
2958 if (annotation_len < 16)
2959 annotation_len = 16;
2960
2961 this->annotation_string = talloc_realloc(this->mem_ctx,
2962 annotation_string,
2963 const char *,
2964 annotation_len);
2965 this->annotation_ir = talloc_realloc(this->mem_ctx,
2966 annotation_ir,
2967 ir_instruction *,
2968 annotation_len);
2969 }
2970
2971 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2972 this->annotation_string[i] = inst->annotation;
2973 this->annotation_ir[i] = inst->ir;
2974 }
2975 last_native_inst = p->nr_insn;
2976 }
2977 }
2978
2979 GLboolean
2980 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2981 {
2982 struct brw_compile *p = &c->func;
2983 struct intel_context *intel = &brw->intel;
2984 GLcontext *ctx = &intel->ctx;
2985 struct brw_shader *shader = NULL;
2986 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2987
2988 if (!prog)
2989 return GL_FALSE;
2990
2991 if (!using_new_fs)
2992 return GL_FALSE;
2993
2994 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2995 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2996 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2997 break;
2998 }
2999 }
3000 if (!shader)
3001 return GL_FALSE;
3002
3003 /* We always use 8-wide mode, at least for now. For one, flow
3004 * control only works in 8-wide. Also, when we're fragment shader
3005 * bound, we're almost always under register pressure as well, so
3006 * 8-wide would save us from the performance cliff of spilling
3007 * regs.
3008 */
3009 c->dispatch_width = 8;
3010
3011 if (INTEL_DEBUG & DEBUG_WM) {
3012 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
3013 _mesa_print_ir(shader->ir, NULL);
3014 printf("\n");
3015 }
3016
3017 /* Now the main event: Visit the shader IR and generate our FS IR for it.
3018 */
3019 fs_visitor v(c, shader);
3020
3021 if (0) {
3022 v.emit_dummy_fs();
3023 } else {
3024 v.calculate_urb_setup();
3025 if (intel->gen < 6)
3026 v.emit_interpolation_setup_gen4();
3027 else
3028 v.emit_interpolation_setup_gen6();
3029
3030 /* Generate FS IR for main(). (the visitor only descends into
3031 * functions called "main").
3032 */
3033 foreach_iter(exec_list_iterator, iter, *shader->ir) {
3034 ir_instruction *ir = (ir_instruction *)iter.get();
3035 v.base_ir = ir;
3036 ir->accept(&v);
3037 }
3038
3039 v.emit_fb_writes();
3040 v.assign_curb_setup();
3041 v.assign_urb_setup();
3042
3043 bool progress;
3044 do {
3045 progress = false;
3046
3047 v.calculate_live_intervals();
3048 progress = v.propagate_constants() || progress;
3049 progress = v.dead_code_eliminate() || progress;
3050 } while (progress);
3051
3052 if (0)
3053 v.assign_regs_trivial();
3054 else
3055 v.assign_regs();
3056 }
3057
3058 if (!v.fail)
3059 v.generate_code();
3060
3061 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
3062
3063 if (v.fail)
3064 return GL_FALSE;
3065
3066 if (INTEL_DEBUG & DEBUG_WM) {
3067 const char *last_annotation_string = NULL;
3068 ir_instruction *last_annotation_ir = NULL;
3069
3070 printf("Native code for fragment shader %d:\n", prog->Name);
3071 for (unsigned int i = 0; i < p->nr_insn; i++) {
3072 if (last_annotation_ir != v.annotation_ir[i]) {
3073 last_annotation_ir = v.annotation_ir[i];
3074 if (last_annotation_ir) {
3075 printf(" ");
3076 last_annotation_ir->print();
3077 printf("\n");
3078 }
3079 }
3080 if (last_annotation_string != v.annotation_string[i]) {
3081 last_annotation_string = v.annotation_string[i];
3082 if (last_annotation_string)
3083 printf(" %s\n", last_annotation_string);
3084 }
3085 brw_disasm(stdout, &p->store[i], intel->gen);
3086 }
3087 printf("\n");
3088 }
3089
3090 c->prog_data.total_grf = v.grf_used;
3091 c->prog_data.total_scratch = 0;
3092
3093 return GL_TRUE;
3094 }