i965: Clean up the virtual GRF handling.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "main/uniforms.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "program/prog_optimize.h"
38 #include "program/sampler.h"
39 #include "program/hash_table.h"
40 #include "brw_context.h"
41 #include "brw_eu.h"
42 #include "brw_wm.h"
43 #include "talloc.h"
44 }
45 #include "../glsl/glsl_types.h"
46 #include "../glsl/ir_optimization.h"
47 #include "../glsl/ir_print_visitor.h"
48
49 enum register_file {
50 ARF = BRW_ARCHITECTURE_REGISTER_FILE,
51 GRF = BRW_GENERAL_REGISTER_FILE,
52 MRF = BRW_MESSAGE_REGISTER_FILE,
53 IMM = BRW_IMMEDIATE_VALUE,
54 FIXED_HW_REG, /* a struct brw_reg */
55 UNIFORM, /* prog_data->params[hw_reg] */
56 BAD_FILE
57 };
58
59 enum fs_opcodes {
60 FS_OPCODE_FB_WRITE = 256,
61 FS_OPCODE_RCP,
62 FS_OPCODE_RSQ,
63 FS_OPCODE_SQRT,
64 FS_OPCODE_EXP2,
65 FS_OPCODE_LOG2,
66 FS_OPCODE_POW,
67 FS_OPCODE_SIN,
68 FS_OPCODE_COS,
69 FS_OPCODE_DDX,
70 FS_OPCODE_DDY,
71 FS_OPCODE_LINTERP,
72 FS_OPCODE_TEX,
73 FS_OPCODE_TXB,
74 FS_OPCODE_TXL,
75 FS_OPCODE_DISCARD,
76 };
77
78 static int using_new_fs = -1;
79 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
80
81 struct gl_shader *
82 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
83 {
84 struct brw_shader *shader;
85
86 shader = talloc_zero(NULL, struct brw_shader);
87 if (shader) {
88 shader->base.Type = type;
89 shader->base.Name = name;
90 _mesa_init_shader(ctx, &shader->base);
91 }
92
93 return &shader->base;
94 }
95
96 struct gl_shader_program *
97 brw_new_shader_program(GLcontext *ctx, GLuint name)
98 {
99 struct brw_shader_program *prog;
100 prog = talloc_zero(NULL, struct brw_shader_program);
101 if (prog) {
102 prog->base.Name = name;
103 _mesa_init_shader_program(ctx, &prog->base);
104 }
105 return &prog->base;
106 }
107
108 GLboolean
109 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
110 {
111 if (!_mesa_ir_compile_shader(ctx, shader))
112 return GL_FALSE;
113
114 return GL_TRUE;
115 }
116
117 GLboolean
118 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
119 {
120 if (using_new_fs == -1)
121 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
122
123 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
124 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
125
126 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
127 void *mem_ctx = talloc_new(NULL);
128 bool progress;
129
130 if (shader->ir)
131 talloc_free(shader->ir);
132 shader->ir = new(shader) exec_list;
133 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
134
135 do_mat_op_to_vec(shader->ir);
136 do_mod_to_fract(shader->ir);
137 do_div_to_mul_rcp(shader->ir);
138 do_sub_to_add_neg(shader->ir);
139 do_explog_to_explog2(shader->ir);
140
141 do {
142 progress = false;
143
144 brw_do_channel_expressions(shader->ir);
145 brw_do_vector_splitting(shader->ir);
146
147 progress = do_lower_jumps(shader->ir, true, true,
148 true, /* main return */
149 false, /* continue */
150 false /* loops */
151 ) || progress;
152
153 progress = do_common_optimization(shader->ir, true, 32) || progress;
154
155 progress = lower_noise(shader->ir) || progress;
156 progress =
157 lower_variable_index_to_cond_assign(shader->ir,
158 GL_TRUE, /* input */
159 GL_TRUE, /* output */
160 GL_TRUE, /* temp */
161 GL_TRUE /* uniform */
162 ) || progress;
163 } while (progress);
164
165 validate_ir_tree(shader->ir);
166
167 reparent_ir(shader->ir, shader->ir);
168 talloc_free(mem_ctx);
169 }
170 }
171
172 if (!_mesa_ir_link_shader(ctx, prog))
173 return GL_FALSE;
174
175 return GL_TRUE;
176 }
177
178 static int
179 type_size(const struct glsl_type *type)
180 {
181 unsigned int size, i;
182
183 switch (type->base_type) {
184 case GLSL_TYPE_UINT:
185 case GLSL_TYPE_INT:
186 case GLSL_TYPE_FLOAT:
187 case GLSL_TYPE_BOOL:
188 return type->components();
189 case GLSL_TYPE_ARRAY:
190 return type_size(type->fields.array) * type->length;
191 case GLSL_TYPE_STRUCT:
192 size = 0;
193 for (i = 0; i < type->length; i++) {
194 size += type_size(type->fields.structure[i].type);
195 }
196 return size;
197 case GLSL_TYPE_SAMPLER:
198 /* Samplers take up no register space, since they're baked in at
199 * link time.
200 */
201 return 0;
202 default:
203 assert(!"not reached");
204 return 0;
205 }
206 }
207
208 class fs_reg {
209 public:
210 /* Callers of this talloc-based new need not call delete. It's
211 * easier to just talloc_free 'ctx' (or any of its ancestors). */
212 static void* operator new(size_t size, void *ctx)
213 {
214 void *node;
215
216 node = talloc_size(ctx, size);
217 assert(node != NULL);
218
219 return node;
220 }
221
222 void init()
223 {
224 this->reg = 0;
225 this->reg_offset = 0;
226 this->negate = 0;
227 this->abs = 0;
228 this->hw_reg = -1;
229 }
230
231 /** Generic unset register constructor. */
232 fs_reg()
233 {
234 init();
235 this->file = BAD_FILE;
236 }
237
238 /** Immediate value constructor. */
239 fs_reg(float f)
240 {
241 init();
242 this->file = IMM;
243 this->type = BRW_REGISTER_TYPE_F;
244 this->imm.f = f;
245 }
246
247 /** Immediate value constructor. */
248 fs_reg(int32_t i)
249 {
250 init();
251 this->file = IMM;
252 this->type = BRW_REGISTER_TYPE_D;
253 this->imm.i = i;
254 }
255
256 /** Immediate value constructor. */
257 fs_reg(uint32_t u)
258 {
259 init();
260 this->file = IMM;
261 this->type = BRW_REGISTER_TYPE_UD;
262 this->imm.u = u;
263 }
264
265 /** Fixed brw_reg Immediate value constructor. */
266 fs_reg(struct brw_reg fixed_hw_reg)
267 {
268 init();
269 this->file = FIXED_HW_REG;
270 this->fixed_hw_reg = fixed_hw_reg;
271 this->type = fixed_hw_reg.type;
272 }
273
274 fs_reg(enum register_file file, int hw_reg);
275 fs_reg(class fs_visitor *v, const struct glsl_type *type);
276
277 /** Register file: ARF, GRF, MRF, IMM. */
278 enum register_file file;
279 /** virtual register number. 0 = fixed hw reg */
280 int reg;
281 /** Offset within the virtual register. */
282 int reg_offset;
283 /** HW register number. Generally unset until register allocation. */
284 int hw_reg;
285 /** Register type. BRW_REGISTER_TYPE_* */
286 int type;
287 bool negate;
288 bool abs;
289 struct brw_reg fixed_hw_reg;
290
291 /** Value for file == BRW_IMMMEDIATE_FILE */
292 union {
293 int32_t i;
294 uint32_t u;
295 float f;
296 } imm;
297 };
298
299 static const fs_reg reg_undef;
300 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
301
302 class fs_inst : public exec_node {
303 public:
304 /* Callers of this talloc-based new need not call delete. It's
305 * easier to just talloc_free 'ctx' (or any of its ancestors). */
306 static void* operator new(size_t size, void *ctx)
307 {
308 void *node;
309
310 node = talloc_zero_size(ctx, size);
311 assert(node != NULL);
312
313 return node;
314 }
315
316 void init()
317 {
318 this->opcode = BRW_OPCODE_NOP;
319 this->saturate = false;
320 this->conditional_mod = BRW_CONDITIONAL_NONE;
321 this->predicated = false;
322 this->sampler = 0;
323 this->target = 0;
324 this->eot = false;
325 this->shadow_compare = false;
326 }
327
328 fs_inst()
329 {
330 init();
331 }
332
333 fs_inst(int opcode)
334 {
335 init();
336 this->opcode = opcode;
337 }
338
339 fs_inst(int opcode, fs_reg dst, fs_reg src0)
340 {
341 init();
342 this->opcode = opcode;
343 this->dst = dst;
344 this->src[0] = src0;
345 }
346
347 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
348 {
349 init();
350 this->opcode = opcode;
351 this->dst = dst;
352 this->src[0] = src0;
353 this->src[1] = src1;
354 }
355
356 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
357 {
358 init();
359 this->opcode = opcode;
360 this->dst = dst;
361 this->src[0] = src0;
362 this->src[1] = src1;
363 this->src[2] = src2;
364 }
365
366 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
367 fs_reg dst;
368 fs_reg src[3];
369 bool saturate;
370 bool predicated;
371 int conditional_mod; /**< BRW_CONDITIONAL_* */
372
373 int mlen; /**< SEND message length */
374 int sampler;
375 int target; /**< MRT target. */
376 bool eot;
377 bool shadow_compare;
378
379 /** @{
380 * Annotation for the generated IR. One of the two can be set.
381 */
382 ir_instruction *ir;
383 const char *annotation;
384 /** @} */
385 };
386
387 class fs_visitor : public ir_visitor
388 {
389 public:
390
391 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
392 {
393 this->c = c;
394 this->p = &c->func;
395 this->brw = p->brw;
396 this->fp = brw->fragment_program;
397 this->intel = &brw->intel;
398 this->ctx = &intel->ctx;
399 this->mem_ctx = talloc_new(NULL);
400 this->shader = shader;
401 this->fail = false;
402 this->variable_ht = hash_table_ctor(0,
403 hash_table_pointer_hash,
404 hash_table_pointer_compare);
405
406 this->frag_color = NULL;
407 this->frag_data = NULL;
408 this->frag_depth = NULL;
409 this->first_non_payload_grf = 0;
410
411 this->current_annotation = NULL;
412 this->annotation_string = NULL;
413 this->annotation_ir = NULL;
414 this->base_ir = NULL;
415
416 this->virtual_grf_sizes = NULL;
417 this->virtual_grf_next = 1;
418 this->virtual_grf_array_size = 0;
419 }
420 ~fs_visitor()
421 {
422 talloc_free(this->mem_ctx);
423 hash_table_dtor(this->variable_ht);
424 }
425
426 fs_reg *variable_storage(ir_variable *var);
427 int virtual_grf_alloc(int size);
428
429 void visit(ir_variable *ir);
430 void visit(ir_assignment *ir);
431 void visit(ir_dereference_variable *ir);
432 void visit(ir_dereference_record *ir);
433 void visit(ir_dereference_array *ir);
434 void visit(ir_expression *ir);
435 void visit(ir_texture *ir);
436 void visit(ir_if *ir);
437 void visit(ir_constant *ir);
438 void visit(ir_swizzle *ir);
439 void visit(ir_return *ir);
440 void visit(ir_loop *ir);
441 void visit(ir_loop_jump *ir);
442 void visit(ir_discard *ir);
443 void visit(ir_call *ir);
444 void visit(ir_function *ir);
445 void visit(ir_function_signature *ir);
446
447 fs_inst *emit(fs_inst inst);
448 void assign_curb_setup();
449 void assign_urb_setup();
450 void assign_regs();
451 void generate_code();
452 void generate_fb_write(fs_inst *inst);
453 void generate_linterp(fs_inst *inst, struct brw_reg dst,
454 struct brw_reg *src);
455 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
456 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
457 void generate_discard(fs_inst *inst, struct brw_reg temp);
458 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
459 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
460
461 void emit_dummy_fs();
462 void emit_fragcoord_interpolation(ir_variable *ir);
463 void emit_general_interpolation(ir_variable *ir);
464 void emit_interpolation_setup();
465 void emit_fb_writes();
466
467 struct brw_reg interp_reg(int location, int channel);
468 int setup_uniform_values(int loc, const glsl_type *type);
469 void setup_builtin_uniform_values(ir_variable *ir);
470
471 struct brw_context *brw;
472 const struct gl_fragment_program *fp;
473 struct intel_context *intel;
474 GLcontext *ctx;
475 struct brw_wm_compile *c;
476 struct brw_compile *p;
477 struct brw_shader *shader;
478 void *mem_ctx;
479 exec_list instructions;
480
481 int *virtual_grf_sizes;
482 int virtual_grf_next;
483 int virtual_grf_array_size;
484
485 struct hash_table *variable_ht;
486 ir_variable *frag_color, *frag_data, *frag_depth;
487 int first_non_payload_grf;
488
489 /** @{ debug annotation info */
490 const char *current_annotation;
491 ir_instruction *base_ir;
492 const char **annotation_string;
493 ir_instruction **annotation_ir;
494 /** @} */
495
496 bool fail;
497
498 /* Result of last visit() method. */
499 fs_reg result;
500
501 fs_reg pixel_x;
502 fs_reg pixel_y;
503 fs_reg wpos_w;
504 fs_reg pixel_w;
505 fs_reg delta_x;
506 fs_reg delta_y;
507
508 int grf_used;
509
510 };
511
512 int
513 fs_visitor::virtual_grf_alloc(int size)
514 {
515 if (virtual_grf_array_size <= virtual_grf_next) {
516 if (virtual_grf_array_size == 0)
517 virtual_grf_array_size = 16;
518 else
519 virtual_grf_array_size *= 2;
520 virtual_grf_sizes = talloc_realloc(mem_ctx, virtual_grf_sizes,
521 int, virtual_grf_array_size);
522
523 /* This slot is always unused. */
524 virtual_grf_sizes[0] = 0;
525 }
526 virtual_grf_sizes[virtual_grf_next] = size;
527 return virtual_grf_next++;
528 }
529
530 /** Fixed HW reg constructor. */
531 fs_reg::fs_reg(enum register_file file, int hw_reg)
532 {
533 init();
534 this->file = file;
535 this->hw_reg = hw_reg;
536 this->type = BRW_REGISTER_TYPE_F;
537 }
538
539 int
540 brw_type_for_base_type(const struct glsl_type *type)
541 {
542 switch (type->base_type) {
543 case GLSL_TYPE_FLOAT:
544 return BRW_REGISTER_TYPE_F;
545 case GLSL_TYPE_INT:
546 case GLSL_TYPE_BOOL:
547 return BRW_REGISTER_TYPE_D;
548 case GLSL_TYPE_UINT:
549 return BRW_REGISTER_TYPE_UD;
550 case GLSL_TYPE_ARRAY:
551 case GLSL_TYPE_STRUCT:
552 /* These should be overridden with the type of the member when
553 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
554 * way to trip up if we don't.
555 */
556 return BRW_REGISTER_TYPE_UD;
557 default:
558 assert(!"not reached");
559 return BRW_REGISTER_TYPE_F;
560 }
561 }
562
563 /** Automatic reg constructor. */
564 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
565 {
566 init();
567
568 this->file = GRF;
569 this->reg = v->virtual_grf_alloc(type_size(type));
570 this->reg_offset = 0;
571 this->type = brw_type_for_base_type(type);
572 }
573
574 fs_reg *
575 fs_visitor::variable_storage(ir_variable *var)
576 {
577 return (fs_reg *)hash_table_find(this->variable_ht, var);
578 }
579
580 /* Our support for uniforms is piggy-backed on the struct
581 * gl_fragment_program, because that's where the values actually
582 * get stored, rather than in some global gl_shader_program uniform
583 * store.
584 */
585 int
586 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
587 {
588 unsigned int offset = 0;
589 float *vec_values;
590
591 if (type->is_matrix()) {
592 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
593 type->vector_elements,
594 1);
595
596 for (unsigned int i = 0; i < type->matrix_columns; i++) {
597 offset += setup_uniform_values(loc + offset, column);
598 }
599
600 return offset;
601 }
602
603 switch (type->base_type) {
604 case GLSL_TYPE_FLOAT:
605 case GLSL_TYPE_UINT:
606 case GLSL_TYPE_INT:
607 case GLSL_TYPE_BOOL:
608 vec_values = fp->Base.Parameters->ParameterValues[loc];
609 for (unsigned int i = 0; i < type->vector_elements; i++) {
610 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
611 }
612 return 1;
613
614 case GLSL_TYPE_STRUCT:
615 for (unsigned int i = 0; i < type->length; i++) {
616 offset += setup_uniform_values(loc + offset,
617 type->fields.structure[i].type);
618 }
619 return offset;
620
621 case GLSL_TYPE_ARRAY:
622 for (unsigned int i = 0; i < type->length; i++) {
623 offset += setup_uniform_values(loc + offset, type->fields.array);
624 }
625 return offset;
626
627 case GLSL_TYPE_SAMPLER:
628 /* The sampler takes up a slot, but we don't use any values from it. */
629 return 1;
630
631 default:
632 assert(!"not reached");
633 return 0;
634 }
635 }
636
637
638 /* Our support for builtin uniforms is even scarier than non-builtin.
639 * It sits on top of the PROG_STATE_VAR parameters that are
640 * automatically updated from GL context state.
641 */
642 void
643 fs_visitor::setup_builtin_uniform_values(ir_variable *ir)
644 {
645 const struct gl_builtin_uniform_desc *statevar = NULL;
646
647 for (unsigned int i = 0; _mesa_builtin_uniform_desc[i].name; i++) {
648 statevar = &_mesa_builtin_uniform_desc[i];
649 if (strcmp(ir->name, _mesa_builtin_uniform_desc[i].name) == 0)
650 break;
651 }
652
653 if (!statevar->name) {
654 this->fail = true;
655 printf("Failed to find builtin uniform `%s'\n", ir->name);
656 return;
657 }
658
659 int array_count;
660 if (ir->type->is_array()) {
661 array_count = ir->type->length;
662 } else {
663 array_count = 1;
664 }
665
666 for (int a = 0; a < array_count; a++) {
667 for (unsigned int i = 0; i < statevar->num_elements; i++) {
668 struct gl_builtin_uniform_element *element = &statevar->elements[i];
669 int tokens[STATE_LENGTH];
670
671 memcpy(tokens, element->tokens, sizeof(element->tokens));
672 if (ir->type->is_array()) {
673 tokens[1] = a;
674 }
675
676 /* This state reference has already been setup by ir_to_mesa,
677 * but we'll get the same index back here.
678 */
679 int index = _mesa_add_state_reference(this->fp->Base.Parameters,
680 (gl_state_index *)tokens);
681 float *vec_values = this->fp->Base.Parameters->ParameterValues[index];
682
683 /* Add each of the unique swizzles of the element as a
684 * parameter. This'll end up matching the expected layout of
685 * the array/matrix/structure we're trying to fill in.
686 */
687 int last_swiz = -1;
688 for (unsigned int i = 0; i < 4; i++) {
689 int this_swiz = GET_SWZ(element->swizzle, i);
690 if (this_swiz == last_swiz)
691 break;
692 last_swiz = this_swiz;
693
694 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
695 }
696 }
697 }
698 }
699
700 void
701 fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
702 {
703 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
704 fs_reg wpos = *reg;
705 fs_reg neg_y = this->pixel_y;
706 neg_y.negate = true;
707
708 /* gl_FragCoord.x */
709 if (ir->pixel_center_integer) {
710 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
711 } else {
712 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
713 }
714 wpos.reg_offset++;
715
716 /* gl_FragCoord.y */
717 if (ir->origin_upper_left && ir->pixel_center_integer) {
718 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
719 } else {
720 fs_reg pixel_y = this->pixel_y;
721 float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
722
723 if (!ir->origin_upper_left) {
724 pixel_y.negate = true;
725 offset += c->key.drawable_height - 1.0;
726 }
727
728 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
729 }
730 wpos.reg_offset++;
731
732 /* gl_FragCoord.z */
733 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
734 interp_reg(FRAG_ATTRIB_WPOS, 2)));
735 wpos.reg_offset++;
736
737 /* gl_FragCoord.w: Already set up in emit_interpolation */
738 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
739
740 hash_table_insert(this->variable_ht, reg, ir);
741 }
742
743
744 void
745 fs_visitor::emit_general_interpolation(ir_variable *ir)
746 {
747 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
748 /* Interpolation is always in floating point regs. */
749 reg->type = BRW_REGISTER_TYPE_F;
750 fs_reg attr = *reg;
751
752 unsigned int array_elements;
753 const glsl_type *type;
754
755 if (ir->type->is_array()) {
756 array_elements = ir->type->length;
757 if (array_elements == 0) {
758 this->fail = true;
759 }
760 type = ir->type->fields.array;
761 } else {
762 array_elements = 1;
763 type = ir->type;
764 }
765
766 int location = ir->location;
767 for (unsigned int i = 0; i < array_elements; i++) {
768 for (unsigned int j = 0; j < type->matrix_columns; j++) {
769 if (!(fp->Base.InputsRead & BITFIELD64_BIT(location))) {
770 /* If there's no incoming setup data for this slot, don't
771 * emit interpolation for it (since it's not used, and
772 * we'd fall over later trying to find the setup data.
773 */
774 attr.reg_offset += type->vector_elements;
775 continue;
776 }
777
778 for (unsigned int c = 0; c < type->vector_elements; c++) {
779 struct brw_reg interp = interp_reg(location, c);
780 emit(fs_inst(FS_OPCODE_LINTERP,
781 attr,
782 this->delta_x,
783 this->delta_y,
784 fs_reg(interp)));
785 attr.reg_offset++;
786 }
787 attr.reg_offset -= type->vector_elements;
788
789 for (unsigned int c = 0; c < type->vector_elements; c++) {
790 emit(fs_inst(BRW_OPCODE_MUL,
791 attr,
792 attr,
793 this->pixel_w));
794 attr.reg_offset++;
795 }
796 location++;
797 }
798 }
799
800 hash_table_insert(this->variable_ht, reg, ir);
801 }
802
803 void
804 fs_visitor::visit(ir_variable *ir)
805 {
806 fs_reg *reg = NULL;
807
808 if (variable_storage(ir))
809 return;
810
811 if (strcmp(ir->name, "gl_FragColor") == 0) {
812 this->frag_color = ir;
813 } else if (strcmp(ir->name, "gl_FragData") == 0) {
814 this->frag_data = ir;
815 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
816 this->frag_depth = ir;
817 }
818
819 if (ir->mode == ir_var_in) {
820 if (!strcmp(ir->name, "gl_FragCoord")) {
821 emit_fragcoord_interpolation(ir);
822 return;
823 } else if (!strcmp(ir->name, "gl_FrontFacing")) {
824 reg = new(this->mem_ctx) fs_reg(this, ir->type);
825 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
826 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
827 * us front face
828 */
829 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
830 *reg,
831 fs_reg(r1_6ud),
832 fs_reg(1u << 31)));
833 inst->conditional_mod = BRW_CONDITIONAL_L;
834 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
835 } else {
836 emit_general_interpolation(ir);
837 return;
838 }
839 }
840
841 if (ir->mode == ir_var_uniform) {
842 int param_index = c->prog_data.nr_params;
843
844 if (!strncmp(ir->name, "gl_", 3)) {
845 setup_builtin_uniform_values(ir);
846 } else {
847 setup_uniform_values(ir->location, ir->type);
848 }
849
850 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
851 }
852
853 if (!reg)
854 reg = new(this->mem_ctx) fs_reg(this, ir->type);
855
856 hash_table_insert(this->variable_ht, reg, ir);
857 }
858
859 void
860 fs_visitor::visit(ir_dereference_variable *ir)
861 {
862 fs_reg *reg = variable_storage(ir->var);
863 this->result = *reg;
864 }
865
866 void
867 fs_visitor::visit(ir_dereference_record *ir)
868 {
869 const glsl_type *struct_type = ir->record->type;
870
871 ir->record->accept(this);
872
873 unsigned int offset = 0;
874 for (unsigned int i = 0; i < struct_type->length; i++) {
875 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
876 break;
877 offset += type_size(struct_type->fields.structure[i].type);
878 }
879 this->result.reg_offset += offset;
880 this->result.type = brw_type_for_base_type(ir->type);
881 }
882
883 void
884 fs_visitor::visit(ir_dereference_array *ir)
885 {
886 ir_constant *index;
887 int element_size;
888
889 ir->array->accept(this);
890 index = ir->array_index->as_constant();
891
892 element_size = type_size(ir->type);
893 this->result.type = brw_type_for_base_type(ir->type);
894
895 if (index) {
896 assert(this->result.file == UNIFORM ||
897 (this->result.file == GRF &&
898 this->result.reg != 0));
899 this->result.reg_offset += index->value.i[0] * element_size;
900 } else {
901 assert(!"FINISHME: non-constant array element");
902 }
903 }
904
905 void
906 fs_visitor::visit(ir_expression *ir)
907 {
908 unsigned int operand;
909 fs_reg op[2], temp;
910 fs_reg result;
911 fs_inst *inst;
912
913 for (operand = 0; operand < ir->get_num_operands(); operand++) {
914 ir->operands[operand]->accept(this);
915 if (this->result.file == BAD_FILE) {
916 ir_print_visitor v;
917 printf("Failed to get tree for expression operand:\n");
918 ir->operands[operand]->accept(&v);
919 this->fail = true;
920 }
921 op[operand] = this->result;
922
923 /* Matrix expression operands should have been broken down to vector
924 * operations already.
925 */
926 assert(!ir->operands[operand]->type->is_matrix());
927 /* And then those vector operands should have been broken down to scalar.
928 */
929 assert(!ir->operands[operand]->type->is_vector());
930 }
931
932 /* Storage for our result. If our result goes into an assignment, it will
933 * just get copy-propagated out, so no worries.
934 */
935 this->result = fs_reg(this, ir->type);
936
937 switch (ir->operation) {
938 case ir_unop_logic_not:
939 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
940 break;
941 case ir_unop_neg:
942 op[0].negate = !op[0].negate;
943 this->result = op[0];
944 break;
945 case ir_unop_abs:
946 op[0].abs = true;
947 this->result = op[0];
948 break;
949 case ir_unop_sign:
950 temp = fs_reg(this, ir->type);
951
952 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
953
954 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
955 inst->conditional_mod = BRW_CONDITIONAL_G;
956 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
957 inst->predicated = true;
958
959 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
960 inst->conditional_mod = BRW_CONDITIONAL_L;
961 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
962 inst->predicated = true;
963
964 break;
965 case ir_unop_rcp:
966 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
967 break;
968
969 case ir_unop_exp2:
970 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
971 break;
972 case ir_unop_log2:
973 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
974 break;
975 case ir_unop_exp:
976 case ir_unop_log:
977 assert(!"not reached: should be handled by ir_explog_to_explog2");
978 break;
979 case ir_unop_sin:
980 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
981 break;
982 case ir_unop_cos:
983 emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
984 break;
985
986 case ir_unop_dFdx:
987 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
988 break;
989 case ir_unop_dFdy:
990 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
991 break;
992
993 case ir_binop_add:
994 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
995 break;
996 case ir_binop_sub:
997 assert(!"not reached: should be handled by ir_sub_to_add_neg");
998 break;
999
1000 case ir_binop_mul:
1001 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
1002 break;
1003 case ir_binop_div:
1004 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1005 break;
1006 case ir_binop_mod:
1007 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1008 break;
1009
1010 case ir_binop_less:
1011 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1012 inst->conditional_mod = BRW_CONDITIONAL_L;
1013 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1014 break;
1015 case ir_binop_greater:
1016 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1017 inst->conditional_mod = BRW_CONDITIONAL_G;
1018 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1019 break;
1020 case ir_binop_lequal:
1021 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1022 inst->conditional_mod = BRW_CONDITIONAL_LE;
1023 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1024 break;
1025 case ir_binop_gequal:
1026 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1027 inst->conditional_mod = BRW_CONDITIONAL_GE;
1028 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1029 break;
1030 case ir_binop_equal:
1031 case ir_binop_all_equal: /* same as nequal for scalars */
1032 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1033 inst->conditional_mod = BRW_CONDITIONAL_Z;
1034 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1035 break;
1036 case ir_binop_nequal:
1037 case ir_binop_any_nequal: /* same as nequal for scalars */
1038 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1039 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1040 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
1041 break;
1042
1043 case ir_binop_logic_xor:
1044 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
1045 break;
1046
1047 case ir_binop_logic_or:
1048 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
1049 break;
1050
1051 case ir_binop_logic_and:
1052 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
1053 break;
1054
1055 case ir_binop_dot:
1056 case ir_binop_cross:
1057 case ir_unop_any:
1058 assert(!"not reached: should be handled by brw_fs_channel_expressions");
1059 break;
1060
1061 case ir_unop_noise:
1062 assert(!"not reached: should be handled by lower_noise");
1063 break;
1064
1065 case ir_unop_sqrt:
1066 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
1067 break;
1068
1069 case ir_unop_rsq:
1070 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
1071 break;
1072
1073 case ir_unop_i2f:
1074 case ir_unop_b2f:
1075 case ir_unop_b2i:
1076 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1077 break;
1078 case ir_unop_f2i:
1079 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
1080 break;
1081 case ir_unop_f2b:
1082 case ir_unop_i2b:
1083 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
1084 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1085
1086 case ir_unop_trunc:
1087 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1088 break;
1089 case ir_unop_ceil:
1090 op[0].negate = ~op[0].negate;
1091 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1092 this->result.negate = true;
1093 break;
1094 case ir_unop_floor:
1095 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
1096 break;
1097 case ir_unop_fract:
1098 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
1099 break;
1100
1101 case ir_binop_min:
1102 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1103 inst->conditional_mod = BRW_CONDITIONAL_L;
1104
1105 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1106 inst->predicated = true;
1107 break;
1108 case ir_binop_max:
1109 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
1110 inst->conditional_mod = BRW_CONDITIONAL_G;
1111
1112 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
1113 inst->predicated = true;
1114 break;
1115
1116 case ir_binop_pow:
1117 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
1118 break;
1119
1120 case ir_unop_bit_not:
1121 case ir_unop_u2f:
1122 case ir_binop_lshift:
1123 case ir_binop_rshift:
1124 case ir_binop_bit_and:
1125 case ir_binop_bit_xor:
1126 case ir_binop_bit_or:
1127 assert(!"GLSL 1.30 features unsupported");
1128 break;
1129 }
1130 }
1131
1132 void
1133 fs_visitor::visit(ir_assignment *ir)
1134 {
1135 struct fs_reg l, r;
1136 int i;
1137 int write_mask;
1138 fs_inst *inst;
1139
1140 /* FINISHME: arrays on the lhs */
1141 ir->lhs->accept(this);
1142 l = this->result;
1143
1144 ir->rhs->accept(this);
1145 r = this->result;
1146
1147 /* FINISHME: This should really set to the correct maximal writemask for each
1148 * FINISHME: component written (in the loops below). This case can only
1149 * FINISHME: occur for matrices, arrays, and structures.
1150 */
1151 if (ir->write_mask == 0) {
1152 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1153 write_mask = WRITEMASK_XYZW;
1154 } else {
1155 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
1156 write_mask = ir->write_mask;
1157 }
1158
1159 assert(l.file != BAD_FILE);
1160 assert(r.file != BAD_FILE);
1161
1162 if (ir->condition) {
1163 /* Get the condition bool into the predicate. */
1164 ir->condition->accept(this);
1165 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
1166 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1167 }
1168
1169 for (i = 0; i < type_size(ir->lhs->type); i++) {
1170 if (i >= 4 || (write_mask & (1 << i))) {
1171 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1172 if (ir->condition)
1173 inst->predicated = true;
1174 r.reg_offset++;
1175 }
1176 l.reg_offset++;
1177 }
1178 }
1179
1180 void
1181 fs_visitor::visit(ir_texture *ir)
1182 {
1183 int base_mrf = 2;
1184 fs_inst *inst = NULL;
1185 unsigned int mlen = 0;
1186
1187 ir->coordinate->accept(this);
1188 fs_reg coordinate = this->result;
1189
1190 if (ir->projector) {
1191 fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
1192
1193 ir->projector->accept(this);
1194 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
1195
1196 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
1197 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1198 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
1199 coordinate.reg_offset++;
1200 proj_coordinate.reg_offset++;
1201 }
1202 proj_coordinate.reg_offset = 0;
1203
1204 coordinate = proj_coordinate;
1205 }
1206
1207 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1208 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1209 coordinate.reg_offset++;
1210 }
1211
1212 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1213 if (intel->gen < 5)
1214 mlen = 3;
1215
1216 if (ir->shadow_comparitor) {
1217 /* For shadow comparisons, we have to supply u,v,r. */
1218 mlen = 3;
1219
1220 ir->shadow_comparitor->accept(this);
1221 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1222 mlen++;
1223 }
1224
1225 /* Do we ever want to handle writemasking on texture samples? Is it
1226 * performance relevant?
1227 */
1228 fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1229
1230 switch (ir->op) {
1231 case ir_tex:
1232 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1233 break;
1234 case ir_txb:
1235 ir->lod_info.bias->accept(this);
1236 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1237 mlen++;
1238
1239 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1240 break;
1241 case ir_txl:
1242 ir->lod_info.lod->accept(this);
1243 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1244 mlen++;
1245
1246 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1247 break;
1248 case ir_txd:
1249 case ir_txf:
1250 assert(!"GLSL 1.30 features unsupported");
1251 break;
1252 }
1253
1254 inst->sampler =
1255 _mesa_get_sampler_uniform_value(ir->sampler,
1256 ctx->Shader.CurrentProgram,
1257 &brw->fragment_program->Base);
1258 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1259
1260 this->result = dst;
1261
1262 if (ir->shadow_comparitor)
1263 inst->shadow_compare = true;
1264 inst->mlen = mlen;
1265 }
1266
1267 void
1268 fs_visitor::visit(ir_swizzle *ir)
1269 {
1270 ir->val->accept(this);
1271 fs_reg val = this->result;
1272
1273 fs_reg result = fs_reg(this, ir->type);
1274 this->result = result;
1275
1276 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1277 fs_reg channel = val;
1278 int swiz = 0;
1279
1280 switch (i) {
1281 case 0:
1282 swiz = ir->mask.x;
1283 break;
1284 case 1:
1285 swiz = ir->mask.y;
1286 break;
1287 case 2:
1288 swiz = ir->mask.z;
1289 break;
1290 case 3:
1291 swiz = ir->mask.w;
1292 break;
1293 }
1294
1295 channel.reg_offset += swiz;
1296 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1297 result.reg_offset++;
1298 }
1299 }
1300
1301 void
1302 fs_visitor::visit(ir_discard *ir)
1303 {
1304 fs_reg temp = fs_reg(this, glsl_type::uint_type);
1305
1306 assert(ir->condition == NULL); /* FINISHME */
1307
1308 emit(fs_inst(FS_OPCODE_DISCARD, temp, temp));
1309 }
1310
1311 void
1312 fs_visitor::visit(ir_constant *ir)
1313 {
1314 fs_reg reg(this, ir->type);
1315 this->result = reg;
1316
1317 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1318 switch (ir->type->base_type) {
1319 case GLSL_TYPE_FLOAT:
1320 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1321 break;
1322 case GLSL_TYPE_UINT:
1323 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1324 break;
1325 case GLSL_TYPE_INT:
1326 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1327 break;
1328 case GLSL_TYPE_BOOL:
1329 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1330 break;
1331 default:
1332 assert(!"Non-float/uint/int/bool constant");
1333 }
1334 reg.reg_offset++;
1335 }
1336 }
1337
1338 void
1339 fs_visitor::visit(ir_if *ir)
1340 {
1341 fs_inst *inst;
1342
1343 /* Don't point the annotation at the if statement, because then it plus
1344 * the then and else blocks get printed.
1345 */
1346 this->base_ir = ir->condition;
1347
1348 /* Generate the condition into the condition code. */
1349 ir->condition->accept(this);
1350 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1351 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1352
1353 inst = emit(fs_inst(BRW_OPCODE_IF));
1354 inst->predicated = true;
1355
1356 foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1357 ir_instruction *ir = (ir_instruction *)iter.get();
1358 this->base_ir = ir;
1359
1360 ir->accept(this);
1361 }
1362
1363 if (!ir->else_instructions.is_empty()) {
1364 emit(fs_inst(BRW_OPCODE_ELSE));
1365
1366 foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1367 ir_instruction *ir = (ir_instruction *)iter.get();
1368 this->base_ir = ir;
1369
1370 ir->accept(this);
1371 }
1372 }
1373
1374 emit(fs_inst(BRW_OPCODE_ENDIF));
1375 }
1376
1377 void
1378 fs_visitor::visit(ir_loop *ir)
1379 {
1380 fs_reg counter = reg_undef;
1381
1382 if (ir->counter) {
1383 this->base_ir = ir->counter;
1384 ir->counter->accept(this);
1385 counter = *(variable_storage(ir->counter));
1386
1387 if (ir->from) {
1388 this->base_ir = ir->from;
1389 ir->from->accept(this);
1390
1391 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1392 }
1393 }
1394
1395 /* Start a safety counter. If the user messed up their loop
1396 * counting, we don't want to hang the GPU.
1397 */
1398 fs_reg max_iter = fs_reg(this, glsl_type::int_type);
1399 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
1400
1401 emit(fs_inst(BRW_OPCODE_DO));
1402
1403 if (ir->to) {
1404 this->base_ir = ir->to;
1405 ir->to->accept(this);
1406
1407 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1408 counter, this->result));
1409 switch (ir->cmp) {
1410 case ir_binop_equal:
1411 inst->conditional_mod = BRW_CONDITIONAL_Z;
1412 break;
1413 case ir_binop_nequal:
1414 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1415 break;
1416 case ir_binop_gequal:
1417 inst->conditional_mod = BRW_CONDITIONAL_GE;
1418 break;
1419 case ir_binop_lequal:
1420 inst->conditional_mod = BRW_CONDITIONAL_LE;
1421 break;
1422 case ir_binop_greater:
1423 inst->conditional_mod = BRW_CONDITIONAL_G;
1424 break;
1425 case ir_binop_less:
1426 inst->conditional_mod = BRW_CONDITIONAL_L;
1427 break;
1428 default:
1429 assert(!"not reached: unknown loop condition");
1430 this->fail = true;
1431 break;
1432 }
1433
1434 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1435 inst->predicated = true;
1436 }
1437
1438 foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1439 ir_instruction *ir = (ir_instruction *)iter.get();
1440 fs_inst *inst;
1441
1442 this->base_ir = ir;
1443 ir->accept(this);
1444
1445 /* Check the maximum loop iters counter. */
1446 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1)));
1447 inst->conditional_mod = BRW_CONDITIONAL_Z;
1448
1449 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1450 inst->predicated = true;
1451 }
1452
1453 if (ir->increment) {
1454 this->base_ir = ir->increment;
1455 ir->increment->accept(this);
1456 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1457 }
1458
1459 emit(fs_inst(BRW_OPCODE_WHILE));
1460 }
1461
1462 void
1463 fs_visitor::visit(ir_loop_jump *ir)
1464 {
1465 switch (ir->mode) {
1466 case ir_loop_jump::jump_break:
1467 emit(fs_inst(BRW_OPCODE_BREAK));
1468 break;
1469 case ir_loop_jump::jump_continue:
1470 emit(fs_inst(BRW_OPCODE_CONTINUE));
1471 break;
1472 }
1473 }
1474
1475 void
1476 fs_visitor::visit(ir_call *ir)
1477 {
1478 assert(!"FINISHME");
1479 }
1480
1481 void
1482 fs_visitor::visit(ir_return *ir)
1483 {
1484 assert(!"FINISHME");
1485 }
1486
1487 void
1488 fs_visitor::visit(ir_function *ir)
1489 {
1490 /* Ignore function bodies other than main() -- we shouldn't see calls to
1491 * them since they should all be inlined before we get to ir_to_mesa.
1492 */
1493 if (strcmp(ir->name, "main") == 0) {
1494 const ir_function_signature *sig;
1495 exec_list empty;
1496
1497 sig = ir->matching_signature(&empty);
1498
1499 assert(sig);
1500
1501 foreach_iter(exec_list_iterator, iter, sig->body) {
1502 ir_instruction *ir = (ir_instruction *)iter.get();
1503 this->base_ir = ir;
1504
1505 ir->accept(this);
1506 }
1507 }
1508 }
1509
1510 void
1511 fs_visitor::visit(ir_function_signature *ir)
1512 {
1513 assert(!"not reached");
1514 (void)ir;
1515 }
1516
1517 fs_inst *
1518 fs_visitor::emit(fs_inst inst)
1519 {
1520 fs_inst *list_inst = new(mem_ctx) fs_inst;
1521 *list_inst = inst;
1522
1523 list_inst->annotation = this->current_annotation;
1524 list_inst->ir = this->base_ir;
1525
1526 this->instructions.push_tail(list_inst);
1527
1528 return list_inst;
1529 }
1530
1531 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1532 void
1533 fs_visitor::emit_dummy_fs()
1534 {
1535 /* Everyone's favorite color. */
1536 emit(fs_inst(BRW_OPCODE_MOV,
1537 fs_reg(MRF, 2),
1538 fs_reg(1.0f)));
1539 emit(fs_inst(BRW_OPCODE_MOV,
1540 fs_reg(MRF, 3),
1541 fs_reg(0.0f)));
1542 emit(fs_inst(BRW_OPCODE_MOV,
1543 fs_reg(MRF, 4),
1544 fs_reg(1.0f)));
1545 emit(fs_inst(BRW_OPCODE_MOV,
1546 fs_reg(MRF, 5),
1547 fs_reg(0.0f)));
1548
1549 fs_inst *write;
1550 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1551 fs_reg(0),
1552 fs_reg(0)));
1553 }
1554
1555 /* The register location here is relative to the start of the URB
1556 * data. It will get adjusted to be a real location before
1557 * generate_code() time.
1558 */
1559 struct brw_reg
1560 fs_visitor::interp_reg(int location, int channel)
1561 {
1562 int regnr = location * 2 + channel / 2;
1563 int stride = (channel & 1) * 4;
1564
1565 return brw_vec1_grf(regnr, stride);
1566 }
1567
1568 /** Emits the interpolation for the varying inputs. */
1569 void
1570 fs_visitor::emit_interpolation_setup()
1571 {
1572 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1573
1574 this->current_annotation = "compute pixel centers";
1575 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1576 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1577 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1578 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1579 emit(fs_inst(BRW_OPCODE_ADD,
1580 this->pixel_x,
1581 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1582 fs_reg(brw_imm_v(0x10101010))));
1583 emit(fs_inst(BRW_OPCODE_ADD,
1584 this->pixel_y,
1585 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1586 fs_reg(brw_imm_v(0x11001100))));
1587
1588 this->current_annotation = "compute pixel deltas from v0";
1589 this->delta_x = fs_reg(this, glsl_type::float_type);
1590 this->delta_y = fs_reg(this, glsl_type::float_type);
1591 emit(fs_inst(BRW_OPCODE_ADD,
1592 this->delta_x,
1593 this->pixel_x,
1594 fs_reg(negate(brw_vec1_grf(1, 0)))));
1595 emit(fs_inst(BRW_OPCODE_ADD,
1596 this->delta_y,
1597 this->pixel_y,
1598 fs_reg(negate(brw_vec1_grf(1, 1)))));
1599
1600 this->current_annotation = "compute pos.w and 1/pos.w";
1601 /* Compute wpos.w. It's always in our setup, since it's needed to
1602 * interpolate the other attributes.
1603 */
1604 this->wpos_w = fs_reg(this, glsl_type::float_type);
1605 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1606 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1607 /* Compute the pixel 1/W value from wpos.w. */
1608 this->pixel_w = fs_reg(this, glsl_type::float_type);
1609 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1610 this->current_annotation = NULL;
1611 }
1612
1613 void
1614 fs_visitor::emit_fb_writes()
1615 {
1616 this->current_annotation = "FB write header";
1617 int nr = 0;
1618
1619 /* m0, m1 header */
1620 nr += 2;
1621
1622 if (c->key.aa_dest_stencil_reg) {
1623 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1624 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1625 }
1626
1627 /* Reserve space for color. It'll be filled in per MRT below. */
1628 int color_mrf = nr;
1629 nr += 4;
1630
1631 if (c->key.source_depth_to_render_target) {
1632 if (c->key.computes_depth) {
1633 /* Hand over gl_FragDepth. */
1634 assert(this->frag_depth);
1635 fs_reg depth = *(variable_storage(this->frag_depth));
1636
1637 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1638 } else {
1639 /* Pass through the payload depth. */
1640 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1641 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1642 }
1643 }
1644
1645 if (c->key.dest_depth_reg) {
1646 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1647 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1648 }
1649
1650 fs_reg color = reg_undef;
1651 if (this->frag_color)
1652 color = *(variable_storage(this->frag_color));
1653 else if (this->frag_data)
1654 color = *(variable_storage(this->frag_data));
1655
1656 for (int target = 0; target < c->key.nr_color_regions; target++) {
1657 this->current_annotation = talloc_asprintf(this->mem_ctx,
1658 "FB write target %d",
1659 target);
1660 if (this->frag_color || this->frag_data) {
1661 for (int i = 0; i < 4; i++) {
1662 emit(fs_inst(BRW_OPCODE_MOV,
1663 fs_reg(MRF, color_mrf + i),
1664 color));
1665 color.reg_offset++;
1666 }
1667 }
1668
1669 if (this->frag_color)
1670 color.reg_offset -= 4;
1671
1672 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1673 reg_undef, reg_undef));
1674 inst->target = target;
1675 inst->mlen = nr;
1676 if (target == c->key.nr_color_regions - 1)
1677 inst->eot = true;
1678 }
1679
1680 if (c->key.nr_color_regions == 0) {
1681 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1682 reg_undef, reg_undef));
1683 inst->mlen = nr;
1684 inst->eot = true;
1685 }
1686
1687 this->current_annotation = NULL;
1688 }
1689
1690 void
1691 fs_visitor::generate_fb_write(fs_inst *inst)
1692 {
1693 GLboolean eot = inst->eot;
1694
1695 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1696 * move, here's g1.
1697 */
1698 brw_push_insn_state(p);
1699 brw_set_mask_control(p, BRW_MASK_DISABLE);
1700 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1701 brw_MOV(p,
1702 brw_message_reg(1),
1703 brw_vec8_grf(1, 0));
1704 brw_pop_insn_state(p);
1705
1706 brw_fb_WRITE(p,
1707 8, /* dispatch_width */
1708 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1709 0, /* base MRF */
1710 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1711 inst->target,
1712 inst->mlen,
1713 0,
1714 eot);
1715 }
1716
1717 void
1718 fs_visitor::generate_linterp(fs_inst *inst,
1719 struct brw_reg dst, struct brw_reg *src)
1720 {
1721 struct brw_reg delta_x = src[0];
1722 struct brw_reg delta_y = src[1];
1723 struct brw_reg interp = src[2];
1724
1725 if (brw->has_pln &&
1726 delta_y.nr == delta_x.nr + 1 &&
1727 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1728 brw_PLN(p, dst, interp, delta_x);
1729 } else {
1730 brw_LINE(p, brw_null_reg(), interp, delta_x);
1731 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1732 }
1733 }
1734
1735 void
1736 fs_visitor::generate_math(fs_inst *inst,
1737 struct brw_reg dst, struct brw_reg *src)
1738 {
1739 int op;
1740
1741 switch (inst->opcode) {
1742 case FS_OPCODE_RCP:
1743 op = BRW_MATH_FUNCTION_INV;
1744 break;
1745 case FS_OPCODE_RSQ:
1746 op = BRW_MATH_FUNCTION_RSQ;
1747 break;
1748 case FS_OPCODE_SQRT:
1749 op = BRW_MATH_FUNCTION_SQRT;
1750 break;
1751 case FS_OPCODE_EXP2:
1752 op = BRW_MATH_FUNCTION_EXP;
1753 break;
1754 case FS_OPCODE_LOG2:
1755 op = BRW_MATH_FUNCTION_LOG;
1756 break;
1757 case FS_OPCODE_POW:
1758 op = BRW_MATH_FUNCTION_POW;
1759 break;
1760 case FS_OPCODE_SIN:
1761 op = BRW_MATH_FUNCTION_SIN;
1762 break;
1763 case FS_OPCODE_COS:
1764 op = BRW_MATH_FUNCTION_COS;
1765 break;
1766 default:
1767 assert(!"not reached: unknown math function");
1768 op = 0;
1769 break;
1770 }
1771
1772 if (inst->opcode == FS_OPCODE_POW) {
1773 brw_MOV(p, brw_message_reg(3), src[1]);
1774 }
1775
1776 brw_math(p, dst,
1777 op,
1778 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1779 BRW_MATH_SATURATE_NONE,
1780 2, src[0],
1781 BRW_MATH_DATA_VECTOR,
1782 BRW_MATH_PRECISION_FULL);
1783 }
1784
1785 void
1786 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1787 {
1788 int msg_type = -1;
1789 int rlen = 4;
1790
1791 if (intel->gen == 5) {
1792 switch (inst->opcode) {
1793 case FS_OPCODE_TEX:
1794 if (inst->shadow_compare) {
1795 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1796 } else {
1797 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1798 }
1799 break;
1800 case FS_OPCODE_TXB:
1801 if (inst->shadow_compare) {
1802 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1803 } else {
1804 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1805 }
1806 break;
1807 }
1808 } else {
1809 switch (inst->opcode) {
1810 case FS_OPCODE_TEX:
1811 /* Note that G45 and older determines shadow compare and dispatch width
1812 * from message length for most messages.
1813 */
1814 if (inst->shadow_compare) {
1815 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1816 } else {
1817 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1818 }
1819 case FS_OPCODE_TXB:
1820 if (inst->shadow_compare) {
1821 assert(!"FINISHME: shadow compare with bias.");
1822 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1823 } else {
1824 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1825 rlen = 8;
1826 }
1827 break;
1828 }
1829 }
1830 assert(msg_type != -1);
1831
1832 /* g0 header. */
1833 src.nr--;
1834
1835 brw_SAMPLE(p,
1836 retype(dst, BRW_REGISTER_TYPE_UW),
1837 src.nr,
1838 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1839 SURF_INDEX_TEXTURE(inst->sampler),
1840 inst->sampler,
1841 WRITEMASK_XYZW,
1842 msg_type,
1843 rlen,
1844 inst->mlen + 1,
1845 0,
1846 1,
1847 BRW_SAMPLER_SIMD_MODE_SIMD8);
1848 }
1849
1850
1851 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1852 * looking like:
1853 *
1854 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1855 *
1856 * and we're trying to produce:
1857 *
1858 * DDX DDY
1859 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
1860 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
1861 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
1862 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
1863 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
1864 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
1865 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
1866 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
1867 *
1868 * and add another set of two more subspans if in 16-pixel dispatch mode.
1869 *
1870 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1871 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1872 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1873 * between each other. We could probably do it like ddx and swizzle the right
1874 * order later, but bail for now and just produce
1875 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1876 */
1877 void
1878 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1879 {
1880 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
1881 BRW_REGISTER_TYPE_F,
1882 BRW_VERTICAL_STRIDE_2,
1883 BRW_WIDTH_2,
1884 BRW_HORIZONTAL_STRIDE_0,
1885 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1886 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
1887 BRW_REGISTER_TYPE_F,
1888 BRW_VERTICAL_STRIDE_2,
1889 BRW_WIDTH_2,
1890 BRW_HORIZONTAL_STRIDE_0,
1891 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1892 brw_ADD(p, dst, src0, negate(src1));
1893 }
1894
1895 void
1896 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1897 {
1898 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
1899 BRW_REGISTER_TYPE_F,
1900 BRW_VERTICAL_STRIDE_4,
1901 BRW_WIDTH_4,
1902 BRW_HORIZONTAL_STRIDE_0,
1903 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1904 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
1905 BRW_REGISTER_TYPE_F,
1906 BRW_VERTICAL_STRIDE_4,
1907 BRW_WIDTH_4,
1908 BRW_HORIZONTAL_STRIDE_0,
1909 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1910 brw_ADD(p, dst, src0, negate(src1));
1911 }
1912
1913 void
1914 fs_visitor::generate_discard(fs_inst *inst, struct brw_reg temp)
1915 {
1916 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1917 temp = brw_uw1_reg(temp.file, temp.nr, 0);
1918
1919 brw_push_insn_state(p);
1920 brw_set_mask_control(p, BRW_MASK_DISABLE);
1921 brw_NOT(p, temp, brw_mask_reg(1)); /* IMASK */
1922 brw_AND(p, g0, temp, g0);
1923 brw_pop_insn_state(p);
1924 }
1925
1926 void
1927 fs_visitor::assign_curb_setup()
1928 {
1929 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1930 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1931
1932 if (intel->gen == 5 && (c->prog_data.first_curbe_grf +
1933 c->prog_data.curb_read_length) & 1) {
1934 /* Align the start of the interpolation coefficients so that we can use
1935 * the PLN instruction.
1936 */
1937 c->prog_data.first_curbe_grf++;
1938 }
1939
1940 /* Map the offsets in the UNIFORM file to fixed HW regs. */
1941 foreach_iter(exec_list_iterator, iter, this->instructions) {
1942 fs_inst *inst = (fs_inst *)iter.get();
1943
1944 for (unsigned int i = 0; i < 3; i++) {
1945 if (inst->src[i].file == UNIFORM) {
1946 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1947 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1948 constant_nr / 8,
1949 constant_nr % 8);
1950
1951 inst->src[i].file = FIXED_HW_REG;
1952 inst->src[i].fixed_hw_reg = brw_reg;
1953 }
1954 }
1955 }
1956 }
1957
1958 void
1959 fs_visitor::assign_urb_setup()
1960 {
1961 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1962 int interp_reg_nr[FRAG_ATTRIB_MAX];
1963
1964 c->prog_data.urb_read_length = 0;
1965
1966 /* Figure out where each of the incoming setup attributes lands. */
1967 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1968 interp_reg_nr[i] = -1;
1969
1970 if (i != FRAG_ATTRIB_WPOS &&
1971 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1972 continue;
1973
1974 /* Each attribute is 4 setup channels, each of which is half a reg. */
1975 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1976 c->prog_data.urb_read_length += 2;
1977 }
1978
1979 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1980 * the correct setup input.
1981 */
1982 foreach_iter(exec_list_iterator, iter, this->instructions) {
1983 fs_inst *inst = (fs_inst *)iter.get();
1984
1985 if (inst->opcode != FS_OPCODE_LINTERP)
1986 continue;
1987
1988 assert(inst->src[2].file == FIXED_HW_REG);
1989
1990 int location = inst->src[2].fixed_hw_reg.nr / 2;
1991 assert(interp_reg_nr[location] != -1);
1992 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1993 (inst->src[2].fixed_hw_reg.nr & 1));
1994 }
1995
1996 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1997 }
1998
1999 static void
2000 trivial_assign_reg(int *reg_hw_locations, fs_reg *reg)
2001 {
2002 if (reg->file == GRF && reg->reg != 0) {
2003 reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
2004 reg->reg = 0;
2005 }
2006 }
2007
2008 void
2009 fs_visitor::assign_regs()
2010 {
2011 int last_grf = 0;
2012 int hw_reg_mapping[this->virtual_grf_next];
2013 int i;
2014
2015 hw_reg_mapping[0] = 0;
2016 hw_reg_mapping[1] = this->first_non_payload_grf;
2017 for (i = 2; i < this->virtual_grf_next; i++) {
2018 hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
2019 this->virtual_grf_sizes[i - 1]);
2020 }
2021 last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
2022
2023 /* FINISHME: trivial assignment of register numbers */
2024 foreach_iter(exec_list_iterator, iter, this->instructions) {
2025 fs_inst *inst = (fs_inst *)iter.get();
2026
2027 trivial_assign_reg(hw_reg_mapping, &inst->dst);
2028 trivial_assign_reg(hw_reg_mapping, &inst->src[0]);
2029 trivial_assign_reg(hw_reg_mapping, &inst->src[1]);
2030 }
2031
2032 this->grf_used = last_grf + 1;
2033 }
2034
2035 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
2036 {
2037 struct brw_reg brw_reg;
2038
2039 switch (reg->file) {
2040 case GRF:
2041 case ARF:
2042 case MRF:
2043 brw_reg = brw_vec8_reg(reg->file,
2044 reg->hw_reg, 0);
2045 brw_reg = retype(brw_reg, reg->type);
2046 break;
2047 case IMM:
2048 switch (reg->type) {
2049 case BRW_REGISTER_TYPE_F:
2050 brw_reg = brw_imm_f(reg->imm.f);
2051 break;
2052 case BRW_REGISTER_TYPE_D:
2053 brw_reg = brw_imm_d(reg->imm.i);
2054 break;
2055 case BRW_REGISTER_TYPE_UD:
2056 brw_reg = brw_imm_ud(reg->imm.u);
2057 break;
2058 default:
2059 assert(!"not reached");
2060 break;
2061 }
2062 break;
2063 case FIXED_HW_REG:
2064 brw_reg = reg->fixed_hw_reg;
2065 break;
2066 case BAD_FILE:
2067 /* Probably unused. */
2068 brw_reg = brw_null_reg();
2069 break;
2070 case UNIFORM:
2071 assert(!"not reached");
2072 brw_reg = brw_null_reg();
2073 break;
2074 }
2075 if (reg->abs)
2076 brw_reg = brw_abs(brw_reg);
2077 if (reg->negate)
2078 brw_reg = negate(brw_reg);
2079
2080 return brw_reg;
2081 }
2082
2083 void
2084 fs_visitor::generate_code()
2085 {
2086 unsigned int annotation_len = 0;
2087 int last_native_inst = 0;
2088 struct brw_instruction *if_stack[16], *loop_stack[16];
2089 int if_stack_depth = 0, loop_stack_depth = 0;
2090 int if_depth_in_loop[16];
2091
2092 if_depth_in_loop[loop_stack_depth] = 0;
2093
2094 memset(&if_stack, 0, sizeof(if_stack));
2095 foreach_iter(exec_list_iterator, iter, this->instructions) {
2096 fs_inst *inst = (fs_inst *)iter.get();
2097 struct brw_reg src[3], dst;
2098
2099 for (unsigned int i = 0; i < 3; i++) {
2100 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
2101 }
2102 dst = brw_reg_from_fs_reg(&inst->dst);
2103
2104 brw_set_conditionalmod(p, inst->conditional_mod);
2105 brw_set_predicate_control(p, inst->predicated);
2106
2107 switch (inst->opcode) {
2108 case BRW_OPCODE_MOV:
2109 brw_MOV(p, dst, src[0]);
2110 break;
2111 case BRW_OPCODE_ADD:
2112 brw_ADD(p, dst, src[0], src[1]);
2113 break;
2114 case BRW_OPCODE_MUL:
2115 brw_MUL(p, dst, src[0], src[1]);
2116 break;
2117
2118 case BRW_OPCODE_FRC:
2119 brw_FRC(p, dst, src[0]);
2120 break;
2121 case BRW_OPCODE_RNDD:
2122 brw_RNDD(p, dst, src[0]);
2123 break;
2124 case BRW_OPCODE_RNDZ:
2125 brw_RNDZ(p, dst, src[0]);
2126 break;
2127
2128 case BRW_OPCODE_AND:
2129 brw_AND(p, dst, src[0], src[1]);
2130 break;
2131 case BRW_OPCODE_OR:
2132 brw_OR(p, dst, src[0], src[1]);
2133 break;
2134 case BRW_OPCODE_XOR:
2135 brw_XOR(p, dst, src[0], src[1]);
2136 break;
2137
2138 case BRW_OPCODE_CMP:
2139 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2140 break;
2141 case BRW_OPCODE_SEL:
2142 brw_SEL(p, dst, src[0], src[1]);
2143 break;
2144
2145 case BRW_OPCODE_IF:
2146 assert(if_stack_depth < 16);
2147 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2148 if_depth_in_loop[loop_stack_depth]++;
2149 if_stack_depth++;
2150 break;
2151 case BRW_OPCODE_ELSE:
2152 if_stack[if_stack_depth - 1] =
2153 brw_ELSE(p, if_stack[if_stack_depth - 1]);
2154 break;
2155 case BRW_OPCODE_ENDIF:
2156 if_stack_depth--;
2157 brw_ENDIF(p , if_stack[if_stack_depth]);
2158 if_depth_in_loop[loop_stack_depth]--;
2159 break;
2160
2161 case BRW_OPCODE_DO:
2162 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2163 if_depth_in_loop[loop_stack_depth] = 0;
2164 break;
2165
2166 case BRW_OPCODE_BREAK:
2167 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2168 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2169 break;
2170 case BRW_OPCODE_CONTINUE:
2171 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2172 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2173 break;
2174
2175 case BRW_OPCODE_WHILE: {
2176 struct brw_instruction *inst0, *inst1;
2177 GLuint br = 1;
2178
2179 if (intel->gen == 5)
2180 br = 2;
2181
2182 assert(loop_stack_depth > 0);
2183 loop_stack_depth--;
2184 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2185 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2186 while (inst0 > loop_stack[loop_stack_depth]) {
2187 inst0--;
2188 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2189 inst0->bits3.if_else.jump_count == 0) {
2190 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2191 }
2192 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2193 inst0->bits3.if_else.jump_count == 0) {
2194 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2195 }
2196 }
2197 }
2198 break;
2199
2200 case FS_OPCODE_RCP:
2201 case FS_OPCODE_RSQ:
2202 case FS_OPCODE_SQRT:
2203 case FS_OPCODE_EXP2:
2204 case FS_OPCODE_LOG2:
2205 case FS_OPCODE_POW:
2206 case FS_OPCODE_SIN:
2207 case FS_OPCODE_COS:
2208 generate_math(inst, dst, src);
2209 break;
2210 case FS_OPCODE_LINTERP:
2211 generate_linterp(inst, dst, src);
2212 break;
2213 case FS_OPCODE_TEX:
2214 case FS_OPCODE_TXB:
2215 case FS_OPCODE_TXL:
2216 generate_tex(inst, dst, src[0]);
2217 break;
2218 case FS_OPCODE_DISCARD:
2219 generate_discard(inst, dst /* src0 == dst */);
2220 break;
2221 case FS_OPCODE_DDX:
2222 generate_ddx(inst, dst, src[0]);
2223 break;
2224 case FS_OPCODE_DDY:
2225 generate_ddy(inst, dst, src[0]);
2226 break;
2227 case FS_OPCODE_FB_WRITE:
2228 generate_fb_write(inst);
2229 break;
2230 default:
2231 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2232 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2233 brw_opcodes[inst->opcode].name);
2234 } else {
2235 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2236 }
2237 this->fail = true;
2238 }
2239
2240 if (annotation_len < p->nr_insn) {
2241 annotation_len *= 2;
2242 if (annotation_len < 16)
2243 annotation_len = 16;
2244
2245 this->annotation_string = talloc_realloc(this->mem_ctx,
2246 annotation_string,
2247 const char *,
2248 annotation_len);
2249 this->annotation_ir = talloc_realloc(this->mem_ctx,
2250 annotation_ir,
2251 ir_instruction *,
2252 annotation_len);
2253 }
2254
2255 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2256 this->annotation_string[i] = inst->annotation;
2257 this->annotation_ir[i] = inst->ir;
2258 }
2259 last_native_inst = p->nr_insn;
2260 }
2261 }
2262
2263 GLboolean
2264 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2265 {
2266 struct brw_compile *p = &c->func;
2267 struct intel_context *intel = &brw->intel;
2268 GLcontext *ctx = &intel->ctx;
2269 struct brw_shader *shader = NULL;
2270 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2271
2272 if (!prog)
2273 return GL_FALSE;
2274
2275 if (!using_new_fs)
2276 return GL_FALSE;
2277
2278 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2279 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2280 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2281 break;
2282 }
2283 }
2284 if (!shader)
2285 return GL_FALSE;
2286
2287 /* We always use 8-wide mode, at least for now. For one, flow
2288 * control only works in 8-wide. Also, when we're fragment shader
2289 * bound, we're almost always under register pressure as well, so
2290 * 8-wide would save us from the performance cliff of spilling
2291 * regs.
2292 */
2293 c->dispatch_width = 8;
2294
2295 if (INTEL_DEBUG & DEBUG_WM) {
2296 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2297 _mesa_print_ir(shader->ir, NULL);
2298 printf("\n");
2299 }
2300
2301 /* Now the main event: Visit the shader IR and generate our FS IR for it.
2302 */
2303 fs_visitor v(c, shader);
2304
2305 if (0) {
2306 v.emit_dummy_fs();
2307 } else {
2308 v.emit_interpolation_setup();
2309
2310 /* Generate FS IR for main(). (the visitor only descends into
2311 * functions called "main").
2312 */
2313 foreach_iter(exec_list_iterator, iter, *shader->ir) {
2314 ir_instruction *ir = (ir_instruction *)iter.get();
2315 v.base_ir = ir;
2316 ir->accept(&v);
2317 }
2318
2319 v.emit_fb_writes();
2320 v.assign_curb_setup();
2321 v.assign_urb_setup();
2322 v.assign_regs();
2323 }
2324
2325 v.generate_code();
2326
2327 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
2328
2329 if (v.fail)
2330 return GL_FALSE;
2331
2332 if (INTEL_DEBUG & DEBUG_WM) {
2333 const char *last_annotation_string = NULL;
2334 ir_instruction *last_annotation_ir = NULL;
2335
2336 printf("Native code for fragment shader %d:\n", prog->Name);
2337 for (unsigned int i = 0; i < p->nr_insn; i++) {
2338 if (last_annotation_ir != v.annotation_ir[i]) {
2339 last_annotation_ir = v.annotation_ir[i];
2340 if (last_annotation_ir) {
2341 printf(" ");
2342 last_annotation_ir->print();
2343 printf("\n");
2344 }
2345 }
2346 if (last_annotation_string != v.annotation_string[i]) {
2347 last_annotation_string = v.annotation_string[i];
2348 if (last_annotation_string)
2349 printf(" %s\n", last_annotation_string);
2350 }
2351 brw_disasm(stdout, &p->store[i], intel->gen);
2352 }
2353 printf("\n");
2354 }
2355
2356 c->prog_data.total_grf = v.grf_used;
2357 c->prog_data.total_scratch = 0;
2358
2359 return GL_TRUE;
2360 }