i965: Add support for ir_loop counters to the new FS backend.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 #include "program/prog_optimize.h"
37 #include "program/sampler.h"
38 #include "program/hash_table.h"
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "brw_wm.h"
42 #include "talloc.h"
43 }
44 #include "../glsl/glsl_types.h"
45 #include "../glsl/ir_optimization.h"
46 #include "../glsl/ir_print_visitor.h"
47
48 enum register_file {
49 ARF = BRW_ARCHITECTURE_REGISTER_FILE,
50 GRF = BRW_GENERAL_REGISTER_FILE,
51 MRF = BRW_MESSAGE_REGISTER_FILE,
52 IMM = BRW_IMMEDIATE_VALUE,
53 FIXED_HW_REG, /* a struct brw_reg */
54 UNIFORM, /* prog_data->params[hw_reg] */
55 BAD_FILE
56 };
57
58 enum fs_opcodes {
59 FS_OPCODE_FB_WRITE = 256,
60 FS_OPCODE_RCP,
61 FS_OPCODE_RSQ,
62 FS_OPCODE_SQRT,
63 FS_OPCODE_EXP2,
64 FS_OPCODE_LOG2,
65 FS_OPCODE_POW,
66 FS_OPCODE_SIN,
67 FS_OPCODE_COS,
68 FS_OPCODE_DDX,
69 FS_OPCODE_DDY,
70 FS_OPCODE_LINTERP,
71 FS_OPCODE_TEX,
72 FS_OPCODE_TXB,
73 FS_OPCODE_TXL,
74 FS_OPCODE_DISCARD,
75 };
76
77 static int using_new_fs = -1;
78 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
79
80 struct gl_shader *
81 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
82 {
83 struct brw_shader *shader;
84
85 shader = talloc_zero(NULL, struct brw_shader);
86 if (shader) {
87 shader->base.Type = type;
88 shader->base.Name = name;
89 _mesa_init_shader(ctx, &shader->base);
90 }
91
92 return &shader->base;
93 }
94
95 struct gl_shader_program *
96 brw_new_shader_program(GLcontext *ctx, GLuint name)
97 {
98 struct brw_shader_program *prog;
99 prog = talloc_zero(NULL, struct brw_shader_program);
100 if (prog) {
101 prog->base.Name = name;
102 _mesa_init_shader_program(ctx, &prog->base);
103 }
104 return &prog->base;
105 }
106
107 GLboolean
108 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
109 {
110 if (!_mesa_ir_compile_shader(ctx, shader))
111 return GL_FALSE;
112
113 return GL_TRUE;
114 }
115
116 GLboolean
117 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
118 {
119 if (using_new_fs == -1)
120 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
121
122 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
123 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
124
125 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
126 void *mem_ctx = talloc_new(NULL);
127 bool progress;
128
129 if (shader->ir)
130 talloc_free(shader->ir);
131 shader->ir = new(shader) exec_list;
132 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
133
134 do_mat_op_to_vec(shader->ir);
135 do_mod_to_fract(shader->ir);
136 do_div_to_mul_rcp(shader->ir);
137 do_sub_to_add_neg(shader->ir);
138 do_explog_to_explog2(shader->ir);
139
140 do {
141 progress = false;
142
143 brw_do_channel_expressions(shader->ir);
144 brw_do_vector_splitting(shader->ir);
145
146 progress = do_lower_jumps(shader->ir, true, true,
147 true, /* main return */
148 false, /* continue */
149 false /* loops */
150 ) || progress;
151
152 progress = do_common_optimization(shader->ir, true, 32) || progress;
153
154 progress = lower_noise(shader->ir) || progress;
155 progress =
156 lower_variable_index_to_cond_assign(shader->ir,
157 GL_TRUE, /* input */
158 GL_TRUE, /* output */
159 GL_TRUE, /* temp */
160 GL_TRUE /* uniform */
161 ) || progress;
162 } while (progress);
163
164 validate_ir_tree(shader->ir);
165
166 reparent_ir(shader->ir, shader->ir);
167 talloc_free(mem_ctx);
168 }
169 }
170
171 if (!_mesa_ir_link_shader(ctx, prog))
172 return GL_FALSE;
173
174 return GL_TRUE;
175 }
176
177 static int
178 type_size(const struct glsl_type *type)
179 {
180 unsigned int size, i;
181
182 switch (type->base_type) {
183 case GLSL_TYPE_UINT:
184 case GLSL_TYPE_INT:
185 case GLSL_TYPE_FLOAT:
186 case GLSL_TYPE_BOOL:
187 return type->components();
188 case GLSL_TYPE_ARRAY:
189 /* FINISHME: uniform/varying arrays. */
190 return type_size(type->fields.array) * type->length;
191 case GLSL_TYPE_STRUCT:
192 size = 0;
193 for (i = 0; i < type->length; i++) {
194 size += type_size(type->fields.structure[i].type);
195 }
196 return size;
197 case GLSL_TYPE_SAMPLER:
198 /* Samplers take up no register space, since they're baked in at
199 * link time.
200 */
201 return 0;
202 default:
203 assert(!"not reached");
204 return 0;
205 }
206 }
207
208 class fs_reg {
209 public:
210 /* Callers of this talloc-based new need not call delete. It's
211 * easier to just talloc_free 'ctx' (or any of its ancestors). */
212 static void* operator new(size_t size, void *ctx)
213 {
214 void *node;
215
216 node = talloc_size(ctx, size);
217 assert(node != NULL);
218
219 return node;
220 }
221
222 void init()
223 {
224 this->reg = 0;
225 this->reg_offset = 0;
226 this->negate = 0;
227 this->abs = 0;
228 this->hw_reg = -1;
229 }
230
231 /** Generic unset register constructor. */
232 fs_reg()
233 {
234 init();
235 this->file = BAD_FILE;
236 }
237
238 /** Immediate value constructor. */
239 fs_reg(float f)
240 {
241 init();
242 this->file = IMM;
243 this->type = BRW_REGISTER_TYPE_F;
244 this->imm.f = f;
245 }
246
247 /** Immediate value constructor. */
248 fs_reg(int32_t i)
249 {
250 init();
251 this->file = IMM;
252 this->type = BRW_REGISTER_TYPE_D;
253 this->imm.i = i;
254 }
255
256 /** Immediate value constructor. */
257 fs_reg(uint32_t u)
258 {
259 init();
260 this->file = IMM;
261 this->type = BRW_REGISTER_TYPE_UD;
262 this->imm.u = u;
263 }
264
265 /** Fixed brw_reg Immediate value constructor. */
266 fs_reg(struct brw_reg fixed_hw_reg)
267 {
268 init();
269 this->file = FIXED_HW_REG;
270 this->fixed_hw_reg = fixed_hw_reg;
271 this->type = fixed_hw_reg.type;
272 }
273
274 fs_reg(enum register_file file, int hw_reg);
275 fs_reg(class fs_visitor *v, const struct glsl_type *type);
276
277 /** Register file: ARF, GRF, MRF, IMM. */
278 enum register_file file;
279 /** Abstract register number. 0 = fixed hw reg */
280 int reg;
281 /** Offset within the abstract register. */
282 int reg_offset;
283 /** HW register number. Generally unset until register allocation. */
284 int hw_reg;
285 /** Register type. BRW_REGISTER_TYPE_* */
286 int type;
287 bool negate;
288 bool abs;
289 struct brw_reg fixed_hw_reg;
290
291 /** Value for file == BRW_IMMMEDIATE_FILE */
292 union {
293 int32_t i;
294 uint32_t u;
295 float f;
296 } imm;
297 };
298
299 static const fs_reg reg_undef;
300 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
301
302 class fs_inst : public exec_node {
303 public:
304 /* Callers of this talloc-based new need not call delete. It's
305 * easier to just talloc_free 'ctx' (or any of its ancestors). */
306 static void* operator new(size_t size, void *ctx)
307 {
308 void *node;
309
310 node = talloc_zero_size(ctx, size);
311 assert(node != NULL);
312
313 return node;
314 }
315
316 void init()
317 {
318 this->opcode = BRW_OPCODE_NOP;
319 this->saturate = false;
320 this->conditional_mod = BRW_CONDITIONAL_NONE;
321 this->predicated = false;
322 this->sampler = 0;
323 this->target = 0;
324 this->eot = false;
325 this->shadow_compare = false;
326 }
327
328 fs_inst()
329 {
330 init();
331 }
332
333 fs_inst(int opcode)
334 {
335 init();
336 this->opcode = opcode;
337 }
338
339 fs_inst(int opcode, fs_reg dst, fs_reg src0)
340 {
341 init();
342 this->opcode = opcode;
343 this->dst = dst;
344 this->src[0] = src0;
345 }
346
347 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
348 {
349 init();
350 this->opcode = opcode;
351 this->dst = dst;
352 this->src[0] = src0;
353 this->src[1] = src1;
354 }
355
356 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
357 {
358 init();
359 this->opcode = opcode;
360 this->dst = dst;
361 this->src[0] = src0;
362 this->src[1] = src1;
363 this->src[2] = src2;
364 }
365
366 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
367 fs_reg dst;
368 fs_reg src[3];
369 bool saturate;
370 bool predicated;
371 int conditional_mod; /**< BRW_CONDITIONAL_* */
372
373 int mlen; /**< SEND message length */
374 int sampler;
375 int target; /**< MRT target. */
376 bool eot;
377 bool shadow_compare;
378
379 /** @{
380 * Annotation for the generated IR. One of the two can be set.
381 */
382 ir_instruction *ir;
383 const char *annotation;
384 /** @} */
385 };
386
387 class fs_visitor : public ir_visitor
388 {
389 public:
390
391 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
392 {
393 this->c = c;
394 this->p = &c->func;
395 this->brw = p->brw;
396 this->fp = brw->fragment_program;
397 this->intel = &brw->intel;
398 this->ctx = &intel->ctx;
399 this->mem_ctx = talloc_new(NULL);
400 this->shader = shader;
401 this->fail = false;
402 this->next_abstract_grf = 1;
403 this->variable_ht = hash_table_ctor(0,
404 hash_table_pointer_hash,
405 hash_table_pointer_compare);
406
407 this->frag_color = NULL;
408 this->frag_data = NULL;
409 this->frag_depth = NULL;
410 this->first_non_payload_grf = 0;
411
412 this->current_annotation = NULL;
413 this->annotation_string = NULL;
414 this->annotation_ir = NULL;
415 this->base_ir = NULL;
416 }
417 ~fs_visitor()
418 {
419 talloc_free(this->mem_ctx);
420 hash_table_dtor(this->variable_ht);
421 }
422
423 fs_reg *variable_storage(ir_variable *var);
424
425 void visit(ir_variable *ir);
426 void visit(ir_assignment *ir);
427 void visit(ir_dereference_variable *ir);
428 void visit(ir_dereference_record *ir);
429 void visit(ir_dereference_array *ir);
430 void visit(ir_expression *ir);
431 void visit(ir_texture *ir);
432 void visit(ir_if *ir);
433 void visit(ir_constant *ir);
434 void visit(ir_swizzle *ir);
435 void visit(ir_return *ir);
436 void visit(ir_loop *ir);
437 void visit(ir_loop_jump *ir);
438 void visit(ir_discard *ir);
439 void visit(ir_call *ir);
440 void visit(ir_function *ir);
441 void visit(ir_function_signature *ir);
442
443 fs_inst *emit(fs_inst inst);
444 void assign_curb_setup();
445 void assign_urb_setup();
446 void assign_regs();
447 void generate_code();
448 void generate_fb_write(fs_inst *inst);
449 void generate_linterp(fs_inst *inst, struct brw_reg dst,
450 struct brw_reg *src);
451 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
452 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
453 void generate_discard(fs_inst *inst);
454 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
455 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
456
457 void emit_dummy_fs();
458 void emit_interpolation();
459 void emit_pinterp(int location);
460 void emit_fb_writes();
461
462 struct brw_reg interp_reg(int location, int channel);
463 int setup_uniform_values(int loc, const glsl_type *type);
464
465 struct brw_context *brw;
466 const struct gl_fragment_program *fp;
467 struct intel_context *intel;
468 GLcontext *ctx;
469 struct brw_wm_compile *c;
470 struct brw_compile *p;
471 struct brw_shader *shader;
472 void *mem_ctx;
473 exec_list instructions;
474 int next_abstract_grf;
475 struct hash_table *variable_ht;
476 ir_variable *frag_color, *frag_data, *frag_depth;
477 int first_non_payload_grf;
478
479 /** @{ debug annotation info */
480 const char *current_annotation;
481 ir_instruction *base_ir;
482 const char **annotation_string;
483 ir_instruction **annotation_ir;
484 /** @} */
485
486 bool fail;
487
488 /* Result of last visit() method. */
489 fs_reg result;
490
491 fs_reg pixel_x;
492 fs_reg pixel_y;
493 fs_reg pixel_w;
494 fs_reg delta_x;
495 fs_reg delta_y;
496 fs_reg interp_attrs[64];
497
498 int grf_used;
499
500 };
501
502 /** Fixed HW reg constructor. */
503 fs_reg::fs_reg(enum register_file file, int hw_reg)
504 {
505 init();
506 this->file = file;
507 this->hw_reg = hw_reg;
508 this->type = BRW_REGISTER_TYPE_F;
509 }
510
511 int
512 brw_type_for_base_type(const struct glsl_type *type)
513 {
514 switch (type->base_type) {
515 case GLSL_TYPE_FLOAT:
516 return BRW_REGISTER_TYPE_F;
517 case GLSL_TYPE_INT:
518 case GLSL_TYPE_BOOL:
519 return BRW_REGISTER_TYPE_D;
520 case GLSL_TYPE_UINT:
521 return BRW_REGISTER_TYPE_UD;
522 case GLSL_TYPE_ARRAY:
523 case GLSL_TYPE_STRUCT:
524 /* These should be overridden with the type of the member when
525 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
526 * way to trip up if we don't.
527 */
528 return BRW_REGISTER_TYPE_UD;
529 default:
530 assert(!"not reached");
531 return BRW_REGISTER_TYPE_F;
532 }
533 }
534
535 /** Automatic reg constructor. */
536 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
537 {
538 init();
539
540 this->file = GRF;
541 this->reg = v->next_abstract_grf;
542 this->reg_offset = 0;
543 v->next_abstract_grf += type_size(type);
544 this->type = brw_type_for_base_type(type);
545 }
546
547 fs_reg *
548 fs_visitor::variable_storage(ir_variable *var)
549 {
550 return (fs_reg *)hash_table_find(this->variable_ht, var);
551 }
552
553 /* Our support for uniforms is piggy-backed on the struct
554 * gl_fragment_program, because that's where the values actually
555 * get stored, rather than in some global gl_shader_program uniform
556 * store.
557 */
558 int
559 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
560 {
561 unsigned int offset = 0;
562 float *vec_values;
563
564 if (type->is_matrix()) {
565 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
566 type->vector_elements,
567 1);
568
569 for (unsigned int i = 0; i < type->matrix_columns; i++) {
570 offset += setup_uniform_values(loc + offset, column);
571 }
572
573 return offset;
574 }
575
576 switch (type->base_type) {
577 case GLSL_TYPE_FLOAT:
578 case GLSL_TYPE_UINT:
579 case GLSL_TYPE_INT:
580 case GLSL_TYPE_BOOL:
581 vec_values = fp->Base.Parameters->ParameterValues[loc];
582 for (unsigned int i = 0; i < type->vector_elements; i++) {
583 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
584 }
585 return 1;
586
587 case GLSL_TYPE_STRUCT:
588 for (unsigned int i = 0; i < type->length; i++) {
589 offset += setup_uniform_values(loc + offset,
590 type->fields.structure[i].type);
591 }
592 return offset;
593
594 case GLSL_TYPE_ARRAY:
595 for (unsigned int i = 0; i < type->length; i++) {
596 offset += setup_uniform_values(loc + offset, type->fields.array);
597 }
598 return offset;
599
600 case GLSL_TYPE_SAMPLER:
601 /* The sampler takes up a slot, but we don't use any values from it. */
602 return 1;
603
604 default:
605 assert(!"not reached");
606 return 0;
607 }
608 }
609
610 void
611 fs_visitor::visit(ir_variable *ir)
612 {
613 fs_reg *reg = NULL;
614
615 if (variable_storage(ir))
616 return;
617
618 if (strcmp(ir->name, "gl_FragColor") == 0) {
619 this->frag_color = ir;
620 } else if (strcmp(ir->name, "gl_FragData") == 0) {
621 this->frag_data = ir;
622 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
623 this->frag_depth = ir;
624 }
625
626 if (ir->mode == ir_var_in) {
627 if (strcmp(ir->name, "gl_FrontFacing") == 0) {
628 reg = new(this->mem_ctx) fs_reg(this, ir->type);
629 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
630 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
631 * us front face
632 */
633 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
634 *reg,
635 fs_reg(r1_6ud),
636 fs_reg(1u << 31)));
637 inst->conditional_mod = BRW_CONDITIONAL_L;
638 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
639 } else {
640 reg = &this->interp_attrs[ir->location];
641 }
642 }
643
644 if (ir->mode == ir_var_uniform) {
645 int param_index = c->prog_data.nr_params;
646
647 setup_uniform_values(ir->location, ir->type);
648
649 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
650 }
651
652 if (!reg)
653 reg = new(this->mem_ctx) fs_reg(this, ir->type);
654
655 hash_table_insert(this->variable_ht, reg, ir);
656 }
657
658 void
659 fs_visitor::visit(ir_dereference_variable *ir)
660 {
661 fs_reg *reg = variable_storage(ir->var);
662 this->result = *reg;
663 }
664
665 void
666 fs_visitor::visit(ir_dereference_record *ir)
667 {
668 const glsl_type *struct_type = ir->record->type;
669
670 ir->record->accept(this);
671
672 unsigned int offset = 0;
673 for (unsigned int i = 0; i < struct_type->length; i++) {
674 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
675 break;
676 offset += type_size(struct_type->fields.structure[i].type);
677 }
678 this->result.reg_offset += offset;
679 this->result.type = brw_type_for_base_type(ir->type);
680 }
681
682 void
683 fs_visitor::visit(ir_dereference_array *ir)
684 {
685 ir_constant *index;
686 int element_size;
687
688 ir->array->accept(this);
689 index = ir->array_index->as_constant();
690
691 if (ir->type->is_matrix()) {
692 element_size = ir->type->vector_elements;
693 } else {
694 element_size = type_size(ir->type);
695 this->result.type = brw_type_for_base_type(ir->type);
696 }
697
698 if (index) {
699 assert(this->result.file == UNIFORM ||
700 (this->result.file == GRF &&
701 this->result.reg != 0));
702 this->result.reg_offset += index->value.i[0] * element_size;
703 } else {
704 assert(!"FINISHME: non-constant matrix column");
705 }
706 }
707
708 void
709 fs_visitor::visit(ir_expression *ir)
710 {
711 unsigned int operand;
712 fs_reg op[2], temp;
713 fs_reg result;
714 fs_inst *inst;
715
716 for (operand = 0; operand < ir->get_num_operands(); operand++) {
717 ir->operands[operand]->accept(this);
718 if (this->result.file == BAD_FILE) {
719 ir_print_visitor v;
720 printf("Failed to get tree for expression operand:\n");
721 ir->operands[operand]->accept(&v);
722 this->fail = true;
723 }
724 op[operand] = this->result;
725
726 /* Matrix expression operands should have been broken down to vector
727 * operations already.
728 */
729 assert(!ir->operands[operand]->type->is_matrix());
730 /* And then those vector operands should have been broken down to scalar.
731 */
732 assert(!ir->operands[operand]->type->is_vector());
733 }
734
735 /* Storage for our result. If our result goes into an assignment, it will
736 * just get copy-propagated out, so no worries.
737 */
738 this->result = fs_reg(this, ir->type);
739
740 switch (ir->operation) {
741 case ir_unop_logic_not:
742 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
743 break;
744 case ir_unop_neg:
745 op[0].negate = !op[0].negate;
746 this->result = op[0];
747 break;
748 case ir_unop_abs:
749 op[0].abs = true;
750 this->result = op[0];
751 break;
752 case ir_unop_sign:
753 temp = fs_reg(this, ir->type);
754
755 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
756
757 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
758 inst->conditional_mod = BRW_CONDITIONAL_G;
759 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
760 inst->predicated = true;
761
762 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
763 inst->conditional_mod = BRW_CONDITIONAL_L;
764 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
765 inst->predicated = true;
766
767 break;
768 case ir_unop_rcp:
769 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
770 break;
771
772 case ir_unop_exp2:
773 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
774 break;
775 case ir_unop_log2:
776 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
777 break;
778 case ir_unop_exp:
779 case ir_unop_log:
780 assert(!"not reached: should be handled by ir_explog_to_explog2");
781 break;
782 case ir_unop_sin:
783 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
784 break;
785 case ir_unop_cos:
786 emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
787 break;
788
789 case ir_unop_dFdx:
790 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
791 break;
792 case ir_unop_dFdy:
793 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
794 break;
795
796 case ir_binop_add:
797 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
798 break;
799 case ir_binop_sub:
800 assert(!"not reached: should be handled by ir_sub_to_add_neg");
801 break;
802
803 case ir_binop_mul:
804 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
805 break;
806 case ir_binop_div:
807 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
808 break;
809 case ir_binop_mod:
810 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
811 break;
812
813 case ir_binop_less:
814 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
815 inst->conditional_mod = BRW_CONDITIONAL_L;
816 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
817 break;
818 case ir_binop_greater:
819 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
820 inst->conditional_mod = BRW_CONDITIONAL_G;
821 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
822 break;
823 case ir_binop_lequal:
824 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
825 inst->conditional_mod = BRW_CONDITIONAL_LE;
826 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
827 break;
828 case ir_binop_gequal:
829 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
830 inst->conditional_mod = BRW_CONDITIONAL_GE;
831 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
832 break;
833 case ir_binop_equal:
834 case ir_binop_all_equal: /* same as nequal for scalars */
835 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
836 inst->conditional_mod = BRW_CONDITIONAL_Z;
837 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
838 break;
839 case ir_binop_nequal:
840 case ir_binop_any_nequal: /* same as nequal for scalars */
841 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
842 inst->conditional_mod = BRW_CONDITIONAL_NZ;
843 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
844 break;
845
846 case ir_binop_logic_xor:
847 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
848 break;
849
850 case ir_binop_logic_or:
851 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
852 break;
853
854 case ir_binop_logic_and:
855 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
856 break;
857
858 case ir_binop_dot:
859 case ir_binop_cross:
860 case ir_unop_any:
861 assert(!"not reached: should be handled by brw_fs_channel_expressions");
862 break;
863
864 case ir_unop_noise:
865 assert(!"not reached: should be handled by lower_noise");
866 break;
867
868 case ir_unop_sqrt:
869 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
870 break;
871
872 case ir_unop_rsq:
873 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
874 break;
875
876 case ir_unop_i2f:
877 case ir_unop_b2f:
878 case ir_unop_b2i:
879 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
880 break;
881 case ir_unop_f2i:
882 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
883 break;
884 case ir_unop_f2b:
885 case ir_unop_i2b:
886 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
887 inst->conditional_mod = BRW_CONDITIONAL_NZ;
888
889 case ir_unop_trunc:
890 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
891 break;
892 case ir_unop_ceil:
893 op[0].negate = ~op[0].negate;
894 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
895 this->result.negate = true;
896 break;
897 case ir_unop_floor:
898 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
899 break;
900 case ir_unop_fract:
901 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
902 break;
903
904 case ir_binop_min:
905 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
906 inst->conditional_mod = BRW_CONDITIONAL_L;
907
908 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
909 inst->predicated = true;
910 break;
911 case ir_binop_max:
912 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
913 inst->conditional_mod = BRW_CONDITIONAL_G;
914
915 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
916 inst->predicated = true;
917 break;
918
919 case ir_binop_pow:
920 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
921 break;
922
923 case ir_unop_bit_not:
924 case ir_unop_u2f:
925 case ir_binop_lshift:
926 case ir_binop_rshift:
927 case ir_binop_bit_and:
928 case ir_binop_bit_xor:
929 case ir_binop_bit_or:
930 assert(!"GLSL 1.30 features unsupported");
931 break;
932 }
933 }
934
935 void
936 fs_visitor::visit(ir_assignment *ir)
937 {
938 struct fs_reg l, r;
939 int i;
940 int write_mask;
941 fs_inst *inst;
942
943 /* FINISHME: arrays on the lhs */
944 ir->lhs->accept(this);
945 l = this->result;
946
947 ir->rhs->accept(this);
948 r = this->result;
949
950 /* FINISHME: This should really set to the correct maximal writemask for each
951 * FINISHME: component written (in the loops below). This case can only
952 * FINISHME: occur for matrices, arrays, and structures.
953 */
954 if (ir->write_mask == 0) {
955 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
956 write_mask = WRITEMASK_XYZW;
957 } else {
958 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
959 write_mask = ir->write_mask;
960 }
961
962 assert(l.file != BAD_FILE);
963 assert(r.file != BAD_FILE);
964
965 if (ir->condition) {
966 /* Get the condition bool into the predicate. */
967 ir->condition->accept(this);
968 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
969 inst->conditional_mod = BRW_CONDITIONAL_NZ;
970 }
971
972 for (i = 0; i < type_size(ir->lhs->type); i++) {
973 if (i >= 4 || (write_mask & (1 << i))) {
974 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
975 if (ir->condition)
976 inst->predicated = true;
977 r.reg_offset++;
978 }
979 l.reg_offset++;
980 }
981 }
982
983 void
984 fs_visitor::visit(ir_texture *ir)
985 {
986 int base_mrf = 2;
987 fs_inst *inst = NULL;
988 unsigned int mlen = 0;
989
990 ir->coordinate->accept(this);
991 fs_reg coordinate = this->result;
992
993 if (ir->projector) {
994 fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
995
996 ir->projector->accept(this);
997 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
998
999 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
1000 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1001 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
1002 coordinate.reg_offset++;
1003 proj_coordinate.reg_offset++;
1004 }
1005 proj_coordinate.reg_offset = 0;
1006
1007 coordinate = proj_coordinate;
1008 }
1009
1010 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1011 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1012 coordinate.reg_offset++;
1013 }
1014
1015 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1016 if (intel->gen < 5)
1017 mlen = 3;
1018
1019 if (ir->shadow_comparitor) {
1020 /* For shadow comparisons, we have to supply u,v,r. */
1021 mlen = 3;
1022
1023 ir->shadow_comparitor->accept(this);
1024 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1025 mlen++;
1026 }
1027
1028 /* Do we ever want to handle writemasking on texture samples? Is it
1029 * performance relevant?
1030 */
1031 fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1032
1033 switch (ir->op) {
1034 case ir_tex:
1035 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1036 break;
1037 case ir_txb:
1038 ir->lod_info.bias->accept(this);
1039 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1040 mlen++;
1041
1042 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1043 break;
1044 case ir_txl:
1045 ir->lod_info.lod->accept(this);
1046 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1047 mlen++;
1048
1049 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1050 break;
1051 case ir_txd:
1052 case ir_txf:
1053 assert(!"GLSL 1.30 features unsupported");
1054 break;
1055 }
1056
1057 inst->sampler =
1058 _mesa_get_sampler_uniform_value(ir->sampler,
1059 ctx->Shader.CurrentProgram,
1060 &brw->fragment_program->Base);
1061 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1062
1063 this->result = dst;
1064
1065 if (ir->shadow_comparitor)
1066 inst->shadow_compare = true;
1067 inst->mlen = mlen;
1068 }
1069
1070 void
1071 fs_visitor::visit(ir_swizzle *ir)
1072 {
1073 ir->val->accept(this);
1074 fs_reg val = this->result;
1075
1076 fs_reg result = fs_reg(this, ir->type);
1077 this->result = result;
1078
1079 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1080 fs_reg channel = val;
1081 int swiz = 0;
1082
1083 switch (i) {
1084 case 0:
1085 swiz = ir->mask.x;
1086 break;
1087 case 1:
1088 swiz = ir->mask.y;
1089 break;
1090 case 2:
1091 swiz = ir->mask.z;
1092 break;
1093 case 3:
1094 swiz = ir->mask.w;
1095 break;
1096 }
1097
1098 channel.reg_offset += swiz;
1099 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1100 result.reg_offset++;
1101 }
1102 }
1103
1104 void
1105 fs_visitor::visit(ir_discard *ir)
1106 {
1107 assert(ir->condition == NULL); /* FINISHME */
1108
1109 emit(fs_inst(FS_OPCODE_DISCARD));
1110 }
1111
1112 void
1113 fs_visitor::visit(ir_constant *ir)
1114 {
1115 fs_reg reg(this, ir->type);
1116 this->result = reg;
1117
1118 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1119 switch (ir->type->base_type) {
1120 case GLSL_TYPE_FLOAT:
1121 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1122 break;
1123 case GLSL_TYPE_UINT:
1124 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1125 break;
1126 case GLSL_TYPE_INT:
1127 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1128 break;
1129 case GLSL_TYPE_BOOL:
1130 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1131 break;
1132 default:
1133 assert(!"Non-float/uint/int/bool constant");
1134 }
1135 reg.reg_offset++;
1136 }
1137 }
1138
1139 void
1140 fs_visitor::visit(ir_if *ir)
1141 {
1142 fs_inst *inst;
1143
1144 /* Don't point the annotation at the if statement, because then it plus
1145 * the then and else blocks get printed.
1146 */
1147 this->base_ir = ir->condition;
1148
1149 /* Generate the condition into the condition code. */
1150 ir->condition->accept(this);
1151 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1152 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1153
1154 inst = emit(fs_inst(BRW_OPCODE_IF));
1155 inst->predicated = true;
1156
1157 foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1158 ir_instruction *ir = (ir_instruction *)iter.get();
1159 this->base_ir = ir;
1160
1161 ir->accept(this);
1162 }
1163
1164 if (!ir->else_instructions.is_empty()) {
1165 emit(fs_inst(BRW_OPCODE_ELSE));
1166
1167 foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1168 ir_instruction *ir = (ir_instruction *)iter.get();
1169 this->base_ir = ir;
1170
1171 ir->accept(this);
1172 }
1173 }
1174
1175 emit(fs_inst(BRW_OPCODE_ENDIF));
1176 }
1177
1178 void
1179 fs_visitor::visit(ir_loop *ir)
1180 {
1181 fs_reg counter = reg_undef;
1182
1183 if (ir->counter) {
1184 this->base_ir = ir->counter;
1185 ir->counter->accept(this);
1186 counter = *(variable_storage(ir->counter));
1187
1188 if (ir->from) {
1189 this->base_ir = ir->from;
1190 ir->from->accept(this);
1191
1192 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1193 }
1194 }
1195
1196 /* Start a safety counter. If the user messed up their loop
1197 * counting, we don't want to hang the GPU.
1198 */
1199 fs_reg max_iter = fs_reg(this, glsl_type::int_type);
1200 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
1201
1202 emit(fs_inst(BRW_OPCODE_DO));
1203
1204 if (ir->to) {
1205 this->base_ir = ir->to;
1206 ir->to->accept(this);
1207
1208 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1209 counter, this->result));
1210 switch (ir->cmp) {
1211 case ir_binop_equal:
1212 inst->conditional_mod = BRW_CONDITIONAL_Z;
1213 break;
1214 case ir_binop_nequal:
1215 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1216 break;
1217 case ir_binop_gequal:
1218 inst->conditional_mod = BRW_CONDITIONAL_GE;
1219 break;
1220 case ir_binop_lequal:
1221 inst->conditional_mod = BRW_CONDITIONAL_LE;
1222 break;
1223 case ir_binop_greater:
1224 inst->conditional_mod = BRW_CONDITIONAL_G;
1225 break;
1226 case ir_binop_less:
1227 inst->conditional_mod = BRW_CONDITIONAL_L;
1228 break;
1229 default:
1230 assert(!"not reached: unknown loop condition");
1231 this->fail = true;
1232 break;
1233 }
1234
1235 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1236 inst->predicated = true;
1237 }
1238
1239 foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1240 ir_instruction *ir = (ir_instruction *)iter.get();
1241 fs_inst *inst;
1242
1243 this->base_ir = ir;
1244 ir->accept(this);
1245
1246 /* Check the maximum loop iters counter. */
1247 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1)));
1248 inst->conditional_mod = BRW_CONDITIONAL_Z;
1249
1250 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1251 inst->predicated = true;
1252 }
1253
1254 if (ir->increment) {
1255 this->base_ir = ir->increment;
1256 ir->increment->accept(this);
1257 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1258 }
1259
1260 emit(fs_inst(BRW_OPCODE_WHILE));
1261 }
1262
1263 void
1264 fs_visitor::visit(ir_loop_jump *ir)
1265 {
1266 switch (ir->mode) {
1267 case ir_loop_jump::jump_break:
1268 emit(fs_inst(BRW_OPCODE_BREAK));
1269 break;
1270 case ir_loop_jump::jump_continue:
1271 emit(fs_inst(BRW_OPCODE_CONTINUE));
1272 break;
1273 }
1274 }
1275
1276 void
1277 fs_visitor::visit(ir_call *ir)
1278 {
1279 assert(!"FINISHME");
1280 }
1281
1282 void
1283 fs_visitor::visit(ir_return *ir)
1284 {
1285 assert(!"FINISHME");
1286 }
1287
1288 void
1289 fs_visitor::visit(ir_function *ir)
1290 {
1291 /* Ignore function bodies other than main() -- we shouldn't see calls to
1292 * them since they should all be inlined before we get to ir_to_mesa.
1293 */
1294 if (strcmp(ir->name, "main") == 0) {
1295 const ir_function_signature *sig;
1296 exec_list empty;
1297
1298 sig = ir->matching_signature(&empty);
1299
1300 assert(sig);
1301
1302 foreach_iter(exec_list_iterator, iter, sig->body) {
1303 ir_instruction *ir = (ir_instruction *)iter.get();
1304 this->base_ir = ir;
1305
1306 ir->accept(this);
1307 }
1308 }
1309 }
1310
1311 void
1312 fs_visitor::visit(ir_function_signature *ir)
1313 {
1314 assert(!"not reached");
1315 (void)ir;
1316 }
1317
1318 fs_inst *
1319 fs_visitor::emit(fs_inst inst)
1320 {
1321 fs_inst *list_inst = new(mem_ctx) fs_inst;
1322 *list_inst = inst;
1323
1324 list_inst->annotation = this->current_annotation;
1325 list_inst->ir = this->base_ir;
1326
1327 this->instructions.push_tail(list_inst);
1328
1329 return list_inst;
1330 }
1331
1332 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1333 void
1334 fs_visitor::emit_dummy_fs()
1335 {
1336 /* Everyone's favorite color. */
1337 emit(fs_inst(BRW_OPCODE_MOV,
1338 fs_reg(MRF, 2),
1339 fs_reg(1.0f)));
1340 emit(fs_inst(BRW_OPCODE_MOV,
1341 fs_reg(MRF, 3),
1342 fs_reg(0.0f)));
1343 emit(fs_inst(BRW_OPCODE_MOV,
1344 fs_reg(MRF, 4),
1345 fs_reg(1.0f)));
1346 emit(fs_inst(BRW_OPCODE_MOV,
1347 fs_reg(MRF, 5),
1348 fs_reg(0.0f)));
1349
1350 fs_inst *write;
1351 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1352 fs_reg(0),
1353 fs_reg(0)));
1354 }
1355
1356 /* The register location here is relative to the start of the URB
1357 * data. It will get adjusted to be a real location before
1358 * generate_code() time.
1359 */
1360 struct brw_reg
1361 fs_visitor::interp_reg(int location, int channel)
1362 {
1363 int regnr = location * 2 + channel / 2;
1364 int stride = (channel & 1) * 4;
1365
1366 return brw_vec1_grf(regnr, stride);
1367 }
1368
1369 /** Emits the interpolation for the varying inputs. */
1370 void
1371 fs_visitor::emit_interpolation()
1372 {
1373 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1374 /* For now, the source regs for the setup URB data will be unset,
1375 * since we don't know until codegen how many push constants we'll
1376 * use, and therefore what the setup URB offset is.
1377 */
1378 fs_reg src_reg = reg_undef;
1379
1380 this->current_annotation = "compute pixel centers";
1381 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1382 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1383 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1384 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1385 emit(fs_inst(BRW_OPCODE_ADD,
1386 this->pixel_x,
1387 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1388 fs_reg(brw_imm_v(0x10101010))));
1389 emit(fs_inst(BRW_OPCODE_ADD,
1390 this->pixel_y,
1391 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1392 fs_reg(brw_imm_v(0x11001100))));
1393
1394 this->current_annotation = "compute pixel deltas from v0";
1395 this->delta_x = fs_reg(this, glsl_type::float_type);
1396 this->delta_y = fs_reg(this, glsl_type::float_type);
1397 emit(fs_inst(BRW_OPCODE_ADD,
1398 this->delta_x,
1399 this->pixel_x,
1400 fs_reg(negate(brw_vec1_grf(1, 0)))));
1401 emit(fs_inst(BRW_OPCODE_ADD,
1402 this->delta_y,
1403 this->pixel_y,
1404 fs_reg(negate(brw_vec1_grf(1, 1)))));
1405
1406 this->current_annotation = "compute pos.w and 1/pos.w";
1407 /* Compute wpos. Unlike many other varying inputs, we usually need it
1408 * to produce 1/w, and the varying variable wouldn't show up.
1409 */
1410 fs_reg wpos = fs_reg(this, glsl_type::vec4_type);
1411 this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos;
1412 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */
1413 wpos.reg_offset++;
1414 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */
1415 wpos.reg_offset++;
1416 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1417 interp_reg(FRAG_ATTRIB_WPOS, 2)));
1418 wpos.reg_offset++;
1419 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1420 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1421 /* Compute the pixel W value from wpos.w. */
1422 this->pixel_w = fs_reg(this, glsl_type::float_type);
1423 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos));
1424
1425 foreach_iter(exec_list_iterator, iter, *this->shader->ir) {
1426 ir_instruction *ir = (ir_instruction *)iter.get();
1427 ir_variable *var = ir->as_variable();
1428
1429 if (!var)
1430 continue;
1431
1432 if (var->mode != ir_var_in)
1433 continue;
1434
1435 /* If it's already set up (WPOS), skip. */
1436 if (var->location == 0)
1437 continue;
1438
1439 this->current_annotation = talloc_asprintf(this->mem_ctx,
1440 "interpolate %s "
1441 "(FRAG_ATTRIB[%d])",
1442 var->name,
1443 var->location);
1444 emit_pinterp(var->location);
1445 }
1446 this->current_annotation = NULL;
1447 }
1448
1449 void
1450 fs_visitor::emit_pinterp(int location)
1451 {
1452 fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type);
1453 this->interp_attrs[location] = interp_attr;
1454
1455 for (unsigned int i = 0; i < 4; i++) {
1456 struct brw_reg interp = interp_reg(location, i);
1457 emit(fs_inst(FS_OPCODE_LINTERP,
1458 interp_attr,
1459 this->delta_x,
1460 this->delta_y,
1461 fs_reg(interp)));
1462 interp_attr.reg_offset++;
1463 }
1464 interp_attr.reg_offset -= 4;
1465
1466 for (unsigned int i = 0; i < 4; i++) {
1467 emit(fs_inst(BRW_OPCODE_MUL,
1468 interp_attr,
1469 interp_attr,
1470 this->pixel_w));
1471 interp_attr.reg_offset++;
1472 }
1473 }
1474
1475 void
1476 fs_visitor::emit_fb_writes()
1477 {
1478 this->current_annotation = "FB write header";
1479 int nr = 0;
1480
1481 /* m0, m1 header */
1482 nr += 2;
1483
1484 if (c->key.aa_dest_stencil_reg) {
1485 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1486 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1487 }
1488
1489 /* Reserve space for color. It'll be filled in per MRT below. */
1490 int color_mrf = nr;
1491 nr += 4;
1492
1493 if (c->key.source_depth_to_render_target) {
1494 if (c->key.computes_depth) {
1495 /* Hand over gl_FragDepth. */
1496 assert(this->frag_depth);
1497 fs_reg depth = *(variable_storage(this->frag_depth));
1498
1499 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1500 } else {
1501 /* Pass through the payload depth. */
1502 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1503 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1504 }
1505 }
1506
1507 if (c->key.dest_depth_reg) {
1508 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1509 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1510 }
1511
1512 fs_reg color = reg_undef;
1513 if (this->frag_color)
1514 color = *(variable_storage(this->frag_color));
1515 else if (this->frag_data)
1516 color = *(variable_storage(this->frag_data));
1517
1518 for (int target = 0; target < c->key.nr_color_regions; target++) {
1519 this->current_annotation = talloc_asprintf(this->mem_ctx,
1520 "FB write target %d",
1521 target);
1522 if (this->frag_color || this->frag_data) {
1523 for (int i = 0; i < 4; i++) {
1524 emit(fs_inst(BRW_OPCODE_MOV,
1525 fs_reg(MRF, color_mrf + i),
1526 color));
1527 color.reg_offset++;
1528 }
1529 }
1530
1531 if (this->frag_color)
1532 color.reg_offset -= 4;
1533
1534 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1535 reg_undef, reg_undef));
1536 inst->target = target;
1537 inst->mlen = nr;
1538 if (target == c->key.nr_color_regions - 1)
1539 inst->eot = true;
1540 }
1541
1542 if (c->key.nr_color_regions == 0) {
1543 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1544 reg_undef, reg_undef));
1545 inst->mlen = nr;
1546 inst->eot = true;
1547 }
1548
1549 this->current_annotation = NULL;
1550 }
1551
1552 void
1553 fs_visitor::generate_fb_write(fs_inst *inst)
1554 {
1555 GLboolean eot = inst->eot;
1556
1557 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1558 * move, here's g1.
1559 */
1560 brw_push_insn_state(p);
1561 brw_set_mask_control(p, BRW_MASK_DISABLE);
1562 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1563 brw_MOV(p,
1564 brw_message_reg(1),
1565 brw_vec8_grf(1, 0));
1566 brw_pop_insn_state(p);
1567
1568 brw_fb_WRITE(p,
1569 8, /* dispatch_width */
1570 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1571 0, /* base MRF */
1572 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1573 inst->target,
1574 inst->mlen,
1575 0,
1576 eot);
1577 }
1578
1579 void
1580 fs_visitor::generate_linterp(fs_inst *inst,
1581 struct brw_reg dst, struct brw_reg *src)
1582 {
1583 struct brw_reg delta_x = src[0];
1584 struct brw_reg delta_y = src[1];
1585 struct brw_reg interp = src[2];
1586
1587 if (brw->has_pln &&
1588 delta_y.nr == delta_x.nr + 1 &&
1589 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1590 brw_PLN(p, dst, interp, delta_x);
1591 } else {
1592 brw_LINE(p, brw_null_reg(), interp, delta_x);
1593 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1594 }
1595 }
1596
1597 void
1598 fs_visitor::generate_math(fs_inst *inst,
1599 struct brw_reg dst, struct brw_reg *src)
1600 {
1601 int op;
1602
1603 switch (inst->opcode) {
1604 case FS_OPCODE_RCP:
1605 op = BRW_MATH_FUNCTION_INV;
1606 break;
1607 case FS_OPCODE_RSQ:
1608 op = BRW_MATH_FUNCTION_RSQ;
1609 break;
1610 case FS_OPCODE_SQRT:
1611 op = BRW_MATH_FUNCTION_SQRT;
1612 break;
1613 case FS_OPCODE_EXP2:
1614 op = BRW_MATH_FUNCTION_EXP;
1615 break;
1616 case FS_OPCODE_LOG2:
1617 op = BRW_MATH_FUNCTION_LOG;
1618 break;
1619 case FS_OPCODE_POW:
1620 op = BRW_MATH_FUNCTION_POW;
1621 break;
1622 case FS_OPCODE_SIN:
1623 op = BRW_MATH_FUNCTION_SIN;
1624 break;
1625 case FS_OPCODE_COS:
1626 op = BRW_MATH_FUNCTION_COS;
1627 break;
1628 default:
1629 assert(!"not reached: unknown math function");
1630 op = 0;
1631 break;
1632 }
1633
1634 if (inst->opcode == FS_OPCODE_POW) {
1635 brw_MOV(p, brw_message_reg(3), src[1]);
1636 }
1637
1638 brw_math(p, dst,
1639 op,
1640 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1641 BRW_MATH_SATURATE_NONE,
1642 2, src[0],
1643 BRW_MATH_DATA_VECTOR,
1644 BRW_MATH_PRECISION_FULL);
1645 }
1646
1647 void
1648 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1649 {
1650 int msg_type = -1;
1651 int rlen = 4;
1652
1653 if (intel->gen == 5) {
1654 switch (inst->opcode) {
1655 case FS_OPCODE_TEX:
1656 if (inst->shadow_compare) {
1657 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1658 } else {
1659 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1660 }
1661 break;
1662 case FS_OPCODE_TXB:
1663 if (inst->shadow_compare) {
1664 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1665 } else {
1666 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1667 }
1668 break;
1669 }
1670 } else {
1671 switch (inst->opcode) {
1672 case FS_OPCODE_TEX:
1673 /* Note that G45 and older determines shadow compare and dispatch width
1674 * from message length for most messages.
1675 */
1676 if (inst->shadow_compare) {
1677 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1678 } else {
1679 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1680 }
1681 case FS_OPCODE_TXB:
1682 if (inst->shadow_compare) {
1683 assert(!"FINISHME: shadow compare with bias.");
1684 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1685 } else {
1686 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1687 rlen = 8;
1688 }
1689 break;
1690 }
1691 }
1692 assert(msg_type != -1);
1693
1694 /* g0 header. */
1695 src.nr--;
1696
1697 brw_SAMPLE(p,
1698 retype(dst, BRW_REGISTER_TYPE_UW),
1699 src.nr,
1700 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1701 SURF_INDEX_TEXTURE(inst->sampler),
1702 inst->sampler,
1703 WRITEMASK_XYZW,
1704 msg_type,
1705 rlen,
1706 inst->mlen + 1,
1707 0,
1708 1,
1709 BRW_SAMPLER_SIMD_MODE_SIMD8);
1710 }
1711
1712
1713 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1714 * looking like:
1715 *
1716 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1717 *
1718 * and we're trying to produce:
1719 *
1720 * DDX DDY
1721 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
1722 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
1723 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
1724 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
1725 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
1726 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
1727 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
1728 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
1729 *
1730 * and add another set of two more subspans if in 16-pixel dispatch mode.
1731 *
1732 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1733 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1734 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1735 * between each other. We could probably do it like ddx and swizzle the right
1736 * order later, but bail for now and just produce
1737 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1738 */
1739 void
1740 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1741 {
1742 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
1743 BRW_REGISTER_TYPE_F,
1744 BRW_VERTICAL_STRIDE_2,
1745 BRW_WIDTH_2,
1746 BRW_HORIZONTAL_STRIDE_0,
1747 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1748 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
1749 BRW_REGISTER_TYPE_F,
1750 BRW_VERTICAL_STRIDE_2,
1751 BRW_WIDTH_2,
1752 BRW_HORIZONTAL_STRIDE_0,
1753 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1754 brw_ADD(p, dst, src0, negate(src1));
1755 }
1756
1757 void
1758 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1759 {
1760 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
1761 BRW_REGISTER_TYPE_F,
1762 BRW_VERTICAL_STRIDE_4,
1763 BRW_WIDTH_4,
1764 BRW_HORIZONTAL_STRIDE_0,
1765 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1766 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
1767 BRW_REGISTER_TYPE_F,
1768 BRW_VERTICAL_STRIDE_4,
1769 BRW_WIDTH_4,
1770 BRW_HORIZONTAL_STRIDE_0,
1771 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1772 brw_ADD(p, dst, src0, negate(src1));
1773 }
1774
1775 void
1776 fs_visitor::generate_discard(fs_inst *inst)
1777 {
1778 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1779 brw_push_insn_state(p);
1780 brw_set_mask_control(p, BRW_MASK_DISABLE);
1781 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1782 brw_AND(p, g0, c->emit_mask_reg, g0);
1783 brw_pop_insn_state(p);
1784 }
1785
1786 static void
1787 trivial_assign_reg(int header_size, fs_reg *reg)
1788 {
1789 if (reg->file == GRF && reg->reg != 0) {
1790 reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset;
1791 reg->reg = 0;
1792 }
1793 }
1794
1795 void
1796 fs_visitor::assign_curb_setup()
1797 {
1798 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1799 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1800
1801 if (intel->gen == 5 && (c->prog_data.first_curbe_grf +
1802 c->prog_data.curb_read_length) & 1) {
1803 /* Align the start of the interpolation coefficients so that we can use
1804 * the PLN instruction.
1805 */
1806 c->prog_data.first_curbe_grf++;
1807 }
1808
1809 /* Map the offsets in the UNIFORM file to fixed HW regs. */
1810 foreach_iter(exec_list_iterator, iter, this->instructions) {
1811 fs_inst *inst = (fs_inst *)iter.get();
1812
1813 for (unsigned int i = 0; i < 3; i++) {
1814 if (inst->src[i].file == UNIFORM) {
1815 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1816 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1817 constant_nr / 8,
1818 constant_nr % 8);
1819
1820 inst->src[i].file = FIXED_HW_REG;
1821 inst->src[i].fixed_hw_reg = brw_reg;
1822 }
1823 }
1824 }
1825 }
1826
1827 void
1828 fs_visitor::assign_urb_setup()
1829 {
1830 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1831 int interp_reg_nr[FRAG_ATTRIB_MAX];
1832
1833 c->prog_data.urb_read_length = 0;
1834
1835 /* Figure out where each of the incoming setup attributes lands. */
1836 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1837 interp_reg_nr[i] = -1;
1838
1839 if (i != FRAG_ATTRIB_WPOS &&
1840 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1841 continue;
1842
1843 /* Each attribute is 4 setup channels, each of which is half a reg. */
1844 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1845 c->prog_data.urb_read_length += 2;
1846 }
1847
1848 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1849 * the correct setup input.
1850 */
1851 foreach_iter(exec_list_iterator, iter, this->instructions) {
1852 fs_inst *inst = (fs_inst *)iter.get();
1853
1854 if (inst->opcode != FS_OPCODE_LINTERP)
1855 continue;
1856
1857 assert(inst->src[2].file == FIXED_HW_REG);
1858
1859 int location = inst->src[2].fixed_hw_reg.nr / 2;
1860 assert(interp_reg_nr[location] != -1);
1861 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1862 (inst->src[2].fixed_hw_reg.nr & 1));
1863 }
1864
1865 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1866 }
1867
1868 void
1869 fs_visitor::assign_regs()
1870 {
1871 int header_size = this->first_non_payload_grf;
1872 int last_grf = 0;
1873
1874 /* FINISHME: trivial assignment of register numbers */
1875 foreach_iter(exec_list_iterator, iter, this->instructions) {
1876 fs_inst *inst = (fs_inst *)iter.get();
1877
1878 trivial_assign_reg(header_size, &inst->dst);
1879 trivial_assign_reg(header_size, &inst->src[0]);
1880 trivial_assign_reg(header_size, &inst->src[1]);
1881
1882 last_grf = MAX2(last_grf, inst->dst.hw_reg);
1883 last_grf = MAX2(last_grf, inst->src[0].hw_reg);
1884 last_grf = MAX2(last_grf, inst->src[1].hw_reg);
1885 }
1886
1887 this->grf_used = last_grf + 1;
1888 }
1889
1890 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
1891 {
1892 struct brw_reg brw_reg;
1893
1894 switch (reg->file) {
1895 case GRF:
1896 case ARF:
1897 case MRF:
1898 brw_reg = brw_vec8_reg(reg->file,
1899 reg->hw_reg, 0);
1900 brw_reg = retype(brw_reg, reg->type);
1901 break;
1902 case IMM:
1903 switch (reg->type) {
1904 case BRW_REGISTER_TYPE_F:
1905 brw_reg = brw_imm_f(reg->imm.f);
1906 break;
1907 case BRW_REGISTER_TYPE_D:
1908 brw_reg = brw_imm_d(reg->imm.i);
1909 break;
1910 case BRW_REGISTER_TYPE_UD:
1911 brw_reg = brw_imm_ud(reg->imm.u);
1912 break;
1913 default:
1914 assert(!"not reached");
1915 break;
1916 }
1917 break;
1918 case FIXED_HW_REG:
1919 brw_reg = reg->fixed_hw_reg;
1920 break;
1921 case BAD_FILE:
1922 /* Probably unused. */
1923 brw_reg = brw_null_reg();
1924 break;
1925 case UNIFORM:
1926 assert(!"not reached");
1927 brw_reg = brw_null_reg();
1928 break;
1929 }
1930 if (reg->abs)
1931 brw_reg = brw_abs(brw_reg);
1932 if (reg->negate)
1933 brw_reg = negate(brw_reg);
1934
1935 return brw_reg;
1936 }
1937
1938 void
1939 fs_visitor::generate_code()
1940 {
1941 unsigned int annotation_len = 0;
1942 int last_native_inst = 0;
1943 struct brw_instruction *if_stack[16], *loop_stack[16];
1944 int if_stack_depth = 0, loop_stack_depth = 0;
1945 int if_depth_in_loop[16];
1946
1947 if_depth_in_loop[loop_stack_depth] = 0;
1948
1949 memset(&if_stack, 0, sizeof(if_stack));
1950 foreach_iter(exec_list_iterator, iter, this->instructions) {
1951 fs_inst *inst = (fs_inst *)iter.get();
1952 struct brw_reg src[3], dst;
1953
1954 for (unsigned int i = 0; i < 3; i++) {
1955 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
1956 }
1957 dst = brw_reg_from_fs_reg(&inst->dst);
1958
1959 brw_set_conditionalmod(p, inst->conditional_mod);
1960 brw_set_predicate_control(p, inst->predicated);
1961
1962 switch (inst->opcode) {
1963 case BRW_OPCODE_MOV:
1964 brw_MOV(p, dst, src[0]);
1965 break;
1966 case BRW_OPCODE_ADD:
1967 brw_ADD(p, dst, src[0], src[1]);
1968 break;
1969 case BRW_OPCODE_MUL:
1970 brw_MUL(p, dst, src[0], src[1]);
1971 break;
1972
1973 case BRW_OPCODE_FRC:
1974 brw_FRC(p, dst, src[0]);
1975 break;
1976 case BRW_OPCODE_RNDD:
1977 brw_RNDD(p, dst, src[0]);
1978 break;
1979 case BRW_OPCODE_RNDZ:
1980 brw_RNDZ(p, dst, src[0]);
1981 break;
1982
1983 case BRW_OPCODE_AND:
1984 brw_AND(p, dst, src[0], src[1]);
1985 break;
1986 case BRW_OPCODE_OR:
1987 brw_OR(p, dst, src[0], src[1]);
1988 break;
1989 case BRW_OPCODE_XOR:
1990 brw_XOR(p, dst, src[0], src[1]);
1991 break;
1992
1993 case BRW_OPCODE_CMP:
1994 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
1995 break;
1996 case BRW_OPCODE_SEL:
1997 brw_SEL(p, dst, src[0], src[1]);
1998 break;
1999
2000 case BRW_OPCODE_IF:
2001 assert(if_stack_depth < 16);
2002 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2003 if_depth_in_loop[loop_stack_depth]++;
2004 if_stack_depth++;
2005 break;
2006 case BRW_OPCODE_ELSE:
2007 if_stack[if_stack_depth - 1] =
2008 brw_ELSE(p, if_stack[if_stack_depth - 1]);
2009 break;
2010 case BRW_OPCODE_ENDIF:
2011 if_stack_depth--;
2012 brw_ENDIF(p , if_stack[if_stack_depth]);
2013 if_depth_in_loop[loop_stack_depth]--;
2014 break;
2015
2016 case BRW_OPCODE_DO:
2017 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2018 if_depth_in_loop[loop_stack_depth] = 0;
2019 break;
2020
2021 case BRW_OPCODE_BREAK:
2022 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2023 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2024 break;
2025 case BRW_OPCODE_CONTINUE:
2026 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2027 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2028 break;
2029
2030 case BRW_OPCODE_WHILE: {
2031 struct brw_instruction *inst0, *inst1;
2032 GLuint br = 1;
2033
2034 if (intel->gen == 5)
2035 br = 2;
2036
2037 assert(loop_stack_depth > 0);
2038 loop_stack_depth--;
2039 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2040 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2041 while (inst0 > loop_stack[loop_stack_depth]) {
2042 inst0--;
2043 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2044 inst0->bits3.if_else.jump_count == 0) {
2045 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2046 }
2047 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2048 inst0->bits3.if_else.jump_count == 0) {
2049 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2050 }
2051 }
2052 }
2053 break;
2054
2055 case FS_OPCODE_RCP:
2056 case FS_OPCODE_RSQ:
2057 case FS_OPCODE_SQRT:
2058 case FS_OPCODE_EXP2:
2059 case FS_OPCODE_LOG2:
2060 case FS_OPCODE_POW:
2061 case FS_OPCODE_SIN:
2062 case FS_OPCODE_COS:
2063 generate_math(inst, dst, src);
2064 break;
2065 case FS_OPCODE_LINTERP:
2066 generate_linterp(inst, dst, src);
2067 break;
2068 case FS_OPCODE_TEX:
2069 case FS_OPCODE_TXB:
2070 case FS_OPCODE_TXL:
2071 generate_tex(inst, dst, src[0]);
2072 break;
2073 case FS_OPCODE_DISCARD:
2074 generate_discard(inst);
2075 break;
2076 case FS_OPCODE_DDX:
2077 generate_ddx(inst, dst, src[0]);
2078 break;
2079 case FS_OPCODE_DDY:
2080 generate_ddy(inst, dst, src[0]);
2081 break;
2082 case FS_OPCODE_FB_WRITE:
2083 generate_fb_write(inst);
2084 break;
2085 default:
2086 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2087 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2088 brw_opcodes[inst->opcode].name);
2089 } else {
2090 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2091 }
2092 this->fail = true;
2093 }
2094
2095 if (annotation_len < p->nr_insn) {
2096 annotation_len *= 2;
2097 if (annotation_len < 16)
2098 annotation_len = 16;
2099
2100 this->annotation_string = talloc_realloc(this->mem_ctx,
2101 annotation_string,
2102 const char *,
2103 annotation_len);
2104 this->annotation_ir = talloc_realloc(this->mem_ctx,
2105 annotation_ir,
2106 ir_instruction *,
2107 annotation_len);
2108 }
2109
2110 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2111 this->annotation_string[i] = inst->annotation;
2112 this->annotation_ir[i] = inst->ir;
2113 }
2114 last_native_inst = p->nr_insn;
2115 }
2116 }
2117
2118 GLboolean
2119 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2120 {
2121 struct brw_compile *p = &c->func;
2122 struct intel_context *intel = &brw->intel;
2123 GLcontext *ctx = &intel->ctx;
2124 struct brw_shader *shader = NULL;
2125 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2126
2127 if (!prog)
2128 return GL_FALSE;
2129
2130 if (!using_new_fs)
2131 return GL_FALSE;
2132
2133 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2134 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2135 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2136 break;
2137 }
2138 }
2139 if (!shader)
2140 return GL_FALSE;
2141
2142 /* We always use 8-wide mode, at least for now. For one, flow
2143 * control only works in 8-wide. Also, when we're fragment shader
2144 * bound, we're almost always under register pressure as well, so
2145 * 8-wide would save us from the performance cliff of spilling
2146 * regs.
2147 */
2148 c->dispatch_width = 8;
2149
2150 if (INTEL_DEBUG & DEBUG_WM) {
2151 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2152 _mesa_print_ir(shader->ir, NULL);
2153 printf("\n");
2154 }
2155
2156 /* Now the main event: Visit the shader IR and generate our FS IR for it.
2157 */
2158 fs_visitor v(c, shader);
2159
2160 if (0) {
2161 v.emit_dummy_fs();
2162 } else {
2163 v.emit_interpolation();
2164
2165 /* Generate FS IR for main(). (the visitor only descends into
2166 * functions called "main").
2167 */
2168 foreach_iter(exec_list_iterator, iter, *shader->ir) {
2169 ir_instruction *ir = (ir_instruction *)iter.get();
2170 v.base_ir = ir;
2171 ir->accept(&v);
2172 }
2173
2174 v.emit_fb_writes();
2175 v.assign_curb_setup();
2176 v.assign_urb_setup();
2177 v.assign_regs();
2178 }
2179
2180 v.generate_code();
2181
2182 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
2183
2184 if (v.fail)
2185 return GL_FALSE;
2186
2187 if (INTEL_DEBUG & DEBUG_WM) {
2188 const char *last_annotation_string = NULL;
2189 ir_instruction *last_annotation_ir = NULL;
2190
2191 printf("Native code for fragment shader %d:\n", prog->Name);
2192 for (unsigned int i = 0; i < p->nr_insn; i++) {
2193 if (last_annotation_ir != v.annotation_ir[i]) {
2194 last_annotation_ir = v.annotation_ir[i];
2195 if (last_annotation_ir) {
2196 printf(" ");
2197 last_annotation_ir->print();
2198 printf("\n");
2199 }
2200 }
2201 if (last_annotation_string != v.annotation_string[i]) {
2202 last_annotation_string = v.annotation_string[i];
2203 if (last_annotation_string)
2204 printf(" %s\n", last_annotation_string);
2205 }
2206 brw_disasm(stdout, &p->store[i], intel->gen);
2207 }
2208 printf("\n");
2209 }
2210
2211 c->prog_data.total_grf = v.grf_used;
2212 c->prog_data.total_scratch = 0;
2213
2214 return GL_TRUE;
2215 }