i965: Add support for MRT to the new FS backend.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 #include "program/prog_optimize.h"
37 #include "program/sampler.h"
38 #include "program/hash_table.h"
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "brw_wm.h"
42 #include "talloc.h"
43 }
44 #include "../glsl/glsl_types.h"
45 #include "../glsl/ir_optimization.h"
46 #include "../glsl/ir_print_visitor.h"
47
48 enum register_file {
49 ARF = BRW_ARCHITECTURE_REGISTER_FILE,
50 GRF = BRW_GENERAL_REGISTER_FILE,
51 MRF = BRW_MESSAGE_REGISTER_FILE,
52 IMM = BRW_IMMEDIATE_VALUE,
53 FIXED_HW_REG, /* a struct brw_reg */
54 UNIFORM, /* prog_data->params[hw_reg] */
55 BAD_FILE
56 };
57
58 enum fs_opcodes {
59 FS_OPCODE_FB_WRITE = 256,
60 FS_OPCODE_RCP,
61 FS_OPCODE_RSQ,
62 FS_OPCODE_SQRT,
63 FS_OPCODE_EXP2,
64 FS_OPCODE_LOG2,
65 FS_OPCODE_POW,
66 FS_OPCODE_SIN,
67 FS_OPCODE_COS,
68 FS_OPCODE_DDX,
69 FS_OPCODE_DDY,
70 FS_OPCODE_LINTERP,
71 FS_OPCODE_TEX,
72 FS_OPCODE_TXB,
73 FS_OPCODE_TXL,
74 FS_OPCODE_DISCARD,
75 };
76
77 static int using_new_fs = -1;
78 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
79
80 struct gl_shader *
81 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
82 {
83 struct brw_shader *shader;
84
85 shader = talloc_zero(NULL, struct brw_shader);
86 if (shader) {
87 shader->base.Type = type;
88 shader->base.Name = name;
89 _mesa_init_shader(ctx, &shader->base);
90 }
91
92 return &shader->base;
93 }
94
95 struct gl_shader_program *
96 brw_new_shader_program(GLcontext *ctx, GLuint name)
97 {
98 struct brw_shader_program *prog;
99 prog = talloc_zero(NULL, struct brw_shader_program);
100 if (prog) {
101 prog->base.Name = name;
102 _mesa_init_shader_program(ctx, &prog->base);
103 }
104 return &prog->base;
105 }
106
107 GLboolean
108 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
109 {
110 if (!_mesa_ir_compile_shader(ctx, shader))
111 return GL_FALSE;
112
113 return GL_TRUE;
114 }
115
116 GLboolean
117 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
118 {
119 if (using_new_fs == -1)
120 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
121
122 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
123 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
124
125 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
126 void *mem_ctx = talloc_new(NULL);
127 bool progress;
128
129 if (shader->ir)
130 talloc_free(shader->ir);
131 shader->ir = new(shader) exec_list;
132 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
133
134 do_mat_op_to_vec(shader->ir);
135 do_mod_to_fract(shader->ir);
136 do_div_to_mul_rcp(shader->ir);
137 do_sub_to_add_neg(shader->ir);
138 do_explog_to_explog2(shader->ir);
139
140 do {
141 progress = false;
142
143 brw_do_channel_expressions(shader->ir);
144 brw_do_vector_splitting(shader->ir);
145
146 progress = do_lower_jumps(shader->ir, true, true,
147 true, /* main return */
148 false, /* continue */
149 false /* loops */
150 ) || progress;
151
152 progress = do_common_optimization(shader->ir, true, 32) || progress;
153
154 progress = lower_noise(shader->ir) || progress;
155 progress =
156 lower_variable_index_to_cond_assign(shader->ir,
157 GL_TRUE, /* input */
158 GL_TRUE, /* output */
159 GL_TRUE, /* temp */
160 GL_TRUE /* uniform */
161 ) || progress;
162 } while (progress);
163
164 validate_ir_tree(shader->ir);
165
166 reparent_ir(shader->ir, shader->ir);
167 talloc_free(mem_ctx);
168 }
169 }
170
171 if (!_mesa_ir_link_shader(ctx, prog))
172 return GL_FALSE;
173
174 return GL_TRUE;
175 }
176
177 static int
178 type_size(const struct glsl_type *type)
179 {
180 unsigned int size, i;
181
182 switch (type->base_type) {
183 case GLSL_TYPE_UINT:
184 case GLSL_TYPE_INT:
185 case GLSL_TYPE_FLOAT:
186 case GLSL_TYPE_BOOL:
187 return type->components();
188 case GLSL_TYPE_ARRAY:
189 /* FINISHME: uniform/varying arrays. */
190 return type_size(type->fields.array) * type->length;
191 case GLSL_TYPE_STRUCT:
192 size = 0;
193 for (i = 0; i < type->length; i++) {
194 size += type_size(type->fields.structure[i].type);
195 }
196 return size;
197 case GLSL_TYPE_SAMPLER:
198 /* Samplers take up no register space, since they're baked in at
199 * link time.
200 */
201 return 0;
202 default:
203 assert(!"not reached");
204 return 0;
205 }
206 }
207
208 class fs_reg {
209 public:
210 /* Callers of this talloc-based new need not call delete. It's
211 * easier to just talloc_free 'ctx' (or any of its ancestors). */
212 static void* operator new(size_t size, void *ctx)
213 {
214 void *node;
215
216 node = talloc_size(ctx, size);
217 assert(node != NULL);
218
219 return node;
220 }
221
222 void init()
223 {
224 this->reg = 0;
225 this->reg_offset = 0;
226 this->negate = 0;
227 this->abs = 0;
228 this->hw_reg = -1;
229 }
230
231 /** Generic unset register constructor. */
232 fs_reg()
233 {
234 init();
235 this->file = BAD_FILE;
236 }
237
238 /** Immediate value constructor. */
239 fs_reg(float f)
240 {
241 init();
242 this->file = IMM;
243 this->type = BRW_REGISTER_TYPE_F;
244 this->imm.f = f;
245 }
246
247 /** Immediate value constructor. */
248 fs_reg(int32_t i)
249 {
250 init();
251 this->file = IMM;
252 this->type = BRW_REGISTER_TYPE_D;
253 this->imm.i = i;
254 }
255
256 /** Immediate value constructor. */
257 fs_reg(uint32_t u)
258 {
259 init();
260 this->file = IMM;
261 this->type = BRW_REGISTER_TYPE_UD;
262 this->imm.u = u;
263 }
264
265 /** Fixed brw_reg Immediate value constructor. */
266 fs_reg(struct brw_reg fixed_hw_reg)
267 {
268 init();
269 this->file = FIXED_HW_REG;
270 this->fixed_hw_reg = fixed_hw_reg;
271 this->type = fixed_hw_reg.type;
272 }
273
274 fs_reg(enum register_file file, int hw_reg);
275 fs_reg(class fs_visitor *v, const struct glsl_type *type);
276
277 /** Register file: ARF, GRF, MRF, IMM. */
278 enum register_file file;
279 /** Abstract register number. 0 = fixed hw reg */
280 int reg;
281 /** Offset within the abstract register. */
282 int reg_offset;
283 /** HW register number. Generally unset until register allocation. */
284 int hw_reg;
285 /** Register type. BRW_REGISTER_TYPE_* */
286 int type;
287 bool negate;
288 bool abs;
289 struct brw_reg fixed_hw_reg;
290
291 /** Value for file == BRW_IMMMEDIATE_FILE */
292 union {
293 int32_t i;
294 uint32_t u;
295 float f;
296 } imm;
297 };
298
299 static const fs_reg reg_undef;
300 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
301
302 class fs_inst : public exec_node {
303 public:
304 /* Callers of this talloc-based new need not call delete. It's
305 * easier to just talloc_free 'ctx' (or any of its ancestors). */
306 static void* operator new(size_t size, void *ctx)
307 {
308 void *node;
309
310 node = talloc_zero_size(ctx, size);
311 assert(node != NULL);
312
313 return node;
314 }
315
316 void init()
317 {
318 this->opcode = BRW_OPCODE_NOP;
319 this->saturate = false;
320 this->conditional_mod = BRW_CONDITIONAL_NONE;
321 this->predicated = false;
322 this->sampler = 0;
323 this->target = 0;
324 this->eot = false;
325 this->shadow_compare = false;
326 }
327
328 fs_inst()
329 {
330 init();
331 }
332
333 fs_inst(int opcode)
334 {
335 init();
336 this->opcode = opcode;
337 }
338
339 fs_inst(int opcode, fs_reg dst, fs_reg src0)
340 {
341 init();
342 this->opcode = opcode;
343 this->dst = dst;
344 this->src[0] = src0;
345 }
346
347 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
348 {
349 init();
350 this->opcode = opcode;
351 this->dst = dst;
352 this->src[0] = src0;
353 this->src[1] = src1;
354 }
355
356 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
357 {
358 init();
359 this->opcode = opcode;
360 this->dst = dst;
361 this->src[0] = src0;
362 this->src[1] = src1;
363 this->src[2] = src2;
364 }
365
366 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
367 fs_reg dst;
368 fs_reg src[3];
369 bool saturate;
370 bool predicated;
371 int conditional_mod; /**< BRW_CONDITIONAL_* */
372
373 int mlen; /**< SEND message length */
374 int sampler;
375 int target; /**< MRT target. */
376 bool eot;
377 bool shadow_compare;
378
379 /** @{
380 * Annotation for the generated IR. One of the two can be set.
381 */
382 ir_instruction *ir;
383 const char *annotation;
384 /** @} */
385 };
386
387 class fs_visitor : public ir_visitor
388 {
389 public:
390
391 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
392 {
393 this->c = c;
394 this->p = &c->func;
395 this->brw = p->brw;
396 this->fp = brw->fragment_program;
397 this->intel = &brw->intel;
398 this->ctx = &intel->ctx;
399 this->mem_ctx = talloc_new(NULL);
400 this->shader = shader;
401 this->fail = false;
402 this->next_abstract_grf = 1;
403 this->variable_ht = hash_table_ctor(0,
404 hash_table_pointer_hash,
405 hash_table_pointer_compare);
406
407 this->frag_color = NULL;
408 this->frag_data = NULL;
409 this->frag_depth = NULL;
410 this->first_non_payload_grf = 0;
411
412 this->current_annotation = NULL;
413 this->annotation_string = NULL;
414 this->annotation_ir = NULL;
415 this->base_ir = NULL;
416 }
417 ~fs_visitor()
418 {
419 talloc_free(this->mem_ctx);
420 hash_table_dtor(this->variable_ht);
421 }
422
423 fs_reg *variable_storage(ir_variable *var);
424
425 void visit(ir_variable *ir);
426 void visit(ir_assignment *ir);
427 void visit(ir_dereference_variable *ir);
428 void visit(ir_dereference_record *ir);
429 void visit(ir_dereference_array *ir);
430 void visit(ir_expression *ir);
431 void visit(ir_texture *ir);
432 void visit(ir_if *ir);
433 void visit(ir_constant *ir);
434 void visit(ir_swizzle *ir);
435 void visit(ir_return *ir);
436 void visit(ir_loop *ir);
437 void visit(ir_loop_jump *ir);
438 void visit(ir_discard *ir);
439 void visit(ir_call *ir);
440 void visit(ir_function *ir);
441 void visit(ir_function_signature *ir);
442
443 fs_inst *emit(fs_inst inst);
444 void assign_curb_setup();
445 void assign_urb_setup();
446 void assign_regs();
447 void generate_code();
448 void generate_fb_write(fs_inst *inst);
449 void generate_linterp(fs_inst *inst, struct brw_reg dst,
450 struct brw_reg *src);
451 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
452 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
453 void generate_discard(fs_inst *inst);
454 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
455 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
456
457 void emit_dummy_fs();
458 void emit_interpolation();
459 void emit_pinterp(int location);
460 void emit_fb_writes();
461
462 struct brw_reg interp_reg(int location, int channel);
463 int setup_uniform_values(int loc, const glsl_type *type);
464
465 struct brw_context *brw;
466 const struct gl_fragment_program *fp;
467 struct intel_context *intel;
468 GLcontext *ctx;
469 struct brw_wm_compile *c;
470 struct brw_compile *p;
471 struct brw_shader *shader;
472 void *mem_ctx;
473 exec_list instructions;
474 int next_abstract_grf;
475 struct hash_table *variable_ht;
476 ir_variable *frag_color, *frag_data, *frag_depth;
477 int first_non_payload_grf;
478
479 /** @{ debug annotation info */
480 const char *current_annotation;
481 ir_instruction *base_ir;
482 const char **annotation_string;
483 ir_instruction **annotation_ir;
484 /** @} */
485
486 bool fail;
487
488 /* Result of last visit() method. */
489 fs_reg result;
490
491 fs_reg pixel_x;
492 fs_reg pixel_y;
493 fs_reg pixel_w;
494 fs_reg delta_x;
495 fs_reg delta_y;
496 fs_reg interp_attrs[64];
497
498 int grf_used;
499
500 };
501
502 /** Fixed HW reg constructor. */
503 fs_reg::fs_reg(enum register_file file, int hw_reg)
504 {
505 init();
506 this->file = file;
507 this->hw_reg = hw_reg;
508 this->type = BRW_REGISTER_TYPE_F;
509 }
510
511 int
512 brw_type_for_base_type(const struct glsl_type *type)
513 {
514 switch (type->base_type) {
515 case GLSL_TYPE_FLOAT:
516 return BRW_REGISTER_TYPE_F;
517 case GLSL_TYPE_INT:
518 case GLSL_TYPE_BOOL:
519 return BRW_REGISTER_TYPE_D;
520 case GLSL_TYPE_UINT:
521 return BRW_REGISTER_TYPE_UD;
522 case GLSL_TYPE_ARRAY:
523 case GLSL_TYPE_STRUCT:
524 /* These should be overridden with the type of the member when
525 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
526 * way to trip up if we don't.
527 */
528 return BRW_REGISTER_TYPE_UD;
529 default:
530 assert(!"not reached");
531 return BRW_REGISTER_TYPE_F;
532 }
533 }
534
535 /** Automatic reg constructor. */
536 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
537 {
538 init();
539
540 this->file = GRF;
541 this->reg = v->next_abstract_grf;
542 this->reg_offset = 0;
543 v->next_abstract_grf += type_size(type);
544 this->type = brw_type_for_base_type(type);
545 }
546
547 fs_reg *
548 fs_visitor::variable_storage(ir_variable *var)
549 {
550 return (fs_reg *)hash_table_find(this->variable_ht, var);
551 }
552
553 /* Our support for uniforms is piggy-backed on the struct
554 * gl_fragment_program, because that's where the values actually
555 * get stored, rather than in some global gl_shader_program uniform
556 * store.
557 */
558 int
559 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
560 {
561 unsigned int offset = 0;
562 float *vec_values;
563
564 if (type->is_matrix()) {
565 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
566 type->vector_elements,
567 1);
568
569 for (unsigned int i = 0; i < type->matrix_columns; i++) {
570 offset += setup_uniform_values(loc + offset, column);
571 }
572
573 return offset;
574 }
575
576 switch (type->base_type) {
577 case GLSL_TYPE_FLOAT:
578 case GLSL_TYPE_UINT:
579 case GLSL_TYPE_INT:
580 case GLSL_TYPE_BOOL:
581 vec_values = fp->Base.Parameters->ParameterValues[loc];
582 for (unsigned int i = 0; i < type->vector_elements; i++) {
583 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
584 }
585 return 1;
586
587 case GLSL_TYPE_STRUCT:
588 for (unsigned int i = 0; i < type->length; i++) {
589 offset += setup_uniform_values(loc + offset,
590 type->fields.structure[i].type);
591 }
592 return offset;
593
594 case GLSL_TYPE_ARRAY:
595 for (unsigned int i = 0; i < type->length; i++) {
596 offset += setup_uniform_values(loc + offset, type->fields.array);
597 }
598 return offset;
599
600 case GLSL_TYPE_SAMPLER:
601 /* The sampler takes up a slot, but we don't use any values from it. */
602 return 1;
603
604 default:
605 assert(!"not reached");
606 return 0;
607 }
608 }
609
610 void
611 fs_visitor::visit(ir_variable *ir)
612 {
613 fs_reg *reg = NULL;
614
615 if (strcmp(ir->name, "gl_FragColor") == 0) {
616 this->frag_color = ir;
617 } else if (strcmp(ir->name, "gl_FragData") == 0) {
618 this->frag_data = ir;
619 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
620 this->frag_depth = ir;
621 }
622
623 if (ir->mode == ir_var_in) {
624 if (strcmp(ir->name, "gl_FrontFacing") == 0) {
625 reg = new(this->mem_ctx) fs_reg(this, ir->type);
626 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
627 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
628 * us front face
629 */
630 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
631 *reg,
632 fs_reg(r1_6ud),
633 fs_reg(1u << 31)));
634 inst->conditional_mod = BRW_CONDITIONAL_L;
635 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
636 } else {
637 reg = &this->interp_attrs[ir->location];
638 }
639 }
640
641 if (ir->mode == ir_var_uniform) {
642 int param_index = c->prog_data.nr_params;
643
644 setup_uniform_values(ir->location, ir->type);
645
646 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
647 }
648
649 if (!reg)
650 reg = new(this->mem_ctx) fs_reg(this, ir->type);
651
652 hash_table_insert(this->variable_ht, reg, ir);
653 }
654
655 void
656 fs_visitor::visit(ir_dereference_variable *ir)
657 {
658 fs_reg *reg = variable_storage(ir->var);
659 this->result = *reg;
660 }
661
662 void
663 fs_visitor::visit(ir_dereference_record *ir)
664 {
665 const glsl_type *struct_type = ir->record->type;
666
667 ir->record->accept(this);
668
669 unsigned int offset = 0;
670 for (unsigned int i = 0; i < struct_type->length; i++) {
671 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
672 break;
673 offset += type_size(struct_type->fields.structure[i].type);
674 }
675 this->result.reg_offset += offset;
676 this->result.type = brw_type_for_base_type(ir->type);
677 }
678
679 void
680 fs_visitor::visit(ir_dereference_array *ir)
681 {
682 ir_constant *index;
683 int element_size;
684
685 ir->array->accept(this);
686 index = ir->array_index->as_constant();
687
688 if (ir->type->is_matrix()) {
689 element_size = ir->type->vector_elements;
690 } else {
691 element_size = type_size(ir->type);
692 this->result.type = brw_type_for_base_type(ir->type);
693 }
694
695 if (index) {
696 assert(this->result.file == UNIFORM ||
697 (this->result.file == GRF &&
698 this->result.reg != 0));
699 this->result.reg_offset += index->value.i[0] * element_size;
700 } else {
701 assert(!"FINISHME: non-constant matrix column");
702 }
703 }
704
705 void
706 fs_visitor::visit(ir_expression *ir)
707 {
708 unsigned int operand;
709 fs_reg op[2], temp;
710 fs_reg result;
711 fs_inst *inst;
712
713 for (operand = 0; operand < ir->get_num_operands(); operand++) {
714 ir->operands[operand]->accept(this);
715 if (this->result.file == BAD_FILE) {
716 ir_print_visitor v;
717 printf("Failed to get tree for expression operand:\n");
718 ir->operands[operand]->accept(&v);
719 this->fail = true;
720 }
721 op[operand] = this->result;
722
723 /* Matrix expression operands should have been broken down to vector
724 * operations already.
725 */
726 assert(!ir->operands[operand]->type->is_matrix());
727 /* And then those vector operands should have been broken down to scalar.
728 */
729 assert(!ir->operands[operand]->type->is_vector());
730 }
731
732 /* Storage for our result. If our result goes into an assignment, it will
733 * just get copy-propagated out, so no worries.
734 */
735 this->result = fs_reg(this, ir->type);
736
737 switch (ir->operation) {
738 case ir_unop_logic_not:
739 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
740 break;
741 case ir_unop_neg:
742 op[0].negate = !op[0].negate;
743 this->result = op[0];
744 break;
745 case ir_unop_abs:
746 op[0].abs = true;
747 this->result = op[0];
748 break;
749 case ir_unop_sign:
750 temp = fs_reg(this, ir->type);
751
752 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
753
754 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
755 inst->conditional_mod = BRW_CONDITIONAL_G;
756 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
757 inst->predicated = true;
758
759 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
760 inst->conditional_mod = BRW_CONDITIONAL_L;
761 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
762 inst->predicated = true;
763
764 break;
765 case ir_unop_rcp:
766 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
767 break;
768
769 case ir_unop_exp2:
770 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
771 break;
772 case ir_unop_log2:
773 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
774 break;
775 case ir_unop_exp:
776 case ir_unop_log:
777 assert(!"not reached: should be handled by ir_explog_to_explog2");
778 break;
779 case ir_unop_sin:
780 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
781 break;
782 case ir_unop_cos:
783 emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
784 break;
785
786 case ir_unop_dFdx:
787 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
788 break;
789 case ir_unop_dFdy:
790 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
791 break;
792
793 case ir_binop_add:
794 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
795 break;
796 case ir_binop_sub:
797 assert(!"not reached: should be handled by ir_sub_to_add_neg");
798 break;
799
800 case ir_binop_mul:
801 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
802 break;
803 case ir_binop_div:
804 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
805 break;
806 case ir_binop_mod:
807 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
808 break;
809
810 case ir_binop_less:
811 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
812 inst->conditional_mod = BRW_CONDITIONAL_L;
813 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
814 break;
815 case ir_binop_greater:
816 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
817 inst->conditional_mod = BRW_CONDITIONAL_G;
818 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
819 break;
820 case ir_binop_lequal:
821 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
822 inst->conditional_mod = BRW_CONDITIONAL_LE;
823 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
824 break;
825 case ir_binop_gequal:
826 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
827 inst->conditional_mod = BRW_CONDITIONAL_GE;
828 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
829 break;
830 case ir_binop_equal:
831 case ir_binop_all_equal: /* same as nequal for scalars */
832 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
833 inst->conditional_mod = BRW_CONDITIONAL_Z;
834 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
835 break;
836 case ir_binop_nequal:
837 case ir_binop_any_nequal: /* same as nequal for scalars */
838 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
839 inst->conditional_mod = BRW_CONDITIONAL_NZ;
840 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
841 break;
842
843 case ir_binop_logic_xor:
844 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
845 break;
846
847 case ir_binop_logic_or:
848 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
849 break;
850
851 case ir_binop_logic_and:
852 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
853 break;
854
855 case ir_binop_dot:
856 case ir_binop_cross:
857 case ir_unop_any:
858 assert(!"not reached: should be handled by brw_fs_channel_expressions");
859 break;
860
861 case ir_unop_noise:
862 assert(!"not reached: should be handled by lower_noise");
863 break;
864
865 case ir_unop_sqrt:
866 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
867 break;
868
869 case ir_unop_rsq:
870 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
871 break;
872
873 case ir_unop_i2f:
874 case ir_unop_b2f:
875 case ir_unop_b2i:
876 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
877 break;
878 case ir_unop_f2i:
879 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
880 break;
881 case ir_unop_f2b:
882 case ir_unop_i2b:
883 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
884 inst->conditional_mod = BRW_CONDITIONAL_NZ;
885
886 case ir_unop_trunc:
887 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
888 break;
889 case ir_unop_ceil:
890 op[0].negate = ~op[0].negate;
891 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
892 this->result.negate = true;
893 break;
894 case ir_unop_floor:
895 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
896 break;
897 case ir_unop_fract:
898 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
899 break;
900
901 case ir_binop_min:
902 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
903 inst->conditional_mod = BRW_CONDITIONAL_L;
904
905 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
906 inst->predicated = true;
907 break;
908 case ir_binop_max:
909 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
910 inst->conditional_mod = BRW_CONDITIONAL_G;
911
912 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
913 inst->predicated = true;
914 break;
915
916 case ir_binop_pow:
917 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
918 break;
919
920 case ir_unop_bit_not:
921 case ir_unop_u2f:
922 case ir_binop_lshift:
923 case ir_binop_rshift:
924 case ir_binop_bit_and:
925 case ir_binop_bit_xor:
926 case ir_binop_bit_or:
927 assert(!"GLSL 1.30 features unsupported");
928 break;
929 }
930 }
931
932 void
933 fs_visitor::visit(ir_assignment *ir)
934 {
935 struct fs_reg l, r;
936 int i;
937 int write_mask;
938 fs_inst *inst;
939
940 /* FINISHME: arrays on the lhs */
941 ir->lhs->accept(this);
942 l = this->result;
943
944 ir->rhs->accept(this);
945 r = this->result;
946
947 /* FINISHME: This should really set to the correct maximal writemask for each
948 * FINISHME: component written (in the loops below). This case can only
949 * FINISHME: occur for matrices, arrays, and structures.
950 */
951 if (ir->write_mask == 0) {
952 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
953 write_mask = WRITEMASK_XYZW;
954 } else {
955 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
956 write_mask = ir->write_mask;
957 }
958
959 assert(l.file != BAD_FILE);
960 assert(r.file != BAD_FILE);
961
962 if (ir->condition) {
963 /* Get the condition bool into the predicate. */
964 ir->condition->accept(this);
965 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
966 inst->conditional_mod = BRW_CONDITIONAL_NZ;
967 }
968
969 for (i = 0; i < type_size(ir->lhs->type); i++) {
970 if (i >= 4 || (write_mask & (1 << i))) {
971 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
972 if (ir->condition)
973 inst->predicated = true;
974 r.reg_offset++;
975 }
976 l.reg_offset++;
977 }
978 }
979
980 void
981 fs_visitor::visit(ir_texture *ir)
982 {
983 int base_mrf = 2;
984 fs_inst *inst = NULL;
985 unsigned int mlen = 0;
986
987 ir->coordinate->accept(this);
988 fs_reg coordinate = this->result;
989
990 if (ir->projector) {
991 fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
992
993 ir->projector->accept(this);
994 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
995
996 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
997 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
998 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
999 coordinate.reg_offset++;
1000 proj_coordinate.reg_offset++;
1001 }
1002 proj_coordinate.reg_offset = 0;
1003
1004 coordinate = proj_coordinate;
1005 }
1006
1007 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1008 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1009 coordinate.reg_offset++;
1010 }
1011
1012 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1013 if (intel->gen < 5)
1014 mlen = 3;
1015
1016 if (ir->shadow_comparitor) {
1017 /* For shadow comparisons, we have to supply u,v,r. */
1018 mlen = 3;
1019
1020 ir->shadow_comparitor->accept(this);
1021 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1022 mlen++;
1023 }
1024
1025 /* Do we ever want to handle writemasking on texture samples? Is it
1026 * performance relevant?
1027 */
1028 fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1029
1030 switch (ir->op) {
1031 case ir_tex:
1032 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1033 break;
1034 case ir_txb:
1035 ir->lod_info.bias->accept(this);
1036 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1037 mlen++;
1038
1039 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1040 break;
1041 case ir_txl:
1042 ir->lod_info.lod->accept(this);
1043 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1044 mlen++;
1045
1046 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1047 break;
1048 case ir_txd:
1049 case ir_txf:
1050 assert(!"GLSL 1.30 features unsupported");
1051 break;
1052 }
1053
1054 inst->sampler =
1055 _mesa_get_sampler_uniform_value(ir->sampler,
1056 ctx->Shader.CurrentProgram,
1057 &brw->fragment_program->Base);
1058 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1059
1060 this->result = dst;
1061
1062 if (ir->shadow_comparitor)
1063 inst->shadow_compare = true;
1064 inst->mlen = mlen;
1065 }
1066
1067 void
1068 fs_visitor::visit(ir_swizzle *ir)
1069 {
1070 ir->val->accept(this);
1071 fs_reg val = this->result;
1072
1073 fs_reg result = fs_reg(this, ir->type);
1074 this->result = result;
1075
1076 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1077 fs_reg channel = val;
1078 int swiz = 0;
1079
1080 switch (i) {
1081 case 0:
1082 swiz = ir->mask.x;
1083 break;
1084 case 1:
1085 swiz = ir->mask.y;
1086 break;
1087 case 2:
1088 swiz = ir->mask.z;
1089 break;
1090 case 3:
1091 swiz = ir->mask.w;
1092 break;
1093 }
1094
1095 channel.reg_offset += swiz;
1096 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1097 result.reg_offset++;
1098 }
1099 }
1100
1101 void
1102 fs_visitor::visit(ir_discard *ir)
1103 {
1104 assert(ir->condition == NULL); /* FINISHME */
1105
1106 emit(fs_inst(FS_OPCODE_DISCARD));
1107 }
1108
1109 void
1110 fs_visitor::visit(ir_constant *ir)
1111 {
1112 fs_reg reg(this, ir->type);
1113 this->result = reg;
1114
1115 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1116 switch (ir->type->base_type) {
1117 case GLSL_TYPE_FLOAT:
1118 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1119 break;
1120 case GLSL_TYPE_UINT:
1121 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1122 break;
1123 case GLSL_TYPE_INT:
1124 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1125 break;
1126 case GLSL_TYPE_BOOL:
1127 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1128 break;
1129 default:
1130 assert(!"Non-float/uint/int/bool constant");
1131 }
1132 reg.reg_offset++;
1133 }
1134 }
1135
1136 void
1137 fs_visitor::visit(ir_if *ir)
1138 {
1139 fs_inst *inst;
1140
1141 /* Don't point the annotation at the if statement, because then it plus
1142 * the then and else blocks get printed.
1143 */
1144 this->base_ir = ir->condition;
1145
1146 /* Generate the condition into the condition code. */
1147 ir->condition->accept(this);
1148 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1149 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1150
1151 inst = emit(fs_inst(BRW_OPCODE_IF));
1152 inst->predicated = true;
1153
1154 foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1155 ir_instruction *ir = (ir_instruction *)iter.get();
1156 this->base_ir = ir;
1157
1158 ir->accept(this);
1159 }
1160
1161 if (!ir->else_instructions.is_empty()) {
1162 emit(fs_inst(BRW_OPCODE_ELSE));
1163
1164 foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1165 ir_instruction *ir = (ir_instruction *)iter.get();
1166 this->base_ir = ir;
1167
1168 ir->accept(this);
1169 }
1170 }
1171
1172 emit(fs_inst(BRW_OPCODE_ENDIF));
1173 }
1174
1175 void
1176 fs_visitor::visit(ir_loop *ir)
1177 {
1178 assert(!ir->from);
1179 assert(!ir->to);
1180 assert(!ir->increment);
1181 assert(!ir->counter);
1182
1183 emit(fs_inst(BRW_OPCODE_DO));
1184
1185 /* Start a safety counter. If the user messed up their loop
1186 * counting, we don't want to hang the GPU.
1187 */
1188 fs_reg max_iter = fs_reg(this, glsl_type::int_type);
1189 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
1190
1191 foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1192 ir_instruction *ir = (ir_instruction *)iter.get();
1193 fs_inst *inst;
1194
1195 this->base_ir = ir;
1196 ir->accept(this);
1197
1198 /* Check the maximum loop iters counter. */
1199 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1)));
1200 inst->conditional_mod = BRW_CONDITIONAL_Z;
1201
1202 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1203 inst->predicated = true;
1204 }
1205
1206 emit(fs_inst(BRW_OPCODE_WHILE));
1207 }
1208
1209 void
1210 fs_visitor::visit(ir_loop_jump *ir)
1211 {
1212 switch (ir->mode) {
1213 case ir_loop_jump::jump_break:
1214 emit(fs_inst(BRW_OPCODE_BREAK));
1215 break;
1216 case ir_loop_jump::jump_continue:
1217 emit(fs_inst(BRW_OPCODE_CONTINUE));
1218 break;
1219 }
1220 }
1221
1222 void
1223 fs_visitor::visit(ir_call *ir)
1224 {
1225 assert(!"FINISHME");
1226 }
1227
1228 void
1229 fs_visitor::visit(ir_return *ir)
1230 {
1231 assert(!"FINISHME");
1232 }
1233
1234 void
1235 fs_visitor::visit(ir_function *ir)
1236 {
1237 /* Ignore function bodies other than main() -- we shouldn't see calls to
1238 * them since they should all be inlined before we get to ir_to_mesa.
1239 */
1240 if (strcmp(ir->name, "main") == 0) {
1241 const ir_function_signature *sig;
1242 exec_list empty;
1243
1244 sig = ir->matching_signature(&empty);
1245
1246 assert(sig);
1247
1248 foreach_iter(exec_list_iterator, iter, sig->body) {
1249 ir_instruction *ir = (ir_instruction *)iter.get();
1250 this->base_ir = ir;
1251
1252 ir->accept(this);
1253 }
1254 }
1255 }
1256
1257 void
1258 fs_visitor::visit(ir_function_signature *ir)
1259 {
1260 assert(!"not reached");
1261 (void)ir;
1262 }
1263
1264 fs_inst *
1265 fs_visitor::emit(fs_inst inst)
1266 {
1267 fs_inst *list_inst = new(mem_ctx) fs_inst;
1268 *list_inst = inst;
1269
1270 list_inst->annotation = this->current_annotation;
1271 list_inst->ir = this->base_ir;
1272
1273 this->instructions.push_tail(list_inst);
1274
1275 return list_inst;
1276 }
1277
1278 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1279 void
1280 fs_visitor::emit_dummy_fs()
1281 {
1282 /* Everyone's favorite color. */
1283 emit(fs_inst(BRW_OPCODE_MOV,
1284 fs_reg(MRF, 2),
1285 fs_reg(1.0f)));
1286 emit(fs_inst(BRW_OPCODE_MOV,
1287 fs_reg(MRF, 3),
1288 fs_reg(0.0f)));
1289 emit(fs_inst(BRW_OPCODE_MOV,
1290 fs_reg(MRF, 4),
1291 fs_reg(1.0f)));
1292 emit(fs_inst(BRW_OPCODE_MOV,
1293 fs_reg(MRF, 5),
1294 fs_reg(0.0f)));
1295
1296 fs_inst *write;
1297 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1298 fs_reg(0),
1299 fs_reg(0)));
1300 }
1301
1302 /* The register location here is relative to the start of the URB
1303 * data. It will get adjusted to be a real location before
1304 * generate_code() time.
1305 */
1306 struct brw_reg
1307 fs_visitor::interp_reg(int location, int channel)
1308 {
1309 int regnr = location * 2 + channel / 2;
1310 int stride = (channel & 1) * 4;
1311
1312 return brw_vec1_grf(regnr, stride);
1313 }
1314
1315 /** Emits the interpolation for the varying inputs. */
1316 void
1317 fs_visitor::emit_interpolation()
1318 {
1319 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1320 /* For now, the source regs for the setup URB data will be unset,
1321 * since we don't know until codegen how many push constants we'll
1322 * use, and therefore what the setup URB offset is.
1323 */
1324 fs_reg src_reg = reg_undef;
1325
1326 this->current_annotation = "compute pixel centers";
1327 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1328 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1329 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1330 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1331 emit(fs_inst(BRW_OPCODE_ADD,
1332 this->pixel_x,
1333 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1334 fs_reg(brw_imm_v(0x10101010))));
1335 emit(fs_inst(BRW_OPCODE_ADD,
1336 this->pixel_y,
1337 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1338 fs_reg(brw_imm_v(0x11001100))));
1339
1340 this->current_annotation = "compute pixel deltas from v0";
1341 this->delta_x = fs_reg(this, glsl_type::float_type);
1342 this->delta_y = fs_reg(this, glsl_type::float_type);
1343 emit(fs_inst(BRW_OPCODE_ADD,
1344 this->delta_x,
1345 this->pixel_x,
1346 fs_reg(negate(brw_vec1_grf(1, 0)))));
1347 emit(fs_inst(BRW_OPCODE_ADD,
1348 this->delta_y,
1349 this->pixel_y,
1350 fs_reg(negate(brw_vec1_grf(1, 1)))));
1351
1352 this->current_annotation = "compute pos.w and 1/pos.w";
1353 /* Compute wpos. Unlike many other varying inputs, we usually need it
1354 * to produce 1/w, and the varying variable wouldn't show up.
1355 */
1356 fs_reg wpos = fs_reg(this, glsl_type::vec4_type);
1357 this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos;
1358 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */
1359 wpos.reg_offset++;
1360 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */
1361 wpos.reg_offset++;
1362 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1363 interp_reg(FRAG_ATTRIB_WPOS, 2)));
1364 wpos.reg_offset++;
1365 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1366 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1367 /* Compute the pixel W value from wpos.w. */
1368 this->pixel_w = fs_reg(this, glsl_type::float_type);
1369 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos));
1370
1371 foreach_iter(exec_list_iterator, iter, *this->shader->ir) {
1372 ir_instruction *ir = (ir_instruction *)iter.get();
1373 ir_variable *var = ir->as_variable();
1374
1375 if (!var)
1376 continue;
1377
1378 if (var->mode != ir_var_in)
1379 continue;
1380
1381 /* If it's already set up (WPOS), skip. */
1382 if (var->location == 0)
1383 continue;
1384
1385 this->current_annotation = talloc_asprintf(this->mem_ctx,
1386 "interpolate %s "
1387 "(FRAG_ATTRIB[%d])",
1388 var->name,
1389 var->location);
1390 emit_pinterp(var->location);
1391 }
1392 this->current_annotation = NULL;
1393 }
1394
1395 void
1396 fs_visitor::emit_pinterp(int location)
1397 {
1398 fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type);
1399 this->interp_attrs[location] = interp_attr;
1400
1401 for (unsigned int i = 0; i < 4; i++) {
1402 struct brw_reg interp = interp_reg(location, i);
1403 emit(fs_inst(FS_OPCODE_LINTERP,
1404 interp_attr,
1405 this->delta_x,
1406 this->delta_y,
1407 fs_reg(interp)));
1408 interp_attr.reg_offset++;
1409 }
1410 interp_attr.reg_offset -= 4;
1411
1412 for (unsigned int i = 0; i < 4; i++) {
1413 emit(fs_inst(BRW_OPCODE_MUL,
1414 interp_attr,
1415 interp_attr,
1416 this->pixel_w));
1417 interp_attr.reg_offset++;
1418 }
1419 }
1420
1421 void
1422 fs_visitor::emit_fb_writes()
1423 {
1424 this->current_annotation = "FB write header";
1425 int nr = 0;
1426
1427 /* m0, m1 header */
1428 nr += 2;
1429
1430 if (c->key.aa_dest_stencil_reg) {
1431 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1432 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1433 }
1434
1435 /* Reserve space for color. It'll be filled in per MRT below. */
1436 int color_mrf = nr;
1437 nr += 4;
1438
1439 if (c->key.source_depth_to_render_target) {
1440 if (c->key.computes_depth) {
1441 /* Hand over gl_FragDepth. */
1442 assert(this->frag_depth);
1443 fs_reg depth = *(variable_storage(this->frag_depth));
1444
1445 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1446 } else {
1447 /* Pass through the payload depth. */
1448 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1449 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1450 }
1451 }
1452
1453 if (c->key.dest_depth_reg) {
1454 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1455 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1456 }
1457
1458 fs_reg color = reg_undef;
1459 if (this->frag_color)
1460 color = *(variable_storage(this->frag_color));
1461 else if (this->frag_data)
1462 color = *(variable_storage(this->frag_data));
1463
1464 for (int target = 0; target < c->key.nr_color_regions; target++) {
1465 this->current_annotation = talloc_asprintf(this->mem_ctx,
1466 "FB write target %d",
1467 target);
1468 if (this->frag_color || this->frag_data) {
1469 for (int i = 0; i < 4; i++) {
1470 emit(fs_inst(BRW_OPCODE_MOV,
1471 fs_reg(MRF, color_mrf + i),
1472 color));
1473 color.reg_offset++;
1474 }
1475 }
1476
1477 if (this->frag_color)
1478 color.reg_offset -= 4;
1479
1480 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1481 reg_undef, reg_undef));
1482 inst->target = target;
1483 inst->mlen = nr;
1484 if (target == c->key.nr_color_regions - 1)
1485 inst->eot = true;
1486 }
1487
1488 if (c->key.nr_color_regions == 0) {
1489 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1490 reg_undef, reg_undef));
1491 inst->mlen = nr;
1492 inst->eot = true;
1493 }
1494
1495 this->current_annotation = NULL;
1496 }
1497
1498 void
1499 fs_visitor::generate_fb_write(fs_inst *inst)
1500 {
1501 GLboolean eot = inst->eot;
1502
1503 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1504 * move, here's g1.
1505 */
1506 brw_push_insn_state(p);
1507 brw_set_mask_control(p, BRW_MASK_DISABLE);
1508 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1509 brw_MOV(p,
1510 brw_message_reg(1),
1511 brw_vec8_grf(1, 0));
1512 brw_pop_insn_state(p);
1513
1514 brw_fb_WRITE(p,
1515 8, /* dispatch_width */
1516 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1517 0, /* base MRF */
1518 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1519 inst->target,
1520 inst->mlen,
1521 0,
1522 eot);
1523 }
1524
1525 void
1526 fs_visitor::generate_linterp(fs_inst *inst,
1527 struct brw_reg dst, struct brw_reg *src)
1528 {
1529 struct brw_reg delta_x = src[0];
1530 struct brw_reg delta_y = src[1];
1531 struct brw_reg interp = src[2];
1532
1533 if (brw->has_pln &&
1534 delta_y.nr == delta_x.nr + 1 &&
1535 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1536 brw_PLN(p, dst, interp, delta_x);
1537 } else {
1538 brw_LINE(p, brw_null_reg(), interp, delta_x);
1539 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1540 }
1541 }
1542
1543 void
1544 fs_visitor::generate_math(fs_inst *inst,
1545 struct brw_reg dst, struct brw_reg *src)
1546 {
1547 int op;
1548
1549 switch (inst->opcode) {
1550 case FS_OPCODE_RCP:
1551 op = BRW_MATH_FUNCTION_INV;
1552 break;
1553 case FS_OPCODE_RSQ:
1554 op = BRW_MATH_FUNCTION_RSQ;
1555 break;
1556 case FS_OPCODE_SQRT:
1557 op = BRW_MATH_FUNCTION_SQRT;
1558 break;
1559 case FS_OPCODE_EXP2:
1560 op = BRW_MATH_FUNCTION_EXP;
1561 break;
1562 case FS_OPCODE_LOG2:
1563 op = BRW_MATH_FUNCTION_LOG;
1564 break;
1565 case FS_OPCODE_POW:
1566 op = BRW_MATH_FUNCTION_POW;
1567 break;
1568 case FS_OPCODE_SIN:
1569 op = BRW_MATH_FUNCTION_SIN;
1570 break;
1571 case FS_OPCODE_COS:
1572 op = BRW_MATH_FUNCTION_COS;
1573 break;
1574 default:
1575 assert(!"not reached: unknown math function");
1576 op = 0;
1577 break;
1578 }
1579
1580 if (inst->opcode == FS_OPCODE_POW) {
1581 brw_MOV(p, brw_message_reg(3), src[1]);
1582 }
1583
1584 brw_math(p, dst,
1585 op,
1586 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1587 BRW_MATH_SATURATE_NONE,
1588 2, src[0],
1589 BRW_MATH_DATA_VECTOR,
1590 BRW_MATH_PRECISION_FULL);
1591 }
1592
1593 void
1594 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1595 {
1596 int msg_type = -1;
1597 int rlen = 4;
1598
1599 if (intel->gen == 5) {
1600 switch (inst->opcode) {
1601 case FS_OPCODE_TEX:
1602 if (inst->shadow_compare) {
1603 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1604 } else {
1605 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1606 }
1607 break;
1608 case FS_OPCODE_TXB:
1609 if (inst->shadow_compare) {
1610 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1611 } else {
1612 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1613 }
1614 break;
1615 }
1616 } else {
1617 switch (inst->opcode) {
1618 case FS_OPCODE_TEX:
1619 /* Note that G45 and older determines shadow compare and dispatch width
1620 * from message length for most messages.
1621 */
1622 if (inst->shadow_compare) {
1623 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1624 } else {
1625 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1626 }
1627 case FS_OPCODE_TXB:
1628 if (inst->shadow_compare) {
1629 assert(!"FINISHME: shadow compare with bias.");
1630 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1631 } else {
1632 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1633 rlen = 8;
1634 }
1635 break;
1636 }
1637 }
1638 assert(msg_type != -1);
1639
1640 /* g0 header. */
1641 src.nr--;
1642
1643 brw_SAMPLE(p,
1644 retype(dst, BRW_REGISTER_TYPE_UW),
1645 src.nr,
1646 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1647 SURF_INDEX_TEXTURE(inst->sampler),
1648 inst->sampler,
1649 WRITEMASK_XYZW,
1650 msg_type,
1651 rlen,
1652 inst->mlen + 1,
1653 0,
1654 1,
1655 BRW_SAMPLER_SIMD_MODE_SIMD8);
1656 }
1657
1658
1659 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1660 * looking like:
1661 *
1662 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1663 *
1664 * and we're trying to produce:
1665 *
1666 * DDX DDY
1667 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
1668 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
1669 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
1670 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
1671 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
1672 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
1673 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
1674 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
1675 *
1676 * and add another set of two more subspans if in 16-pixel dispatch mode.
1677 *
1678 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1679 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1680 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1681 * between each other. We could probably do it like ddx and swizzle the right
1682 * order later, but bail for now and just produce
1683 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1684 */
1685 void
1686 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1687 {
1688 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
1689 BRW_REGISTER_TYPE_F,
1690 BRW_VERTICAL_STRIDE_2,
1691 BRW_WIDTH_2,
1692 BRW_HORIZONTAL_STRIDE_0,
1693 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1694 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
1695 BRW_REGISTER_TYPE_F,
1696 BRW_VERTICAL_STRIDE_2,
1697 BRW_WIDTH_2,
1698 BRW_HORIZONTAL_STRIDE_0,
1699 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1700 brw_ADD(p, dst, src0, negate(src1));
1701 }
1702
1703 void
1704 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1705 {
1706 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
1707 BRW_REGISTER_TYPE_F,
1708 BRW_VERTICAL_STRIDE_4,
1709 BRW_WIDTH_4,
1710 BRW_HORIZONTAL_STRIDE_0,
1711 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1712 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
1713 BRW_REGISTER_TYPE_F,
1714 BRW_VERTICAL_STRIDE_4,
1715 BRW_WIDTH_4,
1716 BRW_HORIZONTAL_STRIDE_0,
1717 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1718 brw_ADD(p, dst, src0, negate(src1));
1719 }
1720
1721 void
1722 fs_visitor::generate_discard(fs_inst *inst)
1723 {
1724 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1725 brw_push_insn_state(p);
1726 brw_set_mask_control(p, BRW_MASK_DISABLE);
1727 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1728 brw_AND(p, g0, c->emit_mask_reg, g0);
1729 brw_pop_insn_state(p);
1730 }
1731
1732 static void
1733 trivial_assign_reg(int header_size, fs_reg *reg)
1734 {
1735 if (reg->file == GRF && reg->reg != 0) {
1736 reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset;
1737 reg->reg = 0;
1738 }
1739 }
1740
1741 void
1742 fs_visitor::assign_curb_setup()
1743 {
1744 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1745 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1746
1747 if (intel->gen == 5 && (c->prog_data.first_curbe_grf +
1748 c->prog_data.curb_read_length) & 1) {
1749 /* Align the start of the interpolation coefficients so that we can use
1750 * the PLN instruction.
1751 */
1752 c->prog_data.first_curbe_grf++;
1753 }
1754
1755 /* Map the offsets in the UNIFORM file to fixed HW regs. */
1756 foreach_iter(exec_list_iterator, iter, this->instructions) {
1757 fs_inst *inst = (fs_inst *)iter.get();
1758
1759 for (unsigned int i = 0; i < 3; i++) {
1760 if (inst->src[i].file == UNIFORM) {
1761 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1762 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1763 constant_nr / 8,
1764 constant_nr % 8);
1765
1766 inst->src[i].file = FIXED_HW_REG;
1767 inst->src[i].fixed_hw_reg = brw_reg;
1768 }
1769 }
1770 }
1771 }
1772
1773 void
1774 fs_visitor::assign_urb_setup()
1775 {
1776 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1777 int interp_reg_nr[FRAG_ATTRIB_MAX];
1778
1779 c->prog_data.urb_read_length = 0;
1780
1781 /* Figure out where each of the incoming setup attributes lands. */
1782 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1783 interp_reg_nr[i] = -1;
1784
1785 if (i != FRAG_ATTRIB_WPOS &&
1786 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1787 continue;
1788
1789 /* Each attribute is 4 setup channels, each of which is half a reg. */
1790 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1791 c->prog_data.urb_read_length += 2;
1792 }
1793
1794 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1795 * the correct setup input.
1796 */
1797 foreach_iter(exec_list_iterator, iter, this->instructions) {
1798 fs_inst *inst = (fs_inst *)iter.get();
1799
1800 if (inst->opcode != FS_OPCODE_LINTERP)
1801 continue;
1802
1803 assert(inst->src[2].file == FIXED_HW_REG);
1804
1805 int location = inst->src[2].fixed_hw_reg.nr / 2;
1806 assert(interp_reg_nr[location] != -1);
1807 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1808 (inst->src[2].fixed_hw_reg.nr & 1));
1809 }
1810
1811 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1812 }
1813
1814 void
1815 fs_visitor::assign_regs()
1816 {
1817 int header_size = this->first_non_payload_grf;
1818 int last_grf = 0;
1819
1820 /* FINISHME: trivial assignment of register numbers */
1821 foreach_iter(exec_list_iterator, iter, this->instructions) {
1822 fs_inst *inst = (fs_inst *)iter.get();
1823
1824 trivial_assign_reg(header_size, &inst->dst);
1825 trivial_assign_reg(header_size, &inst->src[0]);
1826 trivial_assign_reg(header_size, &inst->src[1]);
1827
1828 last_grf = MAX2(last_grf, inst->dst.hw_reg);
1829 last_grf = MAX2(last_grf, inst->src[0].hw_reg);
1830 last_grf = MAX2(last_grf, inst->src[1].hw_reg);
1831 }
1832
1833 this->grf_used = last_grf + 1;
1834 }
1835
1836 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
1837 {
1838 struct brw_reg brw_reg;
1839
1840 switch (reg->file) {
1841 case GRF:
1842 case ARF:
1843 case MRF:
1844 brw_reg = brw_vec8_reg(reg->file,
1845 reg->hw_reg, 0);
1846 brw_reg = retype(brw_reg, reg->type);
1847 break;
1848 case IMM:
1849 switch (reg->type) {
1850 case BRW_REGISTER_TYPE_F:
1851 brw_reg = brw_imm_f(reg->imm.f);
1852 break;
1853 case BRW_REGISTER_TYPE_D:
1854 brw_reg = brw_imm_d(reg->imm.i);
1855 break;
1856 case BRW_REGISTER_TYPE_UD:
1857 brw_reg = brw_imm_ud(reg->imm.u);
1858 break;
1859 default:
1860 assert(!"not reached");
1861 break;
1862 }
1863 break;
1864 case FIXED_HW_REG:
1865 brw_reg = reg->fixed_hw_reg;
1866 break;
1867 case BAD_FILE:
1868 /* Probably unused. */
1869 brw_reg = brw_null_reg();
1870 break;
1871 case UNIFORM:
1872 assert(!"not reached");
1873 brw_reg = brw_null_reg();
1874 break;
1875 }
1876 if (reg->abs)
1877 brw_reg = brw_abs(brw_reg);
1878 if (reg->negate)
1879 brw_reg = negate(brw_reg);
1880
1881 return brw_reg;
1882 }
1883
1884 void
1885 fs_visitor::generate_code()
1886 {
1887 unsigned int annotation_len = 0;
1888 int last_native_inst = 0;
1889 struct brw_instruction *if_stack[16], *loop_stack[16];
1890 int if_stack_depth = 0, loop_stack_depth = 0;
1891 int if_depth_in_loop[16];
1892
1893 if_depth_in_loop[loop_stack_depth] = 0;
1894
1895 memset(&if_stack, 0, sizeof(if_stack));
1896 foreach_iter(exec_list_iterator, iter, this->instructions) {
1897 fs_inst *inst = (fs_inst *)iter.get();
1898 struct brw_reg src[3], dst;
1899
1900 for (unsigned int i = 0; i < 3; i++) {
1901 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
1902 }
1903 dst = brw_reg_from_fs_reg(&inst->dst);
1904
1905 brw_set_conditionalmod(p, inst->conditional_mod);
1906 brw_set_predicate_control(p, inst->predicated);
1907
1908 switch (inst->opcode) {
1909 case BRW_OPCODE_MOV:
1910 brw_MOV(p, dst, src[0]);
1911 break;
1912 case BRW_OPCODE_ADD:
1913 brw_ADD(p, dst, src[0], src[1]);
1914 break;
1915 case BRW_OPCODE_MUL:
1916 brw_MUL(p, dst, src[0], src[1]);
1917 break;
1918
1919 case BRW_OPCODE_FRC:
1920 brw_FRC(p, dst, src[0]);
1921 break;
1922 case BRW_OPCODE_RNDD:
1923 brw_RNDD(p, dst, src[0]);
1924 break;
1925 case BRW_OPCODE_RNDZ:
1926 brw_RNDZ(p, dst, src[0]);
1927 break;
1928
1929 case BRW_OPCODE_AND:
1930 brw_AND(p, dst, src[0], src[1]);
1931 break;
1932 case BRW_OPCODE_OR:
1933 brw_OR(p, dst, src[0], src[1]);
1934 break;
1935 case BRW_OPCODE_XOR:
1936 brw_XOR(p, dst, src[0], src[1]);
1937 break;
1938
1939 case BRW_OPCODE_CMP:
1940 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
1941 break;
1942 case BRW_OPCODE_SEL:
1943 brw_SEL(p, dst, src[0], src[1]);
1944 break;
1945
1946 case BRW_OPCODE_IF:
1947 assert(if_stack_depth < 16);
1948 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
1949 if_depth_in_loop[loop_stack_depth]++;
1950 if_stack_depth++;
1951 break;
1952 case BRW_OPCODE_ELSE:
1953 if_stack[if_stack_depth - 1] =
1954 brw_ELSE(p, if_stack[if_stack_depth - 1]);
1955 break;
1956 case BRW_OPCODE_ENDIF:
1957 if_stack_depth--;
1958 brw_ENDIF(p , if_stack[if_stack_depth]);
1959 if_depth_in_loop[loop_stack_depth]--;
1960 break;
1961
1962 case BRW_OPCODE_DO:
1963 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
1964 if_depth_in_loop[loop_stack_depth] = 0;
1965 break;
1966
1967 case BRW_OPCODE_BREAK:
1968 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
1969 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1970 break;
1971 case BRW_OPCODE_CONTINUE:
1972 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
1973 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
1974 break;
1975
1976 case BRW_OPCODE_WHILE: {
1977 struct brw_instruction *inst0, *inst1;
1978 GLuint br = 1;
1979
1980 if (intel->gen == 5)
1981 br = 2;
1982
1983 assert(loop_stack_depth > 0);
1984 loop_stack_depth--;
1985 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
1986 /* patch all the BREAK/CONT instructions from last BGNLOOP */
1987 while (inst0 > loop_stack[loop_stack_depth]) {
1988 inst0--;
1989 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
1990 inst0->bits3.if_else.jump_count == 0) {
1991 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
1992 }
1993 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
1994 inst0->bits3.if_else.jump_count == 0) {
1995 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
1996 }
1997 }
1998 }
1999 break;
2000
2001 case FS_OPCODE_RCP:
2002 case FS_OPCODE_RSQ:
2003 case FS_OPCODE_SQRT:
2004 case FS_OPCODE_EXP2:
2005 case FS_OPCODE_LOG2:
2006 case FS_OPCODE_POW:
2007 case FS_OPCODE_SIN:
2008 case FS_OPCODE_COS:
2009 generate_math(inst, dst, src);
2010 break;
2011 case FS_OPCODE_LINTERP:
2012 generate_linterp(inst, dst, src);
2013 break;
2014 case FS_OPCODE_TEX:
2015 case FS_OPCODE_TXB:
2016 case FS_OPCODE_TXL:
2017 generate_tex(inst, dst, src[0]);
2018 break;
2019 case FS_OPCODE_DISCARD:
2020 generate_discard(inst);
2021 break;
2022 case FS_OPCODE_DDX:
2023 generate_ddx(inst, dst, src[0]);
2024 break;
2025 case FS_OPCODE_DDY:
2026 generate_ddy(inst, dst, src[0]);
2027 break;
2028 case FS_OPCODE_FB_WRITE:
2029 generate_fb_write(inst);
2030 break;
2031 default:
2032 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2033 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2034 brw_opcodes[inst->opcode].name);
2035 } else {
2036 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2037 }
2038 this->fail = true;
2039 }
2040
2041 if (annotation_len < p->nr_insn) {
2042 annotation_len *= 2;
2043 if (annotation_len < 16)
2044 annotation_len = 16;
2045
2046 this->annotation_string = talloc_realloc(this->mem_ctx,
2047 annotation_string,
2048 const char *,
2049 annotation_len);
2050 this->annotation_ir = talloc_realloc(this->mem_ctx,
2051 annotation_ir,
2052 ir_instruction *,
2053 annotation_len);
2054 }
2055
2056 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2057 this->annotation_string[i] = inst->annotation;
2058 this->annotation_ir[i] = inst->ir;
2059 }
2060 last_native_inst = p->nr_insn;
2061 }
2062 }
2063
2064 GLboolean
2065 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2066 {
2067 struct brw_compile *p = &c->func;
2068 struct intel_context *intel = &brw->intel;
2069 GLcontext *ctx = &intel->ctx;
2070 struct brw_shader *shader = NULL;
2071 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2072
2073 if (!prog)
2074 return GL_FALSE;
2075
2076 if (!using_new_fs)
2077 return GL_FALSE;
2078
2079 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2080 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2081 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2082 break;
2083 }
2084 }
2085 if (!shader)
2086 return GL_FALSE;
2087
2088 /* We always use 8-wide mode, at least for now. For one, flow
2089 * control only works in 8-wide. Also, when we're fragment shader
2090 * bound, we're almost always under register pressure as well, so
2091 * 8-wide would save us from the performance cliff of spilling
2092 * regs.
2093 */
2094 c->dispatch_width = 8;
2095
2096 if (INTEL_DEBUG & DEBUG_WM) {
2097 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2098 _mesa_print_ir(shader->ir, NULL);
2099 printf("\n");
2100 }
2101
2102 /* Now the main event: Visit the shader IR and generate our FS IR for it.
2103 */
2104 fs_visitor v(c, shader);
2105
2106 if (0) {
2107 v.emit_dummy_fs();
2108 } else {
2109 v.emit_interpolation();
2110
2111 /* Generate FS IR for main(). (the visitor only descends into
2112 * functions called "main").
2113 */
2114 foreach_iter(exec_list_iterator, iter, *shader->ir) {
2115 ir_instruction *ir = (ir_instruction *)iter.get();
2116 v.base_ir = ir;
2117 ir->accept(&v);
2118 }
2119
2120 v.emit_fb_writes();
2121 v.assign_curb_setup();
2122 v.assign_urb_setup();
2123 v.assign_regs();
2124 }
2125
2126 v.generate_code();
2127
2128 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
2129
2130 if (v.fail)
2131 return GL_FALSE;
2132
2133 if (INTEL_DEBUG & DEBUG_WM) {
2134 const char *last_annotation_string = NULL;
2135 ir_instruction *last_annotation_ir = NULL;
2136
2137 printf("Native code for fragment shader %d:\n", prog->Name);
2138 for (unsigned int i = 0; i < p->nr_insn; i++) {
2139 if (last_annotation_ir != v.annotation_ir[i]) {
2140 last_annotation_ir = v.annotation_ir[i];
2141 if (last_annotation_ir) {
2142 printf(" ");
2143 last_annotation_ir->print();
2144 printf("\n");
2145 }
2146 }
2147 if (last_annotation_string != v.annotation_string[i]) {
2148 last_annotation_string = v.annotation_string[i];
2149 if (last_annotation_string)
2150 printf(" %s\n", last_annotation_string);
2151 }
2152 brw_disasm(stdout, &p->store[i], intel->gen);
2153 }
2154 printf("\n");
2155 }
2156
2157 c->prog_data.total_grf = v.grf_used;
2158 c->prog_data.total_scratch = 0;
2159
2160 return GL_TRUE;
2161 }