i965: Add support for ARB_fragment_coord_conventions to the new FS backend.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 #include "program/prog_optimize.h"
37 #include "program/sampler.h"
38 #include "program/hash_table.h"
39 #include "brw_context.h"
40 #include "brw_eu.h"
41 #include "brw_wm.h"
42 #include "talloc.h"
43 }
44 #include "../glsl/glsl_types.h"
45 #include "../glsl/ir_optimization.h"
46 #include "../glsl/ir_print_visitor.h"
47
48 enum register_file {
49 ARF = BRW_ARCHITECTURE_REGISTER_FILE,
50 GRF = BRW_GENERAL_REGISTER_FILE,
51 MRF = BRW_MESSAGE_REGISTER_FILE,
52 IMM = BRW_IMMEDIATE_VALUE,
53 FIXED_HW_REG, /* a struct brw_reg */
54 UNIFORM, /* prog_data->params[hw_reg] */
55 BAD_FILE
56 };
57
58 enum fs_opcodes {
59 FS_OPCODE_FB_WRITE = 256,
60 FS_OPCODE_RCP,
61 FS_OPCODE_RSQ,
62 FS_OPCODE_SQRT,
63 FS_OPCODE_EXP2,
64 FS_OPCODE_LOG2,
65 FS_OPCODE_POW,
66 FS_OPCODE_SIN,
67 FS_OPCODE_COS,
68 FS_OPCODE_DDX,
69 FS_OPCODE_DDY,
70 FS_OPCODE_LINTERP,
71 FS_OPCODE_TEX,
72 FS_OPCODE_TXB,
73 FS_OPCODE_TXL,
74 FS_OPCODE_DISCARD,
75 };
76
77 static int using_new_fs = -1;
78 static struct brw_reg brw_reg_from_fs_reg(class fs_reg *reg);
79
80 struct gl_shader *
81 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
82 {
83 struct brw_shader *shader;
84
85 shader = talloc_zero(NULL, struct brw_shader);
86 if (shader) {
87 shader->base.Type = type;
88 shader->base.Name = name;
89 _mesa_init_shader(ctx, &shader->base);
90 }
91
92 return &shader->base;
93 }
94
95 struct gl_shader_program *
96 brw_new_shader_program(GLcontext *ctx, GLuint name)
97 {
98 struct brw_shader_program *prog;
99 prog = talloc_zero(NULL, struct brw_shader_program);
100 if (prog) {
101 prog->base.Name = name;
102 _mesa_init_shader_program(ctx, &prog->base);
103 }
104 return &prog->base;
105 }
106
107 GLboolean
108 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
109 {
110 if (!_mesa_ir_compile_shader(ctx, shader))
111 return GL_FALSE;
112
113 return GL_TRUE;
114 }
115
116 GLboolean
117 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
118 {
119 if (using_new_fs == -1)
120 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
121
122 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
123 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
124
125 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
126 void *mem_ctx = talloc_new(NULL);
127 bool progress;
128
129 if (shader->ir)
130 talloc_free(shader->ir);
131 shader->ir = new(shader) exec_list;
132 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
133
134 do_mat_op_to_vec(shader->ir);
135 do_mod_to_fract(shader->ir);
136 do_div_to_mul_rcp(shader->ir);
137 do_sub_to_add_neg(shader->ir);
138 do_explog_to_explog2(shader->ir);
139
140 do {
141 progress = false;
142
143 brw_do_channel_expressions(shader->ir);
144 brw_do_vector_splitting(shader->ir);
145
146 progress = do_lower_jumps(shader->ir, true, true,
147 true, /* main return */
148 false, /* continue */
149 false /* loops */
150 ) || progress;
151
152 progress = do_common_optimization(shader->ir, true, 32) || progress;
153
154 progress = lower_noise(shader->ir) || progress;
155 progress =
156 lower_variable_index_to_cond_assign(shader->ir,
157 GL_TRUE, /* input */
158 GL_TRUE, /* output */
159 GL_TRUE, /* temp */
160 GL_TRUE /* uniform */
161 ) || progress;
162 } while (progress);
163
164 validate_ir_tree(shader->ir);
165
166 reparent_ir(shader->ir, shader->ir);
167 talloc_free(mem_ctx);
168 }
169 }
170
171 if (!_mesa_ir_link_shader(ctx, prog))
172 return GL_FALSE;
173
174 return GL_TRUE;
175 }
176
177 static int
178 type_size(const struct glsl_type *type)
179 {
180 unsigned int size, i;
181
182 switch (type->base_type) {
183 case GLSL_TYPE_UINT:
184 case GLSL_TYPE_INT:
185 case GLSL_TYPE_FLOAT:
186 case GLSL_TYPE_BOOL:
187 return type->components();
188 case GLSL_TYPE_ARRAY:
189 /* FINISHME: uniform/varying arrays. */
190 return type_size(type->fields.array) * type->length;
191 case GLSL_TYPE_STRUCT:
192 size = 0;
193 for (i = 0; i < type->length; i++) {
194 size += type_size(type->fields.structure[i].type);
195 }
196 return size;
197 case GLSL_TYPE_SAMPLER:
198 /* Samplers take up no register space, since they're baked in at
199 * link time.
200 */
201 return 0;
202 default:
203 assert(!"not reached");
204 return 0;
205 }
206 }
207
208 class fs_reg {
209 public:
210 /* Callers of this talloc-based new need not call delete. It's
211 * easier to just talloc_free 'ctx' (or any of its ancestors). */
212 static void* operator new(size_t size, void *ctx)
213 {
214 void *node;
215
216 node = talloc_size(ctx, size);
217 assert(node != NULL);
218
219 return node;
220 }
221
222 void init()
223 {
224 this->reg = 0;
225 this->reg_offset = 0;
226 this->negate = 0;
227 this->abs = 0;
228 this->hw_reg = -1;
229 }
230
231 /** Generic unset register constructor. */
232 fs_reg()
233 {
234 init();
235 this->file = BAD_FILE;
236 }
237
238 /** Immediate value constructor. */
239 fs_reg(float f)
240 {
241 init();
242 this->file = IMM;
243 this->type = BRW_REGISTER_TYPE_F;
244 this->imm.f = f;
245 }
246
247 /** Immediate value constructor. */
248 fs_reg(int32_t i)
249 {
250 init();
251 this->file = IMM;
252 this->type = BRW_REGISTER_TYPE_D;
253 this->imm.i = i;
254 }
255
256 /** Immediate value constructor. */
257 fs_reg(uint32_t u)
258 {
259 init();
260 this->file = IMM;
261 this->type = BRW_REGISTER_TYPE_UD;
262 this->imm.u = u;
263 }
264
265 /** Fixed brw_reg Immediate value constructor. */
266 fs_reg(struct brw_reg fixed_hw_reg)
267 {
268 init();
269 this->file = FIXED_HW_REG;
270 this->fixed_hw_reg = fixed_hw_reg;
271 this->type = fixed_hw_reg.type;
272 }
273
274 fs_reg(enum register_file file, int hw_reg);
275 fs_reg(class fs_visitor *v, const struct glsl_type *type);
276
277 /** Register file: ARF, GRF, MRF, IMM. */
278 enum register_file file;
279 /** Abstract register number. 0 = fixed hw reg */
280 int reg;
281 /** Offset within the abstract register. */
282 int reg_offset;
283 /** HW register number. Generally unset until register allocation. */
284 int hw_reg;
285 /** Register type. BRW_REGISTER_TYPE_* */
286 int type;
287 bool negate;
288 bool abs;
289 struct brw_reg fixed_hw_reg;
290
291 /** Value for file == BRW_IMMMEDIATE_FILE */
292 union {
293 int32_t i;
294 uint32_t u;
295 float f;
296 } imm;
297 };
298
299 static const fs_reg reg_undef;
300 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
301
302 class fs_inst : public exec_node {
303 public:
304 /* Callers of this talloc-based new need not call delete. It's
305 * easier to just talloc_free 'ctx' (or any of its ancestors). */
306 static void* operator new(size_t size, void *ctx)
307 {
308 void *node;
309
310 node = talloc_zero_size(ctx, size);
311 assert(node != NULL);
312
313 return node;
314 }
315
316 void init()
317 {
318 this->opcode = BRW_OPCODE_NOP;
319 this->saturate = false;
320 this->conditional_mod = BRW_CONDITIONAL_NONE;
321 this->predicated = false;
322 this->sampler = 0;
323 this->target = 0;
324 this->eot = false;
325 this->shadow_compare = false;
326 }
327
328 fs_inst()
329 {
330 init();
331 }
332
333 fs_inst(int opcode)
334 {
335 init();
336 this->opcode = opcode;
337 }
338
339 fs_inst(int opcode, fs_reg dst, fs_reg src0)
340 {
341 init();
342 this->opcode = opcode;
343 this->dst = dst;
344 this->src[0] = src0;
345 }
346
347 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
348 {
349 init();
350 this->opcode = opcode;
351 this->dst = dst;
352 this->src[0] = src0;
353 this->src[1] = src1;
354 }
355
356 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
357 {
358 init();
359 this->opcode = opcode;
360 this->dst = dst;
361 this->src[0] = src0;
362 this->src[1] = src1;
363 this->src[2] = src2;
364 }
365
366 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
367 fs_reg dst;
368 fs_reg src[3];
369 bool saturate;
370 bool predicated;
371 int conditional_mod; /**< BRW_CONDITIONAL_* */
372
373 int mlen; /**< SEND message length */
374 int sampler;
375 int target; /**< MRT target. */
376 bool eot;
377 bool shadow_compare;
378
379 /** @{
380 * Annotation for the generated IR. One of the two can be set.
381 */
382 ir_instruction *ir;
383 const char *annotation;
384 /** @} */
385 };
386
387 class fs_visitor : public ir_visitor
388 {
389 public:
390
391 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
392 {
393 this->c = c;
394 this->p = &c->func;
395 this->brw = p->brw;
396 this->fp = brw->fragment_program;
397 this->intel = &brw->intel;
398 this->ctx = &intel->ctx;
399 this->mem_ctx = talloc_new(NULL);
400 this->shader = shader;
401 this->fail = false;
402 this->next_abstract_grf = 1;
403 this->variable_ht = hash_table_ctor(0,
404 hash_table_pointer_hash,
405 hash_table_pointer_compare);
406
407 this->frag_color = NULL;
408 this->frag_data = NULL;
409 this->frag_depth = NULL;
410 this->first_non_payload_grf = 0;
411
412 this->current_annotation = NULL;
413 this->annotation_string = NULL;
414 this->annotation_ir = NULL;
415 this->base_ir = NULL;
416 }
417 ~fs_visitor()
418 {
419 talloc_free(this->mem_ctx);
420 hash_table_dtor(this->variable_ht);
421 }
422
423 fs_reg *variable_storage(ir_variable *var);
424
425 void visit(ir_variable *ir);
426 void visit(ir_assignment *ir);
427 void visit(ir_dereference_variable *ir);
428 void visit(ir_dereference_record *ir);
429 void visit(ir_dereference_array *ir);
430 void visit(ir_expression *ir);
431 void visit(ir_texture *ir);
432 void visit(ir_if *ir);
433 void visit(ir_constant *ir);
434 void visit(ir_swizzle *ir);
435 void visit(ir_return *ir);
436 void visit(ir_loop *ir);
437 void visit(ir_loop_jump *ir);
438 void visit(ir_discard *ir);
439 void visit(ir_call *ir);
440 void visit(ir_function *ir);
441 void visit(ir_function_signature *ir);
442
443 fs_inst *emit(fs_inst inst);
444 void assign_curb_setup();
445 void assign_urb_setup();
446 void assign_regs();
447 void generate_code();
448 void generate_fb_write(fs_inst *inst);
449 void generate_linterp(fs_inst *inst, struct brw_reg dst,
450 struct brw_reg *src);
451 void generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
452 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
453 void generate_discard(fs_inst *inst);
454 void generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
455 void generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src);
456
457 void emit_dummy_fs();
458 void emit_fragcoord_interpolation(ir_variable *ir);
459 void emit_interpolation();
460 void emit_pinterp(int location);
461 void emit_fb_writes();
462
463 struct brw_reg interp_reg(int location, int channel);
464 int setup_uniform_values(int loc, const glsl_type *type);
465
466 struct brw_context *brw;
467 const struct gl_fragment_program *fp;
468 struct intel_context *intel;
469 GLcontext *ctx;
470 struct brw_wm_compile *c;
471 struct brw_compile *p;
472 struct brw_shader *shader;
473 void *mem_ctx;
474 exec_list instructions;
475 int next_abstract_grf;
476 struct hash_table *variable_ht;
477 ir_variable *frag_color, *frag_data, *frag_depth;
478 int first_non_payload_grf;
479
480 /** @{ debug annotation info */
481 const char *current_annotation;
482 ir_instruction *base_ir;
483 const char **annotation_string;
484 ir_instruction **annotation_ir;
485 /** @} */
486
487 bool fail;
488
489 /* Result of last visit() method. */
490 fs_reg result;
491
492 fs_reg pixel_x;
493 fs_reg pixel_y;
494 fs_reg wpos_w;
495 fs_reg pixel_w;
496 fs_reg delta_x;
497 fs_reg delta_y;
498 fs_reg interp_attrs[64];
499
500 int grf_used;
501
502 };
503
504 /** Fixed HW reg constructor. */
505 fs_reg::fs_reg(enum register_file file, int hw_reg)
506 {
507 init();
508 this->file = file;
509 this->hw_reg = hw_reg;
510 this->type = BRW_REGISTER_TYPE_F;
511 }
512
513 int
514 brw_type_for_base_type(const struct glsl_type *type)
515 {
516 switch (type->base_type) {
517 case GLSL_TYPE_FLOAT:
518 return BRW_REGISTER_TYPE_F;
519 case GLSL_TYPE_INT:
520 case GLSL_TYPE_BOOL:
521 return BRW_REGISTER_TYPE_D;
522 case GLSL_TYPE_UINT:
523 return BRW_REGISTER_TYPE_UD;
524 case GLSL_TYPE_ARRAY:
525 case GLSL_TYPE_STRUCT:
526 /* These should be overridden with the type of the member when
527 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
528 * way to trip up if we don't.
529 */
530 return BRW_REGISTER_TYPE_UD;
531 default:
532 assert(!"not reached");
533 return BRW_REGISTER_TYPE_F;
534 }
535 }
536
537 /** Automatic reg constructor. */
538 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
539 {
540 init();
541
542 this->file = GRF;
543 this->reg = v->next_abstract_grf;
544 this->reg_offset = 0;
545 v->next_abstract_grf += type_size(type);
546 this->type = brw_type_for_base_type(type);
547 }
548
549 fs_reg *
550 fs_visitor::variable_storage(ir_variable *var)
551 {
552 return (fs_reg *)hash_table_find(this->variable_ht, var);
553 }
554
555 /* Our support for uniforms is piggy-backed on the struct
556 * gl_fragment_program, because that's where the values actually
557 * get stored, rather than in some global gl_shader_program uniform
558 * store.
559 */
560 int
561 fs_visitor::setup_uniform_values(int loc, const glsl_type *type)
562 {
563 unsigned int offset = 0;
564 float *vec_values;
565
566 if (type->is_matrix()) {
567 const glsl_type *column = glsl_type::get_instance(GLSL_TYPE_FLOAT,
568 type->vector_elements,
569 1);
570
571 for (unsigned int i = 0; i < type->matrix_columns; i++) {
572 offset += setup_uniform_values(loc + offset, column);
573 }
574
575 return offset;
576 }
577
578 switch (type->base_type) {
579 case GLSL_TYPE_FLOAT:
580 case GLSL_TYPE_UINT:
581 case GLSL_TYPE_INT:
582 case GLSL_TYPE_BOOL:
583 vec_values = fp->Base.Parameters->ParameterValues[loc];
584 for (unsigned int i = 0; i < type->vector_elements; i++) {
585 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
586 }
587 return 1;
588
589 case GLSL_TYPE_STRUCT:
590 for (unsigned int i = 0; i < type->length; i++) {
591 offset += setup_uniform_values(loc + offset,
592 type->fields.structure[i].type);
593 }
594 return offset;
595
596 case GLSL_TYPE_ARRAY:
597 for (unsigned int i = 0; i < type->length; i++) {
598 offset += setup_uniform_values(loc + offset, type->fields.array);
599 }
600 return offset;
601
602 case GLSL_TYPE_SAMPLER:
603 /* The sampler takes up a slot, but we don't use any values from it. */
604 return 1;
605
606 default:
607 assert(!"not reached");
608 return 0;
609 }
610 }
611
612 void
613 fs_visitor::emit_fragcoord_interpolation(ir_variable *ir)
614 {
615 fs_reg *reg = new(this->mem_ctx) fs_reg(this, ir->type);
616 fs_reg wpos = *reg;
617 fs_reg neg_y = this->pixel_y;
618 neg_y.negate = true;
619
620 /* gl_FragCoord.x */
621 if (ir->pixel_center_integer) {
622 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x));
623 } else {
624 emit(fs_inst(BRW_OPCODE_ADD, wpos, this->pixel_x, fs_reg(0.5f)));
625 }
626 wpos.reg_offset++;
627
628 /* gl_FragCoord.y */
629 if (ir->origin_upper_left && ir->pixel_center_integer) {
630 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y));
631 } else {
632 fs_reg pixel_y = this->pixel_y;
633 float offset = (ir->pixel_center_integer ? 0.0 : 0.5);
634
635 if (!ir->origin_upper_left) {
636 pixel_y.negate = true;
637 offset += c->key.drawable_height - 1.0;
638 }
639
640 emit(fs_inst(BRW_OPCODE_ADD, wpos, pixel_y, fs_reg(offset)));
641 }
642 wpos.reg_offset++;
643
644 /* gl_FragCoord.z */
645 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
646 interp_reg(FRAG_ATTRIB_WPOS, 2)));
647 wpos.reg_offset++;
648
649 /* gl_FragCoord.w: Already set up in emit_interpolation */
650 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->wpos_w));
651
652 hash_table_insert(this->variable_ht, reg, ir);
653 }
654
655 void
656 fs_visitor::visit(ir_variable *ir)
657 {
658 fs_reg *reg = NULL;
659
660 if (variable_storage(ir))
661 return;
662
663 if (strcmp(ir->name, "gl_FragColor") == 0) {
664 this->frag_color = ir;
665 } else if (strcmp(ir->name, "gl_FragData") == 0) {
666 this->frag_data = ir;
667 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
668 this->frag_depth = ir;
669 }
670
671 if (ir->mode == ir_var_in) {
672 if (!strcmp(ir->name, "gl_FragCoord")) {
673 emit_fragcoord_interpolation(ir);
674 return;
675 } else if (!strcmp(ir->name, "gl_FrontFacing")) {
676 reg = new(this->mem_ctx) fs_reg(this, ir->type);
677 struct brw_reg r1_6ud = retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD);
678 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
679 * us front face
680 */
681 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP,
682 *reg,
683 fs_reg(r1_6ud),
684 fs_reg(1u << 31)));
685 inst->conditional_mod = BRW_CONDITIONAL_L;
686 emit(fs_inst(BRW_OPCODE_AND, *reg, *reg, fs_reg(1u)));
687 } else {
688 reg = &this->interp_attrs[ir->location];
689 }
690 }
691
692 if (ir->mode == ir_var_uniform) {
693 int param_index = c->prog_data.nr_params;
694
695 setup_uniform_values(ir->location, ir->type);
696
697 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
698 }
699
700 if (!reg)
701 reg = new(this->mem_ctx) fs_reg(this, ir->type);
702
703 hash_table_insert(this->variable_ht, reg, ir);
704 }
705
706 void
707 fs_visitor::visit(ir_dereference_variable *ir)
708 {
709 fs_reg *reg = variable_storage(ir->var);
710 this->result = *reg;
711 }
712
713 void
714 fs_visitor::visit(ir_dereference_record *ir)
715 {
716 const glsl_type *struct_type = ir->record->type;
717
718 ir->record->accept(this);
719
720 unsigned int offset = 0;
721 for (unsigned int i = 0; i < struct_type->length; i++) {
722 if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
723 break;
724 offset += type_size(struct_type->fields.structure[i].type);
725 }
726 this->result.reg_offset += offset;
727 this->result.type = brw_type_for_base_type(ir->type);
728 }
729
730 void
731 fs_visitor::visit(ir_dereference_array *ir)
732 {
733 ir_constant *index;
734 int element_size;
735
736 ir->array->accept(this);
737 index = ir->array_index->as_constant();
738
739 if (ir->type->is_matrix()) {
740 element_size = ir->type->vector_elements;
741 } else {
742 element_size = type_size(ir->type);
743 this->result.type = brw_type_for_base_type(ir->type);
744 }
745
746 if (index) {
747 assert(this->result.file == UNIFORM ||
748 (this->result.file == GRF &&
749 this->result.reg != 0));
750 this->result.reg_offset += index->value.i[0] * element_size;
751 } else {
752 assert(!"FINISHME: non-constant matrix column");
753 }
754 }
755
756 void
757 fs_visitor::visit(ir_expression *ir)
758 {
759 unsigned int operand;
760 fs_reg op[2], temp;
761 fs_reg result;
762 fs_inst *inst;
763
764 for (operand = 0; operand < ir->get_num_operands(); operand++) {
765 ir->operands[operand]->accept(this);
766 if (this->result.file == BAD_FILE) {
767 ir_print_visitor v;
768 printf("Failed to get tree for expression operand:\n");
769 ir->operands[operand]->accept(&v);
770 this->fail = true;
771 }
772 op[operand] = this->result;
773
774 /* Matrix expression operands should have been broken down to vector
775 * operations already.
776 */
777 assert(!ir->operands[operand]->type->is_matrix());
778 /* And then those vector operands should have been broken down to scalar.
779 */
780 assert(!ir->operands[operand]->type->is_vector());
781 }
782
783 /* Storage for our result. If our result goes into an assignment, it will
784 * just get copy-propagated out, so no worries.
785 */
786 this->result = fs_reg(this, ir->type);
787
788 switch (ir->operation) {
789 case ir_unop_logic_not:
790 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
791 break;
792 case ir_unop_neg:
793 op[0].negate = !op[0].negate;
794 this->result = op[0];
795 break;
796 case ir_unop_abs:
797 op[0].abs = true;
798 this->result = op[0];
799 break;
800 case ir_unop_sign:
801 temp = fs_reg(this, ir->type);
802
803 emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(0.0f)));
804
805 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
806 inst->conditional_mod = BRW_CONDITIONAL_G;
807 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(1.0f)));
808 inst->predicated = true;
809
810 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, op[0], fs_reg(0.0f)));
811 inst->conditional_mod = BRW_CONDITIONAL_L;
812 inst = emit(fs_inst(BRW_OPCODE_MOV, this->result, fs_reg(-1.0f)));
813 inst->predicated = true;
814
815 break;
816 case ir_unop_rcp:
817 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
818 break;
819
820 case ir_unop_exp2:
821 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
822 break;
823 case ir_unop_log2:
824 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
825 break;
826 case ir_unop_exp:
827 case ir_unop_log:
828 assert(!"not reached: should be handled by ir_explog_to_explog2");
829 break;
830 case ir_unop_sin:
831 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
832 break;
833 case ir_unop_cos:
834 emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
835 break;
836
837 case ir_unop_dFdx:
838 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
839 break;
840 case ir_unop_dFdy:
841 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
842 break;
843
844 case ir_binop_add:
845 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
846 break;
847 case ir_binop_sub:
848 assert(!"not reached: should be handled by ir_sub_to_add_neg");
849 break;
850
851 case ir_binop_mul:
852 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
853 break;
854 case ir_binop_div:
855 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
856 break;
857 case ir_binop_mod:
858 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
859 break;
860
861 case ir_binop_less:
862 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
863 inst->conditional_mod = BRW_CONDITIONAL_L;
864 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
865 break;
866 case ir_binop_greater:
867 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
868 inst->conditional_mod = BRW_CONDITIONAL_G;
869 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
870 break;
871 case ir_binop_lequal:
872 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
873 inst->conditional_mod = BRW_CONDITIONAL_LE;
874 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
875 break;
876 case ir_binop_gequal:
877 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
878 inst->conditional_mod = BRW_CONDITIONAL_GE;
879 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
880 break;
881 case ir_binop_equal:
882 case ir_binop_all_equal: /* same as nequal for scalars */
883 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
884 inst->conditional_mod = BRW_CONDITIONAL_Z;
885 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
886 break;
887 case ir_binop_nequal:
888 case ir_binop_any_nequal: /* same as nequal for scalars */
889 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
890 inst->conditional_mod = BRW_CONDITIONAL_NZ;
891 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
892 break;
893
894 case ir_binop_logic_xor:
895 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
896 break;
897
898 case ir_binop_logic_or:
899 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
900 break;
901
902 case ir_binop_logic_and:
903 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
904 break;
905
906 case ir_binop_dot:
907 case ir_binop_cross:
908 case ir_unop_any:
909 assert(!"not reached: should be handled by brw_fs_channel_expressions");
910 break;
911
912 case ir_unop_noise:
913 assert(!"not reached: should be handled by lower_noise");
914 break;
915
916 case ir_unop_sqrt:
917 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
918 break;
919
920 case ir_unop_rsq:
921 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
922 break;
923
924 case ir_unop_i2f:
925 case ir_unop_b2f:
926 case ir_unop_b2i:
927 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
928 break;
929 case ir_unop_f2i:
930 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
931 break;
932 case ir_unop_f2b:
933 case ir_unop_i2b:
934 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
935 inst->conditional_mod = BRW_CONDITIONAL_NZ;
936
937 case ir_unop_trunc:
938 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
939 break;
940 case ir_unop_ceil:
941 op[0].negate = ~op[0].negate;
942 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
943 this->result.negate = true;
944 break;
945 case ir_unop_floor:
946 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
947 break;
948 case ir_unop_fract:
949 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
950 break;
951
952 case ir_binop_min:
953 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
954 inst->conditional_mod = BRW_CONDITIONAL_L;
955
956 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
957 inst->predicated = true;
958 break;
959 case ir_binop_max:
960 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
961 inst->conditional_mod = BRW_CONDITIONAL_G;
962
963 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
964 inst->predicated = true;
965 break;
966
967 case ir_binop_pow:
968 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
969 break;
970
971 case ir_unop_bit_not:
972 case ir_unop_u2f:
973 case ir_binop_lshift:
974 case ir_binop_rshift:
975 case ir_binop_bit_and:
976 case ir_binop_bit_xor:
977 case ir_binop_bit_or:
978 assert(!"GLSL 1.30 features unsupported");
979 break;
980 }
981 }
982
983 void
984 fs_visitor::visit(ir_assignment *ir)
985 {
986 struct fs_reg l, r;
987 int i;
988 int write_mask;
989 fs_inst *inst;
990
991 /* FINISHME: arrays on the lhs */
992 ir->lhs->accept(this);
993 l = this->result;
994
995 ir->rhs->accept(this);
996 r = this->result;
997
998 /* FINISHME: This should really set to the correct maximal writemask for each
999 * FINISHME: component written (in the loops below). This case can only
1000 * FINISHME: occur for matrices, arrays, and structures.
1001 */
1002 if (ir->write_mask == 0) {
1003 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
1004 write_mask = WRITEMASK_XYZW;
1005 } else {
1006 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
1007 write_mask = ir->write_mask;
1008 }
1009
1010 assert(l.file != BAD_FILE);
1011 assert(r.file != BAD_FILE);
1012
1013 if (ir->condition) {
1014 /* Get the condition bool into the predicate. */
1015 ir->condition->accept(this);
1016 inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null, this->result, fs_reg(0)));
1017 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1018 }
1019
1020 for (i = 0; i < type_size(ir->lhs->type); i++) {
1021 if (i >= 4 || (write_mask & (1 << i))) {
1022 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
1023 if (ir->condition)
1024 inst->predicated = true;
1025 r.reg_offset++;
1026 }
1027 l.reg_offset++;
1028 }
1029 }
1030
1031 void
1032 fs_visitor::visit(ir_texture *ir)
1033 {
1034 int base_mrf = 2;
1035 fs_inst *inst = NULL;
1036 unsigned int mlen = 0;
1037
1038 ir->coordinate->accept(this);
1039 fs_reg coordinate = this->result;
1040
1041 if (ir->projector) {
1042 fs_reg inv_proj = fs_reg(this, glsl_type::float_type);
1043
1044 ir->projector->accept(this);
1045 emit(fs_inst(FS_OPCODE_RCP, inv_proj, this->result));
1046
1047 fs_reg proj_coordinate = fs_reg(this, ir->coordinate->type);
1048 for (unsigned int i = 0; i < ir->coordinate->type->vector_elements; i++) {
1049 emit(fs_inst(BRW_OPCODE_MUL, proj_coordinate, coordinate, inv_proj));
1050 coordinate.reg_offset++;
1051 proj_coordinate.reg_offset++;
1052 }
1053 proj_coordinate.reg_offset = 0;
1054
1055 coordinate = proj_coordinate;
1056 }
1057
1058 for (mlen = 0; mlen < ir->coordinate->type->vector_elements; mlen++) {
1059 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), coordinate));
1060 coordinate.reg_offset++;
1061 }
1062
1063 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1064 if (intel->gen < 5)
1065 mlen = 3;
1066
1067 if (ir->shadow_comparitor) {
1068 /* For shadow comparisons, we have to supply u,v,r. */
1069 mlen = 3;
1070
1071 ir->shadow_comparitor->accept(this);
1072 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1073 mlen++;
1074 }
1075
1076 /* Do we ever want to handle writemasking on texture samples? Is it
1077 * performance relevant?
1078 */
1079 fs_reg dst = fs_reg(this, glsl_type::vec4_type);
1080
1081 switch (ir->op) {
1082 case ir_tex:
1083 inst = emit(fs_inst(FS_OPCODE_TEX, dst, fs_reg(MRF, base_mrf)));
1084 break;
1085 case ir_txb:
1086 ir->lod_info.bias->accept(this);
1087 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1088 mlen++;
1089
1090 inst = emit(fs_inst(FS_OPCODE_TXB, dst, fs_reg(MRF, base_mrf)));
1091 break;
1092 case ir_txl:
1093 ir->lod_info.lod->accept(this);
1094 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, base_mrf + mlen), this->result));
1095 mlen++;
1096
1097 inst = emit(fs_inst(FS_OPCODE_TXL, dst, fs_reg(MRF, base_mrf)));
1098 break;
1099 case ir_txd:
1100 case ir_txf:
1101 assert(!"GLSL 1.30 features unsupported");
1102 break;
1103 }
1104
1105 inst->sampler =
1106 _mesa_get_sampler_uniform_value(ir->sampler,
1107 ctx->Shader.CurrentProgram,
1108 &brw->fragment_program->Base);
1109 inst->sampler = c->fp->program.Base.SamplerUnits[inst->sampler];
1110
1111 this->result = dst;
1112
1113 if (ir->shadow_comparitor)
1114 inst->shadow_compare = true;
1115 inst->mlen = mlen;
1116 }
1117
1118 void
1119 fs_visitor::visit(ir_swizzle *ir)
1120 {
1121 ir->val->accept(this);
1122 fs_reg val = this->result;
1123
1124 fs_reg result = fs_reg(this, ir->type);
1125 this->result = result;
1126
1127 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1128 fs_reg channel = val;
1129 int swiz = 0;
1130
1131 switch (i) {
1132 case 0:
1133 swiz = ir->mask.x;
1134 break;
1135 case 1:
1136 swiz = ir->mask.y;
1137 break;
1138 case 2:
1139 swiz = ir->mask.z;
1140 break;
1141 case 3:
1142 swiz = ir->mask.w;
1143 break;
1144 }
1145
1146 channel.reg_offset += swiz;
1147 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
1148 result.reg_offset++;
1149 }
1150 }
1151
1152 void
1153 fs_visitor::visit(ir_discard *ir)
1154 {
1155 assert(ir->condition == NULL); /* FINISHME */
1156
1157 emit(fs_inst(FS_OPCODE_DISCARD));
1158 }
1159
1160 void
1161 fs_visitor::visit(ir_constant *ir)
1162 {
1163 fs_reg reg(this, ir->type);
1164 this->result = reg;
1165
1166 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
1167 switch (ir->type->base_type) {
1168 case GLSL_TYPE_FLOAT:
1169 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
1170 break;
1171 case GLSL_TYPE_UINT:
1172 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
1173 break;
1174 case GLSL_TYPE_INT:
1175 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
1176 break;
1177 case GLSL_TYPE_BOOL:
1178 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
1179 break;
1180 default:
1181 assert(!"Non-float/uint/int/bool constant");
1182 }
1183 reg.reg_offset++;
1184 }
1185 }
1186
1187 void
1188 fs_visitor::visit(ir_if *ir)
1189 {
1190 fs_inst *inst;
1191
1192 /* Don't point the annotation at the if statement, because then it plus
1193 * the then and else blocks get printed.
1194 */
1195 this->base_ir = ir->condition;
1196
1197 /* Generate the condition into the condition code. */
1198 ir->condition->accept(this);
1199 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
1200 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1201
1202 inst = emit(fs_inst(BRW_OPCODE_IF));
1203 inst->predicated = true;
1204
1205 foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
1206 ir_instruction *ir = (ir_instruction *)iter.get();
1207 this->base_ir = ir;
1208
1209 ir->accept(this);
1210 }
1211
1212 if (!ir->else_instructions.is_empty()) {
1213 emit(fs_inst(BRW_OPCODE_ELSE));
1214
1215 foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
1216 ir_instruction *ir = (ir_instruction *)iter.get();
1217 this->base_ir = ir;
1218
1219 ir->accept(this);
1220 }
1221 }
1222
1223 emit(fs_inst(BRW_OPCODE_ENDIF));
1224 }
1225
1226 void
1227 fs_visitor::visit(ir_loop *ir)
1228 {
1229 fs_reg counter = reg_undef;
1230
1231 if (ir->counter) {
1232 this->base_ir = ir->counter;
1233 ir->counter->accept(this);
1234 counter = *(variable_storage(ir->counter));
1235
1236 if (ir->from) {
1237 this->base_ir = ir->from;
1238 ir->from->accept(this);
1239
1240 emit(fs_inst(BRW_OPCODE_MOV, counter, this->result));
1241 }
1242 }
1243
1244 /* Start a safety counter. If the user messed up their loop
1245 * counting, we don't want to hang the GPU.
1246 */
1247 fs_reg max_iter = fs_reg(this, glsl_type::int_type);
1248 emit(fs_inst(BRW_OPCODE_MOV, max_iter, fs_reg(10000)));
1249
1250 emit(fs_inst(BRW_OPCODE_DO));
1251
1252 if (ir->to) {
1253 this->base_ir = ir->to;
1254 ir->to->accept(this);
1255
1256 fs_inst *inst = emit(fs_inst(BRW_OPCODE_CMP, reg_null,
1257 counter, this->result));
1258 switch (ir->cmp) {
1259 case ir_binop_equal:
1260 inst->conditional_mod = BRW_CONDITIONAL_Z;
1261 break;
1262 case ir_binop_nequal:
1263 inst->conditional_mod = BRW_CONDITIONAL_NZ;
1264 break;
1265 case ir_binop_gequal:
1266 inst->conditional_mod = BRW_CONDITIONAL_GE;
1267 break;
1268 case ir_binop_lequal:
1269 inst->conditional_mod = BRW_CONDITIONAL_LE;
1270 break;
1271 case ir_binop_greater:
1272 inst->conditional_mod = BRW_CONDITIONAL_G;
1273 break;
1274 case ir_binop_less:
1275 inst->conditional_mod = BRW_CONDITIONAL_L;
1276 break;
1277 default:
1278 assert(!"not reached: unknown loop condition");
1279 this->fail = true;
1280 break;
1281 }
1282
1283 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1284 inst->predicated = true;
1285 }
1286
1287 foreach_iter(exec_list_iterator, iter, ir->body_instructions) {
1288 ir_instruction *ir = (ir_instruction *)iter.get();
1289 fs_inst *inst;
1290
1291 this->base_ir = ir;
1292 ir->accept(this);
1293
1294 /* Check the maximum loop iters counter. */
1295 inst = emit(fs_inst(BRW_OPCODE_ADD, max_iter, max_iter, fs_reg(-1)));
1296 inst->conditional_mod = BRW_CONDITIONAL_Z;
1297
1298 inst = emit(fs_inst(BRW_OPCODE_BREAK));
1299 inst->predicated = true;
1300 }
1301
1302 if (ir->increment) {
1303 this->base_ir = ir->increment;
1304 ir->increment->accept(this);
1305 emit(fs_inst(BRW_OPCODE_ADD, counter, counter, this->result));
1306 }
1307
1308 emit(fs_inst(BRW_OPCODE_WHILE));
1309 }
1310
1311 void
1312 fs_visitor::visit(ir_loop_jump *ir)
1313 {
1314 switch (ir->mode) {
1315 case ir_loop_jump::jump_break:
1316 emit(fs_inst(BRW_OPCODE_BREAK));
1317 break;
1318 case ir_loop_jump::jump_continue:
1319 emit(fs_inst(BRW_OPCODE_CONTINUE));
1320 break;
1321 }
1322 }
1323
1324 void
1325 fs_visitor::visit(ir_call *ir)
1326 {
1327 assert(!"FINISHME");
1328 }
1329
1330 void
1331 fs_visitor::visit(ir_return *ir)
1332 {
1333 assert(!"FINISHME");
1334 }
1335
1336 void
1337 fs_visitor::visit(ir_function *ir)
1338 {
1339 /* Ignore function bodies other than main() -- we shouldn't see calls to
1340 * them since they should all be inlined before we get to ir_to_mesa.
1341 */
1342 if (strcmp(ir->name, "main") == 0) {
1343 const ir_function_signature *sig;
1344 exec_list empty;
1345
1346 sig = ir->matching_signature(&empty);
1347
1348 assert(sig);
1349
1350 foreach_iter(exec_list_iterator, iter, sig->body) {
1351 ir_instruction *ir = (ir_instruction *)iter.get();
1352 this->base_ir = ir;
1353
1354 ir->accept(this);
1355 }
1356 }
1357 }
1358
1359 void
1360 fs_visitor::visit(ir_function_signature *ir)
1361 {
1362 assert(!"not reached");
1363 (void)ir;
1364 }
1365
1366 fs_inst *
1367 fs_visitor::emit(fs_inst inst)
1368 {
1369 fs_inst *list_inst = new(mem_ctx) fs_inst;
1370 *list_inst = inst;
1371
1372 list_inst->annotation = this->current_annotation;
1373 list_inst->ir = this->base_ir;
1374
1375 this->instructions.push_tail(list_inst);
1376
1377 return list_inst;
1378 }
1379
1380 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1381 void
1382 fs_visitor::emit_dummy_fs()
1383 {
1384 /* Everyone's favorite color. */
1385 emit(fs_inst(BRW_OPCODE_MOV,
1386 fs_reg(MRF, 2),
1387 fs_reg(1.0f)));
1388 emit(fs_inst(BRW_OPCODE_MOV,
1389 fs_reg(MRF, 3),
1390 fs_reg(0.0f)));
1391 emit(fs_inst(BRW_OPCODE_MOV,
1392 fs_reg(MRF, 4),
1393 fs_reg(1.0f)));
1394 emit(fs_inst(BRW_OPCODE_MOV,
1395 fs_reg(MRF, 5),
1396 fs_reg(0.0f)));
1397
1398 fs_inst *write;
1399 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1400 fs_reg(0),
1401 fs_reg(0)));
1402 }
1403
1404 /* The register location here is relative to the start of the URB
1405 * data. It will get adjusted to be a real location before
1406 * generate_code() time.
1407 */
1408 struct brw_reg
1409 fs_visitor::interp_reg(int location, int channel)
1410 {
1411 int regnr = location * 2 + channel / 2;
1412 int stride = (channel & 1) * 4;
1413
1414 return brw_vec1_grf(regnr, stride);
1415 }
1416
1417 /** Emits the interpolation for the varying inputs. */
1418 void
1419 fs_visitor::emit_interpolation()
1420 {
1421 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1422 /* For now, the source regs for the setup URB data will be unset,
1423 * since we don't know until codegen how many push constants we'll
1424 * use, and therefore what the setup URB offset is.
1425 */
1426 fs_reg src_reg = reg_undef;
1427
1428 this->current_annotation = "compute pixel centers";
1429 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1430 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1431 this->pixel_x.type = BRW_REGISTER_TYPE_UW;
1432 this->pixel_y.type = BRW_REGISTER_TYPE_UW;
1433 emit(fs_inst(BRW_OPCODE_ADD,
1434 this->pixel_x,
1435 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1436 fs_reg(brw_imm_v(0x10101010))));
1437 emit(fs_inst(BRW_OPCODE_ADD,
1438 this->pixel_y,
1439 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1440 fs_reg(brw_imm_v(0x11001100))));
1441
1442 this->current_annotation = "compute pixel deltas from v0";
1443 this->delta_x = fs_reg(this, glsl_type::float_type);
1444 this->delta_y = fs_reg(this, glsl_type::float_type);
1445 emit(fs_inst(BRW_OPCODE_ADD,
1446 this->delta_x,
1447 this->pixel_x,
1448 fs_reg(negate(brw_vec1_grf(1, 0)))));
1449 emit(fs_inst(BRW_OPCODE_ADD,
1450 this->delta_y,
1451 this->pixel_y,
1452 fs_reg(negate(brw_vec1_grf(1, 1)))));
1453
1454 this->current_annotation = "compute pos.w and 1/pos.w";
1455 /* Compute wpos.w. It's always in our setup, since it's needed to
1456 * interpolate the other attributes.
1457 */
1458 this->wpos_w = fs_reg(this, glsl_type::float_type);
1459 emit(fs_inst(FS_OPCODE_LINTERP, wpos_w, this->delta_x, this->delta_y,
1460 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1461 /* Compute the pixel 1/W value from wpos.w. */
1462 this->pixel_w = fs_reg(this, glsl_type::float_type);
1463 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos_w));
1464
1465 foreach_iter(exec_list_iterator, iter, *this->shader->ir) {
1466 ir_instruction *ir = (ir_instruction *)iter.get();
1467 ir_variable *var = ir->as_variable();
1468
1469 if (!var)
1470 continue;
1471
1472 if (var->mode != ir_var_in)
1473 continue;
1474
1475 /* If it's already set up (WPOS), skip. */
1476 if (var->location == 0)
1477 continue;
1478
1479 this->current_annotation = talloc_asprintf(this->mem_ctx,
1480 "interpolate %s "
1481 "(FRAG_ATTRIB[%d])",
1482 var->name,
1483 var->location);
1484 emit_pinterp(var->location);
1485 }
1486 this->current_annotation = NULL;
1487 }
1488
1489 void
1490 fs_visitor::emit_pinterp(int location)
1491 {
1492 fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type);
1493 this->interp_attrs[location] = interp_attr;
1494
1495 for (unsigned int i = 0; i < 4; i++) {
1496 struct brw_reg interp = interp_reg(location, i);
1497 emit(fs_inst(FS_OPCODE_LINTERP,
1498 interp_attr,
1499 this->delta_x,
1500 this->delta_y,
1501 fs_reg(interp)));
1502 interp_attr.reg_offset++;
1503 }
1504 interp_attr.reg_offset -= 4;
1505
1506 for (unsigned int i = 0; i < 4; i++) {
1507 emit(fs_inst(BRW_OPCODE_MUL,
1508 interp_attr,
1509 interp_attr,
1510 this->pixel_w));
1511 interp_attr.reg_offset++;
1512 }
1513 }
1514
1515 void
1516 fs_visitor::emit_fb_writes()
1517 {
1518 this->current_annotation = "FB write header";
1519 int nr = 0;
1520
1521 /* m0, m1 header */
1522 nr += 2;
1523
1524 if (c->key.aa_dest_stencil_reg) {
1525 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1526 fs_reg(brw_vec8_grf(c->key.aa_dest_stencil_reg, 0))));
1527 }
1528
1529 /* Reserve space for color. It'll be filled in per MRT below. */
1530 int color_mrf = nr;
1531 nr += 4;
1532
1533 if (c->key.source_depth_to_render_target) {
1534 if (c->key.computes_depth) {
1535 /* Hand over gl_FragDepth. */
1536 assert(this->frag_depth);
1537 fs_reg depth = *(variable_storage(this->frag_depth));
1538
1539 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++), depth));
1540 } else {
1541 /* Pass through the payload depth. */
1542 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1543 fs_reg(brw_vec8_grf(c->key.source_depth_reg, 0))));
1544 }
1545 }
1546
1547 if (c->key.dest_depth_reg) {
1548 emit(fs_inst(BRW_OPCODE_MOV, fs_reg(MRF, nr++),
1549 fs_reg(brw_vec8_grf(c->key.dest_depth_reg, 0))));
1550 }
1551
1552 fs_reg color = reg_undef;
1553 if (this->frag_color)
1554 color = *(variable_storage(this->frag_color));
1555 else if (this->frag_data)
1556 color = *(variable_storage(this->frag_data));
1557
1558 for (int target = 0; target < c->key.nr_color_regions; target++) {
1559 this->current_annotation = talloc_asprintf(this->mem_ctx,
1560 "FB write target %d",
1561 target);
1562 if (this->frag_color || this->frag_data) {
1563 for (int i = 0; i < 4; i++) {
1564 emit(fs_inst(BRW_OPCODE_MOV,
1565 fs_reg(MRF, color_mrf + i),
1566 color));
1567 color.reg_offset++;
1568 }
1569 }
1570
1571 if (this->frag_color)
1572 color.reg_offset -= 4;
1573
1574 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1575 reg_undef, reg_undef));
1576 inst->target = target;
1577 inst->mlen = nr;
1578 if (target == c->key.nr_color_regions - 1)
1579 inst->eot = true;
1580 }
1581
1582 if (c->key.nr_color_regions == 0) {
1583 fs_inst *inst = emit(fs_inst(FS_OPCODE_FB_WRITE,
1584 reg_undef, reg_undef));
1585 inst->mlen = nr;
1586 inst->eot = true;
1587 }
1588
1589 this->current_annotation = NULL;
1590 }
1591
1592 void
1593 fs_visitor::generate_fb_write(fs_inst *inst)
1594 {
1595 GLboolean eot = inst->eot;
1596
1597 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1598 * move, here's g1.
1599 */
1600 brw_push_insn_state(p);
1601 brw_set_mask_control(p, BRW_MASK_DISABLE);
1602 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1603 brw_MOV(p,
1604 brw_message_reg(1),
1605 brw_vec8_grf(1, 0));
1606 brw_pop_insn_state(p);
1607
1608 brw_fb_WRITE(p,
1609 8, /* dispatch_width */
1610 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1611 0, /* base MRF */
1612 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1613 inst->target,
1614 inst->mlen,
1615 0,
1616 eot);
1617 }
1618
1619 void
1620 fs_visitor::generate_linterp(fs_inst *inst,
1621 struct brw_reg dst, struct brw_reg *src)
1622 {
1623 struct brw_reg delta_x = src[0];
1624 struct brw_reg delta_y = src[1];
1625 struct brw_reg interp = src[2];
1626
1627 if (brw->has_pln &&
1628 delta_y.nr == delta_x.nr + 1 &&
1629 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1630 brw_PLN(p, dst, interp, delta_x);
1631 } else {
1632 brw_LINE(p, brw_null_reg(), interp, delta_x);
1633 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1634 }
1635 }
1636
1637 void
1638 fs_visitor::generate_math(fs_inst *inst,
1639 struct brw_reg dst, struct brw_reg *src)
1640 {
1641 int op;
1642
1643 switch (inst->opcode) {
1644 case FS_OPCODE_RCP:
1645 op = BRW_MATH_FUNCTION_INV;
1646 break;
1647 case FS_OPCODE_RSQ:
1648 op = BRW_MATH_FUNCTION_RSQ;
1649 break;
1650 case FS_OPCODE_SQRT:
1651 op = BRW_MATH_FUNCTION_SQRT;
1652 break;
1653 case FS_OPCODE_EXP2:
1654 op = BRW_MATH_FUNCTION_EXP;
1655 break;
1656 case FS_OPCODE_LOG2:
1657 op = BRW_MATH_FUNCTION_LOG;
1658 break;
1659 case FS_OPCODE_POW:
1660 op = BRW_MATH_FUNCTION_POW;
1661 break;
1662 case FS_OPCODE_SIN:
1663 op = BRW_MATH_FUNCTION_SIN;
1664 break;
1665 case FS_OPCODE_COS:
1666 op = BRW_MATH_FUNCTION_COS;
1667 break;
1668 default:
1669 assert(!"not reached: unknown math function");
1670 op = 0;
1671 break;
1672 }
1673
1674 if (inst->opcode == FS_OPCODE_POW) {
1675 brw_MOV(p, brw_message_reg(3), src[1]);
1676 }
1677
1678 brw_math(p, dst,
1679 op,
1680 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1681 BRW_MATH_SATURATE_NONE,
1682 2, src[0],
1683 BRW_MATH_DATA_VECTOR,
1684 BRW_MATH_PRECISION_FULL);
1685 }
1686
1687 void
1688 fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1689 {
1690 int msg_type = -1;
1691 int rlen = 4;
1692
1693 if (intel->gen == 5) {
1694 switch (inst->opcode) {
1695 case FS_OPCODE_TEX:
1696 if (inst->shadow_compare) {
1697 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5;
1698 } else {
1699 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_GEN5;
1700 }
1701 break;
1702 case FS_OPCODE_TXB:
1703 if (inst->shadow_compare) {
1704 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5;
1705 } else {
1706 msg_type = BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5;
1707 }
1708 break;
1709 }
1710 } else {
1711 switch (inst->opcode) {
1712 case FS_OPCODE_TEX:
1713 /* Note that G45 and older determines shadow compare and dispatch width
1714 * from message length for most messages.
1715 */
1716 if (inst->shadow_compare) {
1717 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE;
1718 } else {
1719 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE;
1720 }
1721 case FS_OPCODE_TXB:
1722 if (inst->shadow_compare) {
1723 assert(!"FINISHME: shadow compare with bias.");
1724 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1725 } else {
1726 msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS;
1727 rlen = 8;
1728 }
1729 break;
1730 }
1731 }
1732 assert(msg_type != -1);
1733
1734 /* g0 header. */
1735 src.nr--;
1736
1737 brw_SAMPLE(p,
1738 retype(dst, BRW_REGISTER_TYPE_UW),
1739 src.nr,
1740 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1741 SURF_INDEX_TEXTURE(inst->sampler),
1742 inst->sampler,
1743 WRITEMASK_XYZW,
1744 msg_type,
1745 rlen,
1746 inst->mlen + 1,
1747 0,
1748 1,
1749 BRW_SAMPLER_SIMD_MODE_SIMD8);
1750 }
1751
1752
1753 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1754 * looking like:
1755 *
1756 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1757 *
1758 * and we're trying to produce:
1759 *
1760 * DDX DDY
1761 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
1762 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
1763 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
1764 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
1765 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
1766 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
1767 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
1768 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
1769 *
1770 * and add another set of two more subspans if in 16-pixel dispatch mode.
1771 *
1772 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1773 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1774 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1775 * between each other. We could probably do it like ddx and swizzle the right
1776 * order later, but bail for now and just produce
1777 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1778 */
1779 void
1780 fs_visitor::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1781 {
1782 struct brw_reg src0 = brw_reg(src.file, src.nr, 1,
1783 BRW_REGISTER_TYPE_F,
1784 BRW_VERTICAL_STRIDE_2,
1785 BRW_WIDTH_2,
1786 BRW_HORIZONTAL_STRIDE_0,
1787 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1788 struct brw_reg src1 = brw_reg(src.file, src.nr, 0,
1789 BRW_REGISTER_TYPE_F,
1790 BRW_VERTICAL_STRIDE_2,
1791 BRW_WIDTH_2,
1792 BRW_HORIZONTAL_STRIDE_0,
1793 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1794 brw_ADD(p, dst, src0, negate(src1));
1795 }
1796
1797 void
1798 fs_visitor::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
1799 {
1800 struct brw_reg src0 = brw_reg(src.file, src.nr, 0,
1801 BRW_REGISTER_TYPE_F,
1802 BRW_VERTICAL_STRIDE_4,
1803 BRW_WIDTH_4,
1804 BRW_HORIZONTAL_STRIDE_0,
1805 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1806 struct brw_reg src1 = brw_reg(src.file, src.nr, 2,
1807 BRW_REGISTER_TYPE_F,
1808 BRW_VERTICAL_STRIDE_4,
1809 BRW_WIDTH_4,
1810 BRW_HORIZONTAL_STRIDE_0,
1811 BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
1812 brw_ADD(p, dst, src0, negate(src1));
1813 }
1814
1815 void
1816 fs_visitor::generate_discard(fs_inst *inst)
1817 {
1818 struct brw_reg g0 = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW);
1819 brw_push_insn_state(p);
1820 brw_set_mask_control(p, BRW_MASK_DISABLE);
1821 brw_NOT(p, c->emit_mask_reg, brw_mask_reg(1)); /* IMASK */
1822 brw_AND(p, g0, c->emit_mask_reg, g0);
1823 brw_pop_insn_state(p);
1824 }
1825
1826 static void
1827 trivial_assign_reg(int header_size, fs_reg *reg)
1828 {
1829 if (reg->file == GRF && reg->reg != 0) {
1830 reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset;
1831 reg->reg = 0;
1832 }
1833 }
1834
1835 void
1836 fs_visitor::assign_curb_setup()
1837 {
1838 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1839 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1840
1841 if (intel->gen == 5 && (c->prog_data.first_curbe_grf +
1842 c->prog_data.curb_read_length) & 1) {
1843 /* Align the start of the interpolation coefficients so that we can use
1844 * the PLN instruction.
1845 */
1846 c->prog_data.first_curbe_grf++;
1847 }
1848
1849 /* Map the offsets in the UNIFORM file to fixed HW regs. */
1850 foreach_iter(exec_list_iterator, iter, this->instructions) {
1851 fs_inst *inst = (fs_inst *)iter.get();
1852
1853 for (unsigned int i = 0; i < 3; i++) {
1854 if (inst->src[i].file == UNIFORM) {
1855 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1856 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1857 constant_nr / 8,
1858 constant_nr % 8);
1859
1860 inst->src[i].file = FIXED_HW_REG;
1861 inst->src[i].fixed_hw_reg = brw_reg;
1862 }
1863 }
1864 }
1865 }
1866
1867 void
1868 fs_visitor::assign_urb_setup()
1869 {
1870 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1871 int interp_reg_nr[FRAG_ATTRIB_MAX];
1872
1873 c->prog_data.urb_read_length = 0;
1874
1875 /* Figure out where each of the incoming setup attributes lands. */
1876 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1877 interp_reg_nr[i] = -1;
1878
1879 if (i != FRAG_ATTRIB_WPOS &&
1880 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1881 continue;
1882
1883 /* Each attribute is 4 setup channels, each of which is half a reg. */
1884 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1885 c->prog_data.urb_read_length += 2;
1886 }
1887
1888 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1889 * the correct setup input.
1890 */
1891 foreach_iter(exec_list_iterator, iter, this->instructions) {
1892 fs_inst *inst = (fs_inst *)iter.get();
1893
1894 if (inst->opcode != FS_OPCODE_LINTERP)
1895 continue;
1896
1897 assert(inst->src[2].file == FIXED_HW_REG);
1898
1899 int location = inst->src[2].fixed_hw_reg.nr / 2;
1900 assert(interp_reg_nr[location] != -1);
1901 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1902 (inst->src[2].fixed_hw_reg.nr & 1));
1903 }
1904
1905 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1906 }
1907
1908 void
1909 fs_visitor::assign_regs()
1910 {
1911 int header_size = this->first_non_payload_grf;
1912 int last_grf = 0;
1913
1914 /* FINISHME: trivial assignment of register numbers */
1915 foreach_iter(exec_list_iterator, iter, this->instructions) {
1916 fs_inst *inst = (fs_inst *)iter.get();
1917
1918 trivial_assign_reg(header_size, &inst->dst);
1919 trivial_assign_reg(header_size, &inst->src[0]);
1920 trivial_assign_reg(header_size, &inst->src[1]);
1921
1922 last_grf = MAX2(last_grf, inst->dst.hw_reg);
1923 last_grf = MAX2(last_grf, inst->src[0].hw_reg);
1924 last_grf = MAX2(last_grf, inst->src[1].hw_reg);
1925 }
1926
1927 this->grf_used = last_grf + 1;
1928 }
1929
1930 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
1931 {
1932 struct brw_reg brw_reg;
1933
1934 switch (reg->file) {
1935 case GRF:
1936 case ARF:
1937 case MRF:
1938 brw_reg = brw_vec8_reg(reg->file,
1939 reg->hw_reg, 0);
1940 brw_reg = retype(brw_reg, reg->type);
1941 break;
1942 case IMM:
1943 switch (reg->type) {
1944 case BRW_REGISTER_TYPE_F:
1945 brw_reg = brw_imm_f(reg->imm.f);
1946 break;
1947 case BRW_REGISTER_TYPE_D:
1948 brw_reg = brw_imm_d(reg->imm.i);
1949 break;
1950 case BRW_REGISTER_TYPE_UD:
1951 brw_reg = brw_imm_ud(reg->imm.u);
1952 break;
1953 default:
1954 assert(!"not reached");
1955 break;
1956 }
1957 break;
1958 case FIXED_HW_REG:
1959 brw_reg = reg->fixed_hw_reg;
1960 break;
1961 case BAD_FILE:
1962 /* Probably unused. */
1963 brw_reg = brw_null_reg();
1964 break;
1965 case UNIFORM:
1966 assert(!"not reached");
1967 brw_reg = brw_null_reg();
1968 break;
1969 }
1970 if (reg->abs)
1971 brw_reg = brw_abs(brw_reg);
1972 if (reg->negate)
1973 brw_reg = negate(brw_reg);
1974
1975 return brw_reg;
1976 }
1977
1978 void
1979 fs_visitor::generate_code()
1980 {
1981 unsigned int annotation_len = 0;
1982 int last_native_inst = 0;
1983 struct brw_instruction *if_stack[16], *loop_stack[16];
1984 int if_stack_depth = 0, loop_stack_depth = 0;
1985 int if_depth_in_loop[16];
1986
1987 if_depth_in_loop[loop_stack_depth] = 0;
1988
1989 memset(&if_stack, 0, sizeof(if_stack));
1990 foreach_iter(exec_list_iterator, iter, this->instructions) {
1991 fs_inst *inst = (fs_inst *)iter.get();
1992 struct brw_reg src[3], dst;
1993
1994 for (unsigned int i = 0; i < 3; i++) {
1995 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
1996 }
1997 dst = brw_reg_from_fs_reg(&inst->dst);
1998
1999 brw_set_conditionalmod(p, inst->conditional_mod);
2000 brw_set_predicate_control(p, inst->predicated);
2001
2002 switch (inst->opcode) {
2003 case BRW_OPCODE_MOV:
2004 brw_MOV(p, dst, src[0]);
2005 break;
2006 case BRW_OPCODE_ADD:
2007 brw_ADD(p, dst, src[0], src[1]);
2008 break;
2009 case BRW_OPCODE_MUL:
2010 brw_MUL(p, dst, src[0], src[1]);
2011 break;
2012
2013 case BRW_OPCODE_FRC:
2014 brw_FRC(p, dst, src[0]);
2015 break;
2016 case BRW_OPCODE_RNDD:
2017 brw_RNDD(p, dst, src[0]);
2018 break;
2019 case BRW_OPCODE_RNDZ:
2020 brw_RNDZ(p, dst, src[0]);
2021 break;
2022
2023 case BRW_OPCODE_AND:
2024 brw_AND(p, dst, src[0], src[1]);
2025 break;
2026 case BRW_OPCODE_OR:
2027 brw_OR(p, dst, src[0], src[1]);
2028 break;
2029 case BRW_OPCODE_XOR:
2030 brw_XOR(p, dst, src[0], src[1]);
2031 break;
2032
2033 case BRW_OPCODE_CMP:
2034 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
2035 break;
2036 case BRW_OPCODE_SEL:
2037 brw_SEL(p, dst, src[0], src[1]);
2038 break;
2039
2040 case BRW_OPCODE_IF:
2041 assert(if_stack_depth < 16);
2042 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
2043 if_depth_in_loop[loop_stack_depth]++;
2044 if_stack_depth++;
2045 break;
2046 case BRW_OPCODE_ELSE:
2047 if_stack[if_stack_depth - 1] =
2048 brw_ELSE(p, if_stack[if_stack_depth - 1]);
2049 break;
2050 case BRW_OPCODE_ENDIF:
2051 if_stack_depth--;
2052 brw_ENDIF(p , if_stack[if_stack_depth]);
2053 if_depth_in_loop[loop_stack_depth]--;
2054 break;
2055
2056 case BRW_OPCODE_DO:
2057 loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
2058 if_depth_in_loop[loop_stack_depth] = 0;
2059 break;
2060
2061 case BRW_OPCODE_BREAK:
2062 brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
2063 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2064 break;
2065 case BRW_OPCODE_CONTINUE:
2066 brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
2067 brw_set_predicate_control(p, BRW_PREDICATE_NONE);
2068 break;
2069
2070 case BRW_OPCODE_WHILE: {
2071 struct brw_instruction *inst0, *inst1;
2072 GLuint br = 1;
2073
2074 if (intel->gen == 5)
2075 br = 2;
2076
2077 assert(loop_stack_depth > 0);
2078 loop_stack_depth--;
2079 inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
2080 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2081 while (inst0 > loop_stack[loop_stack_depth]) {
2082 inst0--;
2083 if (inst0->header.opcode == BRW_OPCODE_BREAK &&
2084 inst0->bits3.if_else.jump_count == 0) {
2085 inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
2086 }
2087 else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
2088 inst0->bits3.if_else.jump_count == 0) {
2089 inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
2090 }
2091 }
2092 }
2093 break;
2094
2095 case FS_OPCODE_RCP:
2096 case FS_OPCODE_RSQ:
2097 case FS_OPCODE_SQRT:
2098 case FS_OPCODE_EXP2:
2099 case FS_OPCODE_LOG2:
2100 case FS_OPCODE_POW:
2101 case FS_OPCODE_SIN:
2102 case FS_OPCODE_COS:
2103 generate_math(inst, dst, src);
2104 break;
2105 case FS_OPCODE_LINTERP:
2106 generate_linterp(inst, dst, src);
2107 break;
2108 case FS_OPCODE_TEX:
2109 case FS_OPCODE_TXB:
2110 case FS_OPCODE_TXL:
2111 generate_tex(inst, dst, src[0]);
2112 break;
2113 case FS_OPCODE_DISCARD:
2114 generate_discard(inst);
2115 break;
2116 case FS_OPCODE_DDX:
2117 generate_ddx(inst, dst, src[0]);
2118 break;
2119 case FS_OPCODE_DDY:
2120 generate_ddy(inst, dst, src[0]);
2121 break;
2122 case FS_OPCODE_FB_WRITE:
2123 generate_fb_write(inst);
2124 break;
2125 default:
2126 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
2127 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
2128 brw_opcodes[inst->opcode].name);
2129 } else {
2130 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
2131 }
2132 this->fail = true;
2133 }
2134
2135 if (annotation_len < p->nr_insn) {
2136 annotation_len *= 2;
2137 if (annotation_len < 16)
2138 annotation_len = 16;
2139
2140 this->annotation_string = talloc_realloc(this->mem_ctx,
2141 annotation_string,
2142 const char *,
2143 annotation_len);
2144 this->annotation_ir = talloc_realloc(this->mem_ctx,
2145 annotation_ir,
2146 ir_instruction *,
2147 annotation_len);
2148 }
2149
2150 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
2151 this->annotation_string[i] = inst->annotation;
2152 this->annotation_ir[i] = inst->ir;
2153 }
2154 last_native_inst = p->nr_insn;
2155 }
2156 }
2157
2158 GLboolean
2159 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
2160 {
2161 struct brw_compile *p = &c->func;
2162 struct intel_context *intel = &brw->intel;
2163 GLcontext *ctx = &intel->ctx;
2164 struct brw_shader *shader = NULL;
2165 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
2166
2167 if (!prog)
2168 return GL_FALSE;
2169
2170 if (!using_new_fs)
2171 return GL_FALSE;
2172
2173 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
2174 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
2175 shader = (struct brw_shader *)prog->_LinkedShaders[i];
2176 break;
2177 }
2178 }
2179 if (!shader)
2180 return GL_FALSE;
2181
2182 /* We always use 8-wide mode, at least for now. For one, flow
2183 * control only works in 8-wide. Also, when we're fragment shader
2184 * bound, we're almost always under register pressure as well, so
2185 * 8-wide would save us from the performance cliff of spilling
2186 * regs.
2187 */
2188 c->dispatch_width = 8;
2189
2190 if (INTEL_DEBUG & DEBUG_WM) {
2191 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
2192 _mesa_print_ir(shader->ir, NULL);
2193 printf("\n");
2194 }
2195
2196 /* Now the main event: Visit the shader IR and generate our FS IR for it.
2197 */
2198 fs_visitor v(c, shader);
2199
2200 if (0) {
2201 v.emit_dummy_fs();
2202 } else {
2203 v.emit_interpolation();
2204
2205 /* Generate FS IR for main(). (the visitor only descends into
2206 * functions called "main").
2207 */
2208 foreach_iter(exec_list_iterator, iter, *shader->ir) {
2209 ir_instruction *ir = (ir_instruction *)iter.get();
2210 v.base_ir = ir;
2211 ir->accept(&v);
2212 }
2213
2214 v.emit_fb_writes();
2215 v.assign_curb_setup();
2216 v.assign_urb_setup();
2217 v.assign_regs();
2218 }
2219
2220 v.generate_code();
2221
2222 assert(!v.fail); /* FINISHME: Cleanly fail, tested at link time, etc. */
2223
2224 if (v.fail)
2225 return GL_FALSE;
2226
2227 if (INTEL_DEBUG & DEBUG_WM) {
2228 const char *last_annotation_string = NULL;
2229 ir_instruction *last_annotation_ir = NULL;
2230
2231 printf("Native code for fragment shader %d:\n", prog->Name);
2232 for (unsigned int i = 0; i < p->nr_insn; i++) {
2233 if (last_annotation_ir != v.annotation_ir[i]) {
2234 last_annotation_ir = v.annotation_ir[i];
2235 if (last_annotation_ir) {
2236 printf(" ");
2237 last_annotation_ir->print();
2238 printf("\n");
2239 }
2240 }
2241 if (last_annotation_string != v.annotation_string[i]) {
2242 last_annotation_string = v.annotation_string[i];
2243 if (last_annotation_string)
2244 printf(" %s\n", last_annotation_string);
2245 }
2246 brw_disasm(stdout, &p->store[i], intel->gen);
2247 }
2248 printf("\n");
2249 }
2250
2251 c->prog_data.total_grf = v.grf_used;
2252 c->prog_data.total_scratch = 0;
2253
2254 return GL_TRUE;
2255 }