i965: Move libdrm/C++ hack introduced in fa2deb3d to intel_context.h
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "program/prog_parameter.h"
35 #include "program/prog_print.h"
36 #include "program/prog_optimize.h"
37 #include "program/hash_table.h"
38 #include "brw_context.h"
39 #include "brw_eu.h"
40 #include "brw_wm.h"
41 #include "talloc.h"
42 }
43 #include "../glsl/glsl_types.h"
44 #include "../glsl/ir_optimization.h"
45 #include "../glsl/ir_print_visitor.h"
46
47 enum register_file {
48 ARF = BRW_ARCHITECTURE_REGISTER_FILE,
49 GRF = BRW_GENERAL_REGISTER_FILE,
50 MRF = BRW_MESSAGE_REGISTER_FILE,
51 IMM = BRW_IMMEDIATE_VALUE,
52 FIXED_HW_REG, /* a struct brw_reg */
53 UNIFORM, /* prog_data->params[hw_reg] */
54 BAD_FILE
55 };
56
57 enum fs_opcodes {
58 FS_OPCODE_FB_WRITE = 256,
59 FS_OPCODE_RCP,
60 FS_OPCODE_RSQ,
61 FS_OPCODE_SQRT,
62 FS_OPCODE_EXP2,
63 FS_OPCODE_LOG2,
64 FS_OPCODE_POW,
65 FS_OPCODE_SIN,
66 FS_OPCODE_COS,
67 FS_OPCODE_DDX,
68 FS_OPCODE_DDY,
69 FS_OPCODE_LINTERP,
70 };
71
72 static int using_new_fs = -1;
73
74 struct gl_shader *
75 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
76 {
77 struct brw_shader *shader;
78
79 shader = talloc_zero(NULL, struct brw_shader);
80 if (shader) {
81 shader->base.Type = type;
82 shader->base.Name = name;
83 _mesa_init_shader(ctx, &shader->base);
84 }
85
86 return &shader->base;
87 }
88
89 struct gl_shader_program *
90 brw_new_shader_program(GLcontext *ctx, GLuint name)
91 {
92 struct brw_shader_program *prog;
93 prog = talloc_zero(NULL, struct brw_shader_program);
94 if (prog) {
95 prog->base.Name = name;
96 _mesa_init_shader_program(ctx, &prog->base);
97 }
98 return &prog->base;
99 }
100
101 GLboolean
102 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
103 {
104 if (!_mesa_ir_compile_shader(ctx, shader))
105 return GL_FALSE;
106
107 return GL_TRUE;
108 }
109
110 GLboolean
111 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
112 {
113 if (using_new_fs == -1)
114 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
115
116 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
117 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
118
119 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
120 void *mem_ctx = talloc_new(NULL);
121 bool progress;
122
123 if (shader->ir)
124 talloc_free(shader->ir);
125 shader->ir = new(shader) exec_list;
126 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
127
128 do_mat_op_to_vec(shader->ir);
129 do_mod_to_fract(shader->ir);
130 do_div_to_mul_rcp(shader->ir);
131 do_sub_to_add_neg(shader->ir);
132 do_explog_to_explog2(shader->ir);
133
134 brw_do_channel_expressions(shader->ir);
135 brw_do_vector_splitting(shader->ir);
136
137 do {
138 progress = false;
139
140 progress = do_common_optimization(shader->ir, true) || progress;
141 } while (progress);
142
143 validate_ir_tree(shader->ir);
144
145 reparent_ir(shader->ir, shader->ir);
146 talloc_free(mem_ctx);
147 }
148 }
149
150 if (!_mesa_ir_link_shader(ctx, prog))
151 return GL_FALSE;
152
153 return GL_TRUE;
154 }
155
156 static int
157 type_size(const struct glsl_type *type)
158 {
159 unsigned int size, i;
160
161 switch (type->base_type) {
162 case GLSL_TYPE_UINT:
163 case GLSL_TYPE_INT:
164 case GLSL_TYPE_FLOAT:
165 case GLSL_TYPE_BOOL:
166 return type->components();
167 case GLSL_TYPE_ARRAY:
168 /* FINISHME: uniform/varying arrays. */
169 return type_size(type->fields.array) * type->length;
170 case GLSL_TYPE_STRUCT:
171 size = 0;
172 for (i = 0; i < type->length; i++) {
173 size += type_size(type->fields.structure[i].type);
174 }
175 return size;
176 case GLSL_TYPE_SAMPLER:
177 /* Samplers take up no register space, since they're baked in at
178 * link time.
179 */
180 return 0;
181 default:
182 assert(!"not reached");
183 return 0;
184 }
185 }
186
187 class fs_reg {
188 public:
189 /* Callers of this talloc-based new need not call delete. It's
190 * easier to just talloc_free 'ctx' (or any of its ancestors). */
191 static void* operator new(size_t size, void *ctx)
192 {
193 void *node;
194
195 node = talloc_size(ctx, size);
196 assert(node != NULL);
197
198 return node;
199 }
200
201 /** Generic unset register constructor. */
202 fs_reg()
203 {
204 this->file = BAD_FILE;
205 this->reg = 0;
206 this->reg_offset = 0;
207 this->hw_reg = -1;
208 this->negate = 0;
209 this->abs = 0;
210 }
211
212 /** Immediate value constructor. */
213 fs_reg(float f)
214 {
215 this->file = IMM;
216 this->reg = 0;
217 this->hw_reg = 0;
218 this->type = BRW_REGISTER_TYPE_F;
219 this->imm.f = f;
220 this->negate = 0;
221 this->abs = 0;
222 }
223
224 /** Immediate value constructor. */
225 fs_reg(int32_t i)
226 {
227 this->file = IMM;
228 this->reg = 0;
229 this->hw_reg = 0;
230 this->type = BRW_REGISTER_TYPE_D;
231 this->imm.i = i;
232 this->negate = 0;
233 this->abs = 0;
234 }
235
236 /** Immediate value constructor. */
237 fs_reg(uint32_t u)
238 {
239 this->file = IMM;
240 this->reg = 0;
241 this->hw_reg = 0;
242 this->type = BRW_REGISTER_TYPE_UD;
243 this->imm.u = u;
244 this->negate = 0;
245 this->abs = 0;
246 }
247
248 /** Fixed brw_reg Immediate value constructor. */
249 fs_reg(struct brw_reg fixed_hw_reg)
250 {
251 this->file = FIXED_HW_REG;
252 this->fixed_hw_reg = fixed_hw_reg;
253 this->reg = 0;
254 this->hw_reg = 0;
255 this->type = fixed_hw_reg.type;
256 this->negate = 0;
257 this->abs = 0;
258 }
259
260 fs_reg(enum register_file file, int hw_reg);
261 fs_reg(class fs_visitor *v, const struct glsl_type *type);
262
263 /** Register file: ARF, GRF, MRF, IMM. */
264 enum register_file file;
265 /** Abstract register number. 0 = fixed hw reg */
266 int reg;
267 /** Offset within the abstract register. */
268 int reg_offset;
269 /** HW register number. Generally unset until register allocation. */
270 int hw_reg;
271 /** Register type. BRW_REGISTER_TYPE_* */
272 int type;
273 bool negate;
274 bool abs;
275 struct brw_reg fixed_hw_reg;
276
277 /** Value for file == BRW_IMMMEDIATE_FILE */
278 union {
279 int32_t i;
280 uint32_t u;
281 float f;
282 } imm;
283 };
284
285 static const fs_reg reg_undef;
286 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
287
288 class fs_inst : public exec_node {
289 public:
290 /* Callers of this talloc-based new need not call delete. It's
291 * easier to just talloc_free 'ctx' (or any of its ancestors). */
292 static void* operator new(size_t size, void *ctx)
293 {
294 void *node;
295
296 node = talloc_zero_size(ctx, size);
297 assert(node != NULL);
298
299 return node;
300 }
301
302 fs_inst()
303 {
304 this->opcode = BRW_OPCODE_NOP;
305 this->saturate = false;
306 this->conditional_mod = BRW_CONDITIONAL_NONE;
307 this->predicated = false;
308 }
309
310 fs_inst(int opcode)
311 {
312 this->opcode = opcode;
313 this->saturate = false;
314 this->conditional_mod = BRW_CONDITIONAL_NONE;
315 this->predicated = false;
316 }
317
318 fs_inst(int opcode, fs_reg dst, fs_reg src0)
319 {
320 this->opcode = opcode;
321 this->dst = dst;
322 this->src[0] = src0;
323 this->saturate = false;
324 this->conditional_mod = BRW_CONDITIONAL_NONE;
325 this->predicated = false;
326 }
327
328 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
329 {
330 this->opcode = opcode;
331 this->dst = dst;
332 this->src[0] = src0;
333 this->src[1] = src1;
334 this->saturate = false;
335 this->conditional_mod = BRW_CONDITIONAL_NONE;
336 this->predicated = false;
337 }
338
339 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
340 {
341 this->opcode = opcode;
342 this->dst = dst;
343 this->src[0] = src0;
344 this->src[1] = src1;
345 this->src[2] = src2;
346 this->saturate = false;
347 this->conditional_mod = BRW_CONDITIONAL_NONE;
348 this->predicated = false;
349 }
350
351 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
352 fs_reg dst;
353 fs_reg src[3];
354 bool saturate;
355 bool predicated;
356 int conditional_mod; /**< BRW_CONDITIONAL_* */
357
358 /** @{
359 * Annotation for the generated IR. One of the two can be set.
360 */
361 ir_instruction *ir;
362 const char *annotation;
363 /** @} */
364 };
365
366 class fs_visitor : public ir_visitor
367 {
368 public:
369
370 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
371 {
372 this->c = c;
373 this->p = &c->func;
374 this->brw = p->brw;
375 this->intel = &brw->intel;
376 this->ctx = &intel->ctx;
377 this->mem_ctx = talloc_new(NULL);
378 this->shader = shader;
379 this->fail = false;
380 this->next_abstract_grf = 1;
381 this->variable_ht = hash_table_ctor(0,
382 hash_table_pointer_hash,
383 hash_table_pointer_compare);
384
385 this->frag_color = NULL;
386 this->frag_data = NULL;
387 this->frag_depth = NULL;
388 this->first_non_payload_grf = 0;
389
390 this->current_annotation = NULL;
391 this->annotation_string = NULL;
392 this->annotation_ir = NULL;
393 }
394 ~fs_visitor()
395 {
396 talloc_free(this->mem_ctx);
397 hash_table_dtor(this->variable_ht);
398 }
399
400 fs_reg *variable_storage(ir_variable *var);
401
402 void visit(ir_variable *ir);
403 void visit(ir_assignment *ir);
404 void visit(ir_dereference_variable *ir);
405 void visit(ir_dereference_record *ir);
406 void visit(ir_dereference_array *ir);
407 void visit(ir_expression *ir);
408 void visit(ir_texture *ir);
409 void visit(ir_if *ir);
410 void visit(ir_constant *ir);
411 void visit(ir_swizzle *ir);
412 void visit(ir_return *ir);
413 void visit(ir_loop *ir);
414 void visit(ir_loop_jump *ir);
415 void visit(ir_discard *ir);
416 void visit(ir_call *ir);
417 void visit(ir_function *ir);
418 void visit(ir_function_signature *ir);
419
420 fs_inst *emit(fs_inst inst);
421 void assign_curb_setup();
422 void assign_urb_setup();
423 void assign_regs();
424 void generate_code();
425 void generate_fb_write(fs_inst *inst);
426 void generate_linterp(fs_inst *inst, struct brw_reg dst,
427 struct brw_reg *src);
428 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
429
430 void emit_dummy_fs();
431 void emit_interpolation();
432 void emit_pinterp(int location);
433 void emit_fb_writes();
434
435 struct brw_reg interp_reg(int location, int channel);
436
437 struct brw_context *brw;
438 struct intel_context *intel;
439 GLcontext *ctx;
440 struct brw_wm_compile *c;
441 struct brw_compile *p;
442 struct brw_shader *shader;
443 void *mem_ctx;
444 exec_list instructions;
445 int next_abstract_grf;
446 struct hash_table *variable_ht;
447 ir_variable *frag_color, *frag_data, *frag_depth;
448 int first_non_payload_grf;
449
450 /** @{ debug annotation info */
451 const char *current_annotation;
452 ir_instruction *base_ir;
453 const char **annotation_string;
454 ir_instruction **annotation_ir;
455 /** @} */
456
457 bool fail;
458
459 /* Result of last visit() method. */
460 fs_reg result;
461
462 fs_reg pixel_x;
463 fs_reg pixel_y;
464 fs_reg pixel_w;
465 fs_reg delta_x;
466 fs_reg delta_y;
467 fs_reg interp_attrs[64];
468
469 int grf_used;
470
471 };
472
473 /** Fixed HW reg constructor. */
474 fs_reg::fs_reg(enum register_file file, int hw_reg)
475 {
476 this->file = file;
477 this->reg = 0;
478 this->reg_offset = 0;
479 this->hw_reg = hw_reg;
480 this->type = BRW_REGISTER_TYPE_F;
481 this->negate = 0;
482 this->abs = 0;
483 }
484
485 /** Automatic reg constructor. */
486 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
487 {
488 this->file = GRF;
489 this->reg = v->next_abstract_grf;
490 this->reg_offset = 0;
491 v->next_abstract_grf += type_size(type);
492 this->hw_reg = -1;
493 this->negate = 0;
494 this->abs = 0;
495
496 switch (type->base_type) {
497 case GLSL_TYPE_FLOAT:
498 this->type = BRW_REGISTER_TYPE_F;
499 break;
500 case GLSL_TYPE_INT:
501 case GLSL_TYPE_BOOL:
502 this->type = BRW_REGISTER_TYPE_D;
503 break;
504 case GLSL_TYPE_UINT:
505 this->type = BRW_REGISTER_TYPE_UD;
506 break;
507 default:
508 assert(!"not reached");
509 this->type = BRW_REGISTER_TYPE_F;
510 break;
511 }
512 }
513
514 fs_reg *
515 fs_visitor::variable_storage(ir_variable *var)
516 {
517 return (fs_reg *)hash_table_find(this->variable_ht, var);
518 }
519
520 void
521 fs_visitor::visit(ir_variable *ir)
522 {
523 fs_reg *reg = NULL;
524
525 if (strcmp(ir->name, "gl_FragColor") == 0) {
526 this->frag_color = ir;
527 } else if (strcmp(ir->name, "gl_FragData") == 0) {
528 this->frag_data = ir;
529 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
530 this->frag_depth = ir;
531 assert(!"FINISHME: this hangs currently.");
532 }
533
534 if (ir->mode == ir_var_in) {
535 reg = &this->interp_attrs[ir->location];
536 }
537
538 if (ir->mode == ir_var_uniform) {
539 const float *vec_values;
540 int param_index = c->prog_data.nr_params;
541
542 /* FINISHME: This is wildly incomplete. */
543 assert(ir->type->is_scalar() || ir->type->is_vector());
544
545 const struct gl_program *fp = &this->brw->fragment_program->Base;
546 /* Our support for uniforms is piggy-backed on the struct
547 * gl_fragment_program, because that's where the values actually
548 * get stored, rather than in some global gl_shader_program uniform
549 * store.
550 */
551 vec_values = fp->Parameters->ParameterValues[ir->location];
552 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
553 c->prog_data.param[c->prog_data.nr_params++] = &vec_values[i];
554 }
555
556 reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
557 }
558
559 if (!reg)
560 reg = new(this->mem_ctx) fs_reg(this, ir->type);
561
562 hash_table_insert(this->variable_ht, reg, ir);
563 }
564
565 void
566 fs_visitor::visit(ir_dereference_variable *ir)
567 {
568 fs_reg *reg = variable_storage(ir->var);
569 this->result = *reg;
570 }
571
572 void
573 fs_visitor::visit(ir_dereference_record *ir)
574 {
575 assert(!"FINISHME");
576 }
577
578 void
579 fs_visitor::visit(ir_dereference_array *ir)
580 {
581 ir_constant *index;
582 int element_size;
583
584 ir->array->accept(this);
585 index = ir->array_index->as_constant();
586
587 if (ir->type->is_matrix()) {
588 element_size = ir->type->vector_elements;
589 } else {
590 element_size = type_size(ir->type);
591 }
592
593 if (index) {
594 assert(this->result.file == UNIFORM ||
595 (this->result.file == GRF &&
596 this->result.reg != 0));
597 this->result.reg_offset += index->value.i[0] * element_size;
598 } else {
599 assert(!"FINISHME: non-constant matrix column");
600 }
601 }
602
603 void
604 fs_visitor::visit(ir_expression *ir)
605 {
606 unsigned int operand;
607 fs_reg op[2], temp;
608 fs_reg result;
609 fs_inst *inst;
610
611 for (operand = 0; operand < ir->get_num_operands(); operand++) {
612 ir->operands[operand]->accept(this);
613 if (this->result.file == BAD_FILE) {
614 ir_print_visitor v;
615 printf("Failed to get tree for expression operand:\n");
616 ir->operands[operand]->accept(&v);
617 this->fail = true;
618 }
619 op[operand] = this->result;
620
621 /* Matrix expression operands should have been broken down to vector
622 * operations already.
623 */
624 assert(!ir->operands[operand]->type->is_matrix());
625 /* And then those vector operands should have been broken down to scalar.
626 */
627 assert(!ir->operands[operand]->type->is_vector());
628 }
629
630 /* Storage for our result. If our result goes into an assignment, it will
631 * just get copy-propagated out, so no worries.
632 */
633 this->result = fs_reg(this, ir->type);
634
635 switch (ir->operation) {
636 case ir_unop_logic_not:
637 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
638 break;
639 case ir_unop_neg:
640 op[0].negate = ~op[0].negate;
641 this->result = op[0];
642 break;
643 case ir_unop_abs:
644 op[0].abs = true;
645 this->result = op[0];
646 break;
647 case ir_unop_sign:
648 temp = fs_reg(this, ir->type);
649
650 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
651 inst->conditional_mod = BRW_CONDITIONAL_G;
652
653 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)));
654 inst->conditional_mod = BRW_CONDITIONAL_L;
655
656 temp.negate = true;
657 emit(fs_inst(BRW_OPCODE_ADD, this->result, this->result, temp));
658
659 break;
660 case ir_unop_rcp:
661 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
662 break;
663
664 case ir_unop_exp2:
665 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
666 break;
667 case ir_unop_log2:
668 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
669 break;
670 case ir_unop_exp:
671 case ir_unop_log:
672 assert(!"not reached: should be handled by ir_explog_to_explog2");
673 break;
674 case ir_unop_sin:
675 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
676 break;
677 case ir_unop_cos:
678 emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
679 break;
680
681 case ir_unop_dFdx:
682 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
683 break;
684 case ir_unop_dFdy:
685 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
686 break;
687
688 case ir_binop_add:
689 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
690 break;
691 case ir_binop_sub:
692 assert(!"not reached: should be handled by ir_sub_to_add_neg");
693 break;
694
695 case ir_binop_mul:
696 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
697 break;
698 case ir_binop_div:
699 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
700 break;
701 case ir_binop_mod:
702 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
703 break;
704
705 case ir_binop_less:
706 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
707 inst->conditional_mod = BRW_CONDITIONAL_L;
708 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
709 break;
710 case ir_binop_greater:
711 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
712 inst->conditional_mod = BRW_CONDITIONAL_G;
713 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
714 break;
715 case ir_binop_lequal:
716 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
717 inst->conditional_mod = BRW_CONDITIONAL_LE;
718 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
719 break;
720 case ir_binop_gequal:
721 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
722 inst->conditional_mod = BRW_CONDITIONAL_GE;
723 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
724 break;
725 case ir_binop_equal:
726 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
727 inst->conditional_mod = BRW_CONDITIONAL_Z;
728 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
729 break;
730 case ir_binop_nequal:
731 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
732 inst->conditional_mod = BRW_CONDITIONAL_NZ;
733 emit(fs_inst(BRW_OPCODE_AND, this->result, this->result, fs_reg(0x1)));
734 break;
735
736 case ir_binop_logic_xor:
737 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
738 break;
739
740 case ir_binop_logic_or:
741 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
742 break;
743
744 case ir_binop_logic_and:
745 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
746 break;
747
748 case ir_binop_dot:
749 case ir_binop_cross:
750 case ir_unop_any:
751 assert(!"not reached: should be handled by brw_channel_expressions");
752 break;
753
754 case ir_unop_sqrt:
755 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
756 break;
757
758 case ir_unop_rsq:
759 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
760 break;
761
762 case ir_unop_i2f:
763 case ir_unop_b2f:
764 case ir_unop_b2i:
765 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
766 break;
767 case ir_unop_f2i:
768 emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0]));
769 break;
770 case ir_unop_f2b:
771 case ir_unop_i2b:
772 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
773 inst->conditional_mod = BRW_CONDITIONAL_NZ;
774
775 case ir_unop_trunc:
776 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
777 break;
778 case ir_unop_ceil:
779 op[0].negate = ~op[0].negate;
780 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
781 this->result.negate = true;
782 break;
783 case ir_unop_floor:
784 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
785 break;
786 case ir_unop_fract:
787 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
788 break;
789
790 case ir_binop_min:
791 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
792 inst->conditional_mod = BRW_CONDITIONAL_L;
793
794 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
795 inst->predicated = true;
796 break;
797 case ir_binop_max:
798 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
799 inst->conditional_mod = BRW_CONDITIONAL_G;
800
801 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
802 inst->predicated = true;
803 break;
804
805 case ir_binop_pow:
806 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
807 break;
808
809 case ir_unop_bit_not:
810 case ir_unop_u2f:
811 case ir_binop_lshift:
812 case ir_binop_rshift:
813 case ir_binop_bit_and:
814 case ir_binop_bit_xor:
815 case ir_binop_bit_or:
816 assert(!"GLSL 1.30 features unsupported");
817 break;
818 }
819 }
820
821 void
822 fs_visitor::visit(ir_assignment *ir)
823 {
824 struct fs_reg l, r;
825 int i;
826 int write_mask;
827 fs_inst *inst;
828
829 /* FINISHME: arrays on the lhs */
830 ir->lhs->accept(this);
831 l = this->result;
832
833 ir->rhs->accept(this);
834 r = this->result;
835
836 /* FINISHME: This should really set to the correct maximal writemask for each
837 * FINISHME: component written (in the loops below). This case can only
838 * FINISHME: occur for matrices, arrays, and structures.
839 */
840 if (ir->write_mask == 0) {
841 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
842 write_mask = WRITEMASK_XYZW;
843 } else {
844 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
845 write_mask = ir->write_mask;
846 }
847
848 assert(l.file != BAD_FILE);
849 assert(r.file != BAD_FILE);
850
851 if (ir->condition) {
852 /* Get the condition bool into the predicate. */
853 ir->condition->accept(this);
854 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, fs_reg(0)));
855 inst->conditional_mod = BRW_CONDITIONAL_NZ;
856 }
857
858 for (i = 0; i < type_size(ir->lhs->type); i++) {
859 if (i >= 4 || (write_mask & (1 << i))) {
860 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
861 if (ir->condition)
862 inst->predicated = true;
863 }
864 l.reg_offset++;
865 r.reg_offset++;
866 }
867 }
868
869 void
870 fs_visitor::visit(ir_texture *ir)
871 {
872 assert(!"FINISHME");
873 }
874
875 void
876 fs_visitor::visit(ir_swizzle *ir)
877 {
878 ir->val->accept(this);
879 fs_reg val = this->result;
880
881 fs_reg result = fs_reg(this, ir->type);
882 this->result = result;
883
884 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
885 fs_reg channel = val;
886 int swiz = 0;
887
888 switch (i) {
889 case 0:
890 swiz = ir->mask.x;
891 break;
892 case 1:
893 swiz = ir->mask.y;
894 break;
895 case 2:
896 swiz = ir->mask.z;
897 break;
898 case 3:
899 swiz = ir->mask.w;
900 break;
901 }
902
903 channel.reg_offset += swiz;
904 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
905 result.reg_offset++;
906 }
907 }
908
909 void
910 fs_visitor::visit(ir_discard *ir)
911 {
912 assert(!"FINISHME");
913 }
914
915 void
916 fs_visitor::visit(ir_constant *ir)
917 {
918 fs_reg reg(this, ir->type);
919 this->result = reg;
920
921 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
922 switch (ir->type->base_type) {
923 case GLSL_TYPE_FLOAT:
924 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
925 break;
926 case GLSL_TYPE_UINT:
927 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
928 break;
929 case GLSL_TYPE_INT:
930 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
931 break;
932 case GLSL_TYPE_BOOL:
933 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
934 break;
935 default:
936 assert(!"Non-float/uint/int/bool constant");
937 }
938 reg.reg_offset++;
939 }
940 }
941
942 void
943 fs_visitor::visit(ir_if *ir)
944 {
945 fs_inst *inst;
946
947 /* Don't point the annotation at the if statement, because then it plus
948 * the then and else blocks get printed.
949 */
950 this->base_ir = ir->condition;
951
952 /* Generate the condition into the condition code. */
953 ir->condition->accept(this);
954 inst = emit(fs_inst(BRW_OPCODE_MOV, fs_reg(brw_null_reg()), this->result));
955 inst->conditional_mod = BRW_CONDITIONAL_NZ;
956
957 inst = emit(fs_inst(BRW_OPCODE_IF));
958 inst->predicated = true;
959
960 foreach_iter(exec_list_iterator, iter, ir->then_instructions) {
961 ir_instruction *ir = (ir_instruction *)iter.get();
962 this->base_ir = ir;
963
964 ir->accept(this);
965 }
966
967 if (!ir->else_instructions.is_empty()) {
968 emit(fs_inst(BRW_OPCODE_ELSE));
969
970 foreach_iter(exec_list_iterator, iter, ir->else_instructions) {
971 ir_instruction *ir = (ir_instruction *)iter.get();
972 this->base_ir = ir;
973
974 ir->accept(this);
975 }
976 }
977
978 emit(fs_inst(BRW_OPCODE_ENDIF));
979 }
980
981 void
982 fs_visitor::visit(ir_loop *ir)
983 {
984 assert(!"FINISHME");
985 }
986
987 void
988 fs_visitor::visit(ir_loop_jump *ir)
989 {
990 assert(!"FINISHME");
991 }
992
993 void
994 fs_visitor::visit(ir_call *ir)
995 {
996 assert(!"FINISHME");
997 }
998
999 void
1000 fs_visitor::visit(ir_return *ir)
1001 {
1002 assert(!"FINISHME");
1003 }
1004
1005 void
1006 fs_visitor::visit(ir_function *ir)
1007 {
1008 /* Ignore function bodies other than main() -- we shouldn't see calls to
1009 * them since they should all be inlined before we get to ir_to_mesa.
1010 */
1011 if (strcmp(ir->name, "main") == 0) {
1012 const ir_function_signature *sig;
1013 exec_list empty;
1014
1015 sig = ir->matching_signature(&empty);
1016
1017 assert(sig);
1018
1019 foreach_iter(exec_list_iterator, iter, sig->body) {
1020 ir_instruction *ir = (ir_instruction *)iter.get();
1021 this->base_ir = ir;
1022
1023 ir->accept(this);
1024 }
1025 }
1026 }
1027
1028 void
1029 fs_visitor::visit(ir_function_signature *ir)
1030 {
1031 assert(!"not reached");
1032 (void)ir;
1033 }
1034
1035 fs_inst *
1036 fs_visitor::emit(fs_inst inst)
1037 {
1038 fs_inst *list_inst = new(mem_ctx) fs_inst;
1039 *list_inst = inst;
1040
1041 list_inst->annotation = this->current_annotation;
1042 list_inst->ir = this->base_ir;
1043
1044 this->instructions.push_tail(list_inst);
1045
1046 return list_inst;
1047 }
1048
1049 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1050 void
1051 fs_visitor::emit_dummy_fs()
1052 {
1053 /* Everyone's favorite color. */
1054 emit(fs_inst(BRW_OPCODE_MOV,
1055 fs_reg(MRF, 2),
1056 fs_reg(1.0f)));
1057 emit(fs_inst(BRW_OPCODE_MOV,
1058 fs_reg(MRF, 3),
1059 fs_reg(0.0f)));
1060 emit(fs_inst(BRW_OPCODE_MOV,
1061 fs_reg(MRF, 4),
1062 fs_reg(1.0f)));
1063 emit(fs_inst(BRW_OPCODE_MOV,
1064 fs_reg(MRF, 5),
1065 fs_reg(0.0f)));
1066
1067 fs_inst *write;
1068 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
1069 fs_reg(0),
1070 fs_reg(0)));
1071 }
1072
1073 /* The register location here is relative to the start of the URB
1074 * data. It will get adjusted to be a real location before
1075 * generate_code() time.
1076 */
1077 struct brw_reg
1078 fs_visitor::interp_reg(int location, int channel)
1079 {
1080 int regnr = location * 2 + channel / 2;
1081 int stride = (channel & 1) * 4;
1082
1083 return brw_vec1_grf(regnr, stride);
1084 }
1085
1086 /** Emits the interpolation for the varying inputs. */
1087 void
1088 fs_visitor::emit_interpolation()
1089 {
1090 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1091 /* For now, the source regs for the setup URB data will be unset,
1092 * since we don't know until codegen how many push constants we'll
1093 * use, and therefore what the setup URB offset is.
1094 */
1095 fs_reg src_reg = reg_undef;
1096
1097 this->current_annotation = "compute pixel centers";
1098 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1099 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1100 emit(fs_inst(BRW_OPCODE_ADD,
1101 this->pixel_x,
1102 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1103 fs_reg(brw_imm_v(0x10101010))));
1104 emit(fs_inst(BRW_OPCODE_ADD,
1105 this->pixel_y,
1106 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1107 fs_reg(brw_imm_v(0x11001100))));
1108
1109 this->current_annotation = "compute pixel deltas from v0";
1110 this->delta_x = fs_reg(this, glsl_type::float_type);
1111 this->delta_y = fs_reg(this, glsl_type::float_type);
1112 emit(fs_inst(BRW_OPCODE_ADD,
1113 this->delta_x,
1114 this->pixel_x,
1115 fs_reg(negate(brw_vec1_grf(1, 0)))));
1116 emit(fs_inst(BRW_OPCODE_ADD,
1117 this->delta_y,
1118 this->pixel_y,
1119 fs_reg(brw_vec1_grf(1, 1))));
1120
1121 this->current_annotation = "compute pos.w and 1/pos.w";
1122 /* Compute wpos. Unlike many other varying inputs, we usually need it
1123 * to produce 1/w, and the varying variable wouldn't show up.
1124 */
1125 fs_reg wpos = fs_reg(this, glsl_type::vec4_type);
1126 this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos;
1127 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */
1128 wpos.reg_offset++;
1129 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */
1130 wpos.reg_offset++;
1131 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1132 interp_reg(FRAG_ATTRIB_WPOS, 2)));
1133 wpos.reg_offset++;
1134 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1135 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1136 /* Compute the pixel W value from wpos.w. */
1137 this->pixel_w = fs_reg(this, glsl_type::float_type);
1138 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos));
1139
1140 /* FINISHME: gl_FrontFacing */
1141
1142 foreach_iter(exec_list_iterator, iter, *this->shader->ir) {
1143 ir_instruction *ir = (ir_instruction *)iter.get();
1144 ir_variable *var = ir->as_variable();
1145
1146 if (!var)
1147 continue;
1148
1149 if (var->mode != ir_var_in)
1150 continue;
1151
1152 /* If it's already set up (WPOS), skip. */
1153 if (var->location == 0)
1154 continue;
1155
1156 this->current_annotation = talloc_asprintf(this->mem_ctx,
1157 "interpolate %s "
1158 "(FRAG_ATTRIB[%d])",
1159 var->name,
1160 var->location);
1161 emit_pinterp(var->location);
1162 }
1163 this->current_annotation = NULL;
1164 }
1165
1166 void
1167 fs_visitor::emit_pinterp(int location)
1168 {
1169 fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type);
1170 this->interp_attrs[location] = interp_attr;
1171
1172 for (unsigned int i = 0; i < 4; i++) {
1173 struct brw_reg interp = interp_reg(location, i);
1174 emit(fs_inst(FS_OPCODE_LINTERP,
1175 interp_attr,
1176 this->delta_x,
1177 this->delta_y,
1178 fs_reg(interp)));
1179 interp_attr.reg_offset++;
1180 }
1181 interp_attr.reg_offset -= 4;
1182
1183 for (unsigned int i = 0; i < 4; i++) {
1184 emit(fs_inst(BRW_OPCODE_MUL,
1185 interp_attr,
1186 interp_attr,
1187 this->pixel_w));
1188 interp_attr.reg_offset++;
1189 }
1190 }
1191
1192 void
1193 fs_visitor::emit_fb_writes()
1194 {
1195 this->current_annotation = "FB write";
1196
1197 assert(this->frag_color || !"FINISHME: MRT");
1198 fs_reg color = *(variable_storage(this->frag_color));
1199
1200 for (int i = 0; i < 4; i++) {
1201 emit(fs_inst(BRW_OPCODE_MOV,
1202 fs_reg(MRF, 2 + i),
1203 color));
1204 color.reg_offset++;
1205 }
1206
1207 emit(fs_inst(FS_OPCODE_FB_WRITE,
1208 fs_reg(0),
1209 fs_reg(0)));
1210
1211 this->current_annotation = NULL;
1212 }
1213
1214 void
1215 fs_visitor::generate_fb_write(fs_inst *inst)
1216 {
1217 GLboolean eot = 1; /* FINISHME: MRT */
1218 /* FINISHME: AADS */
1219
1220 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1221 * move, here's g1.
1222 */
1223 brw_push_insn_state(p);
1224 brw_set_mask_control(p, BRW_MASK_DISABLE);
1225 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1226 brw_MOV(p,
1227 brw_message_reg(1),
1228 brw_vec8_grf(1, 0));
1229 brw_pop_insn_state(p);
1230
1231 int nr = 2 + 4;
1232
1233 brw_fb_WRITE(p,
1234 8, /* dispatch_width */
1235 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1236 0, /* base MRF */
1237 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1238 0, /* FINISHME: MRT target */
1239 nr,
1240 0,
1241 eot);
1242 }
1243
1244 void
1245 fs_visitor::generate_linterp(fs_inst *inst,
1246 struct brw_reg dst, struct brw_reg *src)
1247 {
1248 struct brw_reg delta_x = src[0];
1249 struct brw_reg delta_y = src[1];
1250 struct brw_reg interp = src[2];
1251
1252 if (brw->has_pln &&
1253 delta_y.nr == delta_x.nr + 1 &&
1254 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1255 brw_PLN(p, dst, interp, delta_x);
1256 } else {
1257 brw_LINE(p, brw_null_reg(), interp, delta_x);
1258 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1259 }
1260 }
1261
1262 void
1263 fs_visitor::generate_math(fs_inst *inst,
1264 struct brw_reg dst, struct brw_reg *src)
1265 {
1266 int op;
1267
1268 switch (inst->opcode) {
1269 case FS_OPCODE_RCP:
1270 op = BRW_MATH_FUNCTION_INV;
1271 break;
1272 case FS_OPCODE_RSQ:
1273 op = BRW_MATH_FUNCTION_RSQ;
1274 break;
1275 case FS_OPCODE_SQRT:
1276 op = BRW_MATH_FUNCTION_SQRT;
1277 break;
1278 case FS_OPCODE_EXP2:
1279 op = BRW_MATH_FUNCTION_EXP;
1280 break;
1281 case FS_OPCODE_LOG2:
1282 op = BRW_MATH_FUNCTION_LOG;
1283 break;
1284 case FS_OPCODE_POW:
1285 op = BRW_MATH_FUNCTION_POW;
1286 break;
1287 case FS_OPCODE_SIN:
1288 op = BRW_MATH_FUNCTION_SIN;
1289 break;
1290 case FS_OPCODE_COS:
1291 op = BRW_MATH_FUNCTION_COS;
1292 break;
1293 default:
1294 assert(!"not reached: unknown math function");
1295 op = 0;
1296 break;
1297 }
1298
1299 if (inst->opcode == FS_OPCODE_POW) {
1300 brw_MOV(p, brw_message_reg(3), src[1]);
1301 }
1302
1303 brw_math(p, dst,
1304 op,
1305 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1306 BRW_MATH_SATURATE_NONE,
1307 2, src[0],
1308 BRW_MATH_DATA_VECTOR,
1309 BRW_MATH_PRECISION_FULL);
1310 }
1311
1312 static void
1313 trivial_assign_reg(int header_size, fs_reg *reg)
1314 {
1315 if (reg->file == GRF && reg->reg != 0) {
1316 reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset;
1317 reg->reg = 0;
1318 }
1319 }
1320
1321 void
1322 fs_visitor::assign_curb_setup()
1323 {
1324 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1325 c->prog_data.curb_read_length = ALIGN(c->prog_data.nr_params, 8) / 8;
1326
1327 /* Map the offsets in the UNIFORM file to fixed HW regs. */
1328 foreach_iter(exec_list_iterator, iter, this->instructions) {
1329 fs_inst *inst = (fs_inst *)iter.get();
1330
1331 for (unsigned int i = 0; i < 3; i++) {
1332 if (inst->src[i].file == UNIFORM) {
1333 int constant_nr = inst->src[i].hw_reg + inst->src[i].reg_offset;
1334 struct brw_reg brw_reg = brw_vec1_grf(c->prog_data.first_curbe_grf +
1335 constant_nr / 8,
1336 constant_nr % 8);
1337
1338 inst->src[i].file = FIXED_HW_REG;
1339 inst->src[i].fixed_hw_reg = brw_reg;
1340 }
1341 }
1342 }
1343 }
1344
1345 void
1346 fs_visitor::assign_urb_setup()
1347 {
1348 int urb_start = c->prog_data.first_curbe_grf + c->prog_data.curb_read_length;
1349 int interp_reg_nr[FRAG_ATTRIB_MAX];
1350
1351 c->prog_data.urb_read_length = 0;
1352
1353 /* Figure out where each of the incoming setup attributes lands. */
1354 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1355 interp_reg_nr[i] = -1;
1356
1357 if (i != FRAG_ATTRIB_WPOS &&
1358 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1359 continue;
1360
1361 /* Each attribute is 4 setup channels, each of which is half a reg. */
1362 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1363 c->prog_data.urb_read_length += 2;
1364 }
1365
1366 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1367 * the correct setup input.
1368 */
1369 foreach_iter(exec_list_iterator, iter, this->instructions) {
1370 fs_inst *inst = (fs_inst *)iter.get();
1371
1372 if (inst->opcode != FS_OPCODE_LINTERP)
1373 continue;
1374
1375 assert(inst->src[2].file == FIXED_HW_REG);
1376
1377 int location = inst->src[2].fixed_hw_reg.nr / 2;
1378 assert(interp_reg_nr[location] != -1);
1379 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1380 (inst->src[2].fixed_hw_reg.nr & 1));
1381 }
1382
1383 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1384 }
1385
1386 void
1387 fs_visitor::assign_regs()
1388 {
1389 int header_size = this->first_non_payload_grf;
1390 int last_grf = 0;
1391
1392 /* FINISHME: trivial assignment of register numbers */
1393 foreach_iter(exec_list_iterator, iter, this->instructions) {
1394 fs_inst *inst = (fs_inst *)iter.get();
1395
1396 trivial_assign_reg(header_size, &inst->dst);
1397 trivial_assign_reg(header_size, &inst->src[0]);
1398 trivial_assign_reg(header_size, &inst->src[1]);
1399
1400 last_grf = MAX2(last_grf, inst->dst.hw_reg);
1401 last_grf = MAX2(last_grf, inst->src[0].hw_reg);
1402 last_grf = MAX2(last_grf, inst->src[1].hw_reg);
1403 }
1404
1405 this->grf_used = last_grf + 1;
1406 }
1407
1408 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
1409 {
1410 struct brw_reg brw_reg;
1411
1412 switch (reg->file) {
1413 case GRF:
1414 case ARF:
1415 case MRF:
1416 brw_reg = brw_vec8_reg(reg->file,
1417 reg->hw_reg, 0);
1418 brw_reg = retype(brw_reg, reg->type);
1419 break;
1420 case IMM:
1421 switch (reg->type) {
1422 case BRW_REGISTER_TYPE_F:
1423 brw_reg = brw_imm_f(reg->imm.f);
1424 break;
1425 case BRW_REGISTER_TYPE_D:
1426 brw_reg = brw_imm_d(reg->imm.i);
1427 break;
1428 case BRW_REGISTER_TYPE_UD:
1429 brw_reg = brw_imm_ud(reg->imm.u);
1430 break;
1431 default:
1432 assert(!"not reached");
1433 break;
1434 }
1435 break;
1436 case FIXED_HW_REG:
1437 brw_reg = reg->fixed_hw_reg;
1438 break;
1439 case BAD_FILE:
1440 /* Probably unused. */
1441 brw_reg = brw_null_reg();
1442 break;
1443 case UNIFORM:
1444 assert(!"not reached");
1445 brw_reg = brw_null_reg();
1446 break;
1447 }
1448 if (reg->abs)
1449 brw_reg = brw_abs(brw_reg);
1450 if (reg->negate)
1451 brw_reg = negate(brw_reg);
1452
1453 return brw_reg;
1454 }
1455
1456 void
1457 fs_visitor::generate_code()
1458 {
1459 unsigned int annotation_len = 0;
1460 int last_native_inst = 0;
1461 struct brw_instruction *if_stack[16];
1462 int if_stack_depth = 0;
1463
1464 memset(&if_stack, 0, sizeof(if_stack));
1465 foreach_iter(exec_list_iterator, iter, this->instructions) {
1466 fs_inst *inst = (fs_inst *)iter.get();
1467 struct brw_reg src[3], dst;
1468
1469 for (unsigned int i = 0; i < 3; i++) {
1470 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
1471 }
1472 dst = brw_reg_from_fs_reg(&inst->dst);
1473
1474 brw_set_conditionalmod(p, inst->conditional_mod);
1475 brw_set_predicate_control(p, inst->predicated);
1476
1477 switch (inst->opcode) {
1478 case BRW_OPCODE_MOV:
1479 brw_MOV(p, dst, src[0]);
1480 break;
1481 case BRW_OPCODE_ADD:
1482 brw_ADD(p, dst, src[0], src[1]);
1483 break;
1484 case BRW_OPCODE_MUL:
1485 brw_MUL(p, dst, src[0], src[1]);
1486 break;
1487
1488 case BRW_OPCODE_FRC:
1489 brw_FRC(p, dst, src[0]);
1490 break;
1491 case BRW_OPCODE_RNDD:
1492 brw_RNDD(p, dst, src[0]);
1493 break;
1494 case BRW_OPCODE_RNDZ:
1495 brw_RNDZ(p, dst, src[0]);
1496 break;
1497
1498 case BRW_OPCODE_AND:
1499 brw_AND(p, dst, src[0], src[1]);
1500 break;
1501 case BRW_OPCODE_OR:
1502 brw_OR(p, dst, src[0], src[1]);
1503 break;
1504 case BRW_OPCODE_XOR:
1505 brw_XOR(p, dst, src[0], src[1]);
1506 break;
1507
1508 case BRW_OPCODE_CMP:
1509 brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
1510 break;
1511 case BRW_OPCODE_SEL:
1512 brw_SEL(p, dst, src[0], src[1]);
1513 break;
1514
1515 case BRW_OPCODE_IF:
1516 assert(if_stack_depth < 16);
1517 if_stack[if_stack_depth] = brw_IF(p, BRW_EXECUTE_8);
1518 if_stack_depth++;
1519 break;
1520 case BRW_OPCODE_ELSE:
1521 if_stack[if_stack_depth - 1] =
1522 brw_ELSE(p, if_stack[if_stack_depth - 1]);
1523 break;
1524 case BRW_OPCODE_ENDIF:
1525 if_stack_depth--;
1526 brw_ENDIF(p , if_stack[if_stack_depth]);
1527 break;
1528 case FS_OPCODE_RCP:
1529 case FS_OPCODE_RSQ:
1530 case FS_OPCODE_SQRT:
1531 case FS_OPCODE_EXP2:
1532 case FS_OPCODE_LOG2:
1533 case FS_OPCODE_POW:
1534 case FS_OPCODE_SIN:
1535 case FS_OPCODE_COS:
1536 generate_math(inst, dst, src);
1537 break;
1538 case FS_OPCODE_LINTERP:
1539 generate_linterp(inst, dst, src);
1540 break;
1541 case FS_OPCODE_FB_WRITE:
1542 generate_fb_write(inst);
1543 break;
1544 default:
1545 if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
1546 _mesa_problem(ctx, "Unsupported opcode `%s' in FS",
1547 brw_opcodes[inst->opcode].name);
1548 } else {
1549 _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode);
1550 }
1551 this->fail = true;
1552 }
1553
1554 if (annotation_len < p->nr_insn) {
1555 annotation_len *= 2;
1556 if (annotation_len < 16)
1557 annotation_len = 16;
1558
1559 this->annotation_string = talloc_realloc(this->mem_ctx,
1560 annotation_string,
1561 const char *,
1562 annotation_len);
1563 this->annotation_ir = talloc_realloc(this->mem_ctx,
1564 annotation_ir,
1565 ir_instruction *,
1566 annotation_len);
1567 }
1568
1569 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
1570 this->annotation_string[i] = inst->annotation;
1571 this->annotation_ir[i] = inst->ir;
1572 }
1573 last_native_inst = p->nr_insn;
1574 }
1575 }
1576
1577 GLboolean
1578 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
1579 {
1580 struct brw_compile *p = &c->func;
1581 struct intel_context *intel = &brw->intel;
1582 GLcontext *ctx = &intel->ctx;
1583 struct brw_shader *shader = NULL;
1584 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
1585
1586 if (!prog)
1587 return GL_FALSE;
1588
1589 if (!using_new_fs)
1590 return GL_FALSE;
1591
1592 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
1593 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
1594 shader = (struct brw_shader *)prog->_LinkedShaders[i];
1595 break;
1596 }
1597 }
1598 if (!shader)
1599 return GL_FALSE;
1600
1601 /* We always use 8-wide mode, at least for now. For one, flow
1602 * control only works in 8-wide. Also, when we're fragment shader
1603 * bound, we're almost always under register pressure as well, so
1604 * 8-wide would save us from the performance cliff of spilling
1605 * regs.
1606 */
1607 c->dispatch_width = 8;
1608
1609 if (INTEL_DEBUG & DEBUG_WM) {
1610 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
1611 _mesa_print_ir(shader->ir, NULL);
1612 printf("\n");
1613 }
1614
1615 /* Now the main event: Visit the shader IR and generate our FS IR for it.
1616 */
1617 fs_visitor v(c, shader);
1618
1619 if (0) {
1620 v.emit_dummy_fs();
1621 } else {
1622 v.emit_interpolation();
1623
1624 /* Generate FS IR for main(). (the visitor only descends into
1625 * functions called "main").
1626 */
1627 foreach_iter(exec_list_iterator, iter, *shader->ir) {
1628 ir_instruction *ir = (ir_instruction *)iter.get();
1629 v.base_ir = ir;
1630 ir->accept(&v);
1631 }
1632
1633 if (v.fail)
1634 return GL_FALSE;
1635
1636 v.emit_fb_writes();
1637 v.assign_curb_setup();
1638 v.assign_urb_setup();
1639 v.assign_regs();
1640 }
1641
1642 v.generate_code();
1643
1644 if (INTEL_DEBUG & DEBUG_WM) {
1645 const char *last_annotation_string = NULL;
1646 ir_instruction *last_annotation_ir = NULL;
1647
1648 printf("Native code for fragment shader %d:\n", prog->Name);
1649 for (unsigned int i = 0; i < p->nr_insn; i++) {
1650 if (last_annotation_ir != v.annotation_ir[i]) {
1651 last_annotation_ir = v.annotation_ir[i];
1652 if (last_annotation_ir) {
1653 printf(" ");
1654 last_annotation_ir->print();
1655 printf("\n");
1656 }
1657 }
1658 if (last_annotation_string != v.annotation_string[i]) {
1659 last_annotation_string = v.annotation_string[i];
1660 if (last_annotation_string)
1661 printf(" %s\n", last_annotation_string);
1662 }
1663 brw_disasm(stdout, &p->store[i], intel->gen);
1664 }
1665 printf("\n");
1666 }
1667
1668 c->prog_data.total_grf = v.grf_used;
1669 c->prog_data.total_scratch = 0;
1670
1671 return GL_TRUE;
1672 }