11c79b11bfc0e97d711b2ab9b057059cc72e6602
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs.cpp
1 /*
2 * Copyright © 2010 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28 extern "C" {
29
30 #include <sys/types.h>
31 /* Evil hack for using libdrm in a c++ compiler. */
32 #define virtual virt
33 #include "i915_drm.h"
34 #include "intel_bufmgr.h"
35 #undef virtual
36
37 #include "main/macros.h"
38 #include "main/shaderobj.h"
39 #include "program/prog_parameter.h"
40 #include "program/prog_print.h"
41 #include "program/prog_optimize.h"
42 #include "program/hash_table.h"
43 #include "brw_context.h"
44 #include "brw_eu.h"
45 #include "brw_wm.h"
46 #include "talloc.h"
47 }
48 #include "../glsl/glsl_types.h"
49 #include "../glsl/ir_optimization.h"
50 #include "../glsl/ir_print_visitor.h"
51
52 enum register_file {
53 ARF = BRW_ARCHITECTURE_REGISTER_FILE,
54 GRF = BRW_GENERAL_REGISTER_FILE,
55 MRF = BRW_MESSAGE_REGISTER_FILE,
56 IMM = BRW_IMMEDIATE_VALUE,
57 FIXED_HW_REG,
58 BAD_FILE
59 };
60
61 enum fs_opcodes {
62 FS_OPCODE_FB_WRITE = 256,
63 FS_OPCODE_RCP,
64 FS_OPCODE_RSQ,
65 FS_OPCODE_SQRT,
66 FS_OPCODE_EXP2,
67 FS_OPCODE_LOG2,
68 FS_OPCODE_POW,
69 FS_OPCODE_SIN,
70 FS_OPCODE_COS,
71 FS_OPCODE_DDX,
72 FS_OPCODE_DDY,
73 FS_OPCODE_LINTERP,
74 };
75
76 static int using_new_fs = -1;
77
78 struct gl_shader *
79 brw_new_shader(GLcontext *ctx, GLuint name, GLuint type)
80 {
81 struct brw_shader *shader;
82
83 shader = talloc_zero(NULL, struct brw_shader);
84 shader->base.Type = type;
85 shader->base.Name = name;
86 if (shader) {
87 _mesa_init_shader(ctx, &shader->base);
88 }
89
90 return &shader->base;
91 }
92
93 struct gl_shader_program *
94 brw_new_shader_program(GLcontext *ctx, GLuint name)
95 {
96 struct brw_shader_program *prog;
97 prog = talloc_zero(NULL, struct brw_shader_program);
98 if (prog) {
99 _mesa_init_shader_program(ctx, &prog->base);
100 }
101 return &prog->base;
102 }
103
104 GLboolean
105 brw_compile_shader(GLcontext *ctx, struct gl_shader *shader)
106 {
107 if (!_mesa_ir_compile_shader(ctx, shader))
108 return GL_FALSE;
109
110 return GL_TRUE;
111 }
112
113 GLboolean
114 brw_link_shader(GLcontext *ctx, struct gl_shader_program *prog)
115 {
116 if (using_new_fs == -1)
117 using_new_fs = getenv("INTEL_NEW_FS") != NULL;
118
119 for (unsigned i = 0; i < prog->_NumLinkedShaders; i++) {
120 struct brw_shader *shader = (struct brw_shader *)prog->_LinkedShaders[i];
121
122 if (using_new_fs && shader->base.Type == GL_FRAGMENT_SHADER) {
123 void *mem_ctx = talloc_new(NULL);
124 bool progress;
125
126 shader->ir = new(shader) exec_list;
127 clone_ir_list(mem_ctx, shader->ir, shader->base.ir);
128
129 do_mat_op_to_vec(shader->ir);
130 do_div_to_mul_rcp(shader->ir);
131 do_sub_to_add_neg(shader->ir);
132 do_explog_to_explog2(shader->ir);
133
134 brw_do_channel_expressions(shader->ir);
135 brw_do_vector_splitting(shader->ir);
136
137 do {
138 progress = false;
139
140 progress = do_common_optimization(shader->ir, true) || progress;
141 } while (progress);
142
143 reparent_ir(shader->ir, shader);
144 talloc_free(mem_ctx);
145 }
146 }
147
148 if (!_mesa_ir_link_shader(ctx, prog))
149 return GL_FALSE;
150
151 return GL_TRUE;
152 }
153
154 static int
155 type_size(const struct glsl_type *type)
156 {
157 unsigned int size, i;
158
159 switch (type->base_type) {
160 case GLSL_TYPE_UINT:
161 case GLSL_TYPE_INT:
162 case GLSL_TYPE_FLOAT:
163 case GLSL_TYPE_BOOL:
164 if (type->is_matrix()) {
165 /* In case of incoming uniform/varying matrices, match their
166 * allocation behavior. FINISHME: We could just use
167 * glsl_type->components() for variables and temps within the
168 * shader.
169 */
170 return type->matrix_columns * 4;
171 } else {
172 return type->vector_elements;
173 }
174 case GLSL_TYPE_ARRAY:
175 /* FINISHME: uniform/varying arrays. */
176 return type_size(type->fields.array) * type->length;
177 case GLSL_TYPE_STRUCT:
178 size = 0;
179 for (i = 0; i < type->length; i++) {
180 size += type_size(type->fields.structure[i].type);
181 }
182 return size;
183 case GLSL_TYPE_SAMPLER:
184 /* Samplers take up no register space, since they're baked in at
185 * link time.
186 */
187 return 0;
188 default:
189 assert(!"not reached");
190 return 0;
191 }
192 }
193
194 class fs_reg {
195 public:
196 /* Callers of this talloc-based new need not call delete. It's
197 * easier to just talloc_free 'ctx' (or any of its ancestors). */
198 static void* operator new(size_t size, void *ctx)
199 {
200 void *node;
201
202 node = talloc_size(ctx, size);
203 assert(node != NULL);
204
205 return node;
206 }
207
208 /** Generic unset register constructor. */
209 fs_reg()
210 {
211 this->file = BAD_FILE;
212 this->reg = 0;
213 this->reg_offset = 0;
214 this->hw_reg = -1;
215 this->negate = 0;
216 this->abs = 0;
217 }
218
219 /** Immediate value constructor. */
220 fs_reg(float f)
221 {
222 this->file = IMM;
223 this->reg = 0;
224 this->hw_reg = 0;
225 this->type = BRW_REGISTER_TYPE_F;
226 this->imm.f = f;
227 this->negate = 0;
228 this->abs = 0;
229 }
230
231 /** Immediate value constructor. */
232 fs_reg(int32_t i)
233 {
234 this->file = IMM;
235 this->reg = 0;
236 this->hw_reg = 0;
237 this->type = BRW_REGISTER_TYPE_D;
238 this->imm.i = i;
239 this->negate = 0;
240 this->abs = 0;
241 }
242
243 /** Immediate value constructor. */
244 fs_reg(uint32_t u)
245 {
246 this->file = IMM;
247 this->reg = 0;
248 this->hw_reg = 0;
249 this->type = BRW_REGISTER_TYPE_UD;
250 this->imm.u = u;
251 this->negate = 0;
252 this->abs = 0;
253 }
254
255 /** Fixed brw_reg Immediate value constructor. */
256 fs_reg(struct brw_reg fixed_hw_reg)
257 {
258 this->file = FIXED_HW_REG;
259 this->fixed_hw_reg = fixed_hw_reg;
260 this->reg = 0;
261 this->hw_reg = 0;
262 this->type = fixed_hw_reg.type;
263 this->negate = 0;
264 this->abs = 0;
265 }
266
267 fs_reg(enum register_file file, int hw_reg);
268 fs_reg(class fs_visitor *v, const struct glsl_type *type);
269
270 /** Register file: ARF, GRF, MRF, IMM. */
271 enum register_file file;
272 /** Abstract register number. 0 = fixed hw reg */
273 int reg;
274 /** Offset within the abstract register. */
275 int reg_offset;
276 /** HW register number. Generally unset until register allocation. */
277 int hw_reg;
278 /** Register type. BRW_REGISTER_TYPE_* */
279 int type;
280 bool negate;
281 bool abs;
282 struct brw_reg fixed_hw_reg;
283
284 /** Value for file == BRW_IMMMEDIATE_FILE */
285 union {
286 int32_t i;
287 uint32_t u;
288 float f;
289 } imm;
290 };
291
292 static const fs_reg reg_undef;
293 static const fs_reg reg_null(ARF, BRW_ARF_NULL);
294
295 class fs_inst : public exec_node {
296 public:
297 /* Callers of this talloc-based new need not call delete. It's
298 * easier to just talloc_free 'ctx' (or any of its ancestors). */
299 static void* operator new(size_t size, void *ctx)
300 {
301 void *node;
302
303 node = talloc_zero_size(ctx, size);
304 assert(node != NULL);
305
306 return node;
307 }
308
309 fs_inst()
310 {
311 this->opcode = BRW_OPCODE_NOP;
312 this->saturate = false;
313 this->conditional_mod = BRW_CONDITIONAL_NONE;
314 this->predicated = false;
315 }
316
317 fs_inst(int opcode, fs_reg dst, fs_reg src0)
318 {
319 this->opcode = opcode;
320 this->dst = dst;
321 this->src[0] = src0;
322 this->saturate = false;
323 this->conditional_mod = BRW_CONDITIONAL_NONE;
324 this->predicated = false;
325 }
326
327 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1)
328 {
329 this->opcode = opcode;
330 this->dst = dst;
331 this->src[0] = src0;
332 this->src[1] = src1;
333 this->saturate = false;
334 this->conditional_mod = BRW_CONDITIONAL_NONE;
335 this->predicated = false;
336 }
337
338 fs_inst(int opcode, fs_reg dst, fs_reg src0, fs_reg src1, fs_reg src2)
339 {
340 this->opcode = opcode;
341 this->dst = dst;
342 this->src[0] = src0;
343 this->src[1] = src1;
344 this->src[2] = src2;
345 this->saturate = false;
346 this->conditional_mod = BRW_CONDITIONAL_NONE;
347 this->predicated = false;
348 }
349
350 int opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
351 fs_reg dst;
352 fs_reg src[3];
353 bool saturate;
354 bool predicated;
355 int conditional_mod; /**< BRW_CONDITIONAL_* */
356
357 /** @{
358 * Annotation for the generated IR. One of the two can be set.
359 */
360 ir_instruction *ir;
361 const char *annotation;
362 /** @} */
363 };
364
365 class fs_visitor : public ir_visitor
366 {
367 public:
368
369 fs_visitor(struct brw_wm_compile *c, struct brw_shader *shader)
370 {
371 this->c = c;
372 this->p = &c->func;
373 this->brw = p->brw;
374 this->intel = &brw->intel;
375 this->mem_ctx = talloc_new(NULL);
376 this->shader = shader;
377 this->fail = false;
378 this->next_abstract_grf = 1;
379 this->variable_ht = hash_table_ctor(0,
380 hash_table_pointer_hash,
381 hash_table_pointer_compare);
382
383 this->frag_color = NULL;
384 this->frag_data = NULL;
385 this->frag_depth = NULL;
386 this->first_non_payload_grf = 0;
387
388 this->current_annotation = NULL;
389 this->annotation_string = NULL;
390 this->annotation_ir = NULL;
391 }
392 ~fs_visitor()
393 {
394 talloc_free(this->mem_ctx);
395 hash_table_dtor(this->variable_ht);
396 }
397
398 fs_reg *variable_storage(ir_variable *var);
399
400 void visit(ir_variable *ir);
401 void visit(ir_assignment *ir);
402 void visit(ir_dereference_variable *ir);
403 void visit(ir_dereference_record *ir);
404 void visit(ir_dereference_array *ir);
405 void visit(ir_expression *ir);
406 void visit(ir_texture *ir);
407 void visit(ir_if *ir);
408 void visit(ir_constant *ir);
409 void visit(ir_swizzle *ir);
410 void visit(ir_return *ir);
411 void visit(ir_loop *ir);
412 void visit(ir_loop_jump *ir);
413 void visit(ir_discard *ir);
414 void visit(ir_call *ir);
415 void visit(ir_function *ir);
416 void visit(ir_function_signature *ir);
417
418 fs_inst *emit(fs_inst inst);
419 void assign_urb_setup();
420 void assign_regs();
421 void generate_code();
422 void generate_fb_write(fs_inst *inst);
423 void generate_linterp(fs_inst *inst, struct brw_reg dst,
424 struct brw_reg *src);
425 void generate_math(fs_inst *inst, struct brw_reg dst, struct brw_reg *src);
426
427 void emit_dummy_fs();
428 void emit_interpolation();
429 void emit_pinterp(int location);
430 void emit_fb_writes();
431
432 struct brw_reg interp_reg(int location, int channel);
433
434 struct brw_context *brw;
435 struct intel_context *intel;
436 struct brw_wm_compile *c;
437 struct brw_compile *p;
438 struct brw_shader *shader;
439 void *mem_ctx;
440 exec_list instructions;
441 int next_abstract_grf;
442 struct hash_table *variable_ht;
443 ir_variable *frag_color, *frag_data, *frag_depth;
444 int first_non_payload_grf;
445
446 /** @{ debug annotation info */
447 const char *current_annotation;
448 ir_instruction *base_ir;
449 const char **annotation_string;
450 ir_instruction **annotation_ir;
451 /** @} */
452
453 bool fail;
454
455 /* Result of last visit() method. */
456 fs_reg result;
457
458 fs_reg pixel_x;
459 fs_reg pixel_y;
460 fs_reg pixel_w;
461 fs_reg delta_x;
462 fs_reg delta_y;
463 fs_reg interp_attrs[64];
464
465 int grf_used;
466
467 };
468
469 /** Fixed HW reg constructor. */
470 fs_reg::fs_reg(enum register_file file, int hw_reg)
471 {
472 this->file = file;
473 this->reg = 0;
474 this->reg_offset = 0;
475 this->hw_reg = hw_reg;
476 this->type = BRW_REGISTER_TYPE_F;
477 this->negate = 0;
478 this->abs = 0;
479 }
480
481 /** Automatic reg constructor. */
482 fs_reg::fs_reg(class fs_visitor *v, const struct glsl_type *type)
483 {
484 this->file = GRF;
485 this->reg = v->next_abstract_grf;
486 this->reg_offset = 0;
487 v->next_abstract_grf += type_size(type);
488 this->hw_reg = -1;
489 this->negate = 0;
490 this->abs = 0;
491
492 switch (type->base_type) {
493 case GLSL_TYPE_FLOAT:
494 this->type = BRW_REGISTER_TYPE_F;
495 break;
496 case GLSL_TYPE_INT:
497 case GLSL_TYPE_BOOL:
498 this->type = BRW_REGISTER_TYPE_D;
499 break;
500 case GLSL_TYPE_UINT:
501 this->type = BRW_REGISTER_TYPE_UD;
502 break;
503 default:
504 assert(!"not reached");
505 this->type = BRW_REGISTER_TYPE_F;
506 break;
507 }
508 }
509
510 fs_reg *
511 fs_visitor::variable_storage(ir_variable *var)
512 {
513 return (fs_reg *)hash_table_find(this->variable_ht, var);
514 }
515
516 void
517 fs_visitor::visit(ir_variable *ir)
518 {
519 fs_reg *reg = NULL;
520
521 /* FINISHME */
522 assert(ir->mode != ir_var_uniform);
523
524 if (strcmp(ir->name, "gl_FragColor") == 0) {
525 this->frag_color = ir;
526 } else if (strcmp(ir->name, "gl_FragData") == 0) {
527 this->frag_data = ir;
528 } else if (strcmp(ir->name, "gl_FragDepth") == 0) {
529 this->frag_depth = ir;
530 }
531
532 if (ir->mode == ir_var_in) {
533 reg = &this->interp_attrs[ir->location];
534 }
535
536 if (!reg)
537 reg = new(this->mem_ctx) fs_reg(this, ir->type);
538
539 hash_table_insert(this->variable_ht, reg, ir);
540 }
541
542 void
543 fs_visitor::visit(ir_dereference_variable *ir)
544 {
545 fs_reg *reg = variable_storage(ir->var);
546 this->result = *reg;
547 }
548
549 void
550 fs_visitor::visit(ir_dereference_record *ir)
551 {
552 assert(!"FINISHME");
553 }
554
555 void
556 fs_visitor::visit(ir_dereference_array *ir)
557 {
558 assert(!"FINISHME");
559 }
560
561 void
562 fs_visitor::visit(ir_expression *ir)
563 {
564 unsigned int operand;
565 fs_reg op[2], temp;
566 fs_reg result;
567 fs_inst *inst;
568
569 for (operand = 0; operand < ir->get_num_operands(); operand++) {
570 ir->operands[operand]->accept(this);
571 if (this->result.file == BAD_FILE) {
572 ir_print_visitor v;
573 printf("Failed to get tree for expression operand:\n");
574 ir->operands[operand]->accept(&v);
575 this->fail = true;
576 }
577 op[operand] = this->result;
578
579 /* Matrix expression operands should have been broken down to vector
580 * operations already.
581 */
582 assert(!ir->operands[operand]->type->is_matrix());
583 /* And then those vector operands should have been broken down to scalar.
584 */
585 assert(!ir->operands[operand]->type->is_vector());
586 }
587
588 /* Storage for our result. If our result goes into an assignment, it will
589 * just get copy-propagated out, so no worries.
590 */
591 this->result = fs_reg(this, ir->type);
592
593 switch (ir->operation) {
594 case ir_unop_logic_not:
595 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], fs_reg(-1)));
596 break;
597 case ir_unop_neg:
598 this->result = op[0];
599 op[0].negate = ~op[0].negate;
600 break;
601 case ir_unop_abs:
602 this->result = op[0];
603 op[0].abs = true;
604 break;
605 case ir_unop_sign:
606 temp = fs_reg(this, ir->type);
607
608 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
609 inst->conditional_mod = BRW_CONDITIONAL_G;
610
611 inst = emit(fs_inst(BRW_OPCODE_CMP, temp, op[0], fs_reg(0.0f)));
612 inst->conditional_mod = BRW_CONDITIONAL_L;
613
614 temp.negate = true;
615 emit(fs_inst(BRW_OPCODE_ADD, this->result, this->result, temp));
616
617 break;
618 case ir_unop_rcp:
619 emit(fs_inst(FS_OPCODE_RCP, this->result, op[0]));
620 break;
621
622 case ir_unop_exp2:
623 emit(fs_inst(FS_OPCODE_EXP2, this->result, op[0]));
624 break;
625 case ir_unop_log2:
626 emit(fs_inst(FS_OPCODE_LOG2, this->result, op[0]));
627 break;
628 case ir_unop_exp:
629 case ir_unop_log:
630 assert(!"not reached: should be handled by ir_explog_to_explog2");
631 break;
632 case ir_unop_sin:
633 emit(fs_inst(FS_OPCODE_SIN, this->result, op[0]));
634 break;
635 case ir_unop_cos:
636 emit(fs_inst(FS_OPCODE_COS, this->result, op[0]));
637 break;
638
639 case ir_unop_dFdx:
640 emit(fs_inst(FS_OPCODE_DDX, this->result, op[0]));
641 break;
642 case ir_unop_dFdy:
643 emit(fs_inst(FS_OPCODE_DDY, this->result, op[0]));
644 break;
645
646 case ir_binop_add:
647 emit(fs_inst(BRW_OPCODE_ADD, this->result, op[0], op[1]));
648 break;
649 case ir_binop_sub:
650 assert(!"not reached: should be handled by ir_sub_to_add_neg");
651 break;
652
653 case ir_binop_mul:
654 emit(fs_inst(BRW_OPCODE_MUL, this->result, op[0], op[1]));
655 break;
656 case ir_binop_div:
657 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
658 break;
659 case ir_binop_mod:
660 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
661 break;
662
663 case ir_binop_less:
664 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
665 inst->conditional_mod = BRW_CONDITIONAL_L;
666 break;
667 case ir_binop_greater:
668 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
669 inst->conditional_mod = BRW_CONDITIONAL_G;
670 break;
671 case ir_binop_lequal:
672 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
673 inst->conditional_mod = BRW_CONDITIONAL_LE;
674 break;
675 case ir_binop_gequal:
676 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
677 inst->conditional_mod = BRW_CONDITIONAL_GE;
678 break;
679 case ir_binop_equal:
680 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
681 inst->conditional_mod = BRW_CONDITIONAL_Z;
682 break;
683 case ir_binop_nequal:
684 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
685 inst->conditional_mod = BRW_CONDITIONAL_NZ;
686 break;
687
688 case ir_binop_logic_xor:
689 emit(fs_inst(BRW_OPCODE_XOR, this->result, op[0], op[1]));
690 break;
691
692 case ir_binop_logic_or:
693 emit(fs_inst(BRW_OPCODE_OR, this->result, op[0], op[1]));
694 break;
695
696 case ir_binop_logic_and:
697 emit(fs_inst(BRW_OPCODE_AND, this->result, op[0], op[1]));
698 break;
699
700 case ir_binop_dot:
701 case ir_binop_cross:
702 case ir_unop_any:
703 assert(!"not reached: should be handled by brw_channel_expressions");
704 break;
705
706 case ir_unop_sqrt:
707 emit(fs_inst(FS_OPCODE_SQRT, this->result, op[0]));
708 break;
709
710 case ir_unop_rsq:
711 emit(fs_inst(FS_OPCODE_RSQ, this->result, op[0]));
712 break;
713
714 case ir_unop_i2f:
715 case ir_unop_b2f:
716 case ir_unop_b2i:
717 emit(fs_inst(BRW_OPCODE_MOV, this->result, op[0]));
718 break;
719 case ir_unop_f2i:
720 emit(fs_inst(BRW_OPCODE_RNDZ, this->result, op[0]));
721 break;
722 case ir_unop_f2b:
723 case ir_unop_i2b:
724 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], fs_reg(0.0f)));
725 inst->conditional_mod = BRW_CONDITIONAL_NZ;
726
727 case ir_unop_trunc:
728 emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
729 break;
730 case ir_unop_ceil:
731 op[0].negate = ~op[0].negate;
732 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
733 this->result.negate = true;
734 break;
735 case ir_unop_floor:
736 inst = emit(fs_inst(BRW_OPCODE_RNDD, this->result, op[0]));
737 break;
738 case ir_unop_fract:
739 inst = emit(fs_inst(BRW_OPCODE_FRC, this->result, op[0]));
740 break;
741
742 case ir_binop_min:
743 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
744 inst->conditional_mod = BRW_CONDITIONAL_L;
745
746 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
747 inst->predicated = true;
748 break;
749 case ir_binop_max:
750 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, op[0], op[1]));
751 inst->conditional_mod = BRW_CONDITIONAL_G;
752
753 inst = emit(fs_inst(BRW_OPCODE_SEL, this->result, op[0], op[1]));
754 inst->predicated = true;
755 break;
756
757 case ir_binop_pow:
758 inst = emit(fs_inst(FS_OPCODE_POW, this->result, op[0], op[1]));
759 break;
760
761 case ir_unop_bit_not:
762 case ir_unop_u2f:
763 case ir_binop_lshift:
764 case ir_binop_rshift:
765 case ir_binop_bit_and:
766 case ir_binop_bit_xor:
767 case ir_binop_bit_or:
768 assert(!"GLSL 1.30 features unsupported");
769 break;
770 }
771 }
772
773 void
774 fs_visitor::visit(ir_assignment *ir)
775 {
776 struct fs_reg l, r;
777 int i;
778 int write_mask;
779 fs_inst *inst;
780
781 /* FINISHME: arrays on the lhs */
782 ir->lhs->accept(this);
783 l = this->result;
784
785 ir->rhs->accept(this);
786 r = this->result;
787
788 /* FINISHME: This should really set to the correct maximal writemask for each
789 * FINISHME: component written (in the loops below). This case can only
790 * FINISHME: occur for matrices, arrays, and structures.
791 */
792 if (ir->write_mask == 0) {
793 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
794 write_mask = WRITEMASK_XYZW;
795 } else {
796 assert(ir->lhs->type->is_vector() || ir->lhs->type->is_scalar());
797 write_mask = ir->write_mask;
798 }
799
800 assert(l.file != BAD_FILE);
801 assert(r.file != BAD_FILE);
802
803 if (ir->condition) {
804 /* Get the condition bool into the predicate. */
805 ir->condition->accept(this);
806 inst = emit(fs_inst(BRW_OPCODE_CMP, this->result, fs_reg(0)));
807 inst->conditional_mod = BRW_CONDITIONAL_NZ;
808 }
809
810 for (i = 0; i < type_size(ir->lhs->type); i++) {
811 if (i < 4 && !(write_mask & (1 << i)))
812 continue;
813
814 inst = emit(fs_inst(BRW_OPCODE_MOV, l, r));
815 if (ir->condition)
816 inst->predicated = true;
817 l.reg_offset++;
818 r.reg_offset++;
819 }
820 }
821
822 void
823 fs_visitor::visit(ir_texture *ir)
824 {
825 assert(!"FINISHME");
826 }
827
828 void
829 fs_visitor::visit(ir_swizzle *ir)
830 {
831 ir->val->accept(this);
832 fs_reg val = this->result;
833
834 fs_reg result = fs_reg(this, ir->type);
835 this->result = result;
836
837 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
838 fs_reg channel = val;
839 int swiz = 0;
840
841 switch (i) {
842 case 0:
843 swiz = ir->mask.x;
844 break;
845 case 1:
846 swiz = ir->mask.y;
847 break;
848 case 2:
849 swiz = ir->mask.z;
850 break;
851 case 3:
852 swiz = ir->mask.w;
853 break;
854 }
855
856 channel.reg_offset += swiz;
857 emit(fs_inst(BRW_OPCODE_MOV, result, channel));
858 result.reg_offset++;
859 }
860 }
861
862 void
863 fs_visitor::visit(ir_discard *ir)
864 {
865 assert(!"FINISHME");
866 }
867
868 void
869 fs_visitor::visit(ir_constant *ir)
870 {
871 fs_reg reg(this, ir->type);
872 this->result = reg;
873
874 for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
875 switch (ir->type->base_type) {
876 case GLSL_TYPE_FLOAT:
877 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.f[i])));
878 break;
879 case GLSL_TYPE_UINT:
880 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.u[i])));
881 break;
882 case GLSL_TYPE_INT:
883 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg(ir->value.i[i])));
884 break;
885 case GLSL_TYPE_BOOL:
886 emit(fs_inst(BRW_OPCODE_MOV, reg, fs_reg((int)ir->value.b[i])));
887 break;
888 default:
889 assert(!"Non-float/uint/int/bool constant");
890 }
891 reg.reg_offset++;
892 }
893 }
894
895 void
896 fs_visitor::visit(ir_if *ir)
897 {
898 assert(!"FINISHME");
899 }
900
901 void
902 fs_visitor::visit(ir_loop *ir)
903 {
904 assert(!"FINISHME");
905 }
906
907 void
908 fs_visitor::visit(ir_loop_jump *ir)
909 {
910 assert(!"FINISHME");
911 }
912
913 void
914 fs_visitor::visit(ir_call *ir)
915 {
916 assert(!"FINISHME");
917 }
918
919 void
920 fs_visitor::visit(ir_return *ir)
921 {
922 assert(!"FINISHME");
923 }
924
925 void
926 fs_visitor::visit(ir_function *ir)
927 {
928 /* Ignore function bodies other than main() -- we shouldn't see calls to
929 * them since they should all be inlined before we get to ir_to_mesa.
930 */
931 if (strcmp(ir->name, "main") == 0) {
932 const ir_function_signature *sig;
933 exec_list empty;
934
935 sig = ir->matching_signature(&empty);
936
937 assert(sig);
938
939 foreach_iter(exec_list_iterator, iter, sig->body) {
940 ir_instruction *ir = (ir_instruction *)iter.get();
941 this->base_ir = ir;
942
943 ir->accept(this);
944 }
945 }
946 }
947
948 void
949 fs_visitor::visit(ir_function_signature *ir)
950 {
951 assert(!"not reached");
952 (void)ir;
953 }
954
955 fs_inst *
956 fs_visitor::emit(fs_inst inst)
957 {
958 fs_inst *list_inst = new(mem_ctx) fs_inst;
959 *list_inst = inst;
960
961 list_inst->annotation = this->current_annotation;
962 list_inst->ir = this->base_ir;
963
964 this->instructions.push_tail(list_inst);
965
966 return list_inst;
967 }
968
969 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
970 void
971 fs_visitor::emit_dummy_fs()
972 {
973 /* Everyone's favorite color. */
974 emit(fs_inst(BRW_OPCODE_MOV,
975 fs_reg(MRF, 2),
976 fs_reg(1.0f)));
977 emit(fs_inst(BRW_OPCODE_MOV,
978 fs_reg(MRF, 3),
979 fs_reg(0.0f)));
980 emit(fs_inst(BRW_OPCODE_MOV,
981 fs_reg(MRF, 4),
982 fs_reg(1.0f)));
983 emit(fs_inst(BRW_OPCODE_MOV,
984 fs_reg(MRF, 5),
985 fs_reg(0.0f)));
986
987 fs_inst *write;
988 write = emit(fs_inst(FS_OPCODE_FB_WRITE,
989 fs_reg(0),
990 fs_reg(0)));
991 }
992
993 /* The register location here is relative to the start of the URB
994 * data. It will get adjusted to be a real location before
995 * generate_code() time.
996 */
997 struct brw_reg
998 fs_visitor::interp_reg(int location, int channel)
999 {
1000 int regnr = location * 2 + channel / 2;
1001 int stride = (channel & 1) * 4;
1002
1003 return brw_vec1_grf(regnr, stride);
1004 }
1005
1006 /** Emits the interpolation for the varying inputs. */
1007 void
1008 fs_visitor::emit_interpolation()
1009 {
1010 struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
1011 /* For now, the source regs for the setup URB data will be unset,
1012 * since we don't know until codegen how many push constants we'll
1013 * use, and therefore what the setup URB offset is.
1014 */
1015 fs_reg src_reg = reg_undef;
1016
1017 this->current_annotation = "compute pixel centers";
1018 this->pixel_x = fs_reg(this, glsl_type::uint_type);
1019 this->pixel_y = fs_reg(this, glsl_type::uint_type);
1020 emit(fs_inst(BRW_OPCODE_ADD,
1021 this->pixel_x,
1022 fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
1023 fs_reg(brw_imm_v(0x10101010))));
1024 emit(fs_inst(BRW_OPCODE_ADD,
1025 this->pixel_y,
1026 fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
1027 fs_reg(brw_imm_v(0x11001100))));
1028
1029 this->current_annotation = "compute pixel deltas from v0";
1030 this->delta_x = fs_reg(this, glsl_type::float_type);
1031 this->delta_y = fs_reg(this, glsl_type::float_type);
1032 emit(fs_inst(BRW_OPCODE_ADD,
1033 this->delta_x,
1034 this->pixel_x,
1035 fs_reg(negate(brw_vec1_grf(1, 0)))));
1036 emit(fs_inst(BRW_OPCODE_ADD,
1037 this->delta_y,
1038 this->pixel_y,
1039 fs_reg(brw_vec1_grf(1, 1))));
1040
1041 this->current_annotation = "compute pos.w and 1/pos.w";
1042 /* Compute wpos. Unlike many other varying inputs, we usually need it
1043 * to produce 1/w, and the varying variable wouldn't show up.
1044 */
1045 fs_reg wpos = fs_reg(this, glsl_type::vec4_type);
1046 this->interp_attrs[FRAG_ATTRIB_WPOS] = wpos;
1047 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_x)); /* FINISHME: ARB_fcc */
1048 wpos.reg_offset++;
1049 emit(fs_inst(BRW_OPCODE_MOV, wpos, this->pixel_y)); /* FINISHME: ARB_fcc */
1050 wpos.reg_offset++;
1051 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1052 interp_reg(FRAG_ATTRIB_WPOS, 2)));
1053 wpos.reg_offset++;
1054 emit(fs_inst(FS_OPCODE_LINTERP, wpos, this->delta_x, this->delta_y,
1055 interp_reg(FRAG_ATTRIB_WPOS, 3)));
1056 /* Compute the pixel W value from wpos.w. */
1057 this->pixel_w = fs_reg(this, glsl_type::float_type);
1058 emit(fs_inst(FS_OPCODE_RCP, this->pixel_w, wpos));
1059
1060 /* FINISHME: gl_FrontFacing */
1061
1062 foreach_iter(exec_list_iterator, iter, *this->shader->ir) {
1063 ir_instruction *ir = (ir_instruction *)iter.get();
1064 ir_variable *var = ir->as_variable();
1065
1066 if (!var)
1067 continue;
1068
1069 if (var->mode != ir_var_in)
1070 continue;
1071
1072 /* If it's already set up (WPOS), skip. */
1073 if (var->location == 0)
1074 continue;
1075
1076 this->current_annotation = talloc_asprintf(this->mem_ctx,
1077 "interpolate %s "
1078 "(FRAG_ATTRIB[%d])",
1079 var->name,
1080 var->location);
1081 emit_pinterp(var->location);
1082 }
1083 this->current_annotation = NULL;
1084 }
1085
1086 void
1087 fs_visitor::emit_pinterp(int location)
1088 {
1089 fs_reg interp_attr = fs_reg(this, glsl_type::vec4_type);
1090 this->interp_attrs[location] = interp_attr;
1091
1092 for (unsigned int i = 0; i < 4; i++) {
1093 struct brw_reg interp = interp_reg(location, i);
1094 emit(fs_inst(FS_OPCODE_LINTERP,
1095 interp_attr,
1096 this->delta_x,
1097 this->delta_y,
1098 fs_reg(interp)));
1099 interp_attr.reg_offset++;
1100 }
1101 interp_attr.reg_offset -= 4;
1102
1103 for (unsigned int i = 0; i < 4; i++) {
1104 emit(fs_inst(BRW_OPCODE_MUL,
1105 interp_attr,
1106 interp_attr,
1107 this->pixel_w));
1108 interp_attr.reg_offset++;
1109 }
1110 }
1111
1112 void
1113 fs_visitor::emit_fb_writes()
1114 {
1115 this->current_annotation = "FB write";
1116
1117 assert(this->frag_color || !"FINISHME: MRT");
1118 fs_reg color = *(variable_storage(this->frag_color));
1119
1120 for (int i = 0; i < 4; i++) {
1121 emit(fs_inst(BRW_OPCODE_MOV,
1122 fs_reg(MRF, 2 + i),
1123 color));
1124 color.reg_offset++;
1125 }
1126
1127 emit(fs_inst(FS_OPCODE_FB_WRITE,
1128 fs_reg(0),
1129 fs_reg(0)));
1130
1131 this->current_annotation = NULL;
1132 }
1133
1134 void
1135 fs_visitor::generate_fb_write(fs_inst *inst)
1136 {
1137 GLboolean eot = 1; /* FINISHME: MRT */
1138 /* FINISHME: AADS */
1139
1140 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1141 * move, here's g1.
1142 */
1143 brw_push_insn_state(p);
1144 brw_set_mask_control(p, BRW_MASK_DISABLE);
1145 brw_set_compression_control(p, BRW_COMPRESSION_NONE);
1146 brw_MOV(p,
1147 brw_message_reg(1),
1148 brw_vec8_grf(1, 0));
1149 brw_pop_insn_state(p);
1150
1151 int nr = 2 + 4;
1152
1153 brw_fb_WRITE(p,
1154 8, /* dispatch_width */
1155 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW),
1156 0, /* base MRF */
1157 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW),
1158 0, /* FINISHME: MRT target */
1159 nr,
1160 0,
1161 eot);
1162 }
1163
1164 void
1165 fs_visitor::generate_linterp(fs_inst *inst,
1166 struct brw_reg dst, struct brw_reg *src)
1167 {
1168 struct brw_reg delta_x = src[0];
1169 struct brw_reg delta_y = src[1];
1170 struct brw_reg interp = src[2];
1171
1172 if (brw->has_pln &&
1173 delta_y.nr == delta_x.nr + 1 &&
1174 (intel->gen >= 6 || (delta_x.nr & 1) == 0)) {
1175 brw_PLN(p, dst, interp, delta_x);
1176 } else {
1177 brw_LINE(p, brw_null_reg(), interp, delta_x);
1178 brw_MAC(p, dst, suboffset(interp, 1), delta_y);
1179 }
1180 }
1181
1182 void
1183 fs_visitor::generate_math(fs_inst *inst,
1184 struct brw_reg dst, struct brw_reg *src)
1185 {
1186 int op;
1187
1188 switch (inst->opcode) {
1189 case FS_OPCODE_RCP:
1190 op = BRW_MATH_FUNCTION_INV;
1191 break;
1192 case FS_OPCODE_RSQ:
1193 op = BRW_MATH_FUNCTION_RSQ;
1194 break;
1195 case FS_OPCODE_SQRT:
1196 op = BRW_MATH_FUNCTION_SQRT;
1197 break;
1198 case FS_OPCODE_EXP2:
1199 op = BRW_MATH_FUNCTION_EXP;
1200 break;
1201 case FS_OPCODE_LOG2:
1202 op = BRW_MATH_FUNCTION_LOG;
1203 break;
1204 case FS_OPCODE_POW:
1205 op = BRW_MATH_FUNCTION_POW;
1206 break;
1207 case FS_OPCODE_SIN:
1208 op = BRW_MATH_FUNCTION_SIN;
1209 break;
1210 case FS_OPCODE_COS:
1211 op = BRW_MATH_FUNCTION_COS;
1212 break;
1213 default:
1214 assert(!"not reached: unknown math function");
1215 op = 0;
1216 break;
1217 }
1218
1219 if (inst->opcode == FS_OPCODE_POW) {
1220 brw_MOV(p, brw_message_reg(3), src[1]);
1221 }
1222
1223 brw_math(p, dst,
1224 op,
1225 inst->saturate ? BRW_MATH_SATURATE_SATURATE :
1226 BRW_MATH_SATURATE_NONE,
1227 2, src[0],
1228 BRW_MATH_DATA_VECTOR,
1229 BRW_MATH_PRECISION_FULL);
1230 }
1231
1232 static void
1233 trivial_assign_reg(int header_size, fs_reg *reg)
1234 {
1235 if (reg->file == GRF && reg->reg != 0) {
1236 reg->hw_reg = header_size + reg->reg - 1 + reg->reg_offset;
1237 reg->reg = 0;
1238 }
1239 }
1240
1241 void
1242 fs_visitor::assign_urb_setup()
1243 {
1244 int urb_start = c->key.nr_payload_regs; /* FINISHME: push constants */
1245 int interp_reg_nr[FRAG_ATTRIB_MAX];
1246
1247 c->prog_data.urb_read_length = 0;
1248
1249 /* Figure out where each of the incoming setup attributes lands. */
1250 for (unsigned int i = 0; i < FRAG_ATTRIB_MAX; i++) {
1251 interp_reg_nr[i] = -1;
1252
1253 if (i != FRAG_ATTRIB_WPOS &&
1254 !(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(i)))
1255 continue;
1256
1257 /* Each attribute is 4 setup channels, each of which is half a reg. */
1258 interp_reg_nr[i] = urb_start + c->prog_data.urb_read_length;
1259 c->prog_data.urb_read_length += 2;
1260 }
1261
1262 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1263 * the correct setup input.
1264 */
1265 foreach_iter(exec_list_iterator, iter, this->instructions) {
1266 fs_inst *inst = (fs_inst *)iter.get();
1267
1268 if (inst->opcode != FS_OPCODE_LINTERP)
1269 continue;
1270
1271 assert(inst->src[2].file == FIXED_HW_REG);
1272
1273 int location = inst->src[2].fixed_hw_reg.nr / 2;
1274 assert(interp_reg_nr[location] != -1);
1275 inst->src[2].fixed_hw_reg.nr = (interp_reg_nr[location] +
1276 (inst->src[2].fixed_hw_reg.nr & 1));
1277 }
1278
1279 this->first_non_payload_grf = urb_start + c->prog_data.urb_read_length;
1280 }
1281
1282 void
1283 fs_visitor::assign_regs()
1284 {
1285 int header_size = this->first_non_payload_grf;
1286 int last_grf = 0;
1287
1288 /* FINISHME: trivial assignment of register numbers */
1289 foreach_iter(exec_list_iterator, iter, this->instructions) {
1290 fs_inst *inst = (fs_inst *)iter.get();
1291
1292 trivial_assign_reg(header_size, &inst->dst);
1293 trivial_assign_reg(header_size, &inst->src[0]);
1294 trivial_assign_reg(header_size, &inst->src[1]);
1295
1296 last_grf = MAX2(last_grf, inst->dst.hw_reg);
1297 last_grf = MAX2(last_grf, inst->src[0].hw_reg);
1298 last_grf = MAX2(last_grf, inst->src[1].hw_reg);
1299 }
1300
1301 this->grf_used = last_grf;
1302 }
1303
1304 static struct brw_reg brw_reg_from_fs_reg(fs_reg *reg)
1305 {
1306 struct brw_reg brw_reg;
1307
1308 switch (reg->file) {
1309 case GRF:
1310 case ARF:
1311 case MRF:
1312 brw_reg = brw_vec8_reg(reg->file,
1313 reg->hw_reg, 0);
1314 brw_reg = retype(brw_reg, reg->type);
1315 break;
1316 case IMM:
1317 switch (reg->type) {
1318 case BRW_REGISTER_TYPE_F:
1319 brw_reg = brw_imm_f(reg->imm.f);
1320 break;
1321 case BRW_REGISTER_TYPE_D:
1322 brw_reg = brw_imm_f(reg->imm.i);
1323 break;
1324 case BRW_REGISTER_TYPE_UD:
1325 brw_reg = brw_imm_f(reg->imm.u);
1326 break;
1327 default:
1328 assert(!"not reached");
1329 break;
1330 }
1331 break;
1332 case FIXED_HW_REG:
1333 brw_reg = reg->fixed_hw_reg;
1334 break;
1335 case BAD_FILE:
1336 /* Probably unused. */
1337 brw_reg = brw_null_reg();
1338 }
1339 if (reg->abs)
1340 brw_reg = brw_abs(brw_reg);
1341 if (reg->negate)
1342 brw_reg = negate(brw_reg);
1343
1344 return brw_reg;
1345 }
1346
1347 void
1348 fs_visitor::generate_code()
1349 {
1350 unsigned int annotation_len = 0;
1351 int last_native_inst = 0;
1352
1353 foreach_iter(exec_list_iterator, iter, this->instructions) {
1354 fs_inst *inst = (fs_inst *)iter.get();
1355 struct brw_reg src[3], dst;
1356
1357 for (unsigned int i = 0; i < 3; i++) {
1358 src[i] = brw_reg_from_fs_reg(&inst->src[i]);
1359 }
1360 dst = brw_reg_from_fs_reg(&inst->dst);
1361
1362 brw_set_conditionalmod(p, inst->conditional_mod);
1363 brw_set_predicate_control(p, inst->predicated);
1364
1365 switch (inst->opcode) {
1366 case BRW_OPCODE_MOV:
1367 brw_MOV(p, dst, src[0]);
1368 break;
1369 case BRW_OPCODE_ADD:
1370 brw_ADD(p, dst, src[0], src[1]);
1371 break;
1372 case BRW_OPCODE_MUL:
1373 brw_MUL(p, dst, src[0], src[1]);
1374 break;
1375 case FS_OPCODE_RCP:
1376 case FS_OPCODE_RSQ:
1377 case FS_OPCODE_SQRT:
1378 case FS_OPCODE_EXP2:
1379 case FS_OPCODE_LOG2:
1380 case FS_OPCODE_POW:
1381 case FS_OPCODE_SIN:
1382 case FS_OPCODE_COS:
1383 generate_math(inst, dst, src);
1384 break;
1385 case FS_OPCODE_LINTERP:
1386 generate_linterp(inst, dst, src);
1387 break;
1388 case FS_OPCODE_FB_WRITE:
1389 generate_fb_write(inst);
1390 break;
1391 default:
1392 assert(!"not reached");
1393 }
1394
1395 if (annotation_len < p->nr_insn) {
1396 annotation_len *= 2;
1397 if (annotation_len < 16)
1398 annotation_len = 16;
1399
1400 this->annotation_string = talloc_realloc(this->mem_ctx,
1401 annotation_string,
1402 const char *,
1403 annotation_len);
1404 this->annotation_ir = talloc_realloc(this->mem_ctx,
1405 annotation_ir,
1406 ir_instruction *,
1407 annotation_len);
1408 }
1409
1410 for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
1411 this->annotation_string[i] = inst->annotation;
1412 this->annotation_ir[i] = inst->ir;
1413 }
1414 last_native_inst = p->nr_insn;
1415 }
1416 }
1417
1418 GLboolean
1419 brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c)
1420 {
1421 struct brw_compile *p = &c->func;
1422 struct intel_context *intel = &brw->intel;
1423 GLcontext *ctx = &intel->ctx;
1424 struct brw_shader *shader = NULL;
1425 struct gl_shader_program *prog = ctx->Shader.CurrentProgram;
1426
1427 if (!prog)
1428 return GL_FALSE;
1429
1430 if (!using_new_fs)
1431 return GL_FALSE;
1432
1433 for (unsigned int i = 0; i < prog->_NumLinkedShaders; i++) {
1434 if (prog->_LinkedShaders[i]->Type == GL_FRAGMENT_SHADER) {
1435 shader = (struct brw_shader *)prog->_LinkedShaders[i];
1436 break;
1437 }
1438 }
1439 if (!shader)
1440 return GL_FALSE;
1441
1442 /* We always use 8-wide mode, at least for now. For one, flow
1443 * control only works in 8-wide. Also, when we're fragment shader
1444 * bound, we're almost always under register pressure as well, so
1445 * 8-wide would save us from the performance cliff of spilling
1446 * regs.
1447 */
1448 c->dispatch_width = 8;
1449
1450 if (INTEL_DEBUG & DEBUG_WM) {
1451 printf("GLSL IR for native fragment shader %d:\n", prog->Name);
1452 _mesa_print_ir(shader->ir, NULL);
1453 printf("\n");
1454 }
1455
1456 /* Now the main event: Visit the shader IR and generate our FS IR for it.
1457 */
1458 fs_visitor v(c, shader);
1459
1460 if (0) {
1461 v.emit_dummy_fs();
1462 } else {
1463 v.emit_interpolation();
1464
1465 /* Generate FS IR for main(). (the visitor only descends into
1466 * functions called "main").
1467 */
1468 foreach_iter(exec_list_iterator, iter, *shader->ir) {
1469 ir_instruction *ir = (ir_instruction *)iter.get();
1470 v.base_ir = ir;
1471 ir->accept(&v);
1472 }
1473
1474 if (v.fail)
1475 return GL_FALSE;
1476
1477 v.emit_fb_writes();
1478 v.assign_urb_setup();
1479 v.assign_regs();
1480 }
1481
1482 v.generate_code();
1483
1484 if (INTEL_DEBUG & DEBUG_WM) {
1485 const char *last_annotation_string = NULL;
1486 ir_instruction *last_annotation_ir = NULL;
1487
1488 printf("Native code for fragment shader %d:\n", prog->Name);
1489 for (unsigned int i = 0; i < p->nr_insn; i++) {
1490 if (last_annotation_ir != v.annotation_ir[i]) {
1491 last_annotation_ir = v.annotation_ir[i];
1492 if (last_annotation_ir) {
1493 printf(" ");
1494 last_annotation_ir->print();
1495 printf("\n");
1496 }
1497 }
1498 if (last_annotation_string != v.annotation_string[i]) {
1499 last_annotation_string = v.annotation_string[i];
1500 if (last_annotation_string)
1501 printf(" %s\n", last_annotation_string);
1502 }
1503 brw_disasm(stdout, &p->store[i], intel->gen);
1504 }
1505 printf("\n");
1506 }
1507
1508 c->prog_data.nr_params = 0; /* FINISHME */
1509 c->prog_data.first_curbe_grf = c->key.nr_payload_regs;
1510 c->prog_data.curb_read_length = 0; /* FINISHME */
1511 c->prog_data.total_grf = v.grf_used;
1512 c->prog_data.total_scratch = 0;
1513
1514 return GL_TRUE;
1515 }