2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
30 #include <sys/types.h>
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "main/uniforms.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "program/prog_optimize.h"
38 #include "program/register_allocate.h"
39 #include "program/sampler.h"
40 #include "program/hash_table.h"
41 #include "brw_context.h"
46 #include "../glsl/glsl_types.h"
47 #include "../glsl/ir_optimization.h"
48 #include "../glsl/ir_print_visitor.h"
51 ARF
= BRW_ARCHITECTURE_REGISTER_FILE
,
52 GRF
= BRW_GENERAL_REGISTER_FILE
,
53 MRF
= BRW_MESSAGE_REGISTER_FILE
,
54 IMM
= BRW_IMMEDIATE_VALUE
,
55 FIXED_HW_REG
, /* a struct brw_reg */
56 UNIFORM
, /* prog_data->params[hw_reg] */
61 FS_OPCODE_FB_WRITE
= 256,
79 static int using_new_fs
= -1;
80 static struct brw_reg
brw_reg_from_fs_reg(class fs_reg
*reg
);
83 brw_new_shader(GLcontext
*ctx
, GLuint name
, GLuint type
)
85 struct brw_shader
*shader
;
87 shader
= talloc_zero(NULL
, struct brw_shader
);
89 shader
->base
.Type
= type
;
90 shader
->base
.Name
= name
;
91 _mesa_init_shader(ctx
, &shader
->base
);
97 struct gl_shader_program
*
98 brw_new_shader_program(GLcontext
*ctx
, GLuint name
)
100 struct brw_shader_program
*prog
;
101 prog
= talloc_zero(NULL
, struct brw_shader_program
);
103 prog
->base
.Name
= name
;
104 _mesa_init_shader_program(ctx
, &prog
->base
);
110 brw_compile_shader(GLcontext
*ctx
, struct gl_shader
*shader
)
112 if (!_mesa_ir_compile_shader(ctx
, shader
))
119 brw_link_shader(GLcontext
*ctx
, struct gl_shader_program
*prog
)
121 if (using_new_fs
== -1)
122 using_new_fs
= getenv("INTEL_NEW_FS") != NULL
;
124 for (unsigned i
= 0; i
< prog
->_NumLinkedShaders
; i
++) {
125 struct brw_shader
*shader
= (struct brw_shader
*)prog
->_LinkedShaders
[i
];
127 if (using_new_fs
&& shader
->base
.Type
== GL_FRAGMENT_SHADER
) {
128 void *mem_ctx
= talloc_new(NULL
);
132 talloc_free(shader
->ir
);
133 shader
->ir
= new(shader
) exec_list
;
134 clone_ir_list(mem_ctx
, shader
->ir
, shader
->base
.ir
);
136 do_mat_op_to_vec(shader
->ir
);
137 do_mod_to_fract(shader
->ir
);
138 do_div_to_mul_rcp(shader
->ir
);
139 do_sub_to_add_neg(shader
->ir
);
140 do_explog_to_explog2(shader
->ir
);
145 brw_do_channel_expressions(shader
->ir
);
146 brw_do_vector_splitting(shader
->ir
);
148 progress
= do_lower_jumps(shader
->ir
, true, true,
149 true, /* main return */
150 false, /* continue */
154 progress
= do_common_optimization(shader
->ir
, true, 32) || progress
;
156 progress
= lower_noise(shader
->ir
) || progress
;
158 lower_variable_index_to_cond_assign(shader
->ir
,
160 GL_TRUE
, /* output */
162 GL_TRUE
/* uniform */
166 validate_ir_tree(shader
->ir
);
168 reparent_ir(shader
->ir
, shader
->ir
);
169 talloc_free(mem_ctx
);
173 if (!_mesa_ir_link_shader(ctx
, prog
))
180 type_size(const struct glsl_type
*type
)
182 unsigned int size
, i
;
184 switch (type
->base_type
) {
187 case GLSL_TYPE_FLOAT
:
189 return type
->components();
190 case GLSL_TYPE_ARRAY
:
191 return type_size(type
->fields
.array
) * type
->length
;
192 case GLSL_TYPE_STRUCT
:
194 for (i
= 0; i
< type
->length
; i
++) {
195 size
+= type_size(type
->fields
.structure
[i
].type
);
198 case GLSL_TYPE_SAMPLER
:
199 /* Samplers take up no register space, since they're baked in at
204 assert(!"not reached");
211 /* Callers of this talloc-based new need not call delete. It's
212 * easier to just talloc_free 'ctx' (or any of its ancestors). */
213 static void* operator new(size_t size
, void *ctx
)
217 node
= talloc_size(ctx
, size
);
218 assert(node
!= NULL
);
226 this->reg_offset
= 0;
232 /** Generic unset register constructor. */
236 this->file
= BAD_FILE
;
239 /** Immediate value constructor. */
244 this->type
= BRW_REGISTER_TYPE_F
;
248 /** Immediate value constructor. */
253 this->type
= BRW_REGISTER_TYPE_D
;
257 /** Immediate value constructor. */
262 this->type
= BRW_REGISTER_TYPE_UD
;
266 /** Fixed brw_reg Immediate value constructor. */
267 fs_reg(struct brw_reg fixed_hw_reg
)
270 this->file
= FIXED_HW_REG
;
271 this->fixed_hw_reg
= fixed_hw_reg
;
272 this->type
= fixed_hw_reg
.type
;
275 fs_reg(enum register_file file
, int hw_reg
);
276 fs_reg(class fs_visitor
*v
, const struct glsl_type
*type
);
278 /** Register file: ARF, GRF, MRF, IMM. */
279 enum register_file file
;
280 /** virtual register number. 0 = fixed hw reg */
282 /** Offset within the virtual register. */
284 /** HW register number. Generally unset until register allocation. */
286 /** Register type. BRW_REGISTER_TYPE_* */
290 struct brw_reg fixed_hw_reg
;
292 /** Value for file == BRW_IMMMEDIATE_FILE */
300 static const fs_reg reg_undef
;
301 static const fs_reg
reg_null(ARF
, BRW_ARF_NULL
);
303 class fs_inst
: public exec_node
{
305 /* Callers of this talloc-based new need not call delete. It's
306 * easier to just talloc_free 'ctx' (or any of its ancestors). */
307 static void* operator new(size_t size
, void *ctx
)
311 node
= talloc_zero_size(ctx
, size
);
312 assert(node
!= NULL
);
319 this->opcode
= BRW_OPCODE_NOP
;
320 this->saturate
= false;
321 this->conditional_mod
= BRW_CONDITIONAL_NONE
;
322 this->predicated
= false;
326 this->shadow_compare
= false;
337 this->opcode
= opcode
;
340 fs_inst(int opcode
, fs_reg dst
, fs_reg src0
)
343 this->opcode
= opcode
;
348 fs_inst(int opcode
, fs_reg dst
, fs_reg src0
, fs_reg src1
)
351 this->opcode
= opcode
;
357 fs_inst(int opcode
, fs_reg dst
, fs_reg src0
, fs_reg src1
, fs_reg src2
)
360 this->opcode
= opcode
;
367 int opcode
; /* BRW_OPCODE_* or FS_OPCODE_* */
372 int conditional_mod
; /**< BRW_CONDITIONAL_* */
374 int mlen
; /**< SEND message length */
376 int target
; /**< MRT target. */
381 * Annotation for the generated IR. One of the two can be set.
384 const char *annotation
;
388 class fs_visitor
: public ir_visitor
392 fs_visitor(struct brw_wm_compile
*c
, struct brw_shader
*shader
)
397 this->fp
= brw
->fragment_program
;
398 this->intel
= &brw
->intel
;
399 this->ctx
= &intel
->ctx
;
400 this->mem_ctx
= talloc_new(NULL
);
401 this->shader
= shader
;
403 this->variable_ht
= hash_table_ctor(0,
404 hash_table_pointer_hash
,
405 hash_table_pointer_compare
);
407 this->frag_color
= NULL
;
408 this->frag_data
= NULL
;
409 this->frag_depth
= NULL
;
410 this->first_non_payload_grf
= 0;
412 this->current_annotation
= NULL
;
413 this->annotation_string
= NULL
;
414 this->annotation_ir
= NULL
;
415 this->base_ir
= NULL
;
417 this->virtual_grf_sizes
= NULL
;
418 this->virtual_grf_next
= 1;
419 this->virtual_grf_array_size
= 0;
420 this->virtual_grf_def
= NULL
;
421 this->virtual_grf_use
= NULL
;
425 talloc_free(this->mem_ctx
);
426 hash_table_dtor(this->variable_ht
);
429 fs_reg
*variable_storage(ir_variable
*var
);
430 int virtual_grf_alloc(int size
);
432 void visit(ir_variable
*ir
);
433 void visit(ir_assignment
*ir
);
434 void visit(ir_dereference_variable
*ir
);
435 void visit(ir_dereference_record
*ir
);
436 void visit(ir_dereference_array
*ir
);
437 void visit(ir_expression
*ir
);
438 void visit(ir_texture
*ir
);
439 void visit(ir_if
*ir
);
440 void visit(ir_constant
*ir
);
441 void visit(ir_swizzle
*ir
);
442 void visit(ir_return
*ir
);
443 void visit(ir_loop
*ir
);
444 void visit(ir_loop_jump
*ir
);
445 void visit(ir_discard
*ir
);
446 void visit(ir_call
*ir
);
447 void visit(ir_function
*ir
);
448 void visit(ir_function_signature
*ir
);
450 fs_inst
*emit(fs_inst inst
);
451 void assign_curb_setup();
452 void assign_urb_setup();
454 void assign_regs_trivial();
455 void calculate_live_intervals();
456 bool virtual_grf_interferes(int a
, int b
);
457 void generate_code();
458 void generate_fb_write(fs_inst
*inst
);
459 void generate_linterp(fs_inst
*inst
, struct brw_reg dst
,
460 struct brw_reg
*src
);
461 void generate_tex(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
);
462 void generate_math(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg
*src
);
463 void generate_discard(fs_inst
*inst
, struct brw_reg temp
);
464 void generate_ddx(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
);
465 void generate_ddy(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
);
467 void emit_dummy_fs();
468 void emit_fragcoord_interpolation(ir_variable
*ir
);
469 void emit_general_interpolation(ir_variable
*ir
);
470 void emit_interpolation_setup();
471 void emit_fb_writes();
473 struct brw_reg
interp_reg(int location
, int channel
);
474 int setup_uniform_values(int loc
, const glsl_type
*type
);
475 void setup_builtin_uniform_values(ir_variable
*ir
);
477 struct brw_context
*brw
;
478 const struct gl_fragment_program
*fp
;
479 struct intel_context
*intel
;
481 struct brw_wm_compile
*c
;
482 struct brw_compile
*p
;
483 struct brw_shader
*shader
;
485 exec_list instructions
;
487 int *virtual_grf_sizes
;
488 int virtual_grf_next
;
489 int virtual_grf_array_size
;
490 int *virtual_grf_def
;
491 int *virtual_grf_use
;
493 struct hash_table
*variable_ht
;
494 ir_variable
*frag_color
, *frag_data
, *frag_depth
;
495 int first_non_payload_grf
;
497 /** @{ debug annotation info */
498 const char *current_annotation
;
499 ir_instruction
*base_ir
;
500 const char **annotation_string
;
501 ir_instruction
**annotation_ir
;
506 /* Result of last visit() method. */
521 fs_visitor::virtual_grf_alloc(int size
)
523 if (virtual_grf_array_size
<= virtual_grf_next
) {
524 if (virtual_grf_array_size
== 0)
525 virtual_grf_array_size
= 16;
527 virtual_grf_array_size
*= 2;
528 virtual_grf_sizes
= talloc_realloc(mem_ctx
, virtual_grf_sizes
,
529 int, virtual_grf_array_size
);
531 /* This slot is always unused. */
532 virtual_grf_sizes
[0] = 0;
534 virtual_grf_sizes
[virtual_grf_next
] = size
;
535 return virtual_grf_next
++;
538 /** Fixed HW reg constructor. */
539 fs_reg::fs_reg(enum register_file file
, int hw_reg
)
543 this->hw_reg
= hw_reg
;
544 this->type
= BRW_REGISTER_TYPE_F
;
548 brw_type_for_base_type(const struct glsl_type
*type
)
550 switch (type
->base_type
) {
551 case GLSL_TYPE_FLOAT
:
552 return BRW_REGISTER_TYPE_F
;
555 return BRW_REGISTER_TYPE_D
;
557 return BRW_REGISTER_TYPE_UD
;
558 case GLSL_TYPE_ARRAY
:
559 case GLSL_TYPE_STRUCT
:
560 /* These should be overridden with the type of the member when
561 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
562 * way to trip up if we don't.
564 return BRW_REGISTER_TYPE_UD
;
566 assert(!"not reached");
567 return BRW_REGISTER_TYPE_F
;
571 /** Automatic reg constructor. */
572 fs_reg::fs_reg(class fs_visitor
*v
, const struct glsl_type
*type
)
577 this->reg
= v
->virtual_grf_alloc(type_size(type
));
578 this->reg_offset
= 0;
579 this->type
= brw_type_for_base_type(type
);
583 fs_visitor::variable_storage(ir_variable
*var
)
585 return (fs_reg
*)hash_table_find(this->variable_ht
, var
);
588 /* Our support for uniforms is piggy-backed on the struct
589 * gl_fragment_program, because that's where the values actually
590 * get stored, rather than in some global gl_shader_program uniform
594 fs_visitor::setup_uniform_values(int loc
, const glsl_type
*type
)
596 unsigned int offset
= 0;
599 if (type
->is_matrix()) {
600 const glsl_type
*column
= glsl_type::get_instance(GLSL_TYPE_FLOAT
,
601 type
->vector_elements
,
604 for (unsigned int i
= 0; i
< type
->matrix_columns
; i
++) {
605 offset
+= setup_uniform_values(loc
+ offset
, column
);
611 switch (type
->base_type
) {
612 case GLSL_TYPE_FLOAT
:
616 vec_values
= fp
->Base
.Parameters
->ParameterValues
[loc
];
617 for (unsigned int i
= 0; i
< type
->vector_elements
; i
++) {
618 c
->prog_data
.param
[c
->prog_data
.nr_params
++] = &vec_values
[i
];
622 case GLSL_TYPE_STRUCT
:
623 for (unsigned int i
= 0; i
< type
->length
; i
++) {
624 offset
+= setup_uniform_values(loc
+ offset
,
625 type
->fields
.structure
[i
].type
);
629 case GLSL_TYPE_ARRAY
:
630 for (unsigned int i
= 0; i
< type
->length
; i
++) {
631 offset
+= setup_uniform_values(loc
+ offset
, type
->fields
.array
);
635 case GLSL_TYPE_SAMPLER
:
636 /* The sampler takes up a slot, but we don't use any values from it. */
640 assert(!"not reached");
646 /* Our support for builtin uniforms is even scarier than non-builtin.
647 * It sits on top of the PROG_STATE_VAR parameters that are
648 * automatically updated from GL context state.
651 fs_visitor::setup_builtin_uniform_values(ir_variable
*ir
)
653 const struct gl_builtin_uniform_desc
*statevar
= NULL
;
655 for (unsigned int i
= 0; _mesa_builtin_uniform_desc
[i
].name
; i
++) {
656 statevar
= &_mesa_builtin_uniform_desc
[i
];
657 if (strcmp(ir
->name
, _mesa_builtin_uniform_desc
[i
].name
) == 0)
661 if (!statevar
->name
) {
663 printf("Failed to find builtin uniform `%s'\n", ir
->name
);
668 if (ir
->type
->is_array()) {
669 array_count
= ir
->type
->length
;
674 for (int a
= 0; a
< array_count
; a
++) {
675 for (unsigned int i
= 0; i
< statevar
->num_elements
; i
++) {
676 struct gl_builtin_uniform_element
*element
= &statevar
->elements
[i
];
677 int tokens
[STATE_LENGTH
];
679 memcpy(tokens
, element
->tokens
, sizeof(element
->tokens
));
680 if (ir
->type
->is_array()) {
684 /* This state reference has already been setup by ir_to_mesa,
685 * but we'll get the same index back here.
687 int index
= _mesa_add_state_reference(this->fp
->Base
.Parameters
,
688 (gl_state_index
*)tokens
);
689 float *vec_values
= this->fp
->Base
.Parameters
->ParameterValues
[index
];
691 /* Add each of the unique swizzles of the element as a
692 * parameter. This'll end up matching the expected layout of
693 * the array/matrix/structure we're trying to fill in.
696 for (unsigned int i
= 0; i
< 4; i
++) {
697 int this_swiz
= GET_SWZ(element
->swizzle
, i
);
698 if (this_swiz
== last_swiz
)
700 last_swiz
= this_swiz
;
702 c
->prog_data
.param
[c
->prog_data
.nr_params
++] = &vec_values
[i
];
709 fs_visitor::emit_fragcoord_interpolation(ir_variable
*ir
)
711 fs_reg
*reg
= new(this->mem_ctx
) fs_reg(this, ir
->type
);
713 fs_reg neg_y
= this->pixel_y
;
717 if (ir
->pixel_center_integer
) {
718 emit(fs_inst(BRW_OPCODE_MOV
, wpos
, this->pixel_x
));
720 emit(fs_inst(BRW_OPCODE_ADD
, wpos
, this->pixel_x
, fs_reg(0.5f
)));
725 if (ir
->origin_upper_left
&& ir
->pixel_center_integer
) {
726 emit(fs_inst(BRW_OPCODE_MOV
, wpos
, this->pixel_y
));
728 fs_reg pixel_y
= this->pixel_y
;
729 float offset
= (ir
->pixel_center_integer
? 0.0 : 0.5);
731 if (!ir
->origin_upper_left
) {
732 pixel_y
.negate
= true;
733 offset
+= c
->key
.drawable_height
- 1.0;
736 emit(fs_inst(BRW_OPCODE_ADD
, wpos
, pixel_y
, fs_reg(offset
)));
741 emit(fs_inst(FS_OPCODE_LINTERP
, wpos
, this->delta_x
, this->delta_y
,
742 interp_reg(FRAG_ATTRIB_WPOS
, 2)));
745 /* gl_FragCoord.w: Already set up in emit_interpolation */
746 emit(fs_inst(BRW_OPCODE_MOV
, wpos
, this->wpos_w
));
748 hash_table_insert(this->variable_ht
, reg
, ir
);
753 fs_visitor::emit_general_interpolation(ir_variable
*ir
)
755 fs_reg
*reg
= new(this->mem_ctx
) fs_reg(this, ir
->type
);
756 /* Interpolation is always in floating point regs. */
757 reg
->type
= BRW_REGISTER_TYPE_F
;
760 unsigned int array_elements
;
761 const glsl_type
*type
;
763 if (ir
->type
->is_array()) {
764 array_elements
= ir
->type
->length
;
765 if (array_elements
== 0) {
768 type
= ir
->type
->fields
.array
;
774 int location
= ir
->location
;
775 for (unsigned int i
= 0; i
< array_elements
; i
++) {
776 for (unsigned int j
= 0; j
< type
->matrix_columns
; j
++) {
777 if (!(fp
->Base
.InputsRead
& BITFIELD64_BIT(location
))) {
778 /* If there's no incoming setup data for this slot, don't
779 * emit interpolation for it (since it's not used, and
780 * we'd fall over later trying to find the setup data.
782 attr
.reg_offset
+= type
->vector_elements
;
786 for (unsigned int c
= 0; c
< type
->vector_elements
; c
++) {
787 struct brw_reg interp
= interp_reg(location
, c
);
788 emit(fs_inst(FS_OPCODE_LINTERP
,
795 attr
.reg_offset
-= type
->vector_elements
;
797 for (unsigned int c
= 0; c
< type
->vector_elements
; c
++) {
798 emit(fs_inst(BRW_OPCODE_MUL
,
808 hash_table_insert(this->variable_ht
, reg
, ir
);
812 fs_visitor::visit(ir_variable
*ir
)
816 if (variable_storage(ir
))
819 if (strcmp(ir
->name
, "gl_FragColor") == 0) {
820 this->frag_color
= ir
;
821 } else if (strcmp(ir
->name
, "gl_FragData") == 0) {
822 this->frag_data
= ir
;
823 } else if (strcmp(ir
->name
, "gl_FragDepth") == 0) {
824 this->frag_depth
= ir
;
827 if (ir
->mode
== ir_var_in
) {
828 if (!strcmp(ir
->name
, "gl_FragCoord")) {
829 emit_fragcoord_interpolation(ir
);
831 } else if (!strcmp(ir
->name
, "gl_FrontFacing")) {
832 reg
= new(this->mem_ctx
) fs_reg(this, ir
->type
);
833 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
834 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
837 fs_inst
*inst
= emit(fs_inst(BRW_OPCODE_CMP
,
841 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
842 emit(fs_inst(BRW_OPCODE_AND
, *reg
, *reg
, fs_reg(1u)));
844 emit_general_interpolation(ir
);
849 if (ir
->mode
== ir_var_uniform
) {
850 int param_index
= c
->prog_data
.nr_params
;
852 if (!strncmp(ir
->name
, "gl_", 3)) {
853 setup_builtin_uniform_values(ir
);
855 setup_uniform_values(ir
->location
, ir
->type
);
858 reg
= new(this->mem_ctx
) fs_reg(UNIFORM
, param_index
);
862 reg
= new(this->mem_ctx
) fs_reg(this, ir
->type
);
864 hash_table_insert(this->variable_ht
, reg
, ir
);
868 fs_visitor::visit(ir_dereference_variable
*ir
)
870 fs_reg
*reg
= variable_storage(ir
->var
);
875 fs_visitor::visit(ir_dereference_record
*ir
)
877 const glsl_type
*struct_type
= ir
->record
->type
;
879 ir
->record
->accept(this);
881 unsigned int offset
= 0;
882 for (unsigned int i
= 0; i
< struct_type
->length
; i
++) {
883 if (strcmp(struct_type
->fields
.structure
[i
].name
, ir
->field
) == 0)
885 offset
+= type_size(struct_type
->fields
.structure
[i
].type
);
887 this->result
.reg_offset
+= offset
;
888 this->result
.type
= brw_type_for_base_type(ir
->type
);
892 fs_visitor::visit(ir_dereference_array
*ir
)
897 ir
->array
->accept(this);
898 index
= ir
->array_index
->as_constant();
900 element_size
= type_size(ir
->type
);
901 this->result
.type
= brw_type_for_base_type(ir
->type
);
904 assert(this->result
.file
== UNIFORM
||
905 (this->result
.file
== GRF
&&
906 this->result
.reg
!= 0));
907 this->result
.reg_offset
+= index
->value
.i
[0] * element_size
;
909 assert(!"FINISHME: non-constant array element");
914 fs_visitor::visit(ir_expression
*ir
)
916 unsigned int operand
;
921 for (operand
= 0; operand
< ir
->get_num_operands(); operand
++) {
922 ir
->operands
[operand
]->accept(this);
923 if (this->result
.file
== BAD_FILE
) {
925 printf("Failed to get tree for expression operand:\n");
926 ir
->operands
[operand
]->accept(&v
);
929 op
[operand
] = this->result
;
931 /* Matrix expression operands should have been broken down to vector
932 * operations already.
934 assert(!ir
->operands
[operand
]->type
->is_matrix());
935 /* And then those vector operands should have been broken down to scalar.
937 assert(!ir
->operands
[operand
]->type
->is_vector());
940 /* Storage for our result. If our result goes into an assignment, it will
941 * just get copy-propagated out, so no worries.
943 this->result
= fs_reg(this, ir
->type
);
945 switch (ir
->operation
) {
946 case ir_unop_logic_not
:
947 emit(fs_inst(BRW_OPCODE_ADD
, this->result
, op
[0], fs_reg(-1)));
950 op
[0].negate
= !op
[0].negate
;
951 this->result
= op
[0];
955 this->result
= op
[0];
958 temp
= fs_reg(this, ir
->type
);
960 emit(fs_inst(BRW_OPCODE_MOV
, this->result
, fs_reg(0.0f
)));
962 inst
= emit(fs_inst(BRW_OPCODE_CMP
, reg_null
, op
[0], fs_reg(0.0f
)));
963 inst
->conditional_mod
= BRW_CONDITIONAL_G
;
964 inst
= emit(fs_inst(BRW_OPCODE_MOV
, this->result
, fs_reg(1.0f
)));
965 inst
->predicated
= true;
967 inst
= emit(fs_inst(BRW_OPCODE_CMP
, reg_null
, op
[0], fs_reg(0.0f
)));
968 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
969 inst
= emit(fs_inst(BRW_OPCODE_MOV
, this->result
, fs_reg(-1.0f
)));
970 inst
->predicated
= true;
974 emit(fs_inst(FS_OPCODE_RCP
, this->result
, op
[0]));
978 emit(fs_inst(FS_OPCODE_EXP2
, this->result
, op
[0]));
981 emit(fs_inst(FS_OPCODE_LOG2
, this->result
, op
[0]));
985 assert(!"not reached: should be handled by ir_explog_to_explog2");
988 emit(fs_inst(FS_OPCODE_SIN
, this->result
, op
[0]));
991 emit(fs_inst(FS_OPCODE_COS
, this->result
, op
[0]));
995 emit(fs_inst(FS_OPCODE_DDX
, this->result
, op
[0]));
998 emit(fs_inst(FS_OPCODE_DDY
, this->result
, op
[0]));
1002 emit(fs_inst(BRW_OPCODE_ADD
, this->result
, op
[0], op
[1]));
1005 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1009 emit(fs_inst(BRW_OPCODE_MUL
, this->result
, op
[0], op
[1]));
1012 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1015 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1019 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1020 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
1021 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1023 case ir_binop_greater
:
1024 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1025 inst
->conditional_mod
= BRW_CONDITIONAL_G
;
1026 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1028 case ir_binop_lequal
:
1029 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1030 inst
->conditional_mod
= BRW_CONDITIONAL_LE
;
1031 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1033 case ir_binop_gequal
:
1034 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1035 inst
->conditional_mod
= BRW_CONDITIONAL_GE
;
1036 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1038 case ir_binop_equal
:
1039 case ir_binop_all_equal
: /* same as nequal for scalars */
1040 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1041 inst
->conditional_mod
= BRW_CONDITIONAL_Z
;
1042 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1044 case ir_binop_nequal
:
1045 case ir_binop_any_nequal
: /* same as nequal for scalars */
1046 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1047 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
1048 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1051 case ir_binop_logic_xor
:
1052 emit(fs_inst(BRW_OPCODE_XOR
, this->result
, op
[0], op
[1]));
1055 case ir_binop_logic_or
:
1056 emit(fs_inst(BRW_OPCODE_OR
, this->result
, op
[0], op
[1]));
1059 case ir_binop_logic_and
:
1060 emit(fs_inst(BRW_OPCODE_AND
, this->result
, op
[0], op
[1]));
1064 case ir_binop_cross
:
1066 assert(!"not reached: should be handled by brw_fs_channel_expressions");
1070 assert(!"not reached: should be handled by lower_noise");
1074 emit(fs_inst(FS_OPCODE_SQRT
, this->result
, op
[0]));
1078 emit(fs_inst(FS_OPCODE_RSQ
, this->result
, op
[0]));
1084 emit(fs_inst(BRW_OPCODE_MOV
, this->result
, op
[0]));
1087 emit(fs_inst(BRW_OPCODE_MOV
, this->result
, op
[0]));
1091 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], fs_reg(0.0f
)));
1092 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
1095 emit(fs_inst(BRW_OPCODE_RNDD
, this->result
, op
[0]));
1098 op
[0].negate
= ~op
[0].negate
;
1099 inst
= emit(fs_inst(BRW_OPCODE_RNDD
, this->result
, op
[0]));
1100 this->result
.negate
= true;
1103 inst
= emit(fs_inst(BRW_OPCODE_RNDD
, this->result
, op
[0]));
1106 inst
= emit(fs_inst(BRW_OPCODE_FRC
, this->result
, op
[0]));
1110 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1111 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
1113 inst
= emit(fs_inst(BRW_OPCODE_SEL
, this->result
, op
[0], op
[1]));
1114 inst
->predicated
= true;
1117 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1118 inst
->conditional_mod
= BRW_CONDITIONAL_G
;
1120 inst
= emit(fs_inst(BRW_OPCODE_SEL
, this->result
, op
[0], op
[1]));
1121 inst
->predicated
= true;
1125 inst
= emit(fs_inst(FS_OPCODE_POW
, this->result
, op
[0], op
[1]));
1128 case ir_unop_bit_not
:
1130 case ir_binop_lshift
:
1131 case ir_binop_rshift
:
1132 case ir_binop_bit_and
:
1133 case ir_binop_bit_xor
:
1134 case ir_binop_bit_or
:
1135 assert(!"GLSL 1.30 features unsupported");
1141 fs_visitor::visit(ir_assignment
*ir
)
1148 /* FINISHME: arrays on the lhs */
1149 ir
->lhs
->accept(this);
1152 ir
->rhs
->accept(this);
1155 /* FINISHME: This should really set to the correct maximal writemask for each
1156 * FINISHME: component written (in the loops below). This case can only
1157 * FINISHME: occur for matrices, arrays, and structures.
1159 if (ir
->write_mask
== 0) {
1160 assert(!ir
->lhs
->type
->is_scalar() && !ir
->lhs
->type
->is_vector());
1161 write_mask
= WRITEMASK_XYZW
;
1163 assert(ir
->lhs
->type
->is_vector() || ir
->lhs
->type
->is_scalar());
1164 write_mask
= ir
->write_mask
;
1167 assert(l
.file
!= BAD_FILE
);
1168 assert(r
.file
!= BAD_FILE
);
1170 if (ir
->condition
) {
1171 /* Get the condition bool into the predicate. */
1172 ir
->condition
->accept(this);
1173 inst
= emit(fs_inst(BRW_OPCODE_CMP
, reg_null
, this->result
, fs_reg(0)));
1174 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
1177 for (i
= 0; i
< type_size(ir
->lhs
->type
); i
++) {
1178 if (i
>= 4 || (write_mask
& (1 << i
))) {
1179 inst
= emit(fs_inst(BRW_OPCODE_MOV
, l
, r
));
1181 inst
->predicated
= true;
1189 fs_visitor::visit(ir_texture
*ir
)
1192 fs_inst
*inst
= NULL
;
1193 unsigned int mlen
= 0;
1195 ir
->coordinate
->accept(this);
1196 fs_reg coordinate
= this->result
;
1198 if (ir
->projector
) {
1199 fs_reg inv_proj
= fs_reg(this, glsl_type::float_type
);
1201 ir
->projector
->accept(this);
1202 emit(fs_inst(FS_OPCODE_RCP
, inv_proj
, this->result
));
1204 fs_reg proj_coordinate
= fs_reg(this, ir
->coordinate
->type
);
1205 for (unsigned int i
= 0; i
< ir
->coordinate
->type
->vector_elements
; i
++) {
1206 emit(fs_inst(BRW_OPCODE_MUL
, proj_coordinate
, coordinate
, inv_proj
));
1207 coordinate
.reg_offset
++;
1208 proj_coordinate
.reg_offset
++;
1210 proj_coordinate
.reg_offset
= 0;
1212 coordinate
= proj_coordinate
;
1215 for (mlen
= 0; mlen
< ir
->coordinate
->type
->vector_elements
; mlen
++) {
1216 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, base_mrf
+ mlen
), coordinate
));
1217 coordinate
.reg_offset
++;
1220 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1224 if (ir
->shadow_comparitor
) {
1225 /* For shadow comparisons, we have to supply u,v,r. */
1228 ir
->shadow_comparitor
->accept(this);
1229 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, base_mrf
+ mlen
), this->result
));
1233 /* Do we ever want to handle writemasking on texture samples? Is it
1234 * performance relevant?
1236 fs_reg dst
= fs_reg(this, glsl_type::vec4_type
);
1240 inst
= emit(fs_inst(FS_OPCODE_TEX
, dst
, fs_reg(MRF
, base_mrf
)));
1243 ir
->lod_info
.bias
->accept(this);
1244 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, base_mrf
+ mlen
), this->result
));
1247 inst
= emit(fs_inst(FS_OPCODE_TXB
, dst
, fs_reg(MRF
, base_mrf
)));
1250 ir
->lod_info
.lod
->accept(this);
1251 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, base_mrf
+ mlen
), this->result
));
1254 inst
= emit(fs_inst(FS_OPCODE_TXL
, dst
, fs_reg(MRF
, base_mrf
)));
1258 assert(!"GLSL 1.30 features unsupported");
1263 _mesa_get_sampler_uniform_value(ir
->sampler
,
1264 ctx
->Shader
.CurrentProgram
,
1265 &brw
->fragment_program
->Base
);
1266 inst
->sampler
= c
->fp
->program
.Base
.SamplerUnits
[inst
->sampler
];
1270 if (ir
->shadow_comparitor
)
1271 inst
->shadow_compare
= true;
1276 fs_visitor::visit(ir_swizzle
*ir
)
1278 ir
->val
->accept(this);
1279 fs_reg val
= this->result
;
1281 fs_reg result
= fs_reg(this, ir
->type
);
1282 this->result
= result
;
1284 for (unsigned int i
= 0; i
< ir
->type
->vector_elements
; i
++) {
1285 fs_reg channel
= val
;
1303 channel
.reg_offset
+= swiz
;
1304 emit(fs_inst(BRW_OPCODE_MOV
, result
, channel
));
1305 result
.reg_offset
++;
1310 fs_visitor::visit(ir_discard
*ir
)
1312 fs_reg temp
= fs_reg(this, glsl_type::uint_type
);
1314 assert(ir
->condition
== NULL
); /* FINISHME */
1316 emit(fs_inst(FS_OPCODE_DISCARD
, temp
, temp
));
1320 fs_visitor::visit(ir_constant
*ir
)
1322 fs_reg
reg(this, ir
->type
);
1325 for (unsigned int i
= 0; i
< ir
->type
->vector_elements
; i
++) {
1326 switch (ir
->type
->base_type
) {
1327 case GLSL_TYPE_FLOAT
:
1328 emit(fs_inst(BRW_OPCODE_MOV
, reg
, fs_reg(ir
->value
.f
[i
])));
1330 case GLSL_TYPE_UINT
:
1331 emit(fs_inst(BRW_OPCODE_MOV
, reg
, fs_reg(ir
->value
.u
[i
])));
1334 emit(fs_inst(BRW_OPCODE_MOV
, reg
, fs_reg(ir
->value
.i
[i
])));
1336 case GLSL_TYPE_BOOL
:
1337 emit(fs_inst(BRW_OPCODE_MOV
, reg
, fs_reg((int)ir
->value
.b
[i
])));
1340 assert(!"Non-float/uint/int/bool constant");
1347 fs_visitor::visit(ir_if
*ir
)
1351 /* Don't point the annotation at the if statement, because then it plus
1352 * the then and else blocks get printed.
1354 this->base_ir
= ir
->condition
;
1356 /* Generate the condition into the condition code. */
1357 ir
->condition
->accept(this);
1358 inst
= emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(brw_null_reg()), this->result
));
1359 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
1361 inst
= emit(fs_inst(BRW_OPCODE_IF
));
1362 inst
->predicated
= true;
1364 foreach_iter(exec_list_iterator
, iter
, ir
->then_instructions
) {
1365 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
1371 if (!ir
->else_instructions
.is_empty()) {
1372 emit(fs_inst(BRW_OPCODE_ELSE
));
1374 foreach_iter(exec_list_iterator
, iter
, ir
->else_instructions
) {
1375 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
1382 emit(fs_inst(BRW_OPCODE_ENDIF
));
1386 fs_visitor::visit(ir_loop
*ir
)
1388 fs_reg counter
= reg_undef
;
1391 this->base_ir
= ir
->counter
;
1392 ir
->counter
->accept(this);
1393 counter
= *(variable_storage(ir
->counter
));
1396 this->base_ir
= ir
->from
;
1397 ir
->from
->accept(this);
1399 emit(fs_inst(BRW_OPCODE_MOV
, counter
, this->result
));
1403 /* Start a safety counter. If the user messed up their loop
1404 * counting, we don't want to hang the GPU.
1406 fs_reg max_iter
= fs_reg(this, glsl_type::int_type
);
1407 emit(fs_inst(BRW_OPCODE_MOV
, max_iter
, fs_reg(10000)));
1409 emit(fs_inst(BRW_OPCODE_DO
));
1412 this->base_ir
= ir
->to
;
1413 ir
->to
->accept(this);
1415 fs_inst
*inst
= emit(fs_inst(BRW_OPCODE_CMP
, reg_null
,
1416 counter
, this->result
));
1418 case ir_binop_equal
:
1419 inst
->conditional_mod
= BRW_CONDITIONAL_Z
;
1421 case ir_binop_nequal
:
1422 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
1424 case ir_binop_gequal
:
1425 inst
->conditional_mod
= BRW_CONDITIONAL_GE
;
1427 case ir_binop_lequal
:
1428 inst
->conditional_mod
= BRW_CONDITIONAL_LE
;
1430 case ir_binop_greater
:
1431 inst
->conditional_mod
= BRW_CONDITIONAL_G
;
1434 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
1437 assert(!"not reached: unknown loop condition");
1442 inst
= emit(fs_inst(BRW_OPCODE_BREAK
));
1443 inst
->predicated
= true;
1446 foreach_iter(exec_list_iterator
, iter
, ir
->body_instructions
) {
1447 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
1453 /* Check the maximum loop iters counter. */
1454 inst
= emit(fs_inst(BRW_OPCODE_ADD
, max_iter
, max_iter
, fs_reg(-1)));
1455 inst
->conditional_mod
= BRW_CONDITIONAL_Z
;
1457 inst
= emit(fs_inst(BRW_OPCODE_BREAK
));
1458 inst
->predicated
= true;
1461 if (ir
->increment
) {
1462 this->base_ir
= ir
->increment
;
1463 ir
->increment
->accept(this);
1464 emit(fs_inst(BRW_OPCODE_ADD
, counter
, counter
, this->result
));
1467 emit(fs_inst(BRW_OPCODE_WHILE
));
1471 fs_visitor::visit(ir_loop_jump
*ir
)
1474 case ir_loop_jump::jump_break
:
1475 emit(fs_inst(BRW_OPCODE_BREAK
));
1477 case ir_loop_jump::jump_continue
:
1478 emit(fs_inst(BRW_OPCODE_CONTINUE
));
1484 fs_visitor::visit(ir_call
*ir
)
1486 assert(!"FINISHME");
1490 fs_visitor::visit(ir_return
*ir
)
1492 assert(!"FINISHME");
1496 fs_visitor::visit(ir_function
*ir
)
1498 /* Ignore function bodies other than main() -- we shouldn't see calls to
1499 * them since they should all be inlined before we get to ir_to_mesa.
1501 if (strcmp(ir
->name
, "main") == 0) {
1502 const ir_function_signature
*sig
;
1505 sig
= ir
->matching_signature(&empty
);
1509 foreach_iter(exec_list_iterator
, iter
, sig
->body
) {
1510 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
1519 fs_visitor::visit(ir_function_signature
*ir
)
1521 assert(!"not reached");
1526 fs_visitor::emit(fs_inst inst
)
1528 fs_inst
*list_inst
= new(mem_ctx
) fs_inst
;
1531 list_inst
->annotation
= this->current_annotation
;
1532 list_inst
->ir
= this->base_ir
;
1534 this->instructions
.push_tail(list_inst
);
1539 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1541 fs_visitor::emit_dummy_fs()
1543 /* Everyone's favorite color. */
1544 emit(fs_inst(BRW_OPCODE_MOV
,
1547 emit(fs_inst(BRW_OPCODE_MOV
,
1550 emit(fs_inst(BRW_OPCODE_MOV
,
1553 emit(fs_inst(BRW_OPCODE_MOV
,
1558 write
= emit(fs_inst(FS_OPCODE_FB_WRITE
,
1563 /* The register location here is relative to the start of the URB
1564 * data. It will get adjusted to be a real location before
1565 * generate_code() time.
1568 fs_visitor::interp_reg(int location
, int channel
)
1570 int regnr
= location
* 2 + channel
/ 2;
1571 int stride
= (channel
& 1) * 4;
1573 return brw_vec1_grf(regnr
, stride
);
1576 /** Emits the interpolation for the varying inputs. */
1578 fs_visitor::emit_interpolation_setup()
1580 struct brw_reg g1_uw
= retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW
);
1582 this->current_annotation
= "compute pixel centers";
1583 this->pixel_x
= fs_reg(this, glsl_type::uint_type
);
1584 this->pixel_y
= fs_reg(this, glsl_type::uint_type
);
1585 this->pixel_x
.type
= BRW_REGISTER_TYPE_UW
;
1586 this->pixel_y
.type
= BRW_REGISTER_TYPE_UW
;
1587 emit(fs_inst(BRW_OPCODE_ADD
,
1589 fs_reg(stride(suboffset(g1_uw
, 4), 2, 4, 0)),
1590 fs_reg(brw_imm_v(0x10101010))));
1591 emit(fs_inst(BRW_OPCODE_ADD
,
1593 fs_reg(stride(suboffset(g1_uw
, 5), 2, 4, 0)),
1594 fs_reg(brw_imm_v(0x11001100))));
1596 this->current_annotation
= "compute pixel deltas from v0";
1597 this->delta_x
= fs_reg(this, glsl_type::float_type
);
1598 this->delta_y
= fs_reg(this, glsl_type::float_type
);
1599 emit(fs_inst(BRW_OPCODE_ADD
,
1602 fs_reg(negate(brw_vec1_grf(1, 0)))));
1603 emit(fs_inst(BRW_OPCODE_ADD
,
1606 fs_reg(negate(brw_vec1_grf(1, 1)))));
1608 this->current_annotation
= "compute pos.w and 1/pos.w";
1609 /* Compute wpos.w. It's always in our setup, since it's needed to
1610 * interpolate the other attributes.
1612 this->wpos_w
= fs_reg(this, glsl_type::float_type
);
1613 emit(fs_inst(FS_OPCODE_LINTERP
, wpos_w
, this->delta_x
, this->delta_y
,
1614 interp_reg(FRAG_ATTRIB_WPOS
, 3)));
1615 /* Compute the pixel 1/W value from wpos.w. */
1616 this->pixel_w
= fs_reg(this, glsl_type::float_type
);
1617 emit(fs_inst(FS_OPCODE_RCP
, this->pixel_w
, wpos_w
));
1618 this->current_annotation
= NULL
;
1622 fs_visitor::emit_fb_writes()
1624 this->current_annotation
= "FB write header";
1630 if (c
->key
.aa_dest_stencil_reg
) {
1631 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, nr
++),
1632 fs_reg(brw_vec8_grf(c
->key
.aa_dest_stencil_reg
, 0))));
1635 /* Reserve space for color. It'll be filled in per MRT below. */
1639 if (c
->key
.source_depth_to_render_target
) {
1640 if (c
->key
.computes_depth
) {
1641 /* Hand over gl_FragDepth. */
1642 assert(this->frag_depth
);
1643 fs_reg depth
= *(variable_storage(this->frag_depth
));
1645 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, nr
++), depth
));
1647 /* Pass through the payload depth. */
1648 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, nr
++),
1649 fs_reg(brw_vec8_grf(c
->key
.source_depth_reg
, 0))));
1653 if (c
->key
.dest_depth_reg
) {
1654 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, nr
++),
1655 fs_reg(brw_vec8_grf(c
->key
.dest_depth_reg
, 0))));
1658 fs_reg color
= reg_undef
;
1659 if (this->frag_color
)
1660 color
= *(variable_storage(this->frag_color
));
1661 else if (this->frag_data
)
1662 color
= *(variable_storage(this->frag_data
));
1664 for (int target
= 0; target
< c
->key
.nr_color_regions
; target
++) {
1665 this->current_annotation
= talloc_asprintf(this->mem_ctx
,
1666 "FB write target %d",
1668 if (this->frag_color
|| this->frag_data
) {
1669 for (int i
= 0; i
< 4; i
++) {
1670 emit(fs_inst(BRW_OPCODE_MOV
,
1671 fs_reg(MRF
, color_mrf
+ i
),
1677 if (this->frag_color
)
1678 color
.reg_offset
-= 4;
1680 fs_inst
*inst
= emit(fs_inst(FS_OPCODE_FB_WRITE
,
1681 reg_undef
, reg_undef
));
1682 inst
->target
= target
;
1684 if (target
== c
->key
.nr_color_regions
- 1)
1688 if (c
->key
.nr_color_regions
== 0) {
1689 fs_inst
*inst
= emit(fs_inst(FS_OPCODE_FB_WRITE
,
1690 reg_undef
, reg_undef
));
1695 this->current_annotation
= NULL
;
1699 fs_visitor::generate_fb_write(fs_inst
*inst
)
1701 GLboolean eot
= inst
->eot
;
1703 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1706 brw_push_insn_state(p
);
1707 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1708 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1711 brw_vec8_grf(1, 0));
1712 brw_pop_insn_state(p
);
1715 8, /* dispatch_width */
1716 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
1718 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1726 fs_visitor::generate_linterp(fs_inst
*inst
,
1727 struct brw_reg dst
, struct brw_reg
*src
)
1729 struct brw_reg delta_x
= src
[0];
1730 struct brw_reg delta_y
= src
[1];
1731 struct brw_reg interp
= src
[2];
1734 delta_y
.nr
== delta_x
.nr
+ 1 &&
1735 (intel
->gen
>= 6 || (delta_x
.nr
& 1) == 0)) {
1736 brw_PLN(p
, dst
, interp
, delta_x
);
1738 brw_LINE(p
, brw_null_reg(), interp
, delta_x
);
1739 brw_MAC(p
, dst
, suboffset(interp
, 1), delta_y
);
1744 fs_visitor::generate_math(fs_inst
*inst
,
1745 struct brw_reg dst
, struct brw_reg
*src
)
1749 switch (inst
->opcode
) {
1751 op
= BRW_MATH_FUNCTION_INV
;
1754 op
= BRW_MATH_FUNCTION_RSQ
;
1756 case FS_OPCODE_SQRT
:
1757 op
= BRW_MATH_FUNCTION_SQRT
;
1759 case FS_OPCODE_EXP2
:
1760 op
= BRW_MATH_FUNCTION_EXP
;
1762 case FS_OPCODE_LOG2
:
1763 op
= BRW_MATH_FUNCTION_LOG
;
1766 op
= BRW_MATH_FUNCTION_POW
;
1769 op
= BRW_MATH_FUNCTION_SIN
;
1772 op
= BRW_MATH_FUNCTION_COS
;
1775 assert(!"not reached: unknown math function");
1780 if (inst
->opcode
== FS_OPCODE_POW
) {
1781 brw_MOV(p
, brw_message_reg(3), src
[1]);
1786 inst
->saturate
? BRW_MATH_SATURATE_SATURATE
:
1787 BRW_MATH_SATURATE_NONE
,
1789 BRW_MATH_DATA_VECTOR
,
1790 BRW_MATH_PRECISION_FULL
);
1794 fs_visitor::generate_tex(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
)
1799 if (intel
->gen
== 5) {
1800 switch (inst
->opcode
) {
1802 if (inst
->shadow_compare
) {
1803 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5
;
1805 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_GEN5
;
1809 if (inst
->shadow_compare
) {
1810 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5
;
1812 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5
;
1817 switch (inst
->opcode
) {
1819 /* Note that G45 and older determines shadow compare and dispatch width
1820 * from message length for most messages.
1822 if (inst
->shadow_compare
) {
1823 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
1825 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
1828 if (inst
->shadow_compare
) {
1829 assert(!"FINISHME: shadow compare with bias.");
1830 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
1832 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
1838 assert(msg_type
!= -1);
1844 retype(dst
, BRW_REGISTER_TYPE_UW
),
1846 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1847 SURF_INDEX_TEXTURE(inst
->sampler
),
1855 BRW_SAMPLER_SIMD_MODE_SIMD8
);
1859 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1862 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1864 * and we're trying to produce:
1867 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
1868 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
1869 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
1870 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
1871 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
1872 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
1873 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
1874 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
1876 * and add another set of two more subspans if in 16-pixel dispatch mode.
1878 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1879 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1880 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1881 * between each other. We could probably do it like ddx and swizzle the right
1882 * order later, but bail for now and just produce
1883 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1886 fs_visitor::generate_ddx(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
)
1888 struct brw_reg src0
= brw_reg(src
.file
, src
.nr
, 1,
1889 BRW_REGISTER_TYPE_F
,
1890 BRW_VERTICAL_STRIDE_2
,
1892 BRW_HORIZONTAL_STRIDE_0
,
1893 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
1894 struct brw_reg src1
= brw_reg(src
.file
, src
.nr
, 0,
1895 BRW_REGISTER_TYPE_F
,
1896 BRW_VERTICAL_STRIDE_2
,
1898 BRW_HORIZONTAL_STRIDE_0
,
1899 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
1900 brw_ADD(p
, dst
, src0
, negate(src1
));
1904 fs_visitor::generate_ddy(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
)
1906 struct brw_reg src0
= brw_reg(src
.file
, src
.nr
, 0,
1907 BRW_REGISTER_TYPE_F
,
1908 BRW_VERTICAL_STRIDE_4
,
1910 BRW_HORIZONTAL_STRIDE_0
,
1911 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
1912 struct brw_reg src1
= brw_reg(src
.file
, src
.nr
, 2,
1913 BRW_REGISTER_TYPE_F
,
1914 BRW_VERTICAL_STRIDE_4
,
1916 BRW_HORIZONTAL_STRIDE_0
,
1917 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
1918 brw_ADD(p
, dst
, src0
, negate(src1
));
1922 fs_visitor::generate_discard(fs_inst
*inst
, struct brw_reg temp
)
1924 struct brw_reg g0
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1925 temp
= brw_uw1_reg(temp
.file
, temp
.nr
, 0);
1927 brw_push_insn_state(p
);
1928 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1929 brw_NOT(p
, temp
, brw_mask_reg(1)); /* IMASK */
1930 brw_AND(p
, g0
, temp
, g0
);
1931 brw_pop_insn_state(p
);
1935 fs_visitor::assign_curb_setup()
1937 c
->prog_data
.first_curbe_grf
= c
->key
.nr_payload_regs
;
1938 c
->prog_data
.curb_read_length
= ALIGN(c
->prog_data
.nr_params
, 8) / 8;
1940 if (intel
->gen
== 5 && (c
->prog_data
.first_curbe_grf
+
1941 c
->prog_data
.curb_read_length
) & 1) {
1942 /* Align the start of the interpolation coefficients so that we can use
1943 * the PLN instruction.
1945 c
->prog_data
.first_curbe_grf
++;
1948 /* Map the offsets in the UNIFORM file to fixed HW regs. */
1949 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
1950 fs_inst
*inst
= (fs_inst
*)iter
.get();
1952 for (unsigned int i
= 0; i
< 3; i
++) {
1953 if (inst
->src
[i
].file
== UNIFORM
) {
1954 int constant_nr
= inst
->src
[i
].hw_reg
+ inst
->src
[i
].reg_offset
;
1955 struct brw_reg brw_reg
= brw_vec1_grf(c
->prog_data
.first_curbe_grf
+
1959 inst
->src
[i
].file
= FIXED_HW_REG
;
1960 inst
->src
[i
].fixed_hw_reg
= brw_reg
;
1967 fs_visitor::assign_urb_setup()
1969 int urb_start
= c
->prog_data
.first_curbe_grf
+ c
->prog_data
.curb_read_length
;
1970 int interp_reg_nr
[FRAG_ATTRIB_MAX
];
1972 c
->prog_data
.urb_read_length
= 0;
1974 /* Figure out where each of the incoming setup attributes lands. */
1975 for (unsigned int i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
1976 interp_reg_nr
[i
] = -1;
1978 if (i
!= FRAG_ATTRIB_WPOS
&&
1979 !(brw
->fragment_program
->Base
.InputsRead
& BITFIELD64_BIT(i
)))
1982 /* Each attribute is 4 setup channels, each of which is half a reg. */
1983 interp_reg_nr
[i
] = urb_start
+ c
->prog_data
.urb_read_length
;
1984 c
->prog_data
.urb_read_length
+= 2;
1987 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1988 * the correct setup input.
1990 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
1991 fs_inst
*inst
= (fs_inst
*)iter
.get();
1993 if (inst
->opcode
!= FS_OPCODE_LINTERP
)
1996 assert(inst
->src
[2].file
== FIXED_HW_REG
);
1998 int location
= inst
->src
[2].fixed_hw_reg
.nr
/ 2;
1999 assert(interp_reg_nr
[location
] != -1);
2000 inst
->src
[2].fixed_hw_reg
.nr
= (interp_reg_nr
[location
] +
2001 (inst
->src
[2].fixed_hw_reg
.nr
& 1));
2004 this->first_non_payload_grf
= urb_start
+ c
->prog_data
.urb_read_length
;
2008 assign_reg(int *reg_hw_locations
, fs_reg
*reg
)
2010 if (reg
->file
== GRF
&& reg
->reg
!= 0) {
2011 reg
->hw_reg
= reg_hw_locations
[reg
->reg
] + reg
->reg_offset
;
2017 fs_visitor::assign_regs_trivial()
2020 int hw_reg_mapping
[this->virtual_grf_next
];
2023 hw_reg_mapping
[0] = 0;
2024 hw_reg_mapping
[1] = this->first_non_payload_grf
;
2025 for (i
= 2; i
< this->virtual_grf_next
; i
++) {
2026 hw_reg_mapping
[i
] = (hw_reg_mapping
[i
- 1] +
2027 this->virtual_grf_sizes
[i
- 1]);
2029 last_grf
= hw_reg_mapping
[i
- 1] + this->virtual_grf_sizes
[i
- 1];
2031 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
2032 fs_inst
*inst
= (fs_inst
*)iter
.get();
2034 assign_reg(hw_reg_mapping
, &inst
->dst
);
2035 assign_reg(hw_reg_mapping
, &inst
->src
[0]);
2036 assign_reg(hw_reg_mapping
, &inst
->src
[1]);
2039 this->grf_used
= last_grf
+ 1;
2043 fs_visitor::assign_regs()
2046 int hw_reg_mapping
[this->virtual_grf_next
+ 1];
2047 int base_reg_count
= BRW_MAX_GRF
- this->first_non_payload_grf
;
2048 int class_sizes
[base_reg_count
];
2049 int class_count
= 0;
2051 calculate_live_intervals();
2053 /* Set up the register classes.
2055 * The base registers store a scalar value. For texture samples,
2056 * we get virtual GRFs composed of 4 contiguous hw register. For
2057 * structures and arrays, we store them as contiguous larger things
2058 * than that, though we should be able to do better most of the
2061 class_sizes
[class_count
++] = 1;
2062 for (int r
= 1; r
< this->virtual_grf_next
; r
++) {
2065 for (i
= 0; i
< class_count
; i
++) {
2066 if (class_sizes
[i
] == this->virtual_grf_sizes
[r
])
2069 if (i
== class_count
) {
2070 class_sizes
[class_count
++] = this->virtual_grf_sizes
[r
];
2074 int ra_reg_count
= 0;
2075 int class_base_reg
[class_count
];
2076 int class_reg_count
[class_count
];
2077 int classes
[class_count
];
2079 for (int i
= 0; i
< class_count
; i
++) {
2080 class_base_reg
[i
] = ra_reg_count
;
2081 class_reg_count
[i
] = base_reg_count
- (class_sizes
[i
] - 1);
2082 ra_reg_count
+= class_reg_count
[i
];
2085 struct ra_regs
*regs
= ra_alloc_reg_set(ra_reg_count
);
2086 for (int i
= 0; i
< class_count
; i
++) {
2087 classes
[i
] = ra_alloc_reg_class(regs
);
2089 for (int i_r
= 0; i_r
< class_reg_count
[i
]; i_r
++) {
2090 ra_class_add_reg(regs
, classes
[i
], class_base_reg
[i
] + i_r
);
2093 /* Add conflicts between our contiguous registers aliasing
2094 * base regs and other register classes' contiguous registers
2095 * that alias base regs, or the base regs themselves for classes[0].
2097 for (int c
= 0; c
<= i
; c
++) {
2098 for (int i_r
= 0; i_r
< class_reg_count
[i
] - 1; i_r
++) {
2099 for (int c_r
= MAX2(0, i_r
- (class_sizes
[c
] - 1));
2100 c_r
<= MIN2(class_reg_count
[c
] - 1, i_r
+ class_sizes
[i
] - 1);
2104 printf("%d/%d conflicts %d/%d\n",
2105 class_sizes
[i
], i_r
,
2106 class_sizes
[c
], c_r
);
2109 ra_add_reg_conflict(regs
,
2110 class_base_reg
[i
] + i_r
,
2111 class_base_reg
[c
] + c_r
);
2117 ra_set_finalize(regs
);
2119 struct ra_graph
*g
= ra_alloc_interference_graph(regs
,
2120 this->virtual_grf_next
);
2121 /* Node 0 is just a placeholder to keep virtual_grf[] mapping 1:1
2124 ra_set_node_class(g
, 0, classes
[0]);
2126 for (int i
= 1; i
< this->virtual_grf_next
; i
++) {
2127 for (int c
= 0; c
< class_count
; c
++) {
2128 if (class_sizes
[c
] == this->virtual_grf_sizes
[i
]) {
2129 ra_set_node_class(g
, i
, classes
[c
]);
2134 for (int j
= 1; j
< i
; j
++) {
2135 if (virtual_grf_interferes(i
, j
)) {
2136 ra_add_node_interference(g
, i
, j
);
2141 /* FINISHME: Handle spilling */
2142 if (!ra_allocate_no_spills(g
)) {
2143 fprintf(stderr
, "Failed to allocate registers.\n");
2148 /* Get the chosen virtual registers for each node, and map virtual
2149 * regs in the register classes back down to real hardware reg
2152 hw_reg_mapping
[0] = 0; /* unused */
2153 for (int i
= 1; i
< this->virtual_grf_next
; i
++) {
2154 int reg
= ra_get_node_reg(g
, i
);
2157 for (int c
= 0; c
< class_count
; c
++) {
2158 if (reg
>= class_base_reg
[c
] &&
2159 reg
< class_base_reg
[c
] + class_reg_count
[c
] - 1) {
2160 hw_reg
= reg
- class_base_reg
[c
];
2165 assert(hw_reg
!= -1);
2166 hw_reg_mapping
[i
] = this->first_non_payload_grf
+ hw_reg
;
2167 last_grf
= MAX2(last_grf
,
2168 hw_reg_mapping
[i
] + this->virtual_grf_sizes
[i
] - 1);
2171 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
2172 fs_inst
*inst
= (fs_inst
*)iter
.get();
2174 assign_reg(hw_reg_mapping
, &inst
->dst
);
2175 assign_reg(hw_reg_mapping
, &inst
->src
[0]);
2176 assign_reg(hw_reg_mapping
, &inst
->src
[1]);
2179 this->grf_used
= last_grf
+ 1;
2186 fs_visitor::calculate_live_intervals()
2188 int num_vars
= this->virtual_grf_next
;
2189 int *def
= talloc_array(mem_ctx
, int, num_vars
);
2190 int *use
= talloc_array(mem_ctx
, int, num_vars
);
2194 for (int i
= 0; i
< num_vars
; i
++) {
2200 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
2201 fs_inst
*inst
= (fs_inst
*)iter
.get();
2203 if (inst
->opcode
== BRW_OPCODE_DO
) {
2204 if (loop_depth
++ == 0)
2206 } else if (inst
->opcode
== BRW_OPCODE_WHILE
) {
2209 if (loop_depth
== 0) {
2212 * Patches up any vars marked for use within the loop as
2213 * live until the end. This is conservative, as there
2214 * will often be variables defined and used inside the
2215 * loop but dead at the end of the loop body.
2217 for (int i
= 0; i
< num_vars
; i
++) {
2218 if (use
[i
] == loop_start
) {
2229 for (unsigned int i
= 0; i
< 3; i
++) {
2230 if (inst
->src
[i
].file
== GRF
&& inst
->src
[i
].reg
!= 0) {
2231 def
[inst
->src
[i
].reg
] = MIN2(def
[inst
->src
[i
].reg
], eip
);
2232 use
[inst
->src
[i
].reg
] = MAX2(use
[inst
->src
[i
].reg
], eip
);
2235 if (inst
->dst
.file
== GRF
&& inst
->dst
.reg
!= 0) {
2236 def
[inst
->dst
.reg
] = MIN2(def
[inst
->dst
.reg
], eip
);
2237 use
[inst
->dst
.reg
] = MAX2(use
[inst
->dst
.reg
], eip
);
2244 this->virtual_grf_def
= def
;
2245 this->virtual_grf_use
= use
;
2249 fs_visitor::virtual_grf_interferes(int a
, int b
)
2251 int start
= MAX2(this->virtual_grf_def
[a
], this->virtual_grf_def
[b
]);
2252 int end
= MIN2(this->virtual_grf_use
[a
], this->virtual_grf_use
[b
]);
2254 return start
<= end
;
2257 static struct brw_reg
brw_reg_from_fs_reg(fs_reg
*reg
)
2259 struct brw_reg brw_reg
;
2261 switch (reg
->file
) {
2265 brw_reg
= brw_vec8_reg(reg
->file
,
2267 brw_reg
= retype(brw_reg
, reg
->type
);
2270 switch (reg
->type
) {
2271 case BRW_REGISTER_TYPE_F
:
2272 brw_reg
= brw_imm_f(reg
->imm
.f
);
2274 case BRW_REGISTER_TYPE_D
:
2275 brw_reg
= brw_imm_d(reg
->imm
.i
);
2277 case BRW_REGISTER_TYPE_UD
:
2278 brw_reg
= brw_imm_ud(reg
->imm
.u
);
2281 assert(!"not reached");
2286 brw_reg
= reg
->fixed_hw_reg
;
2289 /* Probably unused. */
2290 brw_reg
= brw_null_reg();
2293 assert(!"not reached");
2294 brw_reg
= brw_null_reg();
2298 brw_reg
= brw_abs(brw_reg
);
2300 brw_reg
= negate(brw_reg
);
2306 fs_visitor::generate_code()
2308 unsigned int annotation_len
= 0;
2309 int last_native_inst
= 0;
2310 struct brw_instruction
*if_stack
[16], *loop_stack
[16];
2311 int if_stack_depth
= 0, loop_stack_depth
= 0;
2312 int if_depth_in_loop
[16];
2314 if_depth_in_loop
[loop_stack_depth
] = 0;
2316 memset(&if_stack
, 0, sizeof(if_stack
));
2317 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
2318 fs_inst
*inst
= (fs_inst
*)iter
.get();
2319 struct brw_reg src
[3], dst
;
2321 for (unsigned int i
= 0; i
< 3; i
++) {
2322 src
[i
] = brw_reg_from_fs_reg(&inst
->src
[i
]);
2324 dst
= brw_reg_from_fs_reg(&inst
->dst
);
2326 brw_set_conditionalmod(p
, inst
->conditional_mod
);
2327 brw_set_predicate_control(p
, inst
->predicated
);
2329 switch (inst
->opcode
) {
2330 case BRW_OPCODE_MOV
:
2331 brw_MOV(p
, dst
, src
[0]);
2333 case BRW_OPCODE_ADD
:
2334 brw_ADD(p
, dst
, src
[0], src
[1]);
2336 case BRW_OPCODE_MUL
:
2337 brw_MUL(p
, dst
, src
[0], src
[1]);
2340 case BRW_OPCODE_FRC
:
2341 brw_FRC(p
, dst
, src
[0]);
2343 case BRW_OPCODE_RNDD
:
2344 brw_RNDD(p
, dst
, src
[0]);
2346 case BRW_OPCODE_RNDZ
:
2347 brw_RNDZ(p
, dst
, src
[0]);
2350 case BRW_OPCODE_AND
:
2351 brw_AND(p
, dst
, src
[0], src
[1]);
2354 brw_OR(p
, dst
, src
[0], src
[1]);
2356 case BRW_OPCODE_XOR
:
2357 brw_XOR(p
, dst
, src
[0], src
[1]);
2360 case BRW_OPCODE_CMP
:
2361 brw_CMP(p
, dst
, inst
->conditional_mod
, src
[0], src
[1]);
2363 case BRW_OPCODE_SEL
:
2364 brw_SEL(p
, dst
, src
[0], src
[1]);
2368 assert(if_stack_depth
< 16);
2369 if_stack
[if_stack_depth
] = brw_IF(p
, BRW_EXECUTE_8
);
2370 if_depth_in_loop
[loop_stack_depth
]++;
2373 case BRW_OPCODE_ELSE
:
2374 if_stack
[if_stack_depth
- 1] =
2375 brw_ELSE(p
, if_stack
[if_stack_depth
- 1]);
2377 case BRW_OPCODE_ENDIF
:
2379 brw_ENDIF(p
, if_stack
[if_stack_depth
]);
2380 if_depth_in_loop
[loop_stack_depth
]--;
2384 loop_stack
[loop_stack_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2385 if_depth_in_loop
[loop_stack_depth
] = 0;
2388 case BRW_OPCODE_BREAK
:
2389 brw_BREAK(p
, if_depth_in_loop
[loop_stack_depth
]);
2390 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2392 case BRW_OPCODE_CONTINUE
:
2393 brw_CONT(p
, if_depth_in_loop
[loop_stack_depth
]);
2394 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2397 case BRW_OPCODE_WHILE
: {
2398 struct brw_instruction
*inst0
, *inst1
;
2401 if (intel
->gen
== 5)
2404 assert(loop_stack_depth
> 0);
2406 inst0
= inst1
= brw_WHILE(p
, loop_stack
[loop_stack_depth
]);
2407 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2408 while (inst0
> loop_stack
[loop_stack_depth
]) {
2410 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
&&
2411 inst0
->bits3
.if_else
.jump_count
== 0) {
2412 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
2414 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
2415 inst0
->bits3
.if_else
.jump_count
== 0) {
2416 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
2424 case FS_OPCODE_SQRT
:
2425 case FS_OPCODE_EXP2
:
2426 case FS_OPCODE_LOG2
:
2430 generate_math(inst
, dst
, src
);
2432 case FS_OPCODE_LINTERP
:
2433 generate_linterp(inst
, dst
, src
);
2438 generate_tex(inst
, dst
, src
[0]);
2440 case FS_OPCODE_DISCARD
:
2441 generate_discard(inst
, dst
/* src0 == dst */);
2444 generate_ddx(inst
, dst
, src
[0]);
2447 generate_ddy(inst
, dst
, src
[0]);
2449 case FS_OPCODE_FB_WRITE
:
2450 generate_fb_write(inst
);
2453 if (inst
->opcode
< (int)ARRAY_SIZE(brw_opcodes
)) {
2454 _mesa_problem(ctx
, "Unsupported opcode `%s' in FS",
2455 brw_opcodes
[inst
->opcode
].name
);
2457 _mesa_problem(ctx
, "Unsupported opcode %d in FS", inst
->opcode
);
2462 if (annotation_len
< p
->nr_insn
) {
2463 annotation_len
*= 2;
2464 if (annotation_len
< 16)
2465 annotation_len
= 16;
2467 this->annotation_string
= talloc_realloc(this->mem_ctx
,
2471 this->annotation_ir
= talloc_realloc(this->mem_ctx
,
2477 for (unsigned int i
= last_native_inst
; i
< p
->nr_insn
; i
++) {
2478 this->annotation_string
[i
] = inst
->annotation
;
2479 this->annotation_ir
[i
] = inst
->ir
;
2481 last_native_inst
= p
->nr_insn
;
2486 brw_wm_fs_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2488 struct brw_compile
*p
= &c
->func
;
2489 struct intel_context
*intel
= &brw
->intel
;
2490 GLcontext
*ctx
= &intel
->ctx
;
2491 struct brw_shader
*shader
= NULL
;
2492 struct gl_shader_program
*prog
= ctx
->Shader
.CurrentProgram
;
2500 for (unsigned int i
= 0; i
< prog
->_NumLinkedShaders
; i
++) {
2501 if (prog
->_LinkedShaders
[i
]->Type
== GL_FRAGMENT_SHADER
) {
2502 shader
= (struct brw_shader
*)prog
->_LinkedShaders
[i
];
2509 /* We always use 8-wide mode, at least for now. For one, flow
2510 * control only works in 8-wide. Also, when we're fragment shader
2511 * bound, we're almost always under register pressure as well, so
2512 * 8-wide would save us from the performance cliff of spilling
2515 c
->dispatch_width
= 8;
2517 if (INTEL_DEBUG
& DEBUG_WM
) {
2518 printf("GLSL IR for native fragment shader %d:\n", prog
->Name
);
2519 _mesa_print_ir(shader
->ir
, NULL
);
2523 /* Now the main event: Visit the shader IR and generate our FS IR for it.
2525 fs_visitor
v(c
, shader
);
2530 v
.emit_interpolation_setup();
2532 /* Generate FS IR for main(). (the visitor only descends into
2533 * functions called "main").
2535 foreach_iter(exec_list_iterator
, iter
, *shader
->ir
) {
2536 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
2542 v
.assign_curb_setup();
2543 v
.assign_urb_setup();
2545 v
.assign_regs_trivial();
2552 assert(!v
.fail
); /* FINISHME: Cleanly fail, tested at link time, etc. */
2557 if (INTEL_DEBUG
& DEBUG_WM
) {
2558 const char *last_annotation_string
= NULL
;
2559 ir_instruction
*last_annotation_ir
= NULL
;
2561 printf("Native code for fragment shader %d:\n", prog
->Name
);
2562 for (unsigned int i
= 0; i
< p
->nr_insn
; i
++) {
2563 if (last_annotation_ir
!= v
.annotation_ir
[i
]) {
2564 last_annotation_ir
= v
.annotation_ir
[i
];
2565 if (last_annotation_ir
) {
2567 last_annotation_ir
->print();
2571 if (last_annotation_string
!= v
.annotation_string
[i
]) {
2572 last_annotation_string
= v
.annotation_string
[i
];
2573 if (last_annotation_string
)
2574 printf(" %s\n", last_annotation_string
);
2576 brw_disasm(stdout
, &p
->store
[i
], intel
->gen
);
2581 c
->prog_data
.total_grf
= v
.grf_used
;
2582 c
->prog_data
.total_scratch
= 0;