2 * Copyright © 2010 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
30 #include <sys/types.h>
32 #include "main/macros.h"
33 #include "main/shaderobj.h"
34 #include "main/uniforms.h"
35 #include "program/prog_parameter.h"
36 #include "program/prog_print.h"
37 #include "program/prog_optimize.h"
38 #include "program/sampler.h"
39 #include "program/hash_table.h"
40 #include "brw_context.h"
45 #include "../glsl/glsl_types.h"
46 #include "../glsl/ir_optimization.h"
47 #include "../glsl/ir_print_visitor.h"
50 ARF
= BRW_ARCHITECTURE_REGISTER_FILE
,
51 GRF
= BRW_GENERAL_REGISTER_FILE
,
52 MRF
= BRW_MESSAGE_REGISTER_FILE
,
53 IMM
= BRW_IMMEDIATE_VALUE
,
54 FIXED_HW_REG
, /* a struct brw_reg */
55 UNIFORM
, /* prog_data->params[hw_reg] */
60 FS_OPCODE_FB_WRITE
= 256,
78 static int using_new_fs
= -1;
79 static struct brw_reg
brw_reg_from_fs_reg(class fs_reg
*reg
);
82 brw_new_shader(GLcontext
*ctx
, GLuint name
, GLuint type
)
84 struct brw_shader
*shader
;
86 shader
= talloc_zero(NULL
, struct brw_shader
);
88 shader
->base
.Type
= type
;
89 shader
->base
.Name
= name
;
90 _mesa_init_shader(ctx
, &shader
->base
);
96 struct gl_shader_program
*
97 brw_new_shader_program(GLcontext
*ctx
, GLuint name
)
99 struct brw_shader_program
*prog
;
100 prog
= talloc_zero(NULL
, struct brw_shader_program
);
102 prog
->base
.Name
= name
;
103 _mesa_init_shader_program(ctx
, &prog
->base
);
109 brw_compile_shader(GLcontext
*ctx
, struct gl_shader
*shader
)
111 if (!_mesa_ir_compile_shader(ctx
, shader
))
118 brw_link_shader(GLcontext
*ctx
, struct gl_shader_program
*prog
)
120 if (using_new_fs
== -1)
121 using_new_fs
= getenv("INTEL_NEW_FS") != NULL
;
123 for (unsigned i
= 0; i
< prog
->_NumLinkedShaders
; i
++) {
124 struct brw_shader
*shader
= (struct brw_shader
*)prog
->_LinkedShaders
[i
];
126 if (using_new_fs
&& shader
->base
.Type
== GL_FRAGMENT_SHADER
) {
127 void *mem_ctx
= talloc_new(NULL
);
131 talloc_free(shader
->ir
);
132 shader
->ir
= new(shader
) exec_list
;
133 clone_ir_list(mem_ctx
, shader
->ir
, shader
->base
.ir
);
135 do_mat_op_to_vec(shader
->ir
);
136 do_mod_to_fract(shader
->ir
);
137 do_div_to_mul_rcp(shader
->ir
);
138 do_sub_to_add_neg(shader
->ir
);
139 do_explog_to_explog2(shader
->ir
);
144 brw_do_channel_expressions(shader
->ir
);
145 brw_do_vector_splitting(shader
->ir
);
147 progress
= do_lower_jumps(shader
->ir
, true, true,
148 true, /* main return */
149 false, /* continue */
153 progress
= do_common_optimization(shader
->ir
, true, 32) || progress
;
155 progress
= lower_noise(shader
->ir
) || progress
;
157 lower_variable_index_to_cond_assign(shader
->ir
,
159 GL_TRUE
, /* output */
161 GL_TRUE
/* uniform */
165 validate_ir_tree(shader
->ir
);
167 reparent_ir(shader
->ir
, shader
->ir
);
168 talloc_free(mem_ctx
);
172 if (!_mesa_ir_link_shader(ctx
, prog
))
179 type_size(const struct glsl_type
*type
)
181 unsigned int size
, i
;
183 switch (type
->base_type
) {
186 case GLSL_TYPE_FLOAT
:
188 return type
->components();
189 case GLSL_TYPE_ARRAY
:
190 return type_size(type
->fields
.array
) * type
->length
;
191 case GLSL_TYPE_STRUCT
:
193 for (i
= 0; i
< type
->length
; i
++) {
194 size
+= type_size(type
->fields
.structure
[i
].type
);
197 case GLSL_TYPE_SAMPLER
:
198 /* Samplers take up no register space, since they're baked in at
203 assert(!"not reached");
210 /* Callers of this talloc-based new need not call delete. It's
211 * easier to just talloc_free 'ctx' (or any of its ancestors). */
212 static void* operator new(size_t size
, void *ctx
)
216 node
= talloc_size(ctx
, size
);
217 assert(node
!= NULL
);
225 this->reg_offset
= 0;
231 /** Generic unset register constructor. */
235 this->file
= BAD_FILE
;
238 /** Immediate value constructor. */
243 this->type
= BRW_REGISTER_TYPE_F
;
247 /** Immediate value constructor. */
252 this->type
= BRW_REGISTER_TYPE_D
;
256 /** Immediate value constructor. */
261 this->type
= BRW_REGISTER_TYPE_UD
;
265 /** Fixed brw_reg Immediate value constructor. */
266 fs_reg(struct brw_reg fixed_hw_reg
)
269 this->file
= FIXED_HW_REG
;
270 this->fixed_hw_reg
= fixed_hw_reg
;
271 this->type
= fixed_hw_reg
.type
;
274 fs_reg(enum register_file file
, int hw_reg
);
275 fs_reg(class fs_visitor
*v
, const struct glsl_type
*type
);
277 /** Register file: ARF, GRF, MRF, IMM. */
278 enum register_file file
;
279 /** virtual register number. 0 = fixed hw reg */
281 /** Offset within the virtual register. */
283 /** HW register number. Generally unset until register allocation. */
285 /** Register type. BRW_REGISTER_TYPE_* */
289 struct brw_reg fixed_hw_reg
;
291 /** Value for file == BRW_IMMMEDIATE_FILE */
299 static const fs_reg reg_undef
;
300 static const fs_reg
reg_null(ARF
, BRW_ARF_NULL
);
302 class fs_inst
: public exec_node
{
304 /* Callers of this talloc-based new need not call delete. It's
305 * easier to just talloc_free 'ctx' (or any of its ancestors). */
306 static void* operator new(size_t size
, void *ctx
)
310 node
= talloc_zero_size(ctx
, size
);
311 assert(node
!= NULL
);
318 this->opcode
= BRW_OPCODE_NOP
;
319 this->saturate
= false;
320 this->conditional_mod
= BRW_CONDITIONAL_NONE
;
321 this->predicated
= false;
325 this->shadow_compare
= false;
336 this->opcode
= opcode
;
339 fs_inst(int opcode
, fs_reg dst
, fs_reg src0
)
342 this->opcode
= opcode
;
347 fs_inst(int opcode
, fs_reg dst
, fs_reg src0
, fs_reg src1
)
350 this->opcode
= opcode
;
356 fs_inst(int opcode
, fs_reg dst
, fs_reg src0
, fs_reg src1
, fs_reg src2
)
359 this->opcode
= opcode
;
366 int opcode
; /* BRW_OPCODE_* or FS_OPCODE_* */
371 int conditional_mod
; /**< BRW_CONDITIONAL_* */
373 int mlen
; /**< SEND message length */
375 int target
; /**< MRT target. */
380 * Annotation for the generated IR. One of the two can be set.
383 const char *annotation
;
387 class fs_visitor
: public ir_visitor
391 fs_visitor(struct brw_wm_compile
*c
, struct brw_shader
*shader
)
396 this->fp
= brw
->fragment_program
;
397 this->intel
= &brw
->intel
;
398 this->ctx
= &intel
->ctx
;
399 this->mem_ctx
= talloc_new(NULL
);
400 this->shader
= shader
;
402 this->variable_ht
= hash_table_ctor(0,
403 hash_table_pointer_hash
,
404 hash_table_pointer_compare
);
406 this->frag_color
= NULL
;
407 this->frag_data
= NULL
;
408 this->frag_depth
= NULL
;
409 this->first_non_payload_grf
= 0;
411 this->current_annotation
= NULL
;
412 this->annotation_string
= NULL
;
413 this->annotation_ir
= NULL
;
414 this->base_ir
= NULL
;
416 this->virtual_grf_sizes
= NULL
;
417 this->virtual_grf_next
= 1;
418 this->virtual_grf_array_size
= 0;
422 talloc_free(this->mem_ctx
);
423 hash_table_dtor(this->variable_ht
);
426 fs_reg
*variable_storage(ir_variable
*var
);
427 int virtual_grf_alloc(int size
);
429 void visit(ir_variable
*ir
);
430 void visit(ir_assignment
*ir
);
431 void visit(ir_dereference_variable
*ir
);
432 void visit(ir_dereference_record
*ir
);
433 void visit(ir_dereference_array
*ir
);
434 void visit(ir_expression
*ir
);
435 void visit(ir_texture
*ir
);
436 void visit(ir_if
*ir
);
437 void visit(ir_constant
*ir
);
438 void visit(ir_swizzle
*ir
);
439 void visit(ir_return
*ir
);
440 void visit(ir_loop
*ir
);
441 void visit(ir_loop_jump
*ir
);
442 void visit(ir_discard
*ir
);
443 void visit(ir_call
*ir
);
444 void visit(ir_function
*ir
);
445 void visit(ir_function_signature
*ir
);
447 fs_inst
*emit(fs_inst inst
);
448 void assign_curb_setup();
449 void assign_urb_setup();
451 void generate_code();
452 void generate_fb_write(fs_inst
*inst
);
453 void generate_linterp(fs_inst
*inst
, struct brw_reg dst
,
454 struct brw_reg
*src
);
455 void generate_tex(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
);
456 void generate_math(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg
*src
);
457 void generate_discard(fs_inst
*inst
, struct brw_reg temp
);
458 void generate_ddx(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
);
459 void generate_ddy(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
);
461 void emit_dummy_fs();
462 void emit_fragcoord_interpolation(ir_variable
*ir
);
463 void emit_general_interpolation(ir_variable
*ir
);
464 void emit_interpolation_setup();
465 void emit_fb_writes();
467 struct brw_reg
interp_reg(int location
, int channel
);
468 int setup_uniform_values(int loc
, const glsl_type
*type
);
469 void setup_builtin_uniform_values(ir_variable
*ir
);
471 struct brw_context
*brw
;
472 const struct gl_fragment_program
*fp
;
473 struct intel_context
*intel
;
475 struct brw_wm_compile
*c
;
476 struct brw_compile
*p
;
477 struct brw_shader
*shader
;
479 exec_list instructions
;
481 int *virtual_grf_sizes
;
482 int virtual_grf_next
;
483 int virtual_grf_array_size
;
485 struct hash_table
*variable_ht
;
486 ir_variable
*frag_color
, *frag_data
, *frag_depth
;
487 int first_non_payload_grf
;
489 /** @{ debug annotation info */
490 const char *current_annotation
;
491 ir_instruction
*base_ir
;
492 const char **annotation_string
;
493 ir_instruction
**annotation_ir
;
498 /* Result of last visit() method. */
513 fs_visitor::virtual_grf_alloc(int size
)
515 if (virtual_grf_array_size
<= virtual_grf_next
) {
516 if (virtual_grf_array_size
== 0)
517 virtual_grf_array_size
= 16;
519 virtual_grf_array_size
*= 2;
520 virtual_grf_sizes
= talloc_realloc(mem_ctx
, virtual_grf_sizes
,
521 int, virtual_grf_array_size
);
523 /* This slot is always unused. */
524 virtual_grf_sizes
[0] = 0;
526 virtual_grf_sizes
[virtual_grf_next
] = size
;
527 return virtual_grf_next
++;
530 /** Fixed HW reg constructor. */
531 fs_reg::fs_reg(enum register_file file
, int hw_reg
)
535 this->hw_reg
= hw_reg
;
536 this->type
= BRW_REGISTER_TYPE_F
;
540 brw_type_for_base_type(const struct glsl_type
*type
)
542 switch (type
->base_type
) {
543 case GLSL_TYPE_FLOAT
:
544 return BRW_REGISTER_TYPE_F
;
547 return BRW_REGISTER_TYPE_D
;
549 return BRW_REGISTER_TYPE_UD
;
550 case GLSL_TYPE_ARRAY
:
551 case GLSL_TYPE_STRUCT
:
552 /* These should be overridden with the type of the member when
553 * dereferenced into. BRW_REGISTER_TYPE_UD seems like a likely
554 * way to trip up if we don't.
556 return BRW_REGISTER_TYPE_UD
;
558 assert(!"not reached");
559 return BRW_REGISTER_TYPE_F
;
563 /** Automatic reg constructor. */
564 fs_reg::fs_reg(class fs_visitor
*v
, const struct glsl_type
*type
)
569 this->reg
= v
->virtual_grf_alloc(type_size(type
));
570 this->reg_offset
= 0;
571 this->type
= brw_type_for_base_type(type
);
575 fs_visitor::variable_storage(ir_variable
*var
)
577 return (fs_reg
*)hash_table_find(this->variable_ht
, var
);
580 /* Our support for uniforms is piggy-backed on the struct
581 * gl_fragment_program, because that's where the values actually
582 * get stored, rather than in some global gl_shader_program uniform
586 fs_visitor::setup_uniform_values(int loc
, const glsl_type
*type
)
588 unsigned int offset
= 0;
591 if (type
->is_matrix()) {
592 const glsl_type
*column
= glsl_type::get_instance(GLSL_TYPE_FLOAT
,
593 type
->vector_elements
,
596 for (unsigned int i
= 0; i
< type
->matrix_columns
; i
++) {
597 offset
+= setup_uniform_values(loc
+ offset
, column
);
603 switch (type
->base_type
) {
604 case GLSL_TYPE_FLOAT
:
608 vec_values
= fp
->Base
.Parameters
->ParameterValues
[loc
];
609 for (unsigned int i
= 0; i
< type
->vector_elements
; i
++) {
610 c
->prog_data
.param
[c
->prog_data
.nr_params
++] = &vec_values
[i
];
614 case GLSL_TYPE_STRUCT
:
615 for (unsigned int i
= 0; i
< type
->length
; i
++) {
616 offset
+= setup_uniform_values(loc
+ offset
,
617 type
->fields
.structure
[i
].type
);
621 case GLSL_TYPE_ARRAY
:
622 for (unsigned int i
= 0; i
< type
->length
; i
++) {
623 offset
+= setup_uniform_values(loc
+ offset
, type
->fields
.array
);
627 case GLSL_TYPE_SAMPLER
:
628 /* The sampler takes up a slot, but we don't use any values from it. */
632 assert(!"not reached");
638 /* Our support for builtin uniforms is even scarier than non-builtin.
639 * It sits on top of the PROG_STATE_VAR parameters that are
640 * automatically updated from GL context state.
643 fs_visitor::setup_builtin_uniform_values(ir_variable
*ir
)
645 const struct gl_builtin_uniform_desc
*statevar
= NULL
;
647 for (unsigned int i
= 0; _mesa_builtin_uniform_desc
[i
].name
; i
++) {
648 statevar
= &_mesa_builtin_uniform_desc
[i
];
649 if (strcmp(ir
->name
, _mesa_builtin_uniform_desc
[i
].name
) == 0)
653 if (!statevar
->name
) {
655 printf("Failed to find builtin uniform `%s'\n", ir
->name
);
660 if (ir
->type
->is_array()) {
661 array_count
= ir
->type
->length
;
666 for (int a
= 0; a
< array_count
; a
++) {
667 for (unsigned int i
= 0; i
< statevar
->num_elements
; i
++) {
668 struct gl_builtin_uniform_element
*element
= &statevar
->elements
[i
];
669 int tokens
[STATE_LENGTH
];
671 memcpy(tokens
, element
->tokens
, sizeof(element
->tokens
));
672 if (ir
->type
->is_array()) {
676 /* This state reference has already been setup by ir_to_mesa,
677 * but we'll get the same index back here.
679 int index
= _mesa_add_state_reference(this->fp
->Base
.Parameters
,
680 (gl_state_index
*)tokens
);
681 float *vec_values
= this->fp
->Base
.Parameters
->ParameterValues
[index
];
683 /* Add each of the unique swizzles of the element as a
684 * parameter. This'll end up matching the expected layout of
685 * the array/matrix/structure we're trying to fill in.
688 for (unsigned int i
= 0; i
< 4; i
++) {
689 int this_swiz
= GET_SWZ(element
->swizzle
, i
);
690 if (this_swiz
== last_swiz
)
692 last_swiz
= this_swiz
;
694 c
->prog_data
.param
[c
->prog_data
.nr_params
++] = &vec_values
[i
];
701 fs_visitor::emit_fragcoord_interpolation(ir_variable
*ir
)
703 fs_reg
*reg
= new(this->mem_ctx
) fs_reg(this, ir
->type
);
705 fs_reg neg_y
= this->pixel_y
;
709 if (ir
->pixel_center_integer
) {
710 emit(fs_inst(BRW_OPCODE_MOV
, wpos
, this->pixel_x
));
712 emit(fs_inst(BRW_OPCODE_ADD
, wpos
, this->pixel_x
, fs_reg(0.5f
)));
717 if (ir
->origin_upper_left
&& ir
->pixel_center_integer
) {
718 emit(fs_inst(BRW_OPCODE_MOV
, wpos
, this->pixel_y
));
720 fs_reg pixel_y
= this->pixel_y
;
721 float offset
= (ir
->pixel_center_integer
? 0.0 : 0.5);
723 if (!ir
->origin_upper_left
) {
724 pixel_y
.negate
= true;
725 offset
+= c
->key
.drawable_height
- 1.0;
728 emit(fs_inst(BRW_OPCODE_ADD
, wpos
, pixel_y
, fs_reg(offset
)));
733 emit(fs_inst(FS_OPCODE_LINTERP
, wpos
, this->delta_x
, this->delta_y
,
734 interp_reg(FRAG_ATTRIB_WPOS
, 2)));
737 /* gl_FragCoord.w: Already set up in emit_interpolation */
738 emit(fs_inst(BRW_OPCODE_MOV
, wpos
, this->wpos_w
));
740 hash_table_insert(this->variable_ht
, reg
, ir
);
745 fs_visitor::emit_general_interpolation(ir_variable
*ir
)
747 fs_reg
*reg
= new(this->mem_ctx
) fs_reg(this, ir
->type
);
748 /* Interpolation is always in floating point regs. */
749 reg
->type
= BRW_REGISTER_TYPE_F
;
752 unsigned int array_elements
;
753 const glsl_type
*type
;
755 if (ir
->type
->is_array()) {
756 array_elements
= ir
->type
->length
;
757 if (array_elements
== 0) {
760 type
= ir
->type
->fields
.array
;
766 int location
= ir
->location
;
767 for (unsigned int i
= 0; i
< array_elements
; i
++) {
768 for (unsigned int j
= 0; j
< type
->matrix_columns
; j
++) {
769 if (!(fp
->Base
.InputsRead
& BITFIELD64_BIT(location
))) {
770 /* If there's no incoming setup data for this slot, don't
771 * emit interpolation for it (since it's not used, and
772 * we'd fall over later trying to find the setup data.
774 attr
.reg_offset
+= type
->vector_elements
;
778 for (unsigned int c
= 0; c
< type
->vector_elements
; c
++) {
779 struct brw_reg interp
= interp_reg(location
, c
);
780 emit(fs_inst(FS_OPCODE_LINTERP
,
787 attr
.reg_offset
-= type
->vector_elements
;
789 for (unsigned int c
= 0; c
< type
->vector_elements
; c
++) {
790 emit(fs_inst(BRW_OPCODE_MUL
,
800 hash_table_insert(this->variable_ht
, reg
, ir
);
804 fs_visitor::visit(ir_variable
*ir
)
808 if (variable_storage(ir
))
811 if (strcmp(ir
->name
, "gl_FragColor") == 0) {
812 this->frag_color
= ir
;
813 } else if (strcmp(ir
->name
, "gl_FragData") == 0) {
814 this->frag_data
= ir
;
815 } else if (strcmp(ir
->name
, "gl_FragDepth") == 0) {
816 this->frag_depth
= ir
;
819 if (ir
->mode
== ir_var_in
) {
820 if (!strcmp(ir
->name
, "gl_FragCoord")) {
821 emit_fragcoord_interpolation(ir
);
823 } else if (!strcmp(ir
->name
, "gl_FrontFacing")) {
824 reg
= new(this->mem_ctx
) fs_reg(this, ir
->type
);
825 struct brw_reg r1_6ud
= retype(brw_vec1_grf(1, 6), BRW_REGISTER_TYPE_UD
);
826 /* bit 31 is "primitive is back face", so checking < (1 << 31) gives
829 fs_inst
*inst
= emit(fs_inst(BRW_OPCODE_CMP
,
833 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
834 emit(fs_inst(BRW_OPCODE_AND
, *reg
, *reg
, fs_reg(1u)));
836 emit_general_interpolation(ir
);
841 if (ir
->mode
== ir_var_uniform
) {
842 int param_index
= c
->prog_data
.nr_params
;
844 if (!strncmp(ir
->name
, "gl_", 3)) {
845 setup_builtin_uniform_values(ir
);
847 setup_uniform_values(ir
->location
, ir
->type
);
850 reg
= new(this->mem_ctx
) fs_reg(UNIFORM
, param_index
);
854 reg
= new(this->mem_ctx
) fs_reg(this, ir
->type
);
856 hash_table_insert(this->variable_ht
, reg
, ir
);
860 fs_visitor::visit(ir_dereference_variable
*ir
)
862 fs_reg
*reg
= variable_storage(ir
->var
);
867 fs_visitor::visit(ir_dereference_record
*ir
)
869 const glsl_type
*struct_type
= ir
->record
->type
;
871 ir
->record
->accept(this);
873 unsigned int offset
= 0;
874 for (unsigned int i
= 0; i
< struct_type
->length
; i
++) {
875 if (strcmp(struct_type
->fields
.structure
[i
].name
, ir
->field
) == 0)
877 offset
+= type_size(struct_type
->fields
.structure
[i
].type
);
879 this->result
.reg_offset
+= offset
;
880 this->result
.type
= brw_type_for_base_type(ir
->type
);
884 fs_visitor::visit(ir_dereference_array
*ir
)
889 ir
->array
->accept(this);
890 index
= ir
->array_index
->as_constant();
892 element_size
= type_size(ir
->type
);
893 this->result
.type
= brw_type_for_base_type(ir
->type
);
896 assert(this->result
.file
== UNIFORM
||
897 (this->result
.file
== GRF
&&
898 this->result
.reg
!= 0));
899 this->result
.reg_offset
+= index
->value
.i
[0] * element_size
;
901 assert(!"FINISHME: non-constant array element");
906 fs_visitor::visit(ir_expression
*ir
)
908 unsigned int operand
;
913 for (operand
= 0; operand
< ir
->get_num_operands(); operand
++) {
914 ir
->operands
[operand
]->accept(this);
915 if (this->result
.file
== BAD_FILE
) {
917 printf("Failed to get tree for expression operand:\n");
918 ir
->operands
[operand
]->accept(&v
);
921 op
[operand
] = this->result
;
923 /* Matrix expression operands should have been broken down to vector
924 * operations already.
926 assert(!ir
->operands
[operand
]->type
->is_matrix());
927 /* And then those vector operands should have been broken down to scalar.
929 assert(!ir
->operands
[operand
]->type
->is_vector());
932 /* Storage for our result. If our result goes into an assignment, it will
933 * just get copy-propagated out, so no worries.
935 this->result
= fs_reg(this, ir
->type
);
937 switch (ir
->operation
) {
938 case ir_unop_logic_not
:
939 emit(fs_inst(BRW_OPCODE_ADD
, this->result
, op
[0], fs_reg(-1)));
942 op
[0].negate
= !op
[0].negate
;
943 this->result
= op
[0];
947 this->result
= op
[0];
950 temp
= fs_reg(this, ir
->type
);
952 emit(fs_inst(BRW_OPCODE_MOV
, this->result
, fs_reg(0.0f
)));
954 inst
= emit(fs_inst(BRW_OPCODE_CMP
, reg_null
, op
[0], fs_reg(0.0f
)));
955 inst
->conditional_mod
= BRW_CONDITIONAL_G
;
956 inst
= emit(fs_inst(BRW_OPCODE_MOV
, this->result
, fs_reg(1.0f
)));
957 inst
->predicated
= true;
959 inst
= emit(fs_inst(BRW_OPCODE_CMP
, reg_null
, op
[0], fs_reg(0.0f
)));
960 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
961 inst
= emit(fs_inst(BRW_OPCODE_MOV
, this->result
, fs_reg(-1.0f
)));
962 inst
->predicated
= true;
966 emit(fs_inst(FS_OPCODE_RCP
, this->result
, op
[0]));
970 emit(fs_inst(FS_OPCODE_EXP2
, this->result
, op
[0]));
973 emit(fs_inst(FS_OPCODE_LOG2
, this->result
, op
[0]));
977 assert(!"not reached: should be handled by ir_explog_to_explog2");
980 emit(fs_inst(FS_OPCODE_SIN
, this->result
, op
[0]));
983 emit(fs_inst(FS_OPCODE_COS
, this->result
, op
[0]));
987 emit(fs_inst(FS_OPCODE_DDX
, this->result
, op
[0]));
990 emit(fs_inst(FS_OPCODE_DDY
, this->result
, op
[0]));
994 emit(fs_inst(BRW_OPCODE_ADD
, this->result
, op
[0], op
[1]));
997 assert(!"not reached: should be handled by ir_sub_to_add_neg");
1001 emit(fs_inst(BRW_OPCODE_MUL
, this->result
, op
[0], op
[1]));
1004 assert(!"not reached: should be handled by ir_div_to_mul_rcp");
1007 assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
1011 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1012 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
1013 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1015 case ir_binop_greater
:
1016 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1017 inst
->conditional_mod
= BRW_CONDITIONAL_G
;
1018 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1020 case ir_binop_lequal
:
1021 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1022 inst
->conditional_mod
= BRW_CONDITIONAL_LE
;
1023 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1025 case ir_binop_gequal
:
1026 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1027 inst
->conditional_mod
= BRW_CONDITIONAL_GE
;
1028 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1030 case ir_binop_equal
:
1031 case ir_binop_all_equal
: /* same as nequal for scalars */
1032 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1033 inst
->conditional_mod
= BRW_CONDITIONAL_Z
;
1034 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1036 case ir_binop_nequal
:
1037 case ir_binop_any_nequal
: /* same as nequal for scalars */
1038 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1039 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
1040 emit(fs_inst(BRW_OPCODE_AND
, this->result
, this->result
, fs_reg(0x1)));
1043 case ir_binop_logic_xor
:
1044 emit(fs_inst(BRW_OPCODE_XOR
, this->result
, op
[0], op
[1]));
1047 case ir_binop_logic_or
:
1048 emit(fs_inst(BRW_OPCODE_OR
, this->result
, op
[0], op
[1]));
1051 case ir_binop_logic_and
:
1052 emit(fs_inst(BRW_OPCODE_AND
, this->result
, op
[0], op
[1]));
1056 case ir_binop_cross
:
1058 assert(!"not reached: should be handled by brw_fs_channel_expressions");
1062 assert(!"not reached: should be handled by lower_noise");
1066 emit(fs_inst(FS_OPCODE_SQRT
, this->result
, op
[0]));
1070 emit(fs_inst(FS_OPCODE_RSQ
, this->result
, op
[0]));
1076 emit(fs_inst(BRW_OPCODE_MOV
, this->result
, op
[0]));
1079 emit(fs_inst(BRW_OPCODE_MOV
, this->result
, op
[0]));
1083 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], fs_reg(0.0f
)));
1084 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
1087 emit(fs_inst(BRW_OPCODE_RNDD
, this->result
, op
[0]));
1090 op
[0].negate
= ~op
[0].negate
;
1091 inst
= emit(fs_inst(BRW_OPCODE_RNDD
, this->result
, op
[0]));
1092 this->result
.negate
= true;
1095 inst
= emit(fs_inst(BRW_OPCODE_RNDD
, this->result
, op
[0]));
1098 inst
= emit(fs_inst(BRW_OPCODE_FRC
, this->result
, op
[0]));
1102 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1103 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
1105 inst
= emit(fs_inst(BRW_OPCODE_SEL
, this->result
, op
[0], op
[1]));
1106 inst
->predicated
= true;
1109 inst
= emit(fs_inst(BRW_OPCODE_CMP
, this->result
, op
[0], op
[1]));
1110 inst
->conditional_mod
= BRW_CONDITIONAL_G
;
1112 inst
= emit(fs_inst(BRW_OPCODE_SEL
, this->result
, op
[0], op
[1]));
1113 inst
->predicated
= true;
1117 inst
= emit(fs_inst(FS_OPCODE_POW
, this->result
, op
[0], op
[1]));
1120 case ir_unop_bit_not
:
1122 case ir_binop_lshift
:
1123 case ir_binop_rshift
:
1124 case ir_binop_bit_and
:
1125 case ir_binop_bit_xor
:
1126 case ir_binop_bit_or
:
1127 assert(!"GLSL 1.30 features unsupported");
1133 fs_visitor::visit(ir_assignment
*ir
)
1140 /* FINISHME: arrays on the lhs */
1141 ir
->lhs
->accept(this);
1144 ir
->rhs
->accept(this);
1147 /* FINISHME: This should really set to the correct maximal writemask for each
1148 * FINISHME: component written (in the loops below). This case can only
1149 * FINISHME: occur for matrices, arrays, and structures.
1151 if (ir
->write_mask
== 0) {
1152 assert(!ir
->lhs
->type
->is_scalar() && !ir
->lhs
->type
->is_vector());
1153 write_mask
= WRITEMASK_XYZW
;
1155 assert(ir
->lhs
->type
->is_vector() || ir
->lhs
->type
->is_scalar());
1156 write_mask
= ir
->write_mask
;
1159 assert(l
.file
!= BAD_FILE
);
1160 assert(r
.file
!= BAD_FILE
);
1162 if (ir
->condition
) {
1163 /* Get the condition bool into the predicate. */
1164 ir
->condition
->accept(this);
1165 inst
= emit(fs_inst(BRW_OPCODE_CMP
, reg_null
, this->result
, fs_reg(0)));
1166 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
1169 for (i
= 0; i
< type_size(ir
->lhs
->type
); i
++) {
1170 if (i
>= 4 || (write_mask
& (1 << i
))) {
1171 inst
= emit(fs_inst(BRW_OPCODE_MOV
, l
, r
));
1173 inst
->predicated
= true;
1181 fs_visitor::visit(ir_texture
*ir
)
1184 fs_inst
*inst
= NULL
;
1185 unsigned int mlen
= 0;
1187 ir
->coordinate
->accept(this);
1188 fs_reg coordinate
= this->result
;
1190 if (ir
->projector
) {
1191 fs_reg inv_proj
= fs_reg(this, glsl_type::float_type
);
1193 ir
->projector
->accept(this);
1194 emit(fs_inst(FS_OPCODE_RCP
, inv_proj
, this->result
));
1196 fs_reg proj_coordinate
= fs_reg(this, ir
->coordinate
->type
);
1197 for (unsigned int i
= 0; i
< ir
->coordinate
->type
->vector_elements
; i
++) {
1198 emit(fs_inst(BRW_OPCODE_MUL
, proj_coordinate
, coordinate
, inv_proj
));
1199 coordinate
.reg_offset
++;
1200 proj_coordinate
.reg_offset
++;
1202 proj_coordinate
.reg_offset
= 0;
1204 coordinate
= proj_coordinate
;
1207 for (mlen
= 0; mlen
< ir
->coordinate
->type
->vector_elements
; mlen
++) {
1208 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, base_mrf
+ mlen
), coordinate
));
1209 coordinate
.reg_offset
++;
1212 /* Pre-Ironlake, the 8-wide sampler always took u,v,r. */
1216 if (ir
->shadow_comparitor
) {
1217 /* For shadow comparisons, we have to supply u,v,r. */
1220 ir
->shadow_comparitor
->accept(this);
1221 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, base_mrf
+ mlen
), this->result
));
1225 /* Do we ever want to handle writemasking on texture samples? Is it
1226 * performance relevant?
1228 fs_reg dst
= fs_reg(this, glsl_type::vec4_type
);
1232 inst
= emit(fs_inst(FS_OPCODE_TEX
, dst
, fs_reg(MRF
, base_mrf
)));
1235 ir
->lod_info
.bias
->accept(this);
1236 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, base_mrf
+ mlen
), this->result
));
1239 inst
= emit(fs_inst(FS_OPCODE_TXB
, dst
, fs_reg(MRF
, base_mrf
)));
1242 ir
->lod_info
.lod
->accept(this);
1243 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, base_mrf
+ mlen
), this->result
));
1246 inst
= emit(fs_inst(FS_OPCODE_TXL
, dst
, fs_reg(MRF
, base_mrf
)));
1250 assert(!"GLSL 1.30 features unsupported");
1255 _mesa_get_sampler_uniform_value(ir
->sampler
,
1256 ctx
->Shader
.CurrentProgram
,
1257 &brw
->fragment_program
->Base
);
1258 inst
->sampler
= c
->fp
->program
.Base
.SamplerUnits
[inst
->sampler
];
1262 if (ir
->shadow_comparitor
)
1263 inst
->shadow_compare
= true;
1268 fs_visitor::visit(ir_swizzle
*ir
)
1270 ir
->val
->accept(this);
1271 fs_reg val
= this->result
;
1273 fs_reg result
= fs_reg(this, ir
->type
);
1274 this->result
= result
;
1276 for (unsigned int i
= 0; i
< ir
->type
->vector_elements
; i
++) {
1277 fs_reg channel
= val
;
1295 channel
.reg_offset
+= swiz
;
1296 emit(fs_inst(BRW_OPCODE_MOV
, result
, channel
));
1297 result
.reg_offset
++;
1302 fs_visitor::visit(ir_discard
*ir
)
1304 fs_reg temp
= fs_reg(this, glsl_type::uint_type
);
1306 assert(ir
->condition
== NULL
); /* FINISHME */
1308 emit(fs_inst(FS_OPCODE_DISCARD
, temp
, temp
));
1312 fs_visitor::visit(ir_constant
*ir
)
1314 fs_reg
reg(this, ir
->type
);
1317 for (unsigned int i
= 0; i
< ir
->type
->vector_elements
; i
++) {
1318 switch (ir
->type
->base_type
) {
1319 case GLSL_TYPE_FLOAT
:
1320 emit(fs_inst(BRW_OPCODE_MOV
, reg
, fs_reg(ir
->value
.f
[i
])));
1322 case GLSL_TYPE_UINT
:
1323 emit(fs_inst(BRW_OPCODE_MOV
, reg
, fs_reg(ir
->value
.u
[i
])));
1326 emit(fs_inst(BRW_OPCODE_MOV
, reg
, fs_reg(ir
->value
.i
[i
])));
1328 case GLSL_TYPE_BOOL
:
1329 emit(fs_inst(BRW_OPCODE_MOV
, reg
, fs_reg((int)ir
->value
.b
[i
])));
1332 assert(!"Non-float/uint/int/bool constant");
1339 fs_visitor::visit(ir_if
*ir
)
1343 /* Don't point the annotation at the if statement, because then it plus
1344 * the then and else blocks get printed.
1346 this->base_ir
= ir
->condition
;
1348 /* Generate the condition into the condition code. */
1349 ir
->condition
->accept(this);
1350 inst
= emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(brw_null_reg()), this->result
));
1351 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
1353 inst
= emit(fs_inst(BRW_OPCODE_IF
));
1354 inst
->predicated
= true;
1356 foreach_iter(exec_list_iterator
, iter
, ir
->then_instructions
) {
1357 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
1363 if (!ir
->else_instructions
.is_empty()) {
1364 emit(fs_inst(BRW_OPCODE_ELSE
));
1366 foreach_iter(exec_list_iterator
, iter
, ir
->else_instructions
) {
1367 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
1374 emit(fs_inst(BRW_OPCODE_ENDIF
));
1378 fs_visitor::visit(ir_loop
*ir
)
1380 fs_reg counter
= reg_undef
;
1383 this->base_ir
= ir
->counter
;
1384 ir
->counter
->accept(this);
1385 counter
= *(variable_storage(ir
->counter
));
1388 this->base_ir
= ir
->from
;
1389 ir
->from
->accept(this);
1391 emit(fs_inst(BRW_OPCODE_MOV
, counter
, this->result
));
1395 /* Start a safety counter. If the user messed up their loop
1396 * counting, we don't want to hang the GPU.
1398 fs_reg max_iter
= fs_reg(this, glsl_type::int_type
);
1399 emit(fs_inst(BRW_OPCODE_MOV
, max_iter
, fs_reg(10000)));
1401 emit(fs_inst(BRW_OPCODE_DO
));
1404 this->base_ir
= ir
->to
;
1405 ir
->to
->accept(this);
1407 fs_inst
*inst
= emit(fs_inst(BRW_OPCODE_CMP
, reg_null
,
1408 counter
, this->result
));
1410 case ir_binop_equal
:
1411 inst
->conditional_mod
= BRW_CONDITIONAL_Z
;
1413 case ir_binop_nequal
:
1414 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
1416 case ir_binop_gequal
:
1417 inst
->conditional_mod
= BRW_CONDITIONAL_GE
;
1419 case ir_binop_lequal
:
1420 inst
->conditional_mod
= BRW_CONDITIONAL_LE
;
1422 case ir_binop_greater
:
1423 inst
->conditional_mod
= BRW_CONDITIONAL_G
;
1426 inst
->conditional_mod
= BRW_CONDITIONAL_L
;
1429 assert(!"not reached: unknown loop condition");
1434 inst
= emit(fs_inst(BRW_OPCODE_BREAK
));
1435 inst
->predicated
= true;
1438 foreach_iter(exec_list_iterator
, iter
, ir
->body_instructions
) {
1439 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
1445 /* Check the maximum loop iters counter. */
1446 inst
= emit(fs_inst(BRW_OPCODE_ADD
, max_iter
, max_iter
, fs_reg(-1)));
1447 inst
->conditional_mod
= BRW_CONDITIONAL_Z
;
1449 inst
= emit(fs_inst(BRW_OPCODE_BREAK
));
1450 inst
->predicated
= true;
1453 if (ir
->increment
) {
1454 this->base_ir
= ir
->increment
;
1455 ir
->increment
->accept(this);
1456 emit(fs_inst(BRW_OPCODE_ADD
, counter
, counter
, this->result
));
1459 emit(fs_inst(BRW_OPCODE_WHILE
));
1463 fs_visitor::visit(ir_loop_jump
*ir
)
1466 case ir_loop_jump::jump_break
:
1467 emit(fs_inst(BRW_OPCODE_BREAK
));
1469 case ir_loop_jump::jump_continue
:
1470 emit(fs_inst(BRW_OPCODE_CONTINUE
));
1476 fs_visitor::visit(ir_call
*ir
)
1478 assert(!"FINISHME");
1482 fs_visitor::visit(ir_return
*ir
)
1484 assert(!"FINISHME");
1488 fs_visitor::visit(ir_function
*ir
)
1490 /* Ignore function bodies other than main() -- we shouldn't see calls to
1491 * them since they should all be inlined before we get to ir_to_mesa.
1493 if (strcmp(ir
->name
, "main") == 0) {
1494 const ir_function_signature
*sig
;
1497 sig
= ir
->matching_signature(&empty
);
1501 foreach_iter(exec_list_iterator
, iter
, sig
->body
) {
1502 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
1511 fs_visitor::visit(ir_function_signature
*ir
)
1513 assert(!"not reached");
1518 fs_visitor::emit(fs_inst inst
)
1520 fs_inst
*list_inst
= new(mem_ctx
) fs_inst
;
1523 list_inst
->annotation
= this->current_annotation
;
1524 list_inst
->ir
= this->base_ir
;
1526 this->instructions
.push_tail(list_inst
);
1531 /** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
1533 fs_visitor::emit_dummy_fs()
1535 /* Everyone's favorite color. */
1536 emit(fs_inst(BRW_OPCODE_MOV
,
1539 emit(fs_inst(BRW_OPCODE_MOV
,
1542 emit(fs_inst(BRW_OPCODE_MOV
,
1545 emit(fs_inst(BRW_OPCODE_MOV
,
1550 write
= emit(fs_inst(FS_OPCODE_FB_WRITE
,
1555 /* The register location here is relative to the start of the URB
1556 * data. It will get adjusted to be a real location before
1557 * generate_code() time.
1560 fs_visitor::interp_reg(int location
, int channel
)
1562 int regnr
= location
* 2 + channel
/ 2;
1563 int stride
= (channel
& 1) * 4;
1565 return brw_vec1_grf(regnr
, stride
);
1568 /** Emits the interpolation for the varying inputs. */
1570 fs_visitor::emit_interpolation_setup()
1572 struct brw_reg g1_uw
= retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW
);
1574 this->current_annotation
= "compute pixel centers";
1575 this->pixel_x
= fs_reg(this, glsl_type::uint_type
);
1576 this->pixel_y
= fs_reg(this, glsl_type::uint_type
);
1577 this->pixel_x
.type
= BRW_REGISTER_TYPE_UW
;
1578 this->pixel_y
.type
= BRW_REGISTER_TYPE_UW
;
1579 emit(fs_inst(BRW_OPCODE_ADD
,
1581 fs_reg(stride(suboffset(g1_uw
, 4), 2, 4, 0)),
1582 fs_reg(brw_imm_v(0x10101010))));
1583 emit(fs_inst(BRW_OPCODE_ADD
,
1585 fs_reg(stride(suboffset(g1_uw
, 5), 2, 4, 0)),
1586 fs_reg(brw_imm_v(0x11001100))));
1588 this->current_annotation
= "compute pixel deltas from v0";
1589 this->delta_x
= fs_reg(this, glsl_type::float_type
);
1590 this->delta_y
= fs_reg(this, glsl_type::float_type
);
1591 emit(fs_inst(BRW_OPCODE_ADD
,
1594 fs_reg(negate(brw_vec1_grf(1, 0)))));
1595 emit(fs_inst(BRW_OPCODE_ADD
,
1598 fs_reg(negate(brw_vec1_grf(1, 1)))));
1600 this->current_annotation
= "compute pos.w and 1/pos.w";
1601 /* Compute wpos.w. It's always in our setup, since it's needed to
1602 * interpolate the other attributes.
1604 this->wpos_w
= fs_reg(this, glsl_type::float_type
);
1605 emit(fs_inst(FS_OPCODE_LINTERP
, wpos_w
, this->delta_x
, this->delta_y
,
1606 interp_reg(FRAG_ATTRIB_WPOS
, 3)));
1607 /* Compute the pixel 1/W value from wpos.w. */
1608 this->pixel_w
= fs_reg(this, glsl_type::float_type
);
1609 emit(fs_inst(FS_OPCODE_RCP
, this->pixel_w
, wpos_w
));
1610 this->current_annotation
= NULL
;
1614 fs_visitor::emit_fb_writes()
1616 this->current_annotation
= "FB write header";
1622 if (c
->key
.aa_dest_stencil_reg
) {
1623 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, nr
++),
1624 fs_reg(brw_vec8_grf(c
->key
.aa_dest_stencil_reg
, 0))));
1627 /* Reserve space for color. It'll be filled in per MRT below. */
1631 if (c
->key
.source_depth_to_render_target
) {
1632 if (c
->key
.computes_depth
) {
1633 /* Hand over gl_FragDepth. */
1634 assert(this->frag_depth
);
1635 fs_reg depth
= *(variable_storage(this->frag_depth
));
1637 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, nr
++), depth
));
1639 /* Pass through the payload depth. */
1640 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, nr
++),
1641 fs_reg(brw_vec8_grf(c
->key
.source_depth_reg
, 0))));
1645 if (c
->key
.dest_depth_reg
) {
1646 emit(fs_inst(BRW_OPCODE_MOV
, fs_reg(MRF
, nr
++),
1647 fs_reg(brw_vec8_grf(c
->key
.dest_depth_reg
, 0))));
1650 fs_reg color
= reg_undef
;
1651 if (this->frag_color
)
1652 color
= *(variable_storage(this->frag_color
));
1653 else if (this->frag_data
)
1654 color
= *(variable_storage(this->frag_data
));
1656 for (int target
= 0; target
< c
->key
.nr_color_regions
; target
++) {
1657 this->current_annotation
= talloc_asprintf(this->mem_ctx
,
1658 "FB write target %d",
1660 if (this->frag_color
|| this->frag_data
) {
1661 for (int i
= 0; i
< 4; i
++) {
1662 emit(fs_inst(BRW_OPCODE_MOV
,
1663 fs_reg(MRF
, color_mrf
+ i
),
1669 if (this->frag_color
)
1670 color
.reg_offset
-= 4;
1672 fs_inst
*inst
= emit(fs_inst(FS_OPCODE_FB_WRITE
,
1673 reg_undef
, reg_undef
));
1674 inst
->target
= target
;
1676 if (target
== c
->key
.nr_color_regions
- 1)
1680 if (c
->key
.nr_color_regions
== 0) {
1681 fs_inst
*inst
= emit(fs_inst(FS_OPCODE_FB_WRITE
,
1682 reg_undef
, reg_undef
));
1687 this->current_annotation
= NULL
;
1691 fs_visitor::generate_fb_write(fs_inst
*inst
)
1693 GLboolean eot
= inst
->eot
;
1695 /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied
1698 brw_push_insn_state(p
);
1699 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1700 brw_set_compression_control(p
, BRW_COMPRESSION_NONE
);
1703 brw_vec8_grf(1, 0));
1704 brw_pop_insn_state(p
);
1707 8, /* dispatch_width */
1708 retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW
),
1710 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1718 fs_visitor::generate_linterp(fs_inst
*inst
,
1719 struct brw_reg dst
, struct brw_reg
*src
)
1721 struct brw_reg delta_x
= src
[0];
1722 struct brw_reg delta_y
= src
[1];
1723 struct brw_reg interp
= src
[2];
1726 delta_y
.nr
== delta_x
.nr
+ 1 &&
1727 (intel
->gen
>= 6 || (delta_x
.nr
& 1) == 0)) {
1728 brw_PLN(p
, dst
, interp
, delta_x
);
1730 brw_LINE(p
, brw_null_reg(), interp
, delta_x
);
1731 brw_MAC(p
, dst
, suboffset(interp
, 1), delta_y
);
1736 fs_visitor::generate_math(fs_inst
*inst
,
1737 struct brw_reg dst
, struct brw_reg
*src
)
1741 switch (inst
->opcode
) {
1743 op
= BRW_MATH_FUNCTION_INV
;
1746 op
= BRW_MATH_FUNCTION_RSQ
;
1748 case FS_OPCODE_SQRT
:
1749 op
= BRW_MATH_FUNCTION_SQRT
;
1751 case FS_OPCODE_EXP2
:
1752 op
= BRW_MATH_FUNCTION_EXP
;
1754 case FS_OPCODE_LOG2
:
1755 op
= BRW_MATH_FUNCTION_LOG
;
1758 op
= BRW_MATH_FUNCTION_POW
;
1761 op
= BRW_MATH_FUNCTION_SIN
;
1764 op
= BRW_MATH_FUNCTION_COS
;
1767 assert(!"not reached: unknown math function");
1772 if (inst
->opcode
== FS_OPCODE_POW
) {
1773 brw_MOV(p
, brw_message_reg(3), src
[1]);
1778 inst
->saturate
? BRW_MATH_SATURATE_SATURATE
:
1779 BRW_MATH_SATURATE_NONE
,
1781 BRW_MATH_DATA_VECTOR
,
1782 BRW_MATH_PRECISION_FULL
);
1786 fs_visitor::generate_tex(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
)
1791 if (intel
->gen
== 5) {
1792 switch (inst
->opcode
) {
1794 if (inst
->shadow_compare
) {
1795 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_COMPARE_GEN5
;
1797 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_GEN5
;
1801 if (inst
->shadow_compare
) {
1802 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE_GEN5
;
1804 msg_type
= BRW_SAMPLER_MESSAGE_SAMPLE_BIAS_GEN5
;
1809 switch (inst
->opcode
) {
1811 /* Note that G45 and older determines shadow compare and dispatch width
1812 * from message length for most messages.
1814 if (inst
->shadow_compare
) {
1815 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE
;
1817 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE
;
1820 if (inst
->shadow_compare
) {
1821 assert(!"FINISHME: shadow compare with bias.");
1822 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
1824 msg_type
= BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS
;
1830 assert(msg_type
!= -1);
1836 retype(dst
, BRW_REGISTER_TYPE_UW
),
1838 retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW
),
1839 SURF_INDEX_TEXTURE(inst
->sampler
),
1847 BRW_SAMPLER_SIMD_MODE_SIMD8
);
1851 /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input
1854 * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br
1856 * and we're trying to produce:
1859 * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl)
1860 * (ss0.tr - ss0.tl) (ss0.tr - ss0.br)
1861 * (ss0.br - ss0.bl) (ss0.tl - ss0.bl)
1862 * (ss0.br - ss0.bl) (ss0.tr - ss0.br)
1863 * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl)
1864 * (ss1.tr - ss1.tl) (ss1.tr - ss1.br)
1865 * (ss1.br - ss1.bl) (ss1.tl - ss1.bl)
1866 * (ss1.br - ss1.bl) (ss1.tr - ss1.br)
1868 * and add another set of two more subspans if in 16-pixel dispatch mode.
1870 * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result
1871 * for each pair, and vertstride = 2 jumps us 2 elements after processing a
1872 * pair. But for DDY, it's harder, as we want to produce the pairs swizzled
1873 * between each other. We could probably do it like ddx and swizzle the right
1874 * order later, but bail for now and just produce
1875 * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4)
1878 fs_visitor::generate_ddx(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
)
1880 struct brw_reg src0
= brw_reg(src
.file
, src
.nr
, 1,
1881 BRW_REGISTER_TYPE_F
,
1882 BRW_VERTICAL_STRIDE_2
,
1884 BRW_HORIZONTAL_STRIDE_0
,
1885 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
1886 struct brw_reg src1
= brw_reg(src
.file
, src
.nr
, 0,
1887 BRW_REGISTER_TYPE_F
,
1888 BRW_VERTICAL_STRIDE_2
,
1890 BRW_HORIZONTAL_STRIDE_0
,
1891 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
1892 brw_ADD(p
, dst
, src0
, negate(src1
));
1896 fs_visitor::generate_ddy(fs_inst
*inst
, struct brw_reg dst
, struct brw_reg src
)
1898 struct brw_reg src0
= brw_reg(src
.file
, src
.nr
, 0,
1899 BRW_REGISTER_TYPE_F
,
1900 BRW_VERTICAL_STRIDE_4
,
1902 BRW_HORIZONTAL_STRIDE_0
,
1903 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
1904 struct brw_reg src1
= brw_reg(src
.file
, src
.nr
, 2,
1905 BRW_REGISTER_TYPE_F
,
1906 BRW_VERTICAL_STRIDE_4
,
1908 BRW_HORIZONTAL_STRIDE_0
,
1909 BRW_SWIZZLE_XYZW
, WRITEMASK_XYZW
);
1910 brw_ADD(p
, dst
, src0
, negate(src1
));
1914 fs_visitor::generate_discard(fs_inst
*inst
, struct brw_reg temp
)
1916 struct brw_reg g0
= retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW
);
1917 temp
= brw_uw1_reg(temp
.file
, temp
.nr
, 0);
1919 brw_push_insn_state(p
);
1920 brw_set_mask_control(p
, BRW_MASK_DISABLE
);
1921 brw_NOT(p
, temp
, brw_mask_reg(1)); /* IMASK */
1922 brw_AND(p
, g0
, temp
, g0
);
1923 brw_pop_insn_state(p
);
1927 fs_visitor::assign_curb_setup()
1929 c
->prog_data
.first_curbe_grf
= c
->key
.nr_payload_regs
;
1930 c
->prog_data
.curb_read_length
= ALIGN(c
->prog_data
.nr_params
, 8) / 8;
1932 if (intel
->gen
== 5 && (c
->prog_data
.first_curbe_grf
+
1933 c
->prog_data
.curb_read_length
) & 1) {
1934 /* Align the start of the interpolation coefficients so that we can use
1935 * the PLN instruction.
1937 c
->prog_data
.first_curbe_grf
++;
1940 /* Map the offsets in the UNIFORM file to fixed HW regs. */
1941 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
1942 fs_inst
*inst
= (fs_inst
*)iter
.get();
1944 for (unsigned int i
= 0; i
< 3; i
++) {
1945 if (inst
->src
[i
].file
== UNIFORM
) {
1946 int constant_nr
= inst
->src
[i
].hw_reg
+ inst
->src
[i
].reg_offset
;
1947 struct brw_reg brw_reg
= brw_vec1_grf(c
->prog_data
.first_curbe_grf
+
1951 inst
->src
[i
].file
= FIXED_HW_REG
;
1952 inst
->src
[i
].fixed_hw_reg
= brw_reg
;
1959 fs_visitor::assign_urb_setup()
1961 int urb_start
= c
->prog_data
.first_curbe_grf
+ c
->prog_data
.curb_read_length
;
1962 int interp_reg_nr
[FRAG_ATTRIB_MAX
];
1964 c
->prog_data
.urb_read_length
= 0;
1966 /* Figure out where each of the incoming setup attributes lands. */
1967 for (unsigned int i
= 0; i
< FRAG_ATTRIB_MAX
; i
++) {
1968 interp_reg_nr
[i
] = -1;
1970 if (i
!= FRAG_ATTRIB_WPOS
&&
1971 !(brw
->fragment_program
->Base
.InputsRead
& BITFIELD64_BIT(i
)))
1974 /* Each attribute is 4 setup channels, each of which is half a reg. */
1975 interp_reg_nr
[i
] = urb_start
+ c
->prog_data
.urb_read_length
;
1976 c
->prog_data
.urb_read_length
+= 2;
1979 /* Map the register numbers for FS_OPCODE_LINTERP so that it uses
1980 * the correct setup input.
1982 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
1983 fs_inst
*inst
= (fs_inst
*)iter
.get();
1985 if (inst
->opcode
!= FS_OPCODE_LINTERP
)
1988 assert(inst
->src
[2].file
== FIXED_HW_REG
);
1990 int location
= inst
->src
[2].fixed_hw_reg
.nr
/ 2;
1991 assert(interp_reg_nr
[location
] != -1);
1992 inst
->src
[2].fixed_hw_reg
.nr
= (interp_reg_nr
[location
] +
1993 (inst
->src
[2].fixed_hw_reg
.nr
& 1));
1996 this->first_non_payload_grf
= urb_start
+ c
->prog_data
.urb_read_length
;
2000 trivial_assign_reg(int *reg_hw_locations
, fs_reg
*reg
)
2002 if (reg
->file
== GRF
&& reg
->reg
!= 0) {
2003 reg
->hw_reg
= reg_hw_locations
[reg
->reg
] + reg
->reg_offset
;
2009 fs_visitor::assign_regs()
2012 int hw_reg_mapping
[this->virtual_grf_next
];
2015 hw_reg_mapping
[0] = 0;
2016 hw_reg_mapping
[1] = this->first_non_payload_grf
;
2017 for (i
= 2; i
< this->virtual_grf_next
; i
++) {
2018 hw_reg_mapping
[i
] = (hw_reg_mapping
[i
- 1] +
2019 this->virtual_grf_sizes
[i
- 1]);
2021 last_grf
= hw_reg_mapping
[i
- 1] + this->virtual_grf_sizes
[i
- 1];
2023 /* FINISHME: trivial assignment of register numbers */
2024 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
2025 fs_inst
*inst
= (fs_inst
*)iter
.get();
2027 trivial_assign_reg(hw_reg_mapping
, &inst
->dst
);
2028 trivial_assign_reg(hw_reg_mapping
, &inst
->src
[0]);
2029 trivial_assign_reg(hw_reg_mapping
, &inst
->src
[1]);
2032 this->grf_used
= last_grf
+ 1;
2035 static struct brw_reg
brw_reg_from_fs_reg(fs_reg
*reg
)
2037 struct brw_reg brw_reg
;
2039 switch (reg
->file
) {
2043 brw_reg
= brw_vec8_reg(reg
->file
,
2045 brw_reg
= retype(brw_reg
, reg
->type
);
2048 switch (reg
->type
) {
2049 case BRW_REGISTER_TYPE_F
:
2050 brw_reg
= brw_imm_f(reg
->imm
.f
);
2052 case BRW_REGISTER_TYPE_D
:
2053 brw_reg
= brw_imm_d(reg
->imm
.i
);
2055 case BRW_REGISTER_TYPE_UD
:
2056 brw_reg
= brw_imm_ud(reg
->imm
.u
);
2059 assert(!"not reached");
2064 brw_reg
= reg
->fixed_hw_reg
;
2067 /* Probably unused. */
2068 brw_reg
= brw_null_reg();
2071 assert(!"not reached");
2072 brw_reg
= brw_null_reg();
2076 brw_reg
= brw_abs(brw_reg
);
2078 brw_reg
= negate(brw_reg
);
2084 fs_visitor::generate_code()
2086 unsigned int annotation_len
= 0;
2087 int last_native_inst
= 0;
2088 struct brw_instruction
*if_stack
[16], *loop_stack
[16];
2089 int if_stack_depth
= 0, loop_stack_depth
= 0;
2090 int if_depth_in_loop
[16];
2092 if_depth_in_loop
[loop_stack_depth
] = 0;
2094 memset(&if_stack
, 0, sizeof(if_stack
));
2095 foreach_iter(exec_list_iterator
, iter
, this->instructions
) {
2096 fs_inst
*inst
= (fs_inst
*)iter
.get();
2097 struct brw_reg src
[3], dst
;
2099 for (unsigned int i
= 0; i
< 3; i
++) {
2100 src
[i
] = brw_reg_from_fs_reg(&inst
->src
[i
]);
2102 dst
= brw_reg_from_fs_reg(&inst
->dst
);
2104 brw_set_conditionalmod(p
, inst
->conditional_mod
);
2105 brw_set_predicate_control(p
, inst
->predicated
);
2107 switch (inst
->opcode
) {
2108 case BRW_OPCODE_MOV
:
2109 brw_MOV(p
, dst
, src
[0]);
2111 case BRW_OPCODE_ADD
:
2112 brw_ADD(p
, dst
, src
[0], src
[1]);
2114 case BRW_OPCODE_MUL
:
2115 brw_MUL(p
, dst
, src
[0], src
[1]);
2118 case BRW_OPCODE_FRC
:
2119 brw_FRC(p
, dst
, src
[0]);
2121 case BRW_OPCODE_RNDD
:
2122 brw_RNDD(p
, dst
, src
[0]);
2124 case BRW_OPCODE_RNDZ
:
2125 brw_RNDZ(p
, dst
, src
[0]);
2128 case BRW_OPCODE_AND
:
2129 brw_AND(p
, dst
, src
[0], src
[1]);
2132 brw_OR(p
, dst
, src
[0], src
[1]);
2134 case BRW_OPCODE_XOR
:
2135 brw_XOR(p
, dst
, src
[0], src
[1]);
2138 case BRW_OPCODE_CMP
:
2139 brw_CMP(p
, dst
, inst
->conditional_mod
, src
[0], src
[1]);
2141 case BRW_OPCODE_SEL
:
2142 brw_SEL(p
, dst
, src
[0], src
[1]);
2146 assert(if_stack_depth
< 16);
2147 if_stack
[if_stack_depth
] = brw_IF(p
, BRW_EXECUTE_8
);
2148 if_depth_in_loop
[loop_stack_depth
]++;
2151 case BRW_OPCODE_ELSE
:
2152 if_stack
[if_stack_depth
- 1] =
2153 brw_ELSE(p
, if_stack
[if_stack_depth
- 1]);
2155 case BRW_OPCODE_ENDIF
:
2157 brw_ENDIF(p
, if_stack
[if_stack_depth
]);
2158 if_depth_in_loop
[loop_stack_depth
]--;
2162 loop_stack
[loop_stack_depth
++] = brw_DO(p
, BRW_EXECUTE_8
);
2163 if_depth_in_loop
[loop_stack_depth
] = 0;
2166 case BRW_OPCODE_BREAK
:
2167 brw_BREAK(p
, if_depth_in_loop
[loop_stack_depth
]);
2168 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2170 case BRW_OPCODE_CONTINUE
:
2171 brw_CONT(p
, if_depth_in_loop
[loop_stack_depth
]);
2172 brw_set_predicate_control(p
, BRW_PREDICATE_NONE
);
2175 case BRW_OPCODE_WHILE
: {
2176 struct brw_instruction
*inst0
, *inst1
;
2179 if (intel
->gen
== 5)
2182 assert(loop_stack_depth
> 0);
2184 inst0
= inst1
= brw_WHILE(p
, loop_stack
[loop_stack_depth
]);
2185 /* patch all the BREAK/CONT instructions from last BGNLOOP */
2186 while (inst0
> loop_stack
[loop_stack_depth
]) {
2188 if (inst0
->header
.opcode
== BRW_OPCODE_BREAK
&&
2189 inst0
->bits3
.if_else
.jump_count
== 0) {
2190 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
+ 1);
2192 else if (inst0
->header
.opcode
== BRW_OPCODE_CONTINUE
&&
2193 inst0
->bits3
.if_else
.jump_count
== 0) {
2194 inst0
->bits3
.if_else
.jump_count
= br
* (inst1
- inst0
);
2202 case FS_OPCODE_SQRT
:
2203 case FS_OPCODE_EXP2
:
2204 case FS_OPCODE_LOG2
:
2208 generate_math(inst
, dst
, src
);
2210 case FS_OPCODE_LINTERP
:
2211 generate_linterp(inst
, dst
, src
);
2216 generate_tex(inst
, dst
, src
[0]);
2218 case FS_OPCODE_DISCARD
:
2219 generate_discard(inst
, dst
/* src0 == dst */);
2222 generate_ddx(inst
, dst
, src
[0]);
2225 generate_ddy(inst
, dst
, src
[0]);
2227 case FS_OPCODE_FB_WRITE
:
2228 generate_fb_write(inst
);
2231 if (inst
->opcode
< (int)ARRAY_SIZE(brw_opcodes
)) {
2232 _mesa_problem(ctx
, "Unsupported opcode `%s' in FS",
2233 brw_opcodes
[inst
->opcode
].name
);
2235 _mesa_problem(ctx
, "Unsupported opcode %d in FS", inst
->opcode
);
2240 if (annotation_len
< p
->nr_insn
) {
2241 annotation_len
*= 2;
2242 if (annotation_len
< 16)
2243 annotation_len
= 16;
2245 this->annotation_string
= talloc_realloc(this->mem_ctx
,
2249 this->annotation_ir
= talloc_realloc(this->mem_ctx
,
2255 for (unsigned int i
= last_native_inst
; i
< p
->nr_insn
; i
++) {
2256 this->annotation_string
[i
] = inst
->annotation
;
2257 this->annotation_ir
[i
] = inst
->ir
;
2259 last_native_inst
= p
->nr_insn
;
2264 brw_wm_fs_emit(struct brw_context
*brw
, struct brw_wm_compile
*c
)
2266 struct brw_compile
*p
= &c
->func
;
2267 struct intel_context
*intel
= &brw
->intel
;
2268 GLcontext
*ctx
= &intel
->ctx
;
2269 struct brw_shader
*shader
= NULL
;
2270 struct gl_shader_program
*prog
= ctx
->Shader
.CurrentProgram
;
2278 for (unsigned int i
= 0; i
< prog
->_NumLinkedShaders
; i
++) {
2279 if (prog
->_LinkedShaders
[i
]->Type
== GL_FRAGMENT_SHADER
) {
2280 shader
= (struct brw_shader
*)prog
->_LinkedShaders
[i
];
2287 /* We always use 8-wide mode, at least for now. For one, flow
2288 * control only works in 8-wide. Also, when we're fragment shader
2289 * bound, we're almost always under register pressure as well, so
2290 * 8-wide would save us from the performance cliff of spilling
2293 c
->dispatch_width
= 8;
2295 if (INTEL_DEBUG
& DEBUG_WM
) {
2296 printf("GLSL IR for native fragment shader %d:\n", prog
->Name
);
2297 _mesa_print_ir(shader
->ir
, NULL
);
2301 /* Now the main event: Visit the shader IR and generate our FS IR for it.
2303 fs_visitor
v(c
, shader
);
2308 v
.emit_interpolation_setup();
2310 /* Generate FS IR for main(). (the visitor only descends into
2311 * functions called "main").
2313 foreach_iter(exec_list_iterator
, iter
, *shader
->ir
) {
2314 ir_instruction
*ir
= (ir_instruction
*)iter
.get();
2320 v
.assign_curb_setup();
2321 v
.assign_urb_setup();
2327 assert(!v
.fail
); /* FINISHME: Cleanly fail, tested at link time, etc. */
2332 if (INTEL_DEBUG
& DEBUG_WM
) {
2333 const char *last_annotation_string
= NULL
;
2334 ir_instruction
*last_annotation_ir
= NULL
;
2336 printf("Native code for fragment shader %d:\n", prog
->Name
);
2337 for (unsigned int i
= 0; i
< p
->nr_insn
; i
++) {
2338 if (last_annotation_ir
!= v
.annotation_ir
[i
]) {
2339 last_annotation_ir
= v
.annotation_ir
[i
];
2340 if (last_annotation_ir
) {
2342 last_annotation_ir
->print();
2346 if (last_annotation_string
!= v
.annotation_string
[i
]) {
2347 last_annotation_string
= v
.annotation_string
[i
];
2348 if (last_annotation_string
)
2349 printf(" %s\n", last_annotation_string
);
2351 brw_disasm(stdout
, &p
->store
[i
], intel
->gen
);
2356 c
->prog_data
.total_grf
= v
.grf_used
;
2357 c
->prog_data
.total_scratch
= 0;