2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "glsl/ir_uniform.h"
31 vec4_visitor::emit_nir_code()
33 nir_shader
*nir
= prog
->nir
;
35 if (nir
->num_inputs
> 0)
36 nir_setup_inputs(nir
);
38 if (nir
->num_uniforms
> 0)
39 nir_setup_uniforms(nir
);
41 nir_setup_system_values(nir
);
43 /* get the main function and emit it */
44 nir_foreach_overload(nir
, overload
) {
45 assert(strcmp(overload
->function
->name
, "main") == 0);
46 assert(overload
->impl
);
47 nir_emit_impl(overload
->impl
);
52 vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr
*instr
)
56 switch (instr
->intrinsic
) {
57 case nir_intrinsic_load_vertex_id
:
58 unreachable("should be lowered by lower_vertex_id().");
60 case nir_intrinsic_load_vertex_id_zero_base
:
61 reg
= &this->nir_system_values
[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
];
62 if (reg
->file
== BAD_FILE
)
64 *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
,
68 case nir_intrinsic_load_base_vertex
:
69 reg
= &this->nir_system_values
[SYSTEM_VALUE_BASE_VERTEX
];
70 if (reg
->file
== BAD_FILE
)
71 *reg
= *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX
,
75 case nir_intrinsic_load_instance_id
:
76 reg
= &this->nir_system_values
[SYSTEM_VALUE_INSTANCE_ID
];
77 if (reg
->file
== BAD_FILE
)
78 *reg
= *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID
,
88 setup_system_values_block(nir_block
*block
, void *void_visitor
)
90 vec4_visitor
*v
= (vec4_visitor
*)void_visitor
;
92 nir_foreach_instr(block
, instr
) {
93 if (instr
->type
!= nir_instr_type_intrinsic
)
96 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
97 v
->nir_setup_system_value_intrinsic(intrin
);
104 vec4_visitor::nir_setup_system_values(nir_shader
*shader
)
106 nir_system_values
= ralloc_array(mem_ctx
, dst_reg
, SYSTEM_VALUE_MAX
);
108 nir_foreach_overload(shader
, overload
) {
109 assert(strcmp(overload
->function
->name
, "main") == 0);
110 assert(overload
->impl
);
111 nir_foreach_block(overload
->impl
, setup_system_values_block
, this);
116 vec4_visitor::nir_setup_inputs(nir_shader
*shader
)
118 nir_inputs
= ralloc_array(mem_ctx
, src_reg
, shader
->num_inputs
);
120 foreach_list_typed(nir_variable
, var
, node
, &shader
->inputs
) {
121 int offset
= var
->data
.driver_location
;
122 unsigned size
= type_size(var
->type
);
123 for (unsigned i
= 0; i
< size
; i
++) {
124 src_reg src
= src_reg(ATTR
, var
->data
.location
+ i
, var
->type
);
125 nir_inputs
[offset
+ i
] = src
;
131 vec4_visitor::nir_setup_uniforms(nir_shader
*shader
)
135 nir_uniform_driver_location
=
136 rzalloc_array(mem_ctx
, unsigned, this->uniform_array_size
);
139 foreach_list_typed(nir_variable
, var
, node
, &shader
->uniforms
) {
140 /* UBO's, atomics and samplers don't take up space in the
142 if (var
->interface_type
!= NULL
|| var
->type
->contains_atomic() ||
143 type_size(var
->type
) == 0) {
147 assert(uniforms
< uniform_array_size
);
148 this->uniform_size
[uniforms
] = type_size(var
->type
);
150 if (strncmp(var
->name
, "gl_", 3) == 0)
151 nir_setup_builtin_uniform(var
);
153 nir_setup_uniform(var
);
156 /* For ARB_vertex_program, only a single "parameters" variable is
157 * generated to support uniform data.
159 nir_variable
*var
= (nir_variable
*) shader
->uniforms
.get_head();
160 assert(shader
->uniforms
.length() == 1 &&
161 strcmp(var
->name
, "parameters") == 0);
163 assert(uniforms
< uniform_array_size
);
164 this->uniform_size
[uniforms
] = type_size(var
->type
);
166 struct gl_program_parameter_list
*plist
= prog
->Parameters
;
167 for (unsigned p
= 0; p
< plist
->NumParameters
; p
++) {
168 uniform_vector_size
[uniforms
] = plist
->Parameters
[p
].Size
;
170 /* Parameters should be either vec4 uniforms or single component
171 * constants; matrices and other larger types should have been broken
174 assert(uniform_vector_size
[uniforms
] <= 4);
177 for (i
= 0; i
< uniform_vector_size
[uniforms
]; i
++) {
178 stage_prog_data
->param
[uniforms
* 4 + i
] = &plist
->ParameterValues
[p
][i
];
181 static const gl_constant_value zero
= { 0.0 };
182 stage_prog_data
->param
[uniforms
* 4 + i
] = &zero
;
185 nir_uniform_driver_location
[uniforms
] = var
->data
.driver_location
;
192 vec4_visitor::nir_setup_uniform(nir_variable
*var
)
194 int namelen
= strlen(var
->name
);
196 /* The data for our (non-builtin) uniforms is stored in a series of
197 * gl_uniform_driver_storage structs for each subcomponent that
198 * glGetUniformLocation() could name. We know it's been set up in the same
199 * order we'd walk the type, so walk the list of storage and find anything
200 * with our name, or the prefix of a component that starts with our name.
202 for (unsigned u
= 0; u
< shader_prog
->NumUniformStorage
; u
++) {
203 struct gl_uniform_storage
*storage
= &shader_prog
->UniformStorage
[u
];
205 if (storage
->builtin
)
208 if (strncmp(var
->name
, storage
->name
, namelen
) != 0 ||
209 (storage
->name
[namelen
] != 0 &&
210 storage
->name
[namelen
] != '.' &&
211 storage
->name
[namelen
] != '[')) {
215 gl_constant_value
*components
= storage
->storage
;
216 unsigned vector_count
= (MAX2(storage
->array_elements
, 1) *
217 storage
->type
->matrix_columns
);
219 for (unsigned s
= 0; s
< vector_count
; s
++) {
220 assert(uniforms
< uniform_array_size
);
221 uniform_vector_size
[uniforms
] = storage
->type
->vector_elements
;
224 for (i
= 0; i
< uniform_vector_size
[uniforms
]; i
++) {
225 stage_prog_data
->param
[uniforms
* 4 + i
] = components
;
229 static const gl_constant_value zero
= { 0.0 };
230 stage_prog_data
->param
[uniforms
* 4 + i
] = &zero
;
233 nir_uniform_driver_location
[uniforms
] = var
->data
.driver_location
;
240 vec4_visitor::nir_setup_builtin_uniform(nir_variable
*var
)
242 const nir_state_slot
*const slots
= var
->state_slots
;
243 assert(var
->state_slots
!= NULL
);
245 for (unsigned int i
= 0; i
< var
->num_state_slots
; i
++) {
246 /* This state reference has already been setup by ir_to_mesa,
247 * but we'll get the same index back here. We can reference
248 * ParameterValues directly, since unlike brw_fs.cpp, we never
249 * add new state references during compile.
251 int index
= _mesa_add_state_reference(this->prog
->Parameters
,
252 (gl_state_index
*)slots
[i
].tokens
);
253 gl_constant_value
*values
=
254 &this->prog
->Parameters
->ParameterValues
[index
][0];
256 assert(uniforms
< uniform_array_size
);
258 for (unsigned j
= 0; j
< 4; j
++)
259 stage_prog_data
->param
[uniforms
* 4 + j
] =
260 &values
[GET_SWZ(slots
[i
].swizzle
, j
)];
262 this->uniform_vector_size
[uniforms
] =
263 (var
->type
->is_scalar() || var
->type
->is_vector() ||
264 var
->type
->is_matrix() ? var
->type
->vector_elements
: 4);
266 nir_uniform_driver_location
[uniforms
] = var
->data
.driver_location
;
272 vec4_visitor::nir_emit_impl(nir_function_impl
*impl
)
274 nir_locals
= ralloc_array(mem_ctx
, dst_reg
, impl
->reg_alloc
);
276 foreach_list_typed(nir_register
, reg
, node
, &impl
->registers
) {
277 unsigned array_elems
=
278 reg
->num_array_elems
== 0 ? 1 : reg
->num_array_elems
;
280 nir_locals
[reg
->index
] = dst_reg(GRF
, alloc
.allocate(array_elems
));
283 nir_ssa_values
= ralloc_array(mem_ctx
, dst_reg
, impl
->ssa_alloc
);
285 nir_emit_cf_list(&impl
->body
);
289 vec4_visitor::nir_emit_cf_list(exec_list
*list
)
291 exec_list_validate(list
);
292 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
293 switch (node
->type
) {
295 nir_emit_if(nir_cf_node_as_if(node
));
298 case nir_cf_node_loop
:
299 nir_emit_loop(nir_cf_node_as_loop(node
));
302 case nir_cf_node_block
:
303 nir_emit_block(nir_cf_node_as_block(node
));
307 unreachable("Invalid CFG node block");
313 vec4_visitor::nir_emit_if(nir_if
*if_stmt
)
315 /* First, put the condition in f0 */
316 src_reg condition
= get_nir_src(if_stmt
->condition
, BRW_REGISTER_TYPE_D
, 1);
317 vec4_instruction
*inst
= emit(MOV(dst_null_d(), condition
));
318 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
320 emit(IF(BRW_PREDICATE_NORMAL
));
322 nir_emit_cf_list(&if_stmt
->then_list
);
324 /* note: if the else is empty, dead CF elimination will remove it */
325 emit(BRW_OPCODE_ELSE
);
327 nir_emit_cf_list(&if_stmt
->else_list
);
329 emit(BRW_OPCODE_ENDIF
);
333 vec4_visitor::nir_emit_loop(nir_loop
*loop
)
337 nir_emit_cf_list(&loop
->body
);
339 emit(BRW_OPCODE_WHILE
);
343 vec4_visitor::nir_emit_block(nir_block
*block
)
345 nir_foreach_instr(block
, instr
) {
346 nir_emit_instr(instr
);
351 vec4_visitor::nir_emit_instr(nir_instr
*instr
)
353 this->base_ir
= instr
;
355 switch (instr
->type
) {
356 case nir_instr_type_load_const
:
357 nir_emit_load_const(nir_instr_as_load_const(instr
));
360 case nir_instr_type_intrinsic
:
361 nir_emit_intrinsic(nir_instr_as_intrinsic(instr
));
364 case nir_instr_type_alu
:
365 nir_emit_alu(nir_instr_as_alu(instr
));
368 case nir_instr_type_jump
:
369 nir_emit_jump(nir_instr_as_jump(instr
));
372 case nir_instr_type_tex
:
373 nir_emit_texture(nir_instr_as_tex(instr
));
377 fprintf(stderr
, "VS instruction not yet implemented by NIR->vec4\n");
383 dst_reg_for_nir_reg(vec4_visitor
*v
, nir_register
*nir_reg
,
384 unsigned base_offset
, nir_src
*indirect
)
388 reg
= v
->nir_locals
[nir_reg
->index
];
389 reg
= offset(reg
, base_offset
);
392 new(v
->mem_ctx
) src_reg(v
->get_nir_src(*indirect
,
400 vec4_visitor::get_nir_dest(nir_dest dest
)
402 assert(!dest
.is_ssa
);
403 return dst_reg_for_nir_reg(this, dest
.reg
.reg
, dest
.reg
.base_offset
,
408 vec4_visitor::get_nir_dest(nir_dest dest
, enum brw_reg_type type
)
410 return retype(get_nir_dest(dest
), type
);
414 vec4_visitor::get_nir_dest(nir_dest dest
, nir_alu_type type
)
416 return get_nir_dest(dest
, brw_type_for_nir_type(type
));
420 vec4_visitor::get_nir_src(nir_src src
, enum brw_reg_type type
,
421 unsigned num_components
)
426 assert(src
.ssa
!= NULL
);
427 reg
= nir_ssa_values
[src
.ssa
->index
];
430 reg
= dst_reg_for_nir_reg(this, src
.reg
.reg
, src
.reg
.base_offset
,
434 reg
= retype(reg
, type
);
436 src_reg reg_as_src
= src_reg(reg
);
437 reg_as_src
.swizzle
= brw_swizzle_for_size(num_components
);
442 vec4_visitor::get_nir_src(nir_src src
, nir_alu_type type
,
443 unsigned num_components
)
445 return get_nir_src(src
, brw_type_for_nir_type(type
), num_components
);
449 vec4_visitor::get_nir_src(nir_src src
, unsigned num_components
)
451 /* if type is not specified, default to signed int */
452 return get_nir_src(src
, nir_type_int
, num_components
);
456 vec4_visitor::nir_emit_load_const(nir_load_const_instr
*instr
)
458 dst_reg reg
= dst_reg(GRF
, alloc
.allocate(1));
459 reg
.type
= BRW_REGISTER_TYPE_F
;
461 /* @FIXME: consider emitting vector operations to save some MOVs in
462 * cases where the components are representable in 8 bits.
463 * By now, we emit a MOV for each component.
465 for (unsigned i
= 0; i
< instr
->def
.num_components
; ++i
) {
466 reg
.writemask
= 1 << i
;
467 emit(MOV(reg
, src_reg(instr
->value
.f
[i
])));
470 /* Set final writemask */
471 reg
.writemask
= brw_writemask_for_size(instr
->def
.num_components
);
473 nir_ssa_values
[instr
->def
.index
] = reg
;
477 vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr
*instr
)
482 bool has_indirect
= false;
484 switch (instr
->intrinsic
) {
486 case nir_intrinsic_load_input_indirect
:
489 case nir_intrinsic_load_input
: {
490 int offset
= instr
->const_index
[0];
491 src
= nir_inputs
[offset
];
494 dest
.reladdr
= new(mem_ctx
) src_reg(get_nir_src(instr
->src
[0],
498 dest
= get_nir_dest(instr
->dest
, src
.type
);
499 dest
.writemask
= brw_writemask_for_size(instr
->num_components
);
501 emit(MOV(dest
, src
));
505 case nir_intrinsic_store_output_indirect
:
508 case nir_intrinsic_store_output
: {
509 int varying
= instr
->const_index
[0];
511 src
= get_nir_src(instr
->src
[0], BRW_REGISTER_TYPE_F
,
512 instr
->num_components
);
516 dest
.reladdr
= new(mem_ctx
) src_reg(get_nir_src(instr
->src
[1],
520 output_reg
[varying
] = dest
;
524 case nir_intrinsic_load_vertex_id
:
525 unreachable("should be lowered by lower_vertex_id()");
527 case nir_intrinsic_load_vertex_id_zero_base
: {
529 src_reg(nir_system_values
[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
]);
530 assert(vertex_id
.file
!= BAD_FILE
);
531 dest
= get_nir_dest(instr
->dest
, vertex_id
.type
);
532 emit(MOV(dest
, vertex_id
));
536 case nir_intrinsic_load_base_vertex
: {
537 src_reg base_vertex
=
538 src_reg(nir_system_values
[SYSTEM_VALUE_BASE_VERTEX
]);
539 assert(base_vertex
.file
!= BAD_FILE
);
540 dest
= get_nir_dest(instr
->dest
, base_vertex
.type
);
541 emit(MOV(dest
, base_vertex
));
545 case nir_intrinsic_load_instance_id
: {
546 src_reg instance_id
=
547 src_reg(nir_system_values
[SYSTEM_VALUE_INSTANCE_ID
]);
548 assert(instance_id
.file
!= BAD_FILE
);
549 dest
= get_nir_dest(instr
->dest
, instance_id
.type
);
550 emit(MOV(dest
, instance_id
));
554 case nir_intrinsic_load_uniform_indirect
:
557 case nir_intrinsic_load_uniform
: {
558 int uniform
= instr
->const_index
[0];
560 dest
= get_nir_dest(instr
->dest
);
563 /* Split addressing into uniform and offset */
564 int offset
= uniform
- nir_uniform_driver_location
[uniform
];
568 assert(uniform
>= 0);
570 src
= src_reg(dst_reg(UNIFORM
, uniform
));
571 src
.reg_offset
= offset
;
572 src_reg tmp
= get_nir_src(instr
->src
[0], BRW_REGISTER_TYPE_D
, 1);
573 src
.reladdr
= new(mem_ctx
) src_reg(tmp
);
575 src
= src_reg(dst_reg(UNIFORM
, uniform
));
578 emit(MOV(dest
, src
));
582 case nir_intrinsic_atomic_counter_read
:
583 case nir_intrinsic_atomic_counter_inc
:
584 case nir_intrinsic_atomic_counter_dec
: {
585 unsigned surf_index
= prog_data
->base
.binding_table
.abo_start
+
586 (unsigned) instr
->const_index
[0];
587 src_reg offset
= get_nir_src(instr
->src
[0], nir_type_int
,
588 instr
->num_components
);
589 dest
= get_nir_dest(instr
->dest
);
591 switch (instr
->intrinsic
) {
592 case nir_intrinsic_atomic_counter_inc
:
593 emit_untyped_atomic(BRW_AOP_INC
, surf_index
, dest
, offset
,
594 src_reg(), src_reg());
596 case nir_intrinsic_atomic_counter_dec
:
597 emit_untyped_atomic(BRW_AOP_PREDEC
, surf_index
, dest
, offset
,
598 src_reg(), src_reg());
600 case nir_intrinsic_atomic_counter_read
:
601 emit_untyped_surface_read(surf_index
, dest
, offset
);
604 unreachable("Unreachable");
607 brw_mark_surface_used(stage_prog_data
, surf_index
);
611 case nir_intrinsic_load_ubo_indirect
:
614 case nir_intrinsic_load_ubo
: {
615 nir_const_value
*const_block_index
= nir_src_as_const_value(instr
->src
[0]);
618 dest
= get_nir_dest(instr
->dest
);
620 if (const_block_index
) {
621 /* The block index is a constant, so just emit the binding table entry
624 surf_index
= src_reg(prog_data
->base
.binding_table
.ubo_start
+
625 const_block_index
->u
[0]);
627 /* The block index is not a constant. Evaluate the index expression
628 * per-channel and add the base UBO index; we have to select a value
629 * from any live channel.
631 surf_index
= src_reg(this, glsl_type::uint_type
);
632 emit(ADD(dst_reg(surf_index
), get_nir_src(instr
->src
[0], nir_type_int
,
633 instr
->num_components
),
634 src_reg(prog_data
->base
.binding_table
.ubo_start
)));
635 surf_index
= emit_uniformize(surf_index
);
637 /* Assume this may touch any UBO. It would be nice to provide
638 * a tighter bound, but the array information is already lowered away.
640 brw_mark_surface_used(&prog_data
->base
,
641 prog_data
->base
.binding_table
.ubo_start
+
642 shader_prog
->NumUniformBlocks
- 1);
645 unsigned const_offset
= instr
->const_index
[0];
649 offset
= src_reg(const_offset
/ 16);
651 offset
= src_reg(this, glsl_type::uint_type
);
652 emit(SHR(dst_reg(offset
), get_nir_src(instr
->src
[1], nir_type_int
, 1),
656 src_reg packed_consts
= src_reg(this, glsl_type::vec4_type
);
657 packed_consts
.type
= dest
.type
;
659 emit_pull_constant_load_reg(dst_reg(packed_consts
),
662 NULL
, NULL
/* before_block/inst */);
664 packed_consts
.swizzle
= brw_swizzle_for_size(instr
->num_components
);
665 packed_consts
.swizzle
+= BRW_SWIZZLE4(const_offset
% 16 / 4,
666 const_offset
% 16 / 4,
667 const_offset
% 16 / 4,
668 const_offset
% 16 / 4);
670 emit(MOV(dest
, packed_consts
));
675 unreachable("Unknown intrinsic");
680 brw_swizzle_for_nir_swizzle(uint8_t swizzle
[4])
682 return BRW_SWIZZLE4(swizzle
[0], swizzle
[1], swizzle
[2], swizzle
[3]);
685 static enum brw_conditional_mod
686 brw_conditional_for_nir_comparison(nir_op op
)
692 return BRW_CONDITIONAL_L
;
697 return BRW_CONDITIONAL_GE
;
701 case nir_op_ball_fequal2
:
702 case nir_op_ball_iequal2
:
703 case nir_op_ball_fequal3
:
704 case nir_op_ball_iequal3
:
705 case nir_op_ball_fequal4
:
706 case nir_op_ball_iequal4
:
707 return BRW_CONDITIONAL_Z
;
711 case nir_op_bany_fnequal2
:
712 case nir_op_bany_inequal2
:
713 case nir_op_bany_fnequal3
:
714 case nir_op_bany_inequal3
:
715 case nir_op_bany_fnequal4
:
716 case nir_op_bany_inequal4
:
717 return BRW_CONDITIONAL_NZ
;
720 unreachable("not reached: bad operation for comparison");
725 vec4_visitor::nir_emit_alu(nir_alu_instr
*instr
)
727 vec4_instruction
*inst
;
729 dst_reg dst
= get_nir_dest(instr
->dest
.dest
,
730 nir_op_infos
[instr
->op
].output_type
);
731 dst
.writemask
= instr
->dest
.write_mask
;
734 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
735 op
[i
] = get_nir_src(instr
->src
[i
].src
,
736 nir_op_infos
[instr
->op
].input_types
[i
], 4);
737 op
[i
].swizzle
= brw_swizzle_for_nir_swizzle(instr
->src
[i
].swizzle
);
738 op
[i
].abs
= instr
->src
[i
].abs
;
739 op
[i
].negate
= instr
->src
[i
].negate
;
745 inst
= emit(MOV(dst
, op
[0]));
746 inst
->saturate
= instr
->dest
.saturate
;
752 unreachable("not reached: should be handled by lower_vec_to_movs()");
756 inst
= emit(MOV(dst
, op
[0]));
757 inst
->saturate
= instr
->dest
.saturate
;
762 inst
= emit(MOV(dst
, op
[0]));
768 inst
= emit(ADD(dst
, op
[0], op
[1]));
769 inst
->saturate
= instr
->dest
.saturate
;
773 inst
= emit(MUL(dst
, op
[0], op
[1]));
774 inst
->saturate
= instr
->dest
.saturate
;
778 nir_const_value
*value0
= nir_src_as_const_value(instr
->src
[0].src
);
779 nir_const_value
*value1
= nir_src_as_const_value(instr
->src
[1].src
);
781 /* For integer multiplication, the MUL uses the low 16 bits of one of
782 * the operands (src0 through SNB, src1 on IVB and later). The MACH
783 * accumulates in the contribution of the upper 16 bits of that
784 * operand. If we can determine that one of the args is in the low
785 * 16 bits, though, we can just emit a single MUL.
787 if (value0
&& value0
->u
[0] < (1 << 16)) {
788 if (devinfo
->gen
< 7)
789 emit(MUL(dst
, op
[0], op
[1]));
791 emit(MUL(dst
, op
[1], op
[0]));
792 } else if (value1
&& value1
->u
[0] < (1 << 16)) {
793 if (devinfo
->gen
< 7)
794 emit(MUL(dst
, op
[1], op
[0]));
796 emit(MUL(dst
, op
[0], op
[1]));
798 struct brw_reg acc
= retype(brw_acc_reg(8), dst
.type
);
800 emit(MUL(acc
, op
[0], op
[1]));
801 emit(MACH(dst_null_d(), op
[0], op
[1]));
802 emit(MOV(dst
, src_reg(acc
)));
807 case nir_op_imul_high
:
808 case nir_op_umul_high
: {
809 struct brw_reg acc
= retype(brw_acc_reg(8), dst
.type
);
811 emit(MUL(acc
, op
[0], op
[1]));
812 emit(MACH(dst
, op
[0], op
[1]));
817 inst
= emit_math(SHADER_OPCODE_RCP
, dst
, op
[0]);
818 inst
->saturate
= instr
->dest
.saturate
;
822 inst
= emit_math(SHADER_OPCODE_EXP2
, dst
, op
[0]);
823 inst
->saturate
= instr
->dest
.saturate
;
827 inst
= emit_math(SHADER_OPCODE_LOG2
, dst
, op
[0]);
828 inst
->saturate
= instr
->dest
.saturate
;
832 inst
= emit_math(SHADER_OPCODE_SIN
, dst
, op
[0]);
833 inst
->saturate
= instr
->dest
.saturate
;
837 inst
= emit_math(SHADER_OPCODE_COS
, dst
, op
[0]);
838 inst
->saturate
= instr
->dest
.saturate
;
843 emit_math(SHADER_OPCODE_INT_QUOTIENT
, dst
, op
[0], op
[1]);
847 emit_math(SHADER_OPCODE_INT_REMAINDER
, dst
, op
[0], op
[1]);
851 unreachable("not reached: should be handled by ldexp_to_arith()");
854 inst
= emit_math(SHADER_OPCODE_SQRT
, dst
, op
[0]);
855 inst
->saturate
= instr
->dest
.saturate
;
859 inst
= emit_math(SHADER_OPCODE_RSQ
, dst
, op
[0]);
860 inst
->saturate
= instr
->dest
.saturate
;
864 inst
= emit_math(SHADER_OPCODE_POW
, dst
, op
[0], op
[1]);
865 inst
->saturate
= instr
->dest
.saturate
;
868 case nir_op_uadd_carry
: {
869 struct brw_reg acc
= retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD
);
871 emit(ADDC(dst_null_ud(), op
[0], op
[1]));
872 emit(MOV(dst
, src_reg(acc
)));
876 case nir_op_usub_borrow
: {
877 struct brw_reg acc
= retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD
);
879 emit(SUBB(dst_null_ud(), op
[0], op
[1]));
880 emit(MOV(dst
, src_reg(acc
)));
885 inst
= emit(RNDZ(dst
, op
[0]));
886 inst
->saturate
= instr
->dest
.saturate
;
890 src_reg tmp
= src_reg(this, glsl_type::float_type
);
892 brw_swizzle_for_size(instr
->src
[0].src
.is_ssa
?
893 instr
->src
[0].src
.ssa
->num_components
:
894 instr
->src
[0].src
.reg
.reg
->num_components
);
896 op
[0].negate
= !op
[0].negate
;
897 emit(RNDD(dst_reg(tmp
), op
[0]));
899 inst
= emit(MOV(dst
, tmp
));
900 inst
->saturate
= instr
->dest
.saturate
;
905 inst
= emit(RNDD(dst
, op
[0]));
906 inst
->saturate
= instr
->dest
.saturate
;
910 inst
= emit(FRC(dst
, op
[0]));
911 inst
->saturate
= instr
->dest
.saturate
;
914 case nir_op_fround_even
:
915 inst
= emit(RNDE(dst
, op
[0]));
916 inst
->saturate
= instr
->dest
.saturate
;
922 inst
= emit_minmax(BRW_CONDITIONAL_L
, dst
, op
[0], op
[1]);
923 inst
->saturate
= instr
->dest
.saturate
;
929 inst
= emit_minmax(BRW_CONDITIONAL_GE
, dst
, op
[0], op
[1]);
930 inst
->saturate
= instr
->dest
.saturate
;
934 case nir_op_fddx_coarse
:
935 case nir_op_fddx_fine
:
937 case nir_op_fddy_coarse
:
938 case nir_op_fddy_fine
:
939 unreachable("derivatives are not valid in vertex shaders");
951 emit(CMP(dst
, op
[0], op
[1],
952 brw_conditional_for_nir_comparison(instr
->op
)));
955 case nir_op_ball_fequal2
:
956 case nir_op_ball_iequal2
:
957 case nir_op_ball_fequal3
:
958 case nir_op_ball_iequal3
:
959 case nir_op_ball_fequal4
:
960 case nir_op_ball_iequal4
: {
961 dst_reg tmp
= dst_reg(this, glsl_type::bool_type
);
964 case nir_op_ball_fequal2
:
965 case nir_op_ball_iequal2
:
966 tmp
.writemask
= WRITEMASK_XY
;
968 case nir_op_ball_fequal3
:
969 case nir_op_ball_iequal3
:
970 tmp
.writemask
= WRITEMASK_XYZ
;
972 case nir_op_ball_fequal4
:
973 case nir_op_ball_iequal4
:
974 tmp
.writemask
= WRITEMASK_XYZW
;
977 unreachable("not reached");
980 emit(CMP(tmp
, op
[0], op
[1],
981 brw_conditional_for_nir_comparison(instr
->op
)));
982 emit(MOV(dst
, src_reg(0)));
983 inst
= emit(MOV(dst
, src_reg(~0)));
984 inst
->predicate
= BRW_PREDICATE_ALIGN16_ALL4H
;
988 case nir_op_bany_fnequal2
:
989 case nir_op_bany_inequal2
:
990 case nir_op_bany_fnequal3
:
991 case nir_op_bany_inequal3
:
992 case nir_op_bany_fnequal4
:
993 case nir_op_bany_inequal4
: {
994 dst_reg tmp
= dst_reg(this, glsl_type::bool_type
);
997 case nir_op_bany_fnequal2
:
998 case nir_op_bany_inequal2
:
999 tmp
.writemask
= WRITEMASK_XY
;
1001 case nir_op_bany_fnequal3
:
1002 case nir_op_bany_inequal3
:
1003 tmp
.writemask
= WRITEMASK_XYZ
;
1005 case nir_op_bany_fnequal4
:
1006 case nir_op_bany_inequal4
:
1007 tmp
.writemask
= WRITEMASK_XYZW
;
1010 unreachable("not reached");
1013 emit(CMP(tmp
, op
[0], op
[1],
1014 brw_conditional_for_nir_comparison(instr
->op
)));
1016 emit(MOV(dst
, src_reg(0)));
1017 inst
= emit(MOV(dst
, src_reg(~0)));
1018 inst
->predicate
= BRW_PREDICATE_ALIGN16_ANY4H
;
1023 emit(NOT(dst
, op
[0]));
1027 emit(XOR(dst
, op
[0], op
[1]));
1031 emit(OR(dst
, op
[0], op
[1]));
1035 emit(AND(dst
, op
[0], op
[1]));
1039 emit(AND(dst
, op
[0], src_reg(1)));
1043 op
[0].type
= BRW_REGISTER_TYPE_D
;
1044 dst
.type
= BRW_REGISTER_TYPE_D
;
1045 emit(AND(dst
, op
[0], src_reg(0x3f800000u
)));
1046 dst
.type
= BRW_REGISTER_TYPE_F
;
1050 emit(CMP(dst
, op
[0], src_reg(0.0f
), BRW_CONDITIONAL_NZ
));
1054 emit(CMP(dst
, op
[0], src_reg(0), BRW_CONDITIONAL_NZ
));
1057 case nir_op_fnoise1_1
:
1058 case nir_op_fnoise1_2
:
1059 case nir_op_fnoise1_3
:
1060 case nir_op_fnoise1_4
:
1061 case nir_op_fnoise2_1
:
1062 case nir_op_fnoise2_2
:
1063 case nir_op_fnoise2_3
:
1064 case nir_op_fnoise2_4
:
1065 case nir_op_fnoise3_1
:
1066 case nir_op_fnoise3_2
:
1067 case nir_op_fnoise3_3
:
1068 case nir_op_fnoise3_4
:
1069 case nir_op_fnoise4_1
:
1070 case nir_op_fnoise4_2
:
1071 case nir_op_fnoise4_3
:
1072 case nir_op_fnoise4_4
:
1073 unreachable("not reached: should be handled by lower_noise");
1075 case nir_op_unpack_half_2x16_split_x
:
1076 case nir_op_unpack_half_2x16_split_y
:
1077 case nir_op_pack_half_2x16_split
:
1078 unreachable("not reached: should not occur in vertex shader");
1080 case nir_op_unpack_snorm_2x16
:
1081 case nir_op_unpack_unorm_2x16
:
1082 case nir_op_pack_snorm_2x16
:
1083 case nir_op_pack_unorm_2x16
:
1084 unreachable("not reached: should be handled by lower_packing_builtins");
1086 case nir_op_unpack_half_2x16
:
1087 /* As NIR does not guarantee that we have a correct swizzle outside the
1088 * boundaries of a vector, and the implementation of emit_unpack_half_2x16
1089 * uses the source operand in an operation with WRITEMASK_Y while our
1090 * source operand has only size 1, it accessed incorrect data producing
1091 * regressions in Piglit. We repeat the swizzle of the first component on the
1092 * rest of components to avoid regressions. In the vec4_visitor IR code path
1093 * this is not needed because the operand has already the correct swizzle.
1095 op
[0].swizzle
= brw_compose_swizzle(BRW_SWIZZLE_XXXX
, op
[0].swizzle
);
1096 emit_unpack_half_2x16(dst
, op
[0]);
1099 case nir_op_pack_half_2x16
:
1100 emit_pack_half_2x16(dst
, op
[0]);
1103 case nir_op_unpack_unorm_4x8
:
1104 emit_unpack_unorm_4x8(dst
, op
[0]);
1107 case nir_op_pack_unorm_4x8
:
1108 emit_pack_unorm_4x8(dst
, op
[0]);
1111 case nir_op_unpack_snorm_4x8
:
1112 emit_unpack_snorm_4x8(dst
, op
[0]);
1115 case nir_op_pack_snorm_4x8
:
1116 emit_pack_snorm_4x8(dst
, op
[0]);
1119 case nir_op_bitfield_reverse
:
1120 emit(BFREV(dst
, op
[0]));
1123 case nir_op_bit_count
:
1124 emit(CBIT(dst
, op
[0]));
1127 case nir_op_ufind_msb
:
1128 case nir_op_ifind_msb
: {
1129 src_reg temp
= src_reg(this, glsl_type::uint_type
);
1131 inst
= emit(FBH(dst_reg(temp
), op
[0]));
1132 inst
->dst
.writemask
= WRITEMASK_XYZW
;
1134 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1135 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1136 * subtract the result from 31 to convert the MSB count into an LSB count.
1139 /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1140 temp
.swizzle
= BRW_SWIZZLE_NOOP
;
1141 emit(MOV(dst
, temp
));
1143 src_reg src_tmp
= src_reg(dst
);
1144 emit(CMP(dst_null_d(), src_tmp
, src_reg(-1), BRW_CONDITIONAL_NZ
));
1146 src_tmp
.negate
= true;
1147 inst
= emit(ADD(dst
, src_tmp
, src_reg(31)));
1148 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1152 case nir_op_find_lsb
:
1153 emit(FBL(dst
, op
[0]));
1156 case nir_op_ubitfield_extract
:
1157 case nir_op_ibitfield_extract
:
1158 op
[0] = fix_3src_operand(op
[0]);
1159 op
[1] = fix_3src_operand(op
[1]);
1160 op
[2] = fix_3src_operand(op
[2]);
1162 emit(BFE(dst
, op
[2], op
[1], op
[0]));
1166 emit(BFI1(dst
, op
[0], op
[1]));
1170 op
[0] = fix_3src_operand(op
[0]);
1171 op
[1] = fix_3src_operand(op
[1]);
1172 op
[2] = fix_3src_operand(op
[2]);
1174 emit(BFI2(dst
, op
[0], op
[1], op
[2]));
1177 case nir_op_bitfield_insert
:
1178 unreachable("not reached: should be handled by "
1179 "lower_instructions::bitfield_insert_to_bfm_bfi");
1182 /* AND(val, 0x80000000) gives the sign bit.
1184 * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
1187 emit(CMP(dst_null_f(), op
[0], src_reg(0.0f
), BRW_CONDITIONAL_NZ
));
1189 op
[0].type
= BRW_REGISTER_TYPE_UD
;
1190 dst
.type
= BRW_REGISTER_TYPE_UD
;
1191 emit(AND(dst
, op
[0], src_reg(0x80000000u
)));
1193 inst
= emit(OR(dst
, src_reg(dst
), src_reg(0x3f800000u
)));
1194 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1195 dst
.type
= BRW_REGISTER_TYPE_F
;
1197 if (instr
->dest
.saturate
) {
1198 inst
= emit(MOV(dst
, src_reg(dst
)));
1199 inst
->saturate
= true;
1204 /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
1205 * -> non-negative val generates 0x00000000.
1206 * Predicated OR sets 1 if val is positive.
1208 emit(CMP(dst_null_d(), op
[0], src_reg(0), BRW_CONDITIONAL_G
));
1209 emit(ASR(dst
, op
[0], src_reg(31)));
1210 inst
= emit(OR(dst
, src_reg(dst
), src_reg(1)));
1211 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1215 emit(SHL(dst
, op
[0], op
[1]));
1219 emit(ASR(dst
, op
[0], op
[1]));
1223 emit(SHR(dst
, op
[0], op
[1]));
1227 op
[0] = fix_3src_operand(op
[0]);
1228 op
[1] = fix_3src_operand(op
[1]);
1229 op
[2] = fix_3src_operand(op
[2]);
1231 inst
= emit(MAD(dst
, op
[2], op
[1], op
[0]));
1232 inst
->saturate
= instr
->dest
.saturate
;
1236 inst
= emit_lrp(dst
, op
[0], op
[1], op
[2]);
1237 inst
->saturate
= instr
->dest
.saturate
;
1241 emit(CMP(dst_null_d(), op
[0], src_reg(0), BRW_CONDITIONAL_NZ
));
1242 inst
= emit(BRW_OPCODE_SEL
, dst
, op
[1], op
[2]);
1243 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1247 inst
= emit(BRW_OPCODE_DP2
, dst
, op
[0], op
[1]);
1248 inst
->saturate
= instr
->dest
.saturate
;
1252 inst
= emit(BRW_OPCODE_DP3
, dst
, op
[0], op
[1]);
1253 inst
->saturate
= instr
->dest
.saturate
;
1257 inst
= emit(BRW_OPCODE_DP4
, dst
, op
[0], op
[1]);
1258 inst
->saturate
= instr
->dest
.saturate
;
1263 case nir_op_bany4
: {
1264 dst_reg tmp
= dst_reg(this, glsl_type::bool_type
);
1265 tmp
.writemask
= brw_writemask_for_size(nir_op_infos
[instr
->op
].input_sizes
[0]);
1267 emit(CMP(tmp
, op
[0], src_reg(0), BRW_CONDITIONAL_NZ
));
1269 emit(MOV(dst
, src_reg(0)));
1270 inst
= emit(MOV(dst
, src_reg(~0)));
1271 inst
->predicate
= BRW_PREDICATE_ALIGN16_ANY4H
;
1280 unreachable("not reached: should be lowered by lower_source mods");
1283 unreachable("not reached: should be lowered by DIV_TO_MUL_RCP in the compiler");
1286 unreachable("not reached: should be lowered by MOD_TO_FLOOR in the compiler");
1290 unreachable("not reached: should be handled by ir_sub_to_add_neg");
1293 unreachable("Unimplemented ALU operation");
1298 vec4_visitor::nir_emit_jump(nir_jump_instr
*instr
)
1300 switch (instr
->type
) {
1301 case nir_jump_break
:
1302 emit(BRW_OPCODE_BREAK
);
1305 case nir_jump_continue
:
1306 emit(BRW_OPCODE_CONTINUE
);
1309 case nir_jump_return
:
1312 unreachable("unknown jump");
1316 enum ir_texture_opcode
1317 ir_texture_opcode_for_nir_texop(nir_texop texop
)
1319 enum ir_texture_opcode op
;
1322 case nir_texop_lod
: op
= ir_lod
; break;
1323 case nir_texop_query_levels
: op
= ir_query_levels
; break;
1324 case nir_texop_tex
: op
= ir_tex
; break;
1325 case nir_texop_tg4
: op
= ir_tg4
; break;
1326 case nir_texop_txb
: op
= ir_txb
; break;
1327 case nir_texop_txd
: op
= ir_txd
; break;
1328 case nir_texop_txf
: op
= ir_txf
; break;
1329 case nir_texop_txf_ms
: op
= ir_txf_ms
; break;
1330 case nir_texop_txl
: op
= ir_txl
; break;
1331 case nir_texop_txs
: op
= ir_txs
; break;
1333 unreachable("unknown texture opcode");
1339 glsl_type_for_nir_alu_type(nir_alu_type alu_type
,
1340 unsigned components
)
1343 case nir_type_float
:
1344 return glsl_type::vec(components
);
1346 return glsl_type::ivec(components
);
1347 case nir_type_unsigned
:
1348 return glsl_type::uvec(components
);
1350 return glsl_type::bvec(components
);
1352 return glsl_type::error_type
;
1355 return glsl_type::error_type
;
1359 vec4_visitor::nir_emit_texture(nir_tex_instr
*instr
)
1361 unsigned sampler
= instr
->sampler_index
;
1362 src_reg sampler_reg
= src_reg(sampler
);
1364 const glsl_type
*coord_type
= NULL
;
1365 src_reg shadow_comparitor
;
1366 src_reg offset_value
;
1368 src_reg sample_index
;
1371 const glsl_type
*dest_type
=
1372 glsl_type_for_nir_alu_type(instr
->dest_type
,
1373 nir_tex_instr_dest_size(instr
));
1374 dst_reg dest
= get_nir_dest(instr
->dest
, instr
->dest_type
);
1376 /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
1377 * emitting anything other than setting up the constant result.
1379 if (instr
->op
== nir_texop_tg4
) {
1380 int swiz
= GET_SWZ(key
->tex
.swizzles
[sampler
], instr
->component
);
1381 if (swiz
== SWIZZLE_ZERO
|| swiz
== SWIZZLE_ONE
) {
1382 emit(MOV(dest
, src_reg(swiz
== SWIZZLE_ONE
? 1.0f
: 0.0f
)));
1387 /* Load the texture operation sources */
1388 for (unsigned i
= 0; i
< instr
->num_srcs
; i
++) {
1389 switch (instr
->src
[i
].src_type
) {
1390 case nir_tex_src_comparitor
:
1391 shadow_comparitor
= get_nir_src(instr
->src
[i
].src
,
1392 BRW_REGISTER_TYPE_F
, 1);
1395 case nir_tex_src_coord
: {
1396 unsigned src_size
= nir_tex_instr_src_size(instr
, i
);
1398 switch (instr
->op
) {
1400 case nir_texop_txf_ms
:
1401 coordinate
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_D
,
1403 coord_type
= glsl_type::ivec(src_size
);
1407 coordinate
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_F
,
1409 coord_type
= glsl_type::vec(src_size
);
1415 case nir_tex_src_ddx
:
1416 lod
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_F
,
1417 nir_tex_instr_src_size(instr
, i
));
1420 case nir_tex_src_ddy
:
1421 lod2
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_F
,
1422 nir_tex_instr_src_size(instr
, i
));
1425 case nir_tex_src_lod
:
1426 switch (instr
->op
) {
1429 lod
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_D
, 1);
1433 lod
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_F
, 1);
1438 case nir_tex_src_ms_index
: {
1439 sample_index
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_D
, 1);
1440 assert(coord_type
!= NULL
);
1441 if (devinfo
->gen
>= 7 &&
1442 key
->tex
.compressed_multisample_layout_mask
& (1<<sampler
)) {
1443 mcs
= emit_mcs_fetch(coord_type
, coordinate
, sampler_reg
);
1447 mcs
= retype(mcs
, BRW_REGISTER_TYPE_UD
);
1451 case nir_tex_src_offset
:
1452 offset_value
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_D
, 2);
1455 case nir_tex_src_sampler_offset
: {
1456 /* The highest sampler which may be used by this operation is
1457 * the last element of the array. Mark it here, because the generator
1458 * doesn't have enough information to determine the bound.
1460 uint32_t array_size
= instr
->sampler_array_size
;
1461 uint32_t max_used
= sampler
+ array_size
- 1;
1462 if (instr
->op
== nir_texop_tg4
) {
1463 max_used
+= prog_data
->base
.binding_table
.gather_texture_start
;
1465 max_used
+= prog_data
->base
.binding_table
.texture_start
;
1468 brw_mark_surface_used(&prog_data
->base
, max_used
);
1470 /* Emit code to evaluate the actual indexing expression */
1471 src_reg src
= get_nir_src(instr
->src
[i
].src
, 1);
1472 src_reg
temp(this, glsl_type::uint_type
);
1473 emit(ADD(dst_reg(temp
), src
, src_reg(sampler
)));
1474 sampler_reg
= emit_uniformize(temp
);
1478 case nir_tex_src_projector
:
1479 unreachable("Should be lowered by do_lower_texture_projection");
1481 case nir_tex_src_bias
:
1482 unreachable("LOD bias is not valid for vertex shaders.\n");
1485 unreachable("unknown texture source");
1489 uint32_t constant_offset
= 0;
1490 for (unsigned i
= 0; i
< 3; i
++) {
1491 if (instr
->const_offset
[i
] != 0) {
1492 constant_offset
= brw_texture_offset(instr
->const_offset
, 3);
1497 /* Stuff the channel select bits in the top of the texture offset */
1498 if (instr
->op
== nir_texop_tg4
)
1499 constant_offset
|= gather_channel(instr
->component
, sampler
) << 16;
1501 ir_texture_opcode op
= ir_texture_opcode_for_nir_texop(instr
->op
);
1503 bool is_cube_array
=
1504 instr
->op
== nir_texop_txs
&&
1505 instr
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
&&
1508 emit_texture(op
, dest
, dest_type
, coordinate
, instr
->coord_components
,
1510 lod
, lod2
, sample_index
,
1511 constant_offset
, offset_value
,
1512 mcs
, is_cube_array
, sampler
, sampler_reg
);