2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "glsl/ir_uniform.h"
31 vec4_visitor::emit_nir_code()
33 nir_shader
*nir
= prog
->nir
;
35 if (nir
->num_inputs
> 0)
36 nir_setup_inputs(nir
);
38 if (nir
->num_uniforms
> 0)
39 nir_setup_uniforms(nir
);
41 nir_setup_system_values(nir
);
43 /* get the main function and emit it */
44 nir_foreach_overload(nir
, overload
) {
45 assert(strcmp(overload
->function
->name
, "main") == 0);
46 assert(overload
->impl
);
47 nir_emit_impl(overload
->impl
);
52 vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr
*instr
)
56 switch (instr
->intrinsic
) {
57 case nir_intrinsic_load_vertex_id
:
58 unreachable("should be lowered by lower_vertex_id().");
60 case nir_intrinsic_load_vertex_id_zero_base
:
61 reg
= &this->nir_system_values
[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
];
62 if (reg
->file
== BAD_FILE
)
64 *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
,
68 case nir_intrinsic_load_base_vertex
:
69 reg
= &this->nir_system_values
[SYSTEM_VALUE_BASE_VERTEX
];
70 if (reg
->file
== BAD_FILE
)
71 *reg
= *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX
,
75 case nir_intrinsic_load_instance_id
:
76 reg
= &this->nir_system_values
[SYSTEM_VALUE_INSTANCE_ID
];
77 if (reg
->file
== BAD_FILE
)
78 *reg
= *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID
,
88 setup_system_values_block(nir_block
*block
, void *void_visitor
)
90 vec4_visitor
*v
= (vec4_visitor
*)void_visitor
;
92 nir_foreach_instr(block
, instr
) {
93 if (instr
->type
!= nir_instr_type_intrinsic
)
96 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
97 v
->nir_setup_system_value_intrinsic(intrin
);
104 vec4_visitor::nir_setup_system_values(nir_shader
*shader
)
106 nir_system_values
= ralloc_array(mem_ctx
, dst_reg
, SYSTEM_VALUE_MAX
);
108 nir_foreach_overload(shader
, overload
) {
109 assert(strcmp(overload
->function
->name
, "main") == 0);
110 assert(overload
->impl
);
111 nir_foreach_block(overload
->impl
, setup_system_values_block
, this);
116 vec4_visitor::nir_setup_inputs(nir_shader
*shader
)
118 nir_inputs
= ralloc_array(mem_ctx
, src_reg
, shader
->num_inputs
);
120 foreach_list_typed(nir_variable
, var
, node
, &shader
->inputs
) {
121 int offset
= var
->data
.driver_location
;
122 unsigned size
= type_size(var
->type
);
123 for (unsigned i
= 0; i
< size
; i
++) {
124 src_reg src
= src_reg(ATTR
, var
->data
.location
+ i
, var
->type
);
125 nir_inputs
[offset
+ i
] = src
;
131 vec4_visitor::nir_setup_uniforms(nir_shader
*shader
)
135 nir_uniform_driver_location
=
136 rzalloc_array(mem_ctx
, unsigned, this->uniform_array_size
);
139 foreach_list_typed(nir_variable
, var
, node
, &shader
->uniforms
) {
140 /* UBO's, atomics and samplers don't take up space in the
142 if (var
->interface_type
!= NULL
|| var
->type
->contains_atomic() ||
143 type_size(var
->type
) == 0) {
147 assert(uniforms
< uniform_array_size
);
148 this->uniform_size
[uniforms
] = type_size(var
->type
);
150 if (strncmp(var
->name
, "gl_", 3) == 0)
151 nir_setup_builtin_uniform(var
);
153 nir_setup_uniform(var
);
156 /* ARB_vertex_program is not supported yet */
157 assert("Not implemented");
162 vec4_visitor::nir_setup_uniform(nir_variable
*var
)
164 int namelen
= strlen(var
->name
);
166 /* The data for our (non-builtin) uniforms is stored in a series of
167 * gl_uniform_driver_storage structs for each subcomponent that
168 * glGetUniformLocation() could name. We know it's been set up in the same
169 * order we'd walk the type, so walk the list of storage and find anything
170 * with our name, or the prefix of a component that starts with our name.
172 for (unsigned u
= 0; u
< shader_prog
->NumUniformStorage
; u
++) {
173 struct gl_uniform_storage
*storage
= &shader_prog
->UniformStorage
[u
];
175 if (storage
->builtin
)
178 if (strncmp(var
->name
, storage
->name
, namelen
) != 0 ||
179 (storage
->name
[namelen
] != 0 &&
180 storage
->name
[namelen
] != '.' &&
181 storage
->name
[namelen
] != '[')) {
185 gl_constant_value
*components
= storage
->storage
;
186 unsigned vector_count
= (MAX2(storage
->array_elements
, 1) *
187 storage
->type
->matrix_columns
);
189 for (unsigned s
= 0; s
< vector_count
; s
++) {
190 assert(uniforms
< uniform_array_size
);
191 uniform_vector_size
[uniforms
] = storage
->type
->vector_elements
;
194 for (i
= 0; i
< uniform_vector_size
[uniforms
]; i
++) {
195 stage_prog_data
->param
[uniforms
* 4 + i
] = components
;
199 static const gl_constant_value zero
= { 0.0 };
200 stage_prog_data
->param
[uniforms
* 4 + i
] = &zero
;
203 nir_uniform_driver_location
[uniforms
] = var
->data
.driver_location
;
210 vec4_visitor::nir_setup_builtin_uniform(nir_variable
*var
)
212 const nir_state_slot
*const slots
= var
->state_slots
;
213 assert(var
->state_slots
!= NULL
);
215 for (unsigned int i
= 0; i
< var
->num_state_slots
; i
++) {
216 /* This state reference has already been setup by ir_to_mesa,
217 * but we'll get the same index back here. We can reference
218 * ParameterValues directly, since unlike brw_fs.cpp, we never
219 * add new state references during compile.
221 int index
= _mesa_add_state_reference(this->prog
->Parameters
,
222 (gl_state_index
*)slots
[i
].tokens
);
223 gl_constant_value
*values
=
224 &this->prog
->Parameters
->ParameterValues
[index
][0];
226 assert(uniforms
< uniform_array_size
);
228 for (unsigned j
= 0; j
< 4; j
++)
229 stage_prog_data
->param
[uniforms
* 4 + j
] =
230 &values
[GET_SWZ(slots
[i
].swizzle
, j
)];
232 this->uniform_vector_size
[uniforms
] =
233 (var
->type
->is_scalar() || var
->type
->is_vector() ||
234 var
->type
->is_matrix() ? var
->type
->vector_elements
: 4);
236 nir_uniform_driver_location
[uniforms
] = var
->data
.driver_location
;
242 vec4_visitor::nir_emit_impl(nir_function_impl
*impl
)
244 nir_locals
= ralloc_array(mem_ctx
, dst_reg
, impl
->reg_alloc
);
246 foreach_list_typed(nir_register
, reg
, node
, &impl
->registers
) {
247 unsigned array_elems
=
248 reg
->num_array_elems
== 0 ? 1 : reg
->num_array_elems
;
250 nir_locals
[reg
->index
] = dst_reg(GRF
, alloc
.allocate(array_elems
));
253 nir_ssa_values
= ralloc_array(mem_ctx
, dst_reg
, impl
->ssa_alloc
);
255 nir_emit_cf_list(&impl
->body
);
259 vec4_visitor::nir_emit_cf_list(exec_list
*list
)
261 exec_list_validate(list
);
262 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
263 switch (node
->type
) {
265 nir_emit_if(nir_cf_node_as_if(node
));
268 case nir_cf_node_loop
:
269 nir_emit_loop(nir_cf_node_as_loop(node
));
272 case nir_cf_node_block
:
273 nir_emit_block(nir_cf_node_as_block(node
));
277 unreachable("Invalid CFG node block");
283 vec4_visitor::nir_emit_if(nir_if
*if_stmt
)
285 /* First, put the condition in f0 */
286 src_reg condition
= get_nir_src(if_stmt
->condition
, BRW_REGISTER_TYPE_D
, 1);
287 vec4_instruction
*inst
= emit(MOV(dst_null_d(), condition
));
288 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
290 emit(IF(BRW_PREDICATE_NORMAL
));
292 nir_emit_cf_list(&if_stmt
->then_list
);
294 /* note: if the else is empty, dead CF elimination will remove it */
295 emit(BRW_OPCODE_ELSE
);
297 nir_emit_cf_list(&if_stmt
->else_list
);
299 emit(BRW_OPCODE_ENDIF
);
303 vec4_visitor::nir_emit_loop(nir_loop
*loop
)
307 nir_emit_cf_list(&loop
->body
);
309 emit(BRW_OPCODE_WHILE
);
313 vec4_visitor::nir_emit_block(nir_block
*block
)
315 nir_foreach_instr(block
, instr
) {
316 nir_emit_instr(instr
);
321 vec4_visitor::nir_emit_instr(nir_instr
*instr
)
323 this->base_ir
= instr
;
325 switch (instr
->type
) {
326 case nir_instr_type_load_const
:
327 nir_emit_load_const(nir_instr_as_load_const(instr
));
330 case nir_instr_type_intrinsic
:
331 nir_emit_intrinsic(nir_instr_as_intrinsic(instr
));
334 case nir_instr_type_alu
:
335 nir_emit_alu(nir_instr_as_alu(instr
));
338 case nir_instr_type_jump
:
339 nir_emit_jump(nir_instr_as_jump(instr
));
342 case nir_instr_type_tex
:
343 nir_emit_texture(nir_instr_as_tex(instr
));
347 fprintf(stderr
, "VS instruction not yet implemented by NIR->vec4\n");
353 dst_reg_for_nir_reg(vec4_visitor
*v
, nir_register
*nir_reg
,
354 unsigned base_offset
, nir_src
*indirect
)
358 reg
= v
->nir_locals
[nir_reg
->index
];
359 reg
= offset(reg
, base_offset
);
362 new(v
->mem_ctx
) src_reg(v
->get_nir_src(*indirect
,
370 vec4_visitor::get_nir_dest(nir_dest dest
)
372 assert(!dest
.is_ssa
);
373 return dst_reg_for_nir_reg(this, dest
.reg
.reg
, dest
.reg
.base_offset
,
378 vec4_visitor::get_nir_dest(nir_dest dest
, enum brw_reg_type type
)
380 return retype(get_nir_dest(dest
), type
);
384 vec4_visitor::get_nir_dest(nir_dest dest
, nir_alu_type type
)
386 return get_nir_dest(dest
, brw_type_for_nir_type(type
));
390 vec4_visitor::get_nir_src(nir_src src
, enum brw_reg_type type
,
391 unsigned num_components
)
396 assert(src
.ssa
!= NULL
);
397 reg
= nir_ssa_values
[src
.ssa
->index
];
400 reg
= dst_reg_for_nir_reg(this, src
.reg
.reg
, src
.reg
.base_offset
,
404 reg
= retype(reg
, type
);
406 src_reg reg_as_src
= src_reg(reg
);
407 reg_as_src
.swizzle
= brw_swizzle_for_size(num_components
);
412 vec4_visitor::get_nir_src(nir_src src
, nir_alu_type type
,
413 unsigned num_components
)
415 return get_nir_src(src
, brw_type_for_nir_type(type
), num_components
);
419 vec4_visitor::get_nir_src(nir_src src
, unsigned num_components
)
421 /* if type is not specified, default to signed int */
422 return get_nir_src(src
, nir_type_int
, num_components
);
426 vec4_visitor::nir_emit_load_const(nir_load_const_instr
*instr
)
428 dst_reg reg
= dst_reg(GRF
, alloc
.allocate(1));
429 reg
.type
= BRW_REGISTER_TYPE_F
;
431 /* @FIXME: consider emitting vector operations to save some MOVs in
432 * cases where the components are representable in 8 bits.
433 * By now, we emit a MOV for each component.
435 for (unsigned i
= 0; i
< instr
->def
.num_components
; ++i
) {
436 reg
.writemask
= 1 << i
;
437 emit(MOV(reg
, src_reg(instr
->value
.f
[i
])));
440 /* Set final writemask */
441 reg
.writemask
= brw_writemask_for_size(instr
->def
.num_components
);
443 nir_ssa_values
[instr
->def
.index
] = reg
;
447 vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr
*instr
)
452 bool has_indirect
= false;
454 switch (instr
->intrinsic
) {
456 case nir_intrinsic_load_input_indirect
:
459 case nir_intrinsic_load_input
: {
460 int offset
= instr
->const_index
[0];
461 src
= nir_inputs
[offset
];
464 dest
.reladdr
= new(mem_ctx
) src_reg(get_nir_src(instr
->src
[0],
468 dest
= get_nir_dest(instr
->dest
, src
.type
);
469 dest
.writemask
= brw_writemask_for_size(instr
->num_components
);
471 emit(MOV(dest
, src
));
475 case nir_intrinsic_store_output_indirect
:
478 case nir_intrinsic_store_output
: {
479 int varying
= instr
->const_index
[0];
481 src
= get_nir_src(instr
->src
[0], BRW_REGISTER_TYPE_F
,
482 instr
->num_components
);
486 dest
.reladdr
= new(mem_ctx
) src_reg(get_nir_src(instr
->src
[1],
490 output_reg
[varying
] = dest
;
494 case nir_intrinsic_load_vertex_id
:
495 unreachable("should be lowered by lower_vertex_id()");
497 case nir_intrinsic_load_vertex_id_zero_base
: {
499 src_reg(nir_system_values
[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
]);
500 assert(vertex_id
.file
!= BAD_FILE
);
501 dest
= get_nir_dest(instr
->dest
, vertex_id
.type
);
502 emit(MOV(dest
, vertex_id
));
506 case nir_intrinsic_load_base_vertex
: {
507 src_reg base_vertex
=
508 src_reg(nir_system_values
[SYSTEM_VALUE_BASE_VERTEX
]);
509 assert(base_vertex
.file
!= BAD_FILE
);
510 dest
= get_nir_dest(instr
->dest
, base_vertex
.type
);
511 emit(MOV(dest
, base_vertex
));
515 case nir_intrinsic_load_instance_id
: {
516 src_reg instance_id
=
517 src_reg(nir_system_values
[SYSTEM_VALUE_INSTANCE_ID
]);
518 assert(instance_id
.file
!= BAD_FILE
);
519 dest
= get_nir_dest(instr
->dest
, instance_id
.type
);
520 emit(MOV(dest
, instance_id
));
524 case nir_intrinsic_load_uniform_indirect
:
527 case nir_intrinsic_load_uniform
: {
528 int uniform
= instr
->const_index
[0];
530 dest
= get_nir_dest(instr
->dest
);
533 /* Split addressing into uniform and offset */
534 int offset
= uniform
- nir_uniform_driver_location
[uniform
];
538 assert(uniform
>= 0);
540 src
= src_reg(dst_reg(UNIFORM
, uniform
));
541 src
.reg_offset
= offset
;
542 src_reg tmp
= get_nir_src(instr
->src
[0], BRW_REGISTER_TYPE_D
, 1);
543 src
.reladdr
= new(mem_ctx
) src_reg(tmp
);
545 src
= src_reg(dst_reg(UNIFORM
, uniform
));
548 emit(MOV(dest
, src
));
552 case nir_intrinsic_atomic_counter_read
:
553 case nir_intrinsic_atomic_counter_inc
:
554 case nir_intrinsic_atomic_counter_dec
: {
555 unsigned surf_index
= prog_data
->base
.binding_table
.abo_start
+
556 (unsigned) instr
->const_index
[0];
557 src_reg offset
= get_nir_src(instr
->src
[0], nir_type_int
,
558 instr
->num_components
);
559 dest
= get_nir_dest(instr
->dest
);
561 switch (instr
->intrinsic
) {
562 case nir_intrinsic_atomic_counter_inc
:
563 emit_untyped_atomic(BRW_AOP_INC
, surf_index
, dest
, offset
,
564 src_reg(), src_reg());
566 case nir_intrinsic_atomic_counter_dec
:
567 emit_untyped_atomic(BRW_AOP_PREDEC
, surf_index
, dest
, offset
,
568 src_reg(), src_reg());
570 case nir_intrinsic_atomic_counter_read
:
571 emit_untyped_surface_read(surf_index
, dest
, offset
);
574 unreachable("Unreachable");
577 brw_mark_surface_used(stage_prog_data
, surf_index
);
581 case nir_intrinsic_load_ubo_indirect
:
584 case nir_intrinsic_load_ubo
: {
585 nir_const_value
*const_block_index
= nir_src_as_const_value(instr
->src
[0]);
588 dest
= get_nir_dest(instr
->dest
);
590 if (const_block_index
) {
591 /* The block index is a constant, so just emit the binding table entry
594 surf_index
= src_reg(prog_data
->base
.binding_table
.ubo_start
+
595 const_block_index
->u
[0]);
597 /* The block index is not a constant. Evaluate the index expression
598 * per-channel and add the base UBO index; we have to select a value
599 * from any live channel.
601 surf_index
= src_reg(this, glsl_type::uint_type
);
602 emit(ADD(dst_reg(surf_index
), get_nir_src(instr
->src
[0], nir_type_int
,
603 instr
->num_components
),
604 src_reg(prog_data
->base
.binding_table
.ubo_start
)));
605 surf_index
= emit_uniformize(surf_index
);
607 /* Assume this may touch any UBO. It would be nice to provide
608 * a tighter bound, but the array information is already lowered away.
610 brw_mark_surface_used(&prog_data
->base
,
611 prog_data
->base
.binding_table
.ubo_start
+
612 shader_prog
->NumUniformBlocks
- 1);
615 unsigned const_offset
= instr
->const_index
[0];
619 offset
= src_reg(const_offset
/ 16);
621 offset
= src_reg(this, glsl_type::uint_type
);
622 emit(SHR(dst_reg(offset
), get_nir_src(instr
->src
[1], nir_type_int
, 1),
626 src_reg packed_consts
= src_reg(this, glsl_type::vec4_type
);
627 packed_consts
.type
= dest
.type
;
629 emit_pull_constant_load_reg(dst_reg(packed_consts
),
632 NULL
, NULL
/* before_block/inst */);
634 packed_consts
.swizzle
= brw_swizzle_for_size(instr
->num_components
);
635 packed_consts
.swizzle
+= BRW_SWIZZLE4(const_offset
% 16 / 4,
636 const_offset
% 16 / 4,
637 const_offset
% 16 / 4,
638 const_offset
% 16 / 4);
640 emit(MOV(dest
, packed_consts
));
645 unreachable("Unknown intrinsic");
650 brw_swizzle_for_nir_swizzle(uint8_t swizzle
[4])
652 return BRW_SWIZZLE4(swizzle
[0], swizzle
[1], swizzle
[2], swizzle
[3]);
655 static enum brw_conditional_mod
656 brw_conditional_for_nir_comparison(nir_op op
)
662 return BRW_CONDITIONAL_L
;
667 return BRW_CONDITIONAL_GE
;
671 case nir_op_ball_fequal2
:
672 case nir_op_ball_iequal2
:
673 case nir_op_ball_fequal3
:
674 case nir_op_ball_iequal3
:
675 case nir_op_ball_fequal4
:
676 case nir_op_ball_iequal4
:
677 return BRW_CONDITIONAL_Z
;
681 case nir_op_bany_fnequal2
:
682 case nir_op_bany_inequal2
:
683 case nir_op_bany_fnequal3
:
684 case nir_op_bany_inequal3
:
685 case nir_op_bany_fnequal4
:
686 case nir_op_bany_inequal4
:
687 return BRW_CONDITIONAL_NZ
;
690 unreachable("not reached: bad operation for comparison");
695 vec4_visitor::nir_emit_alu(nir_alu_instr
*instr
)
697 vec4_instruction
*inst
;
699 dst_reg dst
= get_nir_dest(instr
->dest
.dest
,
700 nir_op_infos
[instr
->op
].output_type
);
701 dst
.writemask
= instr
->dest
.write_mask
;
704 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
705 op
[i
] = get_nir_src(instr
->src
[i
].src
,
706 nir_op_infos
[instr
->op
].input_types
[i
], 4);
707 op
[i
].swizzle
= brw_swizzle_for_nir_swizzle(instr
->src
[i
].swizzle
);
708 op
[i
].abs
= instr
->src
[i
].abs
;
709 op
[i
].negate
= instr
->src
[i
].negate
;
715 inst
= emit(MOV(dst
, op
[0]));
716 inst
->saturate
= instr
->dest
.saturate
;
722 unreachable("not reached: should be handled by lower_vec_to_movs()");
726 inst
= emit(MOV(dst
, op
[0]));
727 inst
->saturate
= instr
->dest
.saturate
;
732 inst
= emit(MOV(dst
, op
[0]));
738 inst
= emit(ADD(dst
, op
[0], op
[1]));
739 inst
->saturate
= instr
->dest
.saturate
;
743 inst
= emit(MUL(dst
, op
[0], op
[1]));
744 inst
->saturate
= instr
->dest
.saturate
;
748 nir_const_value
*value0
= nir_src_as_const_value(instr
->src
[0].src
);
749 nir_const_value
*value1
= nir_src_as_const_value(instr
->src
[1].src
);
751 /* For integer multiplication, the MUL uses the low 16 bits of one of
752 * the operands (src0 through SNB, src1 on IVB and later). The MACH
753 * accumulates in the contribution of the upper 16 bits of that
754 * operand. If we can determine that one of the args is in the low
755 * 16 bits, though, we can just emit a single MUL.
757 if (value0
&& value0
->u
[0] < (1 << 16)) {
758 if (devinfo
->gen
< 7)
759 emit(MUL(dst
, op
[0], op
[1]));
761 emit(MUL(dst
, op
[1], op
[0]));
762 } else if (value1
&& value1
->u
[0] < (1 << 16)) {
763 if (devinfo
->gen
< 7)
764 emit(MUL(dst
, op
[1], op
[0]));
766 emit(MUL(dst
, op
[0], op
[1]));
768 struct brw_reg acc
= retype(brw_acc_reg(8), dst
.type
);
770 emit(MUL(acc
, op
[0], op
[1]));
771 emit(MACH(dst_null_d(), op
[0], op
[1]));
772 emit(MOV(dst
, src_reg(acc
)));
777 case nir_op_imul_high
:
778 case nir_op_umul_high
: {
779 struct brw_reg acc
= retype(brw_acc_reg(8), dst
.type
);
781 emit(MUL(acc
, op
[0], op
[1]));
782 emit(MACH(dst
, op
[0], op
[1]));
787 inst
= emit_math(SHADER_OPCODE_RCP
, dst
, op
[0]);
788 inst
->saturate
= instr
->dest
.saturate
;
792 inst
= emit_math(SHADER_OPCODE_EXP2
, dst
, op
[0]);
793 inst
->saturate
= instr
->dest
.saturate
;
797 inst
= emit_math(SHADER_OPCODE_LOG2
, dst
, op
[0]);
798 inst
->saturate
= instr
->dest
.saturate
;
802 inst
= emit_math(SHADER_OPCODE_SIN
, dst
, op
[0]);
803 inst
->saturate
= instr
->dest
.saturate
;
807 inst
= emit_math(SHADER_OPCODE_COS
, dst
, op
[0]);
808 inst
->saturate
= instr
->dest
.saturate
;
813 emit_math(SHADER_OPCODE_INT_QUOTIENT
, dst
, op
[0], op
[1]);
817 emit_math(SHADER_OPCODE_INT_REMAINDER
, dst
, op
[0], op
[1]);
821 unreachable("not reached: should be handled by ldexp_to_arith()");
824 inst
= emit_math(SHADER_OPCODE_SQRT
, dst
, op
[0]);
825 inst
->saturate
= instr
->dest
.saturate
;
829 inst
= emit_math(SHADER_OPCODE_RSQ
, dst
, op
[0]);
830 inst
->saturate
= instr
->dest
.saturate
;
834 inst
= emit_math(SHADER_OPCODE_POW
, dst
, op
[0], op
[1]);
835 inst
->saturate
= instr
->dest
.saturate
;
838 case nir_op_uadd_carry
: {
839 struct brw_reg acc
= retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD
);
841 emit(ADDC(dst_null_ud(), op
[0], op
[1]));
842 emit(MOV(dst
, src_reg(acc
)));
846 case nir_op_usub_borrow
: {
847 struct brw_reg acc
= retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD
);
849 emit(SUBB(dst_null_ud(), op
[0], op
[1]));
850 emit(MOV(dst
, src_reg(acc
)));
855 inst
= emit(RNDZ(dst
, op
[0]));
856 inst
->saturate
= instr
->dest
.saturate
;
860 src_reg tmp
= src_reg(this, glsl_type::float_type
);
862 brw_swizzle_for_size(instr
->src
[0].src
.is_ssa
?
863 instr
->src
[0].src
.ssa
->num_components
:
864 instr
->src
[0].src
.reg
.reg
->num_components
);
866 op
[0].negate
= !op
[0].negate
;
867 emit(RNDD(dst_reg(tmp
), op
[0]));
869 inst
= emit(MOV(dst
, tmp
));
870 inst
->saturate
= instr
->dest
.saturate
;
875 inst
= emit(RNDD(dst
, op
[0]));
876 inst
->saturate
= instr
->dest
.saturate
;
880 inst
= emit(FRC(dst
, op
[0]));
881 inst
->saturate
= instr
->dest
.saturate
;
884 case nir_op_fround_even
:
885 inst
= emit(RNDE(dst
, op
[0]));
886 inst
->saturate
= instr
->dest
.saturate
;
892 inst
= emit_minmax(BRW_CONDITIONAL_L
, dst
, op
[0], op
[1]);
893 inst
->saturate
= instr
->dest
.saturate
;
899 inst
= emit_minmax(BRW_CONDITIONAL_GE
, dst
, op
[0], op
[1]);
900 inst
->saturate
= instr
->dest
.saturate
;
904 case nir_op_fddx_coarse
:
905 case nir_op_fddx_fine
:
907 case nir_op_fddy_coarse
:
908 case nir_op_fddy_fine
:
909 unreachable("derivatives are not valid in vertex shaders");
921 emit(CMP(dst
, op
[0], op
[1],
922 brw_conditional_for_nir_comparison(instr
->op
)));
925 case nir_op_ball_fequal2
:
926 case nir_op_ball_iequal2
:
927 case nir_op_ball_fequal3
:
928 case nir_op_ball_iequal3
:
929 case nir_op_ball_fequal4
:
930 case nir_op_ball_iequal4
: {
931 dst_reg tmp
= dst_reg(this, glsl_type::bool_type
);
934 case nir_op_ball_fequal2
:
935 case nir_op_ball_iequal2
:
936 tmp
.writemask
= WRITEMASK_XY
;
938 case nir_op_ball_fequal3
:
939 case nir_op_ball_iequal3
:
940 tmp
.writemask
= WRITEMASK_XYZ
;
942 case nir_op_ball_fequal4
:
943 case nir_op_ball_iequal4
:
944 tmp
.writemask
= WRITEMASK_XYZW
;
947 unreachable("not reached");
950 emit(CMP(tmp
, op
[0], op
[1],
951 brw_conditional_for_nir_comparison(instr
->op
)));
952 emit(MOV(dst
, src_reg(0)));
953 inst
= emit(MOV(dst
, src_reg(~0)));
954 inst
->predicate
= BRW_PREDICATE_ALIGN16_ALL4H
;
958 case nir_op_bany_fnequal2
:
959 case nir_op_bany_inequal2
:
960 case nir_op_bany_fnequal3
:
961 case nir_op_bany_inequal3
:
962 case nir_op_bany_fnequal4
:
963 case nir_op_bany_inequal4
: {
964 dst_reg tmp
= dst_reg(this, glsl_type::bool_type
);
967 case nir_op_bany_fnequal2
:
968 case nir_op_bany_inequal2
:
969 tmp
.writemask
= WRITEMASK_XY
;
971 case nir_op_bany_fnequal3
:
972 case nir_op_bany_inequal3
:
973 tmp
.writemask
= WRITEMASK_XYZ
;
975 case nir_op_bany_fnequal4
:
976 case nir_op_bany_inequal4
:
977 tmp
.writemask
= WRITEMASK_XYZW
;
980 unreachable("not reached");
983 emit(CMP(tmp
, op
[0], op
[1],
984 brw_conditional_for_nir_comparison(instr
->op
)));
986 emit(MOV(dst
, src_reg(0)));
987 inst
= emit(MOV(dst
, src_reg(~0)));
988 inst
->predicate
= BRW_PREDICATE_ALIGN16_ANY4H
;
993 emit(NOT(dst
, op
[0]));
997 emit(XOR(dst
, op
[0], op
[1]));
1001 emit(OR(dst
, op
[0], op
[1]));
1005 emit(AND(dst
, op
[0], op
[1]));
1009 emit(AND(dst
, op
[0], src_reg(1)));
1013 op
[0].type
= BRW_REGISTER_TYPE_D
;
1014 dst
.type
= BRW_REGISTER_TYPE_D
;
1015 emit(AND(dst
, op
[0], src_reg(0x3f800000u
)));
1016 dst
.type
= BRW_REGISTER_TYPE_F
;
1020 emit(CMP(dst
, op
[0], src_reg(0.0f
), BRW_CONDITIONAL_NZ
));
1024 emit(CMP(dst
, op
[0], src_reg(0), BRW_CONDITIONAL_NZ
));
1027 case nir_op_fnoise1_1
:
1028 case nir_op_fnoise1_2
:
1029 case nir_op_fnoise1_3
:
1030 case nir_op_fnoise1_4
:
1031 case nir_op_fnoise2_1
:
1032 case nir_op_fnoise2_2
:
1033 case nir_op_fnoise2_3
:
1034 case nir_op_fnoise2_4
:
1035 case nir_op_fnoise3_1
:
1036 case nir_op_fnoise3_2
:
1037 case nir_op_fnoise3_3
:
1038 case nir_op_fnoise3_4
:
1039 case nir_op_fnoise4_1
:
1040 case nir_op_fnoise4_2
:
1041 case nir_op_fnoise4_3
:
1042 case nir_op_fnoise4_4
:
1043 unreachable("not reached: should be handled by lower_noise");
1045 case nir_op_unpack_half_2x16_split_x
:
1046 case nir_op_unpack_half_2x16_split_y
:
1047 case nir_op_pack_half_2x16_split
:
1048 unreachable("not reached: should not occur in vertex shader");
1050 case nir_op_unpack_snorm_2x16
:
1051 case nir_op_unpack_unorm_2x16
:
1052 case nir_op_pack_snorm_2x16
:
1053 case nir_op_pack_unorm_2x16
:
1054 unreachable("not reached: should be handled by lower_packing_builtins");
1056 case nir_op_unpack_half_2x16
:
1057 /* As NIR does not guarantee that we have a correct swizzle outside the
1058 * boundaries of a vector, and the implementation of emit_unpack_half_2x16
1059 * uses the source operand in an operation with WRITEMASK_Y while our
1060 * source operand has only size 1, it accessed incorrect data producing
1061 * regressions in Piglit. We repeat the swizzle of the first component on the
1062 * rest of components to avoid regressions. In the vec4_visitor IR code path
1063 * this is not needed because the operand has already the correct swizzle.
1065 op
[0].swizzle
= brw_compose_swizzle(BRW_SWIZZLE_XXXX
, op
[0].swizzle
);
1066 emit_unpack_half_2x16(dst
, op
[0]);
1069 case nir_op_pack_half_2x16
:
1070 emit_pack_half_2x16(dst
, op
[0]);
1073 case nir_op_unpack_unorm_4x8
:
1074 emit_unpack_unorm_4x8(dst
, op
[0]);
1077 case nir_op_pack_unorm_4x8
:
1078 emit_pack_unorm_4x8(dst
, op
[0]);
1081 case nir_op_unpack_snorm_4x8
:
1082 emit_unpack_snorm_4x8(dst
, op
[0]);
1085 case nir_op_pack_snorm_4x8
:
1086 emit_pack_snorm_4x8(dst
, op
[0]);
1089 case nir_op_bitfield_reverse
:
1090 emit(BFREV(dst
, op
[0]));
1093 case nir_op_bit_count
:
1094 emit(CBIT(dst
, op
[0]));
1097 case nir_op_ufind_msb
:
1098 case nir_op_ifind_msb
: {
1099 src_reg temp
= src_reg(this, glsl_type::uint_type
);
1101 inst
= emit(FBH(dst_reg(temp
), op
[0]));
1102 inst
->dst
.writemask
= WRITEMASK_XYZW
;
1104 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1105 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1106 * subtract the result from 31 to convert the MSB count into an LSB count.
1109 /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1110 temp
.swizzle
= BRW_SWIZZLE_NOOP
;
1111 emit(MOV(dst
, temp
));
1113 src_reg src_tmp
= src_reg(dst
);
1114 emit(CMP(dst_null_d(), src_tmp
, src_reg(-1), BRW_CONDITIONAL_NZ
));
1116 src_tmp
.negate
= true;
1117 inst
= emit(ADD(dst
, src_tmp
, src_reg(31)));
1118 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1122 case nir_op_find_lsb
:
1123 emit(FBL(dst
, op
[0]));
1126 case nir_op_ubitfield_extract
:
1127 case nir_op_ibitfield_extract
:
1128 op
[0] = fix_3src_operand(op
[0]);
1129 op
[1] = fix_3src_operand(op
[1]);
1130 op
[2] = fix_3src_operand(op
[2]);
1132 emit(BFE(dst
, op
[2], op
[1], op
[0]));
1136 emit(BFI1(dst
, op
[0], op
[1]));
1140 op
[0] = fix_3src_operand(op
[0]);
1141 op
[1] = fix_3src_operand(op
[1]);
1142 op
[2] = fix_3src_operand(op
[2]);
1144 emit(BFI2(dst
, op
[0], op
[1], op
[2]));
1147 case nir_op_bitfield_insert
:
1148 unreachable("not reached: should be handled by "
1149 "lower_instructions::bitfield_insert_to_bfm_bfi");
1152 /* AND(val, 0x80000000) gives the sign bit.
1154 * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
1157 emit(CMP(dst_null_f(), op
[0], src_reg(0.0f
), BRW_CONDITIONAL_NZ
));
1159 op
[0].type
= BRW_REGISTER_TYPE_UD
;
1160 dst
.type
= BRW_REGISTER_TYPE_UD
;
1161 emit(AND(dst
, op
[0], src_reg(0x80000000u
)));
1163 inst
= emit(OR(dst
, src_reg(dst
), src_reg(0x3f800000u
)));
1164 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1165 dst
.type
= BRW_REGISTER_TYPE_F
;
1167 if (instr
->dest
.saturate
) {
1168 inst
= emit(MOV(dst
, src_reg(dst
)));
1169 inst
->saturate
= true;
1174 /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
1175 * -> non-negative val generates 0x00000000.
1176 * Predicated OR sets 1 if val is positive.
1178 emit(CMP(dst_null_d(), op
[0], src_reg(0), BRW_CONDITIONAL_G
));
1179 emit(ASR(dst
, op
[0], src_reg(31)));
1180 inst
= emit(OR(dst
, src_reg(dst
), src_reg(1)));
1181 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1185 emit(SHL(dst
, op
[0], op
[1]));
1189 emit(ASR(dst
, op
[0], op
[1]));
1193 emit(SHR(dst
, op
[0], op
[1]));
1197 op
[0] = fix_3src_operand(op
[0]);
1198 op
[1] = fix_3src_operand(op
[1]);
1199 op
[2] = fix_3src_operand(op
[2]);
1201 inst
= emit(MAD(dst
, op
[2], op
[1], op
[0]));
1202 inst
->saturate
= instr
->dest
.saturate
;
1206 inst
= emit_lrp(dst
, op
[0], op
[1], op
[2]);
1207 inst
->saturate
= instr
->dest
.saturate
;
1211 emit(CMP(dst_null_d(), op
[0], src_reg(0), BRW_CONDITIONAL_NZ
));
1212 inst
= emit(BRW_OPCODE_SEL
, dst
, op
[1], op
[2]);
1213 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1217 unreachable("Unimplemented ALU operation");
1222 vec4_visitor::nir_emit_jump(nir_jump_instr
*instr
)
1224 /* @TODO: Not yet implemented */
1228 vec4_visitor::nir_emit_texture(nir_tex_instr
*instr
)
1230 /* @TODO: Not yet implemented */