* allocating them. With ARB_enhanced_layouts, multiple output variables
* may occupy the same slot, but have different type sizes.
*/
- nir_foreach_variable(var, &nir->outputs) {
+ nir_foreach_shader_out_variable(var, nir) {
const int loc = var->data.driver_location;
const unsigned var_vec4s =
var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
uniforms = nir->num_uniforms / 4;
- if (stage == MESA_SHADER_COMPUTE) {
+ if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL) {
/* Add uniforms for builtins after regular NIR uniforms. */
assert(uniforms == prog_data->nr_params);
uint32_t *param;
- if (nir->info.cs.local_size_variable) {
+ if (nir->info.cs.local_size_variable &&
+ compiler->lower_variable_group_size) {
param = brw_stage_prog_data_add_params(prog_data, 3);
for (unsigned i = 0; i < 3; i++) {
param[i] = (BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i);
break;
case nir_intrinsic_load_work_group_id:
- assert(v->stage == MESA_SHADER_COMPUTE);
+ assert(v->stage == MESA_SHADER_COMPUTE ||
+ v->stage == MESA_SHADER_KERNEL);
reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
if (reg->file == BAD_FILE)
*reg = *v->emit_cs_work_group_id_setup();
nir_emit_fs_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_KERNEL:
nir_emit_cs_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
default:
case nir_op_flt32:
case nir_op_fge32:
case nir_op_feq32:
- case nir_op_fne32: {
+ case nir_op_fneu32: {
fs_reg dest = result;
const uint32_t bit_size = nir_src_bit_size(instr->src[0].src);
alu->op != nir_op_bcsel &&
(devinfo->gen > 5 ||
(alu->instr.pass_flags & BRW_NIR_BOOLEAN_MASK) != BRW_NIR_BOOLEAN_NEEDS_RESOLVE ||
- alu->op == nir_op_fne32 || alu->op == nir_op_feq32 ||
+ alu->op == nir_op_fneu32 || alu->op == nir_op_feq32 ||
alu->op == nir_op_flt32 || alu->op == nir_op_fge32 ||
alu->op == nir_op_ine32 || alu->op == nir_op_ieq32 ||
alu->op == nir_op_ilt32 || alu->op == nir_op_ige32 ||
cmp->predicate = BRW_PREDICATE_NORMAL;
cmp->flag_subreg = sample_mask_flag_subreg(this);
- if (devinfo->gen >= 6) {
- /* Due to the way we implement discard, the jump will only happen
- * when the whole quad is discarded. So we can do this even for
- * demote as it won't break its uniformity promises.
- */
- emit_discard_jump();
- }
+ emit_discard_jump();
if (devinfo->gen < 7)
limit_dispatch_width(
fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
{
- assert(stage == MESA_SHADER_COMPUTE);
+ assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL);
struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
fs_reg dest;
case nir_intrinsic_load_shared: {
assert(devinfo->gen >= 7);
- assert(stage == MESA_SHADER_COMPUTE);
+ assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL);
const unsigned bit_size = nir_dest_bit_size(instr->dest);
fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS];
case nir_intrinsic_store_shared: {
assert(devinfo->gen >= 7);
- assert(stage == MESA_SHADER_COMPUTE);
+ assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL);
const unsigned bit_size = nir_src_bit_size(instr->src[0]);
fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS];
}
case nir_intrinsic_load_local_group_size: {
+ assert(compiler->lower_variable_group_size);
+ assert(nir->info.cs.local_size_variable);
for (unsigned i = 0; i < 3; i++) {
bld.MOV(retype(offset(dest, bld, i), BRW_REGISTER_TYPE_UD),
group_size[i]);
case nir_intrinsic_bindless_image_atomic_xor:
case nir_intrinsic_bindless_image_atomic_exchange:
case nir_intrinsic_bindless_image_atomic_comp_swap: {
- if (stage == MESA_SHADER_FRAGMENT &&
- instr->intrinsic != nir_intrinsic_image_load)
- brw_wm_prog_data(prog_data)->has_side_effects = true;
-
/* Get some metadata from the image intrinsic. */
const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
BRW_REGISTER_TYPE_UD);
image = bld.emit_uniformize(image);
+ assert(nir_src_as_uint(instr->src[1]) == 0);
+
fs_reg srcs[TEX_LOGICAL_NUM_SRCS];
if (instr->intrinsic == nir_intrinsic_image_size)
srcs[TEX_LOGICAL_SRC_SURFACE] = image;
}
case nir_intrinsic_image_store_raw_intel: {
- if (stage == MESA_SHADER_FRAGMENT)
- brw_wm_prog_data(prog_data)->has_side_effects = true;
-
fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS];
srcs[SURFACE_LOGICAL_SRC_SURFACE] =
get_nir_image_intrinsic_image(bld, instr);
break;
}
- case nir_intrinsic_scoped_memory_barrier:
+ case nir_intrinsic_scoped_barrier:
+ assert(nir_intrinsic_execution_scope(instr) == NIR_SCOPE_NONE);
+ /* Fall through. */
case nir_intrinsic_group_memory_barrier:
case nir_intrinsic_memory_barrier_shared:
case nir_intrinsic_memory_barrier_buffer:
SHADER_OPCODE_INTERLOCK : SHADER_OPCODE_MEMORY_FENCE;
switch (instr->intrinsic) {
- case nir_intrinsic_scoped_memory_barrier: {
+ case nir_intrinsic_scoped_barrier: {
nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
l3_fence = modes & (nir_var_shader_out |
nir_var_mem_ssbo |
break;
}
- if (stage != MESA_SHADER_COMPUTE)
+ if (stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL)
slm_fence = false;
/* If the workgroup fits in a single HW thread, the messages for SLM are
case nir_intrinsic_store_global:
assert(devinfo->gen >= 8);
- if (stage == MESA_SHADER_FRAGMENT)
- brw_wm_prog_data(prog_data)->has_side_effects = true;
-
assert(nir_src_bit_size(instr->src[0]) <= 32);
assert(nir_intrinsic_write_mask(instr) ==
(1u << instr->num_components) - 1);
case nir_intrinsic_store_ssbo: {
assert(devinfo->gen >= 7);
- if (stage == MESA_SHADER_FRAGMENT)
- brw_wm_prog_data(prog_data)->has_side_effects = true;
-
const unsigned bit_size = nir_src_bit_size(instr->src[0]);
fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS];
srcs[SURFACE_LOGICAL_SRC_SURFACE] =
/* Read the vector */
assert(nir_dest_num_components(instr->dest) == 1);
assert(nir_dest_bit_size(instr->dest) <= 32);
- assert(nir_intrinsic_align(instr) > 1);
+ assert(nir_intrinsic_align(instr) > 0);
if (nir_dest_bit_size(instr->dest) >= 4 &&
nir_intrinsic_align(instr) >= 4) {
/* The offset for a DWORD scattered message is in dwords. */
assert(nir_src_num_components(instr->src[0]) == 1);
assert(nir_src_bit_size(instr->src[0]) <= 32);
assert(nir_intrinsic_write_mask(instr) == 1);
- assert(nir_intrinsic_align(instr) > 1);
+ assert(nir_intrinsic_align(instr) > 0);
if (nir_src_bit_size(instr->src[0]) == 32 &&
nir_intrinsic_align(instr) >= 4) {
srcs[SURFACE_LOGICAL_SRC_DATA] = data;
fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld,
int op, nir_intrinsic_instr *instr)
{
- if (stage == MESA_SHADER_FRAGMENT)
- brw_wm_prog_data(prog_data)->has_side_effects = true;
-
/* The BTI untyped atomic messages only support 32-bit atomics. If you
* just look at the big table of messages in the Vol 7 of the SKL PRM, they
* appear to exist. However, if you look at Vol 2a, there are no message
fs_visitor::nir_emit_ssbo_atomic_float(const fs_builder &bld,
int op, nir_intrinsic_instr *instr)
{
- if (stage == MESA_SHADER_FRAGMENT)
- brw_wm_prog_data(prog_data)->has_side_effects = true;
-
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_dest(instr->dest);
fs_visitor::nir_emit_global_atomic(const fs_builder &bld,
int op, nir_intrinsic_instr *instr)
{
- if (stage == MESA_SHADER_FRAGMENT)
- brw_wm_prog_data(prog_data)->has_side_effects = true;
-
fs_reg dest;
if (nir_intrinsic_infos[instr->intrinsic].has_dest)
dest = get_nir_dest(instr->dest);
fs_visitor::nir_emit_global_atomic_float(const fs_builder &bld,
int op, nir_intrinsic_instr *instr)
{
- if (stage == MESA_SHADER_FRAGMENT)
- brw_wm_prog_data(prog_data)->has_side_effects = true;
-
assert(nir_intrinsic_infos[instr->intrinsic].has_dest);
fs_reg dest = get_nir_dest(instr->dest);
data = tmp;
}
- bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
+ bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
dest, addr, data, brw_imm_ud(op));
}