* allocating them. With ARB_enhanced_layouts, multiple output variables
* may occupy the same slot, but have different type sizes.
*/
- nir_foreach_variable(var, &nir->outputs) {
+ nir_foreach_shader_out_variable(var, nir) {
const int loc = var->data.driver_location;
const unsigned var_vec4s =
var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)
uniforms = nir->num_uniforms / 4;
- if (stage == MESA_SHADER_COMPUTE) {
+ if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL) {
/* Add uniforms for builtins after regular NIR uniforms. */
assert(uniforms == prog_data->nr_params);
break;
case nir_intrinsic_load_work_group_id:
- assert(v->stage == MESA_SHADER_COMPUTE);
+ assert(v->stage == MESA_SHADER_COMPUTE ||
+ v->stage == MESA_SHADER_KERNEL);
reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
if (reg->file == BAD_FILE)
*reg = *v->emit_cs_work_group_id_setup();
nir_emit_fs_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
case MESA_SHADER_COMPUTE:
+ case MESA_SHADER_KERNEL:
nir_emit_cs_intrinsic(abld, nir_instr_as_intrinsic(instr));
break;
default:
fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
{
- assert(stage == MESA_SHADER_COMPUTE);
+ assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL);
struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data);
fs_reg dest;
case nir_intrinsic_load_shared: {
assert(devinfo->gen >= 7);
- assert(stage == MESA_SHADER_COMPUTE);
+ assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL);
const unsigned bit_size = nir_dest_bit_size(instr->dest);
fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS];
case nir_intrinsic_store_shared: {
assert(devinfo->gen >= 7);
- assert(stage == MESA_SHADER_COMPUTE);
+ assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL);
const unsigned bit_size = nir_src_bit_size(instr->src[0]);
fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS];
break;
}
- if (stage != MESA_SHADER_COMPUTE)
+ if (stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL)
slm_fence = false;
/* If the workgroup fits in a single HW thread, the messages for SLM are
data = tmp;
}
- bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL,
+ bld.emit(SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL,
dest, addr, data, brw_imm_ud(op));
}