assert(uniforms == prog_data->nr_params);
uint32_t *param;
- if (brw_cs_prog_data(prog_data)->uses_variable_group_size) {
+ if (nir->info.cs.local_size_variable &&
+ compiler->lower_variable_group_size) {
param = brw_stage_prog_data_add_params(prog_data, 3);
for (unsigned i = 0; i < 3; i++) {
param[i] = (BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i);
* invocations are already executed lock-step. Instead of an actual
* barrier just emit a scheduling fence, that will generate no code.
*/
- if (!cs_prog_data->uses_variable_group_size &&
+ if (!nir->info.cs.local_size_variable &&
workgroup_size() <= dispatch_width) {
bld.exec_all().group(1, 0).emit(FS_OPCODE_SCHEDULING_FENCE);
break;
}
case nir_intrinsic_load_local_group_size: {
+ assert(compiler->lower_variable_group_size);
+ assert(nir->info.cs.local_size_variable);
for (unsigned i = 0; i < 3; i++) {
bld.MOV(retype(offset(dest, bld, i), BRW_REGISTER_TYPE_UD),
group_size[i]);
break;
}
+ case nir_intrinsic_load_simd_width_intel: {
+ bld.MOV(dest, brw_imm_ud(cs_prog_data->simd_size));
+ break;
+ };
+
default:
nir_emit_intrinsic(bld, instr);
break;
*
* TODO: Check if applies for many HW threads sharing same Data Port.
*/
- if (!brw_cs_prog_data(prog_data)->uses_variable_group_size &&
+ if (!nir->info.cs.local_size_variable &&
slm_fence && workgroup_size() <= dispatch_width)
slm_fence = false;