intel/fs: Add and use a new load_simd_width_intel intrinsic
[mesa.git] / src / intel / compiler / brw_fs_nir.cpp
index e3149f6254c28208abcfbbbd73675367dacd221c..383c99c9f45e2e2c1f6a249adcacbc1b9c99f5da 100644 (file)
@@ -105,7 +105,8 @@ fs_visitor::nir_setup_uniforms()
       assert(uniforms == prog_data->nr_params);
 
       uint32_t *param;
-      if (brw_cs_prog_data(prog_data)->uses_variable_group_size) {
+      if (nir->info.cs.local_size_variable &&
+          compiler->lower_variable_group_size) {
          param = brw_stage_prog_data_add_params(prog_data, 3);
          for (unsigned i = 0; i < 3; i++) {
             param[i] = (BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X + i);
@@ -3732,7 +3733,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
        * invocations are already executed lock-step.  Instead of an actual
        * barrier just emit a scheduling fence, that will generate no code.
        */
-      if (!cs_prog_data->uses_variable_group_size &&
+      if (!nir->info.cs.local_size_variable &&
           workgroup_size() <= dispatch_width) {
          bld.exec_all().group(1, 0).emit(FS_OPCODE_SCHEDULING_FENCE);
          break;
@@ -3869,6 +3870,8 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
    }
 
    case nir_intrinsic_load_local_group_size: {
+      assert(compiler->lower_variable_group_size);
+      assert(nir->info.cs.local_size_variable);
       for (unsigned i = 0; i < 3; i++) {
          bld.MOV(retype(offset(dest, bld, i), BRW_REGISTER_TYPE_UD),
             group_size[i]);
@@ -3876,6 +3879,11 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
       break;
    }
 
+   case nir_intrinsic_load_simd_width_intel: {
+      bld.MOV(dest, brw_imm_ud(cs_prog_data->simd_size));
+      break;
+   };
+
    default:
       nir_emit_intrinsic(bld, instr);
       break;
@@ -4297,7 +4305,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
        *
        * TODO: Check if applies for many HW threads sharing same Data Port.
        */
-      if (!brw_cs_prog_data(prog_data)->uses_variable_group_size &&
+      if (!nir->info.cs.local_size_variable &&
           slm_fence && workgroup_size() <= dispatch_width)
          slm_fence = false;