this->pull_constant_loc = v->pull_constant_loc;
this->uniforms = v->uniforms;
this->subgroup_id = v->subgroup_id;
+ for (unsigned i = 0; i < ARRAY_SIZE(this->group_size); i++)
+ this->group_size[i] = v->group_size[i];
}
void
return reg;
}
+unsigned
+brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
+ unsigned threads)
+{
+ assert(cs_prog_data->push.per_thread.size % REG_SIZE == 0);
+ assert(cs_prog_data->push.cross_thread.size % REG_SIZE == 0);
+ return cs_prog_data->push.per_thread.size * threads +
+ cs_prog_data->push.cross_thread.size;
+}
+
static void
fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
{
fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
- unsigned total_dwords =
- (cs_prog_data->push.per_thread.size * cs_prog_data->threads +
- cs_prog_data->push.cross_thread.size) / 4;
- fill_push_const_block_info(&cs_prog_data->push.total, total_dwords);
-
assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
cs_prog_data->push.per_thread.size == 0);
assert(cs_prog_data->push.cross_thread.dwords +
prog_data->nr_params);
}
-static void
-cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size)
-{
- cs_prog_data->simd_size = size;
- unsigned group_size = cs_prog_data->local_size[0] *
- cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
- cs_prog_data->threads = (group_size + size - 1) / size;
-}
-
static nir_shader *
compile_cs_to_nir(const struct brw_compiler *compiler,
void *mem_ctx,
char **error_str)
{
prog_data->base.total_shared = src_shader->info.cs.shared_size;
- prog_data->local_size[0] = src_shader->info.cs.local_size[0];
- prog_data->local_size[1] = src_shader->info.cs.local_size[1];
- prog_data->local_size[2] = src_shader->info.cs.local_size[2];
prog_data->slm_size = src_shader->num_shared;
- unsigned local_workgroup_size =
- src_shader->info.cs.local_size[0] * src_shader->info.cs.local_size[1] *
- src_shader->info.cs.local_size[2];
+
+ unsigned local_workgroup_size;
+ if (prog_data->uses_variable_group_size) {
+ prog_data->max_variable_local_size =
+ src_shader->info.cs.max_variable_local_size;
+ local_workgroup_size = src_shader->info.cs.max_variable_local_size;
+ } else {
+ prog_data->local_size[0] = src_shader->info.cs.local_size[0];
+ prog_data->local_size[1] = src_shader->info.cs.local_size[1];
+ prog_data->local_size[2] = src_shader->info.cs.local_size[2];
+ local_workgroup_size = src_shader->info.cs.local_size[0] *
+ src_shader->info.cs.local_size[1] * src_shader->info.cs.local_size[2];
+ }
/* Limit max_threads to 64 for the GPGPU_WALKER command */
const uint32_t max_threads = MIN2(64, compiler->devinfo->max_cs_threads);
assert(v8->max_dispatch_width >= 32);
v = v8;
- cs_set_simd_size(prog_data, 8);
+ prog_data->simd_size = 8;
cs_fill_push_const_info(compiler->devinfo, prog_data);
}
}
assert(v16->max_dispatch_width >= 32);
v = v16;
- cs_set_simd_size(prog_data, 16);
+ prog_data->simd_size = 16;
cs_fill_push_const_info(compiler->devinfo, prog_data);
}
}
}
} else {
v = v32;
- cs_set_simd_size(prog_data, 32);
+ prog_data->simd_size = 32;
cs_fill_push_const_info(compiler->devinfo, prog_data);
}
}