Based on code written by Dave Airlie.
Signed-off-by: Bas Nieuwenhuizen <basni@google.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
void ac_shader_binary_read_config(struct ac_shader_binary *binary,
struct ac_shader_config *conf,
- unsigned symbol_offset)
+ unsigned symbol_offset,
+ bool supports_spill)
{
unsigned i;
const unsigned char *config =
ac_shader_binary_config_start(binary, symbol_offset);
bool really_needs_scratch = false;
-
+ uint32_t wavesize = 0;
/* LLVM adds SGPR spills to the scratch size.
* Find out if we really need the scratch buffer.
*/
- for (i = 0; i < binary->reloc_count; i++) {
- const struct ac_shader_reloc *reloc = &binary->relocs[i];
+ if (supports_spill) {
+ really_needs_scratch = true;
+ } else {
+ for (i = 0; i < binary->reloc_count; i++) {
+ const struct ac_shader_reloc *reloc = &binary->relocs[i];
- if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
- !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
- really_needs_scratch = true;
- break;
+ if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name) ||
+ !strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
+ really_needs_scratch = true;
+ break;
+ }
}
}
case R_0286E8_SPI_TMPRING_SIZE:
case R_00B860_COMPUTE_TMPRING_SIZE:
/* WAVESIZE is in units of 256 dwords. */
- if (really_needs_scratch)
- conf->scratch_bytes_per_wave =
- G_00B860_WAVESIZE(value) * 256 * 4;
+ wavesize = value;
break;
case SPILLED_SGPRS:
conf->spilled_sgprs = value;
if (!conf->spi_ps_input_addr)
conf->spi_ps_input_addr = conf->spi_ps_input_ena;
}
+
+ if (really_needs_scratch) {
+ /* sgprs spills aren't spilling */
+ conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(wavesize) * 256 * 4;
+ }
}
#pragma once
#include <stdint.h>
+#include <stdbool.h>
struct ac_shader_reloc {
char name[32];
void ac_shader_binary_read_config(struct ac_shader_binary *binary,
struct ac_shader_config *conf,
- unsigned symbol_offset);
+ unsigned symbol_offset,
+ bool supports_spill);
}
}
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family)
+LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill)
{
assert(family >= CHIP_TAHITI);
- const char *triple = "amdgcn--";
+ const char *triple = supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--";
LLVMTargetRef target = ac_get_llvm_target(triple);
LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
target,
LLVMValueRef fpmath_md_2p5_ulp;
};
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family);
+LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill);
void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
bool ac_is_sgpr_param(LLVMValueRef param);
arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
set_llvm_calling_convention(ctx->main_function, ctx->stage);
-
ctx->shader_info->num_input_sgprs = 0;
ctx->shader_info->num_input_vgprs = 0;
+ ctx->shader_info->num_user_sgprs = ctx->options->supports_spill ? 2 : 0;
for (i = 0; i < user_sgpr_count; i++)
ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
arg_idx = 0;
user_sgpr_idx = 0;
+
+ if (ctx->options->supports_spill) {
+ set_userdata_location_shader(ctx, AC_UD_SCRATCH, user_sgpr_idx, 2);
+ user_sgpr_idx += 2;
+ }
+
for (unsigned i = 0; i < num_sets; ++i) {
if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
memset(shader_info, 0, sizeof(*shader_info));
- LLVMSetTarget(ctx.module, "amdgcn--");
+ LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
setup_types(&ctx);
ctx.builder = LLVMCreateBuilderInContext(ctx.context);
struct ac_shader_config *config,
struct ac_shader_variant_info *shader_info,
gl_shader_stage stage,
- bool dump_shader)
+ bool dump_shader, bool supports_spill)
{
if (dump_shader)
ac_dump_module(llvm_module);
if (dump_shader)
fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
- ac_shader_binary_read_config(binary, config, 0);
+ ac_shader_binary_read_config(binary, config, 0, supports_spill);
LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
LLVMDisposeModule(llvm_module);
LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
options);
- ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader);
+ ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader, options->supports_spill);
switch (nir->stage) {
case MESA_SHADER_COMPUTE:
for (int i = 0; i < 3; ++i)
struct radv_pipeline_layout *layout;
union ac_shader_variant_key key;
bool unsafe_math;
+ bool supports_spill;
enum radeon_family family;
enum chip_class chip_class;
};
};
enum ac_ud_index {
- AC_UD_PUSH_CONSTANTS = 0,
- AC_UD_SHADER_START = 1,
+ AC_UD_SCRATCH = 0,
+ AC_UD_PUSH_CONSTANTS = 1,
+ AC_UD_SHADER_START = 2,
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
AC_UD_VS_MAX_UD,
options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH);
options.family = chip_family;
options.chip_class = device->physical_device->rad_info.chip_class;
- tm = ac_create_target_machine(chip_family);
+ options.supports_spill = false;
+ tm = ac_create_target_machine(chip_family, false);
ac_compile_nir_shader(tm, &binary, &variant->config,
&variant->info, shader, &options, dump);
LLVMDisposeTargetMachine(tm);