args, 9, 0);
}
+static void si_export_mrt_color(struct lp_build_tgsi_context *bld_base,
+ LLVMValueRef *color, unsigned index,
+ bool is_last)
+{
+ struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
+ struct lp_build_context *base = &bld_base->base;
+ LLVMValueRef args[9];
+ int i;
+
+ /* Clamp color */
+ if (si_shader_ctx->shader->key.ps.clamp_color)
+ for (i = 0; i < 4; i++)
+ color[i] = radeon_llvm_saturate(bld_base, color[i]);
+
+ /* Alpha to one */
+ if (si_shader_ctx->shader->key.ps.alpha_to_one)
+ color[3] = base->one;
+
+ /* Alpha test */
+ if (index == 0 &&
+ si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
+ si_alpha_test(bld_base, color[3]);
+
+ /* Line & polygon smoothing */
+ if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
+ color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);
+
+ /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
+ if (index == 0 &&
+ si_shader_ctx->shader->key.ps.last_cbuf > 0) {
+ for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
+ si_llvm_init_export_args(bld_base, color,
+ V_008DFC_SQ_EXP_MRT + c, args);
+ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9, 0);
+ }
+ }
+
+ /* Export */
+ si_llvm_init_export_args(bld_base, color, V_008DFC_SQ_EXP_MRT + index,
+ args);
+ if (is_last) {
+ args[1] = bld_base->uint_bld.one; /* whether the EXEC mask is valid */
+ args[2] = bld_base->uint_bld.one; /* DONE bit */
+ }
+ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9, 0);
+}
+
+static void si_export_null(struct lp_build_tgsi_context *bld_base)
+{
+ struct lp_build_context *base = &bld_base->base;
+ struct lp_build_context *uint = &bld_base->uint_bld;
+ LLVMValueRef args[9];
+
+ args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
+ args[1] = uint->one; /* whether the EXEC mask is valid */
+ args[2] = uint->one; /* DONE bit */
+ args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_NULL);
+ args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
+ args[5] = uint->undef; /* R */
+ args[6] = uint->undef; /* G */
+ args[7] = uint->undef; /* B */
+ args[8] = uint->undef; /* A */
+
+ lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
+ LLVMVoidTypeInContext(base->gallivm->context),
+ args, 9, 0);
+}
+
static void si_llvm_emit_fs_epilogue(struct lp_build_tgsi_context * bld_base)
{
struct si_shader_context * si_shader_ctx = si_shader_context(bld_base);
struct si_shader * shader = si_shader_ctx->shader;
struct lp_build_context * base = &bld_base->base;
- struct lp_build_context * uint = &bld_base->uint_bld;
struct tgsi_shader_info *info = &shader->selector->info;
LLVMBuilderRef builder = base->gallivm->builder;
- LLVMValueRef args[9];
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
int last_color_export = -1;
int i;
/* If there are no outputs, add a dummy export. */
if (!info->num_outputs) {
- args[0] = lp_build_const_int32(base->gallivm, 0x0); /* enabled channels */
- args[1] = uint->one; /* whether the EXEC mask is valid */
- args[2] = uint->one; /* DONE bit */
- args[3] = lp_build_const_int32(base->gallivm, V_008DFC_SQ_EXP_MRT);
- args[4] = uint->zero; /* COMPR flag (0 = 32-bit export) */
- args[5] = uint->undef; /* R */
- args[6] = uint->undef; /* G */
- args[7] = uint->undef; /* B */
- args[8] = uint->undef; /* A */
-
- lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 9, 0);
+ si_export_null(bld_base);
return;
}
for (i = 0; i < info->num_outputs; i++) {
unsigned semantic_name = info->output_semantic_name[i];
unsigned semantic_index = info->output_semantic_index[i];
- unsigned target, j;
+ unsigned j;
LLVMValueRef color[4] = {};
/* Select the correct target */
case TGSI_SEMANTIC_POSITION:
depth = LLVMBuildLoad(builder,
si_shader_ctx->radeon_bld.soa.outputs[i][2], "");
- continue;
+ break;
case TGSI_SEMANTIC_STENCIL:
stencil = LLVMBuildLoad(builder,
si_shader_ctx->radeon_bld.soa.outputs[i][1], "");
- continue;
+ break;
case TGSI_SEMANTIC_SAMPLEMASK:
samplemask = LLVMBuildLoad(builder,
si_shader_ctx->radeon_bld.soa.outputs[i][0], "");
- continue;
+ break;
case TGSI_SEMANTIC_COLOR:
- target = V_008DFC_SQ_EXP_MRT + semantic_index;
-
for (j = 0; j < 4; j++)
color[j] = LLVMBuildLoad(builder,
si_shader_ctx->radeon_bld.soa.outputs[i][j], "");
- if (si_shader_ctx->shader->key.ps.clamp_color)
- for (j = 0; j < 4; j++)
- color[j] = radeon_llvm_saturate(bld_base, color[j]);
-
- if (si_shader_ctx->shader->key.ps.alpha_to_one)
- color[3] = base->one;
-
- if (semantic_index == 0 &&
- si_shader_ctx->shader->key.ps.alpha_func != PIPE_FUNC_ALWAYS)
- si_alpha_test(bld_base, color[3]);
-
- if (si_shader_ctx->shader->key.ps.poly_line_smoothing)
- color[3] = si_scale_alpha_by_sample_mask(bld_base, color[3]);
+ si_export_mrt_color(bld_base, color, semantic_index,
+ last_color_export == i);
break;
default:
fprintf(stderr,
"Warning: SI unhandled fs output type:%d\n",
semantic_name);
- continue;
- }
-
- /* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
- if (semantic_index == 0 &&
- si_shader_ctx->shader->key.ps.last_cbuf > 0) {
- for (int c = 1; c <= si_shader_ctx->shader->key.ps.last_cbuf; c++) {
- si_llvm_init_export_args(bld_base, color,
- V_008DFC_SQ_EXP_MRT + c, args);
- lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 9, 0);
- }
}
-
- si_llvm_init_export_args(bld_base, color, target, args);
- if (last_color_export == i) {
- args[1] = uint->one; /* whether the EXEC mask is valid */
- args[2] = uint->one; /* DONE bit */
- }
- lp_build_intrinsic(base->gallivm->builder, "llvm.SI.export",
- LLVMVoidTypeInContext(base->gallivm->context),
- args, 9, 0);
}
if (depth || stencil || samplemask)
}
}
-void si_shader_binary_read_config(struct si_shader *shader,
+void si_shader_binary_read_config(struct radeon_shader_binary *binary,
+ struct si_shader_config *conf,
unsigned symbol_offset)
{
unsigned i;
const unsigned char *config =
- radeon_shader_binary_config_start(&shader->binary,
- symbol_offset);
+ radeon_shader_binary_config_start(binary, symbol_offset);
/* XXX: We may be able to emit some of these values directly rather than
* extracting fields to be emitted later.
*/
- for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) {
+ for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
switch (reg) {
case R_00B128_SPI_SHADER_PGM_RSRC1_VS:
case R_00B228_SPI_SHADER_PGM_RSRC1_GS:
case R_00B848_COMPUTE_PGM_RSRC1:
- shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
- shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
- shader->float_mode = G_00B028_FLOAT_MODE(value);
- shader->rsrc1 = value;
+ conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
+ conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
+ conf->float_mode = G_00B028_FLOAT_MODE(value);
+ conf->rsrc1 = value;
break;
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
- shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
+ conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
break;
case R_00B84C_COMPUTE_PGM_RSRC2:
- shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
- shader->rsrc2 = value;
+ conf->lds_size = MAX2(conf->lds_size, G_00B84C_LDS_SIZE(value));
+ conf->rsrc2 = value;
break;
case R_0286CC_SPI_PS_INPUT_ENA:
- shader->spi_ps_input_ena = value;
+ conf->spi_ps_input_ena = value;
break;
case R_0286E8_SPI_TMPRING_SIZE:
case R_00B860_COMPUTE_TMPRING_SIZE:
/* WAVESIZE is in units of 256 dwords. */
- shader->scratch_bytes_per_wave =
+ conf->scratch_bytes_per_wave =
G_00B860_WAVESIZE(value) * 256 * 4 * 1;
break;
default:
uint32_t scratch_rsrc_dword0 = scratch_va;
uint32_t scratch_rsrc_dword1 =
S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
- | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
+ | S_008F04_STRIDE(shader->config.scratch_bytes_per_wave / 64);
for (i = 0 ; i < shader->binary.reloc_count; i++) {
const struct radeon_shader_reloc *reloc =
}
}
-void si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader,
- struct pipe_debug_callback *debug, unsigned processor)
+void si_shader_binary_read(struct si_screen *sscreen,
+ struct radeon_shader_binary *binary,
+ struct si_shader_config *conf,
+ struct pipe_debug_callback *debug,
+ unsigned processor)
{
- const struct radeon_shader_binary *binary = &shader->binary;
-
- si_shader_binary_read_config(shader, 0);
+ si_shader_binary_read_config(binary, conf, 0);
if (r600_can_dump_shader(&sscreen->b, processor)) {
if (!(sscreen->b.debug_flags & DBG_NO_ASM))
fprintf(stderr, "*** SHADER STATS ***\n"
"SGPRS: %d\nVGPRS: %d\nCode Size: %d bytes\nLDS: %d blocks\n"
"Scratch: %d bytes per wave\n********************\n",
- shader->num_sgprs, shader->num_vgprs, binary->code_size,
- shader->lds_size, shader->scratch_bytes_per_wave);
+ conf->num_sgprs, conf->num_vgprs, binary->code_size,
+ conf->lds_size, conf->scratch_bytes_per_wave);
}
pipe_debug_message(debug, SHADER_INFO,
"Shader Stats: SGPRS: %d VGPRS: %d Code Size: %d LDS: %d Scratch: %d",
- shader->num_sgprs, shader->num_vgprs, binary->code_size,
- shader->lds_size, shader->scratch_bytes_per_wave);
+ conf->num_sgprs, conf->num_vgprs, binary->code_size,
+ conf->lds_size, conf->scratch_bytes_per_wave);
}
int si_compile_llvm(struct si_screen *sscreen, struct si_shader *shader,
return r;
}
- si_shader_binary_read(sscreen, shader, debug, processor);
-
- r = si_shader_binary_upload(sscreen, shader);
- if (r)
- return r;
+ si_shader_binary_read(sscreen, &shader->binary, &shader->config,
+ debug, processor);
FREE(shader->binary.config);
- FREE(shader->binary.rodata);
FREE(shader->binary.global_symbol_offsets);
- if (shader->scratch_bytes_per_wave == 0) {
- FREE(shader->binary.code);
- FREE(shader->binary.relocs);
- memset(&shader->binary, 0,
- offsetof(struct radeon_shader_binary, disasm_string));
- }
+ shader->binary.config = NULL;
+ shader->binary.global_symbol_offsets = NULL;
return r;
}
r = si_compile_llvm(sscreen, si_shader_ctx->shader,
si_shader_ctx->tm, bld_base->base.gallivm->module,
debug, TGSI_PROCESSOR_GEOMETRY);
+ if (!r)
+ r = si_shader_binary_upload(sscreen, si_shader_ctx->shader);
radeon_llvm_dispose(&si_shader_ctx->radeon_bld);
goto out;
}
+ r = si_shader_binary_upload(sscreen, shader);
+ if (r) {
+ fprintf(stderr, "LLVM failed to upload shader\n");
+ goto out;
+ }
+
radeon_llvm_dispose(&si_shader_ctx.radeon_bld);
if (si_shader_ctx.type == TGSI_PROCESSOR_GEOMETRY) {
r600_resource_reference(&shader->bo, NULL);
FREE(shader->binary.code);
+ FREE(shader->binary.rodata);
FREE(shader->binary.relocs);
FREE(shader->binary.disasm_string);
}