#include "gallivm/lp_bld_logic.h"
#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_flow.h"
+#include "radeon/r600_cs.h"
#include "radeon/radeon_llvm.h"
#include "radeon/radeon_elf_util.h"
#include "radeon/radeon_llvm_emit.h"
#include <errno.h>
+static const char *scratch_rsrc_dword0_symbol =
+ "SCRATCH_RSRC_DWORD0";
+
+static const char *scratch_rsrc_dword1_symbol =
+ "SCRATCH_RSRC_DWORD1";
+
struct si_shader_output_values
{
LLVMValueRef values[4];
interp_param = LLVMGetParam(main_fn, SI_PARAM_LINEAR_CENTER);
break;
case TGSI_INTERPOLATE_COLOR:
- if (si_shader_ctx->shader->key.ps.flatshade) {
- interp_param = 0;
- break;
- }
- /* fall through to perspective */
case TGSI_INTERPOLATE_PERSPECTIVE:
if (decl->Interp.Location == TGSI_INTERPOLATE_LOC_SAMPLE)
interp_param = LLVMGetParam(main_fn, SI_PARAM_PERSP_SAMPLE);
return;
}
+ /* fs.constant returns the param from the middle vertex, so it's not
+ * really useful for flat shading. It's meant to be used for custom
+ * interpolation (but the intrinsic can't fetch from the other two
+ * vertices).
+ *
+ * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
+ * to do the right thing. The only reason we use fs.constant is that
+ * fs.interp cannot be used on integers, because they can be equal
+ * to NaN.
+ */
intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
- /* XXX: Could there be more than TGSI_NUM_CHANNELS (4) ? */
if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
si_shader_ctx->shader->key.ps.color_two_side) {
LLVMValueRef args[4];
radeon_llvm_create_func(&si_shader_ctx->radeon_bld, params, num_params);
radeon_llvm_shader_type(si_shader_ctx->radeon_bld.main_fn, si_shader_ctx->type);
+ if (shader->dx10_clamp_mode)
+ LLVMAddTargetDependentFunctionAttr(si_shader_ctx->radeon_bld.main_fn,
+ "enable-no-nans-fp-math", "true");
+
for (i = 0; i <= last_sgpr; ++i) {
LLVMValueRef P = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, i);
}
}
-void si_shader_binary_read_config(const struct radeon_shader_binary *binary,
+void si_shader_binary_read_config(const struct si_screen *sscreen,
struct si_shader *shader,
unsigned symbol_offset)
{
unsigned i;
const unsigned char *config =
- radeon_shader_binary_config_start(binary, symbol_offset);
+ radeon_shader_binary_config_start(&shader->binary,
+ symbol_offset);
/* XXX: We may be able to emit some of these values directly rather than
* extracting fields to be emitted later.
*/
- for (i = 0; i < binary->config_size_per_symbol; i+= 8) {
+ for (i = 0; i < shader->binary.config_size_per_symbol; i+= 8) {
unsigned reg = util_le32_to_cpu(*(uint32_t*)(config + i));
unsigned value = util_le32_to_cpu(*(uint32_t*)(config + i + 4));
switch (reg) {
case R_0286CC_SPI_PS_INPUT_ENA:
shader->spi_ps_input_ena = value;
break;
+ case R_0286E8_SPI_TMPRING_SIZE:
case R_00B860_COMPUTE_TMPRING_SIZE:
/* WAVESIZE is in units of 256 dwords. */
shader->scratch_bytes_per_wave =
}
}
+void si_shader_apply_scratch_relocs(struct si_context *sctx,
+ struct si_shader *shader,
+ uint64_t scratch_va)
+{
+ unsigned i;
+ uint32_t scratch_rsrc_dword0 = scratch_va & 0xffffffff;
+ uint32_t scratch_rsrc_dword1 =
+ S_008F04_BASE_ADDRESS_HI(scratch_va >> 32)
+ | S_008F04_STRIDE(shader->scratch_bytes_per_wave / 64);
+
+ for (i = 0 ; i < shader->binary.reloc_count; i++) {
+ const struct radeon_shader_reloc *reloc =
+ &shader->binary.relocs[i];
+ if (!strcmp(scratch_rsrc_dword0_symbol, reloc->name)) {
+ util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
+ &scratch_rsrc_dword0, 4);
+ } else if (!strcmp(scratch_rsrc_dword1_symbol, reloc->name)) {
+ util_memcpy_cpu_to_le32(shader->binary.code + reloc->offset,
+ &scratch_rsrc_dword1, 4);
+ }
+ }
+}
+
int si_shader_binary_read(struct si_screen *sscreen,
struct si_shader *shader,
const struct radeon_shader_binary *binary)
}
}
- si_shader_binary_read_config(binary, shader, 0);
+ si_shader_binary_read_config(sscreen, shader, 0);
/* copy new shader */
code_size = binary->code_size + binary->rodata_size;
LLVMModuleRef mod)
{
int r = 0;
- struct radeon_shader_binary binary;
bool dump = r600_can_dump_shader(&sscreen->b,
shader->selector ? shader->selector->tokens : NULL);
- memset(&binary, 0, sizeof(binary));
- r = radeon_llvm_compile(mod, &binary,
+ r = radeon_llvm_compile(mod, &shader->binary,
r600_get_llvm_processor_name(sscreen->b.family), dump, sscreen->tm);
if (r) {
return r;
}
- r = si_shader_binary_read(sscreen, shader, &binary);
- FREE(binary.code);
- FREE(binary.config);
- FREE(binary.rodata);
+ r = si_shader_binary_read(sscreen, shader, &shader->binary);
+
+ FREE(shader->binary.config);
+ FREE(shader->binary.rodata);
+ FREE(shader->binary.global_symbol_offsets);
+ if (shader->scratch_bytes_per_wave == 0) {
+ FREE(shader->binary.code);
+ FREE(shader->binary.relocs);
+ memset(&shader->binary, 0, sizeof(shader->binary));
+ }
return r;
}
radeon_llvm_context_init(&si_shader_ctx.radeon_bld);
bld_base = &si_shader_ctx.radeon_bld.soa.bld_base;
+ if (sel->type != PIPE_SHADER_COMPUTE)
+ shader->dx10_clamp_mode = true;
+
if (sel->info.uses_kill)
shader->db_shader_control |= S_02880C_KILL_ENABLE(1);
if (shader->gs_copy_shader)
si_shader_destroy(ctx, shader->gs_copy_shader);
+ if (shader->scratch_bo)
+ r600_resource_reference(&shader->scratch_bo, NULL);
+
r600_resource_reference(&shader->bo, NULL);
- r600_resource_reference(&shader->scratch_bo, NULL);
+
+ FREE(shader->binary.code);
+ FREE(shader->binary.relocs);
}