void fix_exports(asm_context& ctx, std::vector<uint32_t>& out, Program* program)
{
+ bool exported = false;
for (Block& block : program->blocks) {
if (!(block.kind & block_kind_export_end))
continue;
std::vector<aco_ptr<Instruction>>::reverse_iterator it = block.instructions.rbegin();
- bool exported = false;
while ( it != block.instructions.rend())
{
if ((*it)->format == Format::EXP) {
break;
++it;
}
- if (exported)
- continue;
- /* we didn't find an Export instruction and have to insert a null export */
- aco_ptr<Export_instruction> exp{create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4, 0)};
- for (unsigned i = 0; i < 4; i++)
- exp->operands[i] = Operand(v1);
- exp->enabled_mask = 0;
- exp->compressed = false;
- exp->done = true;
- exp->valid_mask = (program->stage & hw_fs) || program->chip_class >= GFX10;
- if (program->stage & hw_fs)
- exp->dest = 9; /* NULL */
- else
- exp->dest = V_008DFC_SQ_EXP_POS;
- /* insert the null export 1 instruction before branch/endpgm */
- block.instructions.insert(block.instructions.end() - 1, std::move(exp));
+ }
+
+ if (!exported) {
+ /* Abort in order to avoid a GPU hang. */
+ fprintf(stderr, "Missing export in %s shader:\n", (program->stage & hw_vs) ? "vertex" : "fragment");
+ aco_print_program(program, stderr);
+ abort();
}
}
return false;
}
-static void export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *next_pos)
+static void create_null_export(isel_context *ctx)
+{
+ /* Some shader stages always need to have exports.
+ * So when there is none, we need to add a null export.
+ */
+
+ unsigned dest = (ctx->program->stage & hw_fs) ? 9 /* NULL */ : V_008DFC_SQ_EXP_POS;
+ bool vm = (ctx->program->stage & hw_fs) || ctx->program->chip_class >= GFX10;
+ Builder bld(ctx->program, ctx->block);
+ bld.exp(aco_opcode::exp, Operand(v1), Operand(v1), Operand(v1), Operand(v1),
+ /* enabled_mask */ 0, dest, /* compr */ false, /* done */ true, vm);
+}
+
+static bool export_vs_varying(isel_context *ctx, int slot, bool is_pos, int *next_pos)
{
assert(ctx->stage == vertex_vs ||
ctx->stage == tess_eval_vs ||
: ctx->program->info->vs.outinfo.vs_output_param_offset[slot];
uint64_t mask = ctx->outputs.mask[slot];
if (!is_pos && !mask)
- return;
+ return false;
if (!is_pos && offset == AC_EXP_PARAM_UNDEFINED)
- return;
+ return false;
aco_ptr<Export_instruction> exp{create_instruction<Export_instruction>(aco_opcode::exp, Format::EXP, 4, 0)};
exp->enabled_mask = mask;
for (unsigned i = 0; i < 4; ++i) {
else
exp->dest = V_008DFC_SQ_EXP_PARAM + offset;
ctx->block->instructions.emplace_back(std::move(exp));
+
+ return true;
}
static void export_vs_psiz_layer_viewport(isel_context *ctx, int *next_pos)
/* the order these position exports are created is important */
int next_pos = 0;
- export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
+ bool exported_pos = export_vs_varying(ctx, VARYING_SLOT_POS, true, &next_pos);
if (outinfo->writes_pointsize || outinfo->writes_layer || outinfo->writes_viewport_index) {
export_vs_psiz_layer_viewport(ctx, &next_pos);
+ exported_pos = true;
}
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
- export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, true, &next_pos);
+ exported_pos |= export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST0, true, &next_pos);
if (ctx->num_clip_distances + ctx->num_cull_distances > 4)
- export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, &next_pos);
+ exported_pos |= export_vs_varying(ctx, VARYING_SLOT_CLIP_DIST1, true, &next_pos);
if (ctx->export_clip_dists) {
if (ctx->num_clip_distances + ctx->num_cull_distances > 0)
export_vs_varying(ctx, i, false, NULL);
}
+
+ if (!exported_pos)
+ create_null_export(ctx);
}
-static void export_fs_mrt_z(isel_context *ctx)
+static bool export_fs_mrt_z(isel_context *ctx)
{
Builder bld(ctx->program, ctx->block);
unsigned enabled_channels = 0;
bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3],
enabled_channels, V_008DFC_SQ_EXP_MRTZ, compr);
+
+ return true;
}
-static void export_fs_mrt_color(isel_context *ctx, int slot)
+static bool export_fs_mrt_color(isel_context *ctx, int slot)
{
Builder bld(ctx->program, ctx->block);
unsigned write_mask = ctx->outputs.mask[slot];
}
if (target == V_008DFC_SQ_EXP_NULL)
- return;
+ return false;
if ((bool) compr_op) {
for (int i = 0; i < 2; i++) {
bld.exp(aco_opcode::exp, values[0], values[1], values[2], values[3],
enabled_channels, target, (bool) compr_op);
+ return true;
}
static void create_fs_exports(isel_context *ctx)
{
+ bool exported = false;
+
/* Export depth, stencil and sample mask. */
if (ctx->outputs.mask[FRAG_RESULT_DEPTH] ||
ctx->outputs.mask[FRAG_RESULT_STENCIL] ||
- ctx->outputs.mask[FRAG_RESULT_SAMPLE_MASK]) {
- export_fs_mrt_z(ctx);
- }
+ ctx->outputs.mask[FRAG_RESULT_SAMPLE_MASK])
+ exported |= export_fs_mrt_z(ctx);
/* Export all color render targets. */
- for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i) {
+ for (unsigned i = FRAG_RESULT_DATA0; i < FRAG_RESULT_DATA7 + 1; ++i)
if (ctx->outputs.mask[i])
- export_fs_mrt_color(ctx, i);
- }
+ exported |= export_fs_mrt_color(ctx, i);
+
+ if (!exported)
+ create_null_export(ctx);
}
static void write_tcs_tess_factors(isel_context *ctx)
}
case aco_opcode::p_exit_early_if:
{
- /* don't bother with an early exit at the end of the program */
- if (block->instructions[j + 1]->opcode == aco_opcode::p_logical_end &&
- block->instructions[j + 2]->opcode == aco_opcode::s_endpgm) {
- break;
+ /* don't bother with an early exit near the end of the program */
+ if ((block->instructions.size() - 1 - j) <= 4 &&
+ block->instructions.back()->opcode == aco_opcode::s_endpgm) {
+ unsigned null_exp_dest = (ctx.program->stage & hw_fs) ? 9 /* NULL */ : V_008DFC_SQ_EXP_POS;
+ bool ignore_early_exit = true;
+
+ for (unsigned k = j + 1; k < block->instructions.size(); ++k) {
+ const aco_ptr<Instruction> &instr = block->instructions[k];
+ if (instr->opcode == aco_opcode::s_endpgm ||
+ instr->opcode == aco_opcode::p_logical_end)
+ continue;
+ else if (instr->opcode == aco_opcode::exp &&
+ static_cast<Export_instruction *>(instr.get())->dest == null_exp_dest)
+ continue;
+ else if (instr->opcode == aco_opcode::p_parallelcopy &&
+ instr->definitions[0].isFixed() &&
+ instr->definitions[0].physReg() == exec)
+ continue;
+
+ ignore_early_exit = false;
+ }
+
+ if (ignore_early_exit)
+ break;
}
if (!discard_block) {