}
void
-fs_visitor::allocate_registers(unsigned min_dispatch_width, bool allow_spilling)
+fs_visitor::allocate_registers(bool allow_spilling)
{
bool allocated;
fixup_3src_null_dest();
}
-
- /* We only allow spilling for the last schedule mode and only if the
- * allow_spilling parameter and dispatch width work out ok.
- */
bool can_spill = allow_spilling &&
- (i == ARRAY_SIZE(pre_modes) - 1) &&
- dispatch_width == min_dispatch_width;
+ (i == ARRAY_SIZE(pre_modes) - 1);
/* We should only spill registers on the last scheduling. */
assert(!spilled_any_registers);
}
if (!allocated) {
- if (!allow_spilling)
- fail("Failure to register allocate and spilling is not allowed.");
-
- /* We assume that any spilling is worse than just dropping back to
- * SIMD8. There's probably actually some intermediate point where
- * SIMD16 with a couple of spills is still better.
- */
- if (dispatch_width > min_dispatch_width) {
- fail("Failure to register allocate. Reduce number of "
- "live scalar values to avoid this.");
- }
-
- /* If we failed to allocate, we must have a reason */
- assert(failed);
+ fail("Failure to register allocate. Reduce number of "
+ "live scalar values to avoid this.");
} else if (spilled_any_registers) {
compiler->shader_perf_log(log_data,
"%s shader triggered register spilling. "
assign_vs_urb_setup();
fixup_3src_null_dest();
- allocate_registers(8, true);
+ allocate_registers(true /* allow_spilling */);
return !failed;
}
assign_tcs_urb_setup();
fixup_3src_null_dest();
- allocate_registers(8, true);
+ allocate_registers(true /* allow_spilling */);
return !failed;
}
assign_tes_urb_setup();
fixup_3src_null_dest();
- allocate_registers(8, true);
+ allocate_registers(true /* allow_spilling */);
return !failed;
}
assign_gs_urb_setup();
fixup_3src_null_dest();
- allocate_registers(8, true);
+ allocate_registers(true /* allow_spilling */);
return !failed;
}
assign_urb_setup();
fixup_3src_null_dest();
- allocate_registers(8, allow_spilling);
+
+ allocate_registers(allow_spilling);
if (failed)
return false;
}
bool
-fs_visitor::run_cs(unsigned min_dispatch_width)
+fs_visitor::run_cs(bool allow_spilling)
{
assert(stage == MESA_SHADER_COMPUTE);
- assert(dispatch_width >= min_dispatch_width);
setup_cs_payload();
assign_curb_setup();
fixup_3src_null_dest();
- allocate_registers(min_dispatch_width, true);
+ allocate_registers(allow_spilling);
if (failed)
return false;
fs_visitor *v8 = NULL, *v16 = NULL, *v32 = NULL;
cfg_t *simd8_cfg = NULL, *simd16_cfg = NULL, *simd32_cfg = NULL;
float throughput = 0;
+ bool has_spilled = false;
v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
&prog_data->base, shader, 8, shader_time_index8);
prog_data->reg_blocks_8 = brw_register_blocks(v8->grf_used);
const performance &perf = v8->performance_analysis.require();
throughput = MAX2(throughput, perf.throughput);
+ has_spilled = v8->spilled_any_registers;
+ allow_spilling = false;
}
/* Limit dispatch width to simd8 with dual source blending on gen8.
"using SIMD8 when dual src blending.\n");
}
- if (v8->max_dispatch_width >= 16 &&
+ if (!has_spilled &&
+ v8->max_dispatch_width >= 16 &&
likely(!(INTEL_DEBUG & DEBUG_NO16) || use_rep_send)) {
/* Try a SIMD16 compile */
v16 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
prog_data->reg_blocks_16 = brw_register_blocks(v16->grf_used);
const performance &perf = v16->performance_analysis.require();
throughput = MAX2(throughput, perf.throughput);
+ has_spilled = v16->spilled_any_registers;
+ allow_spilling = false;
}
}
/* Currently, the compiler only supports SIMD32 on SNB+ */
- if (v8->max_dispatch_width >= 32 && !use_rep_send &&
+ if (!has_spilled &&
+ v8->max_dispatch_width >= 32 && !use_rep_send &&
devinfo->gen >= 6 && simd16_cfg &&
!(INTEL_DEBUG & DEBUG_NO32)) {
/* Try a SIMD32 compile */
v8 = new fs_visitor(compiler, log_data, mem_ctx, &key->base,
&prog_data->base,
nir8, 8, shader_time_index);
- if (!v8->run_cs(min_dispatch_width)) {
+ if (!v8->run_cs(true /* allow_spilling */)) {
fail_msg = v8->fail_msg;
} else {
/* We should always be able to do SIMD32 for compute shaders */
}
}
- if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
+ if ((!v || !v->spilled_any_registers) &&
+ likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && min_dispatch_width <= 16 && max_dispatch_width >= 16) {
/* Try a SIMD16 compile */
nir_shader *nir16 = compile_cs_to_nir(compiler, mem_ctx, key,
if (v8)
v16->import_uniforms(v8);
- if (!v16->run_cs(min_dispatch_width)) {
+ if (!v16->run_cs(v == NULL /* allow_spilling */)) {
compiler->shader_perf_log(log_data,
"SIMD16 shader failed to compile: %s",
v16->fail_msg);
}
}
- if (!fail_msg && (min_dispatch_width > 16 || (INTEL_DEBUG & DEBUG_DO32)) &&
+ if ((!v || !v->spilled_any_registers) &&
+ !fail_msg && (min_dispatch_width > 16 || (INTEL_DEBUG & DEBUG_DO32)) &&
max_dispatch_width >= 32) {
/* Try a SIMD32 compile */
nir_shader *nir32 = compile_cs_to_nir(compiler, mem_ctx, key,
else if (v16)
v32->import_uniforms(v16);
- if (!v32->run_cs(min_dispatch_width)) {
+ if (!v32->run_cs(v == NULL /* allow_spilling */)) {
compiler->shader_perf_log(log_data,
"SIMD32 shader failed to compile: %s",
v32->fail_msg);