conf->num_vgprs = MAX2(conf->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
conf->num_sgprs = MAX2(conf->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
+ /* TODO: LLVM doesn't set FLOAT_MODE for non-compute shaders */
conf->float_mode = G_00B028_FLOAT_MODE(value);
conf->rsrc1 = value;
break;
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
conf->lds_size = MAX2(conf->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
+ /* TODO: LLVM doesn't set SHARED_VGPR_CNT for all shader types */
conf->num_shared_vgprs = G_00B02C_SHARED_VGPR_CNT(value);
conf->rsrc2 = value;
break;
/* sgprs spills aren't spilling */
conf->scratch_bytes_per_wave = G_00B860_WAVESIZE(scratch_size) * 256 * 4;
}
+
+ /* Enable 64-bit and 16-bit denormals, because there is no performance
+ * cost.
+ *
+ * Don't enable denormals for 32-bit floats, because:
+ * - denormals disable output modifiers
+ * - denormals break v_mad_f32
+ * - GFX6 & GFX7 would be very slow
+ */
+ conf->float_mode &= ~V_00B028_FP_ALL_DENORMS;
+ conf->float_mode |= V_00B028_FP_64_DENORMS;
}
return NULL;
}
- /* Enable 64-bit and 16-bit denormals, because there is no performance
- * cost.
- *
- * If denormals are enabled, all floating-point output modifiers are
- * ignored.
- *
- * Don't enable denormals for 32-bit floats, because:
- * - Floating-point output modifiers would be ignored by the hw.
- * - Some opcodes don't support denormals, such as v_mad_f32. We would
- * have to stop using those.
- * - GFX6 & GFX7 would be very slow.
- */
- config.float_mode |= V_00B028_FP_64_DENORMS;
-
if (rtld_binary.lds_size > 0) {
unsigned alloc_granularity = device->physical_device->rad_info.chip_class >= GFX7 ? 512 : 256;
config.lds_size = align(rtld_binary.lds_size, alloc_granularity) / alloc_granularity;
bool ok = ac_rtld_read_config(&rtld, conf);
ac_rtld_close(&rtld);
- if (!ok)
- return false;
-
- /* Enable 64-bit and 16-bit denormals, because there is no performance
- * cost.
- *
- * If denormals are enabled, all floating-point output modifiers are
- * ignored.
- *
- * Don't enable denormals for 32-bit floats, because:
- * - Floating-point output modifiers would be ignored by the hw.
- * - Some opcodes don't support denormals, such as v_mad_f32. We would
- * have to stop using those.
- * - GFX6 & GFX7 would be very slow.
- */
- conf->float_mode |= V_00B028_FP_64_DENORMS;
-
- return true;
+ return ok;
}
void si_llvm_context_init(struct si_shader_context *ctx,