#include "util/u_resource.h"
#include "util/u_upload_mgr.h"
-struct gfx10_format {
- unsigned img_format : 9;
-
- /* Various formats are only supported with workarounds for vertex fetch,
- * and some 32_32_32 formats are supported natively, but only for buffers
- * (possibly with some image support, actually, but no filtering). */
- bool buffers_only : 1;
-};
-
#include "gfx10_format_table.h"
static unsigned si_map_swizzle(unsigned swizzle)
sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
}
break;
+
+ case V_028C70_COLOR_5_9_9_9:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);
+ break;
}
}
unsigned initial_cdw = sctx->gfx_cs->current.cdw;
unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | clipdist_mask |
- (culldist_mask << 8);
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+ S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
+ clipdist_mask | (culldist_mask << 8);
if (sctx->chip_class >= GFX10) {
radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
radeon_opt_set_context_reg(
sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
- S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
- S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
+ S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
+ S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
+ S_028010_CENTROID_COMPUTATION_MODE_GFX103(sctx->chip_class >= GFX10_3 ? 2 : 0));
db_shader_control = sctx->ps_db_shader_control;
/*
* format translation
*/
-static uint32_t si_translate_colorformat(enum pipe_format format)
+static uint32_t si_translate_colorformat(enum chip_class chip_class,
+ enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
if (!desc)
if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
return V_028C70_COLOR_10_11_11;
+ if (chip_class >= GFX10_3 &&
+ format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */
+ return V_028C70_COLOR_5_9_9_9;
+
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
return V_028C70_COLOR_INVALID;
return usage;
}
-static bool si_is_colorbuffer_format_supported(enum pipe_format format)
+static bool si_is_colorbuffer_format_supported(enum chip_class chip_class,
+ enum pipe_format format)
{
- return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
+ return si_translate_colorformat(chip_class, format) != V_028C70_COLOR_INVALID &&
si_translate_colorswap(format, false) != ~0U;
}
if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) &&
- si_is_colorbuffer_format_supported(format)) {
+ si_is_colorbuffer_format_supported(sscreen->info.chip_class, format)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format))
case V_028C70_COLOR_4_4_4_4:
case V_028C70_COLOR_10_11_11:
case V_028C70_COLOR_11_11_10:
+ case V_028C70_COLOR_5_9_9_9:
case V_028C70_COLOR_8:
case V_028C70_COLOR_8_8:
case V_028C70_COLOR_8_8_8_8:
}
}
- format = si_translate_colorformat(surf->base.format);
+ format = si_translate_colorformat(sctx->chip_class, surf->base.format);
if (format == V_028C70_COLOR_INVALID) {
PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
}
sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);
sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
- S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples);
+ S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
+ S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(sctx->chip_class >= GFX10_3);
if (sctx->framebuffer.nr_samples > 1) {
db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) |
S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) |
S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type);
- /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
- * to know the total number of layers.
- */
- state[4] =
- S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) |
- S_00A010_BASE_ARRAY(first_layer);
+
+ if (res->target == PIPE_TEXTURE_1D ||
+ res->target == PIPE_TEXTURE_2D) {
+ /* 1D, 2D, and 2D_MSAA can set a custom pitch for shader resources
+ * starting with gfx10.3 (ignored if pitch <= width). Other texture
+ * targets can't. CB and DB can't set a custom pitch for any target.
+ */
+ if (screen->info.chip_class >= GFX10_3)
+ state[4] = S_00A010_DEPTH(tex->surface.u.gfx9.surf_pitch - 1);
+ else
+ state[4] = 0;
+ } else {
+ /* Depth is the last accessible layer on gfx9+. The hw doesn't need
+ * to know the total number of layers.
+ */
+ state[4] = S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ?
+ depth - 1 : last_layer) |
+ S_00A010_BASE_ARRAY(first_layer);
+ }
+
state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) |
S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples)
: tex->buffer.b.b.last_level) |
unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy;
unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso);
bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
- state->mag_img_filter == PIPE_TEX_FILTER_NEAREST;
+ state->mag_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ state->compare_mode == PIPE_TEX_COMPARE_NONE;
union pipe_color_union clamped_border_color;
if (!rstate) {
* into account would complicate the fast path (where everything
* is nicely aligned).
*/
- bool check_alignment = log_hw_load_size >= 1 && (sscreen->info.chip_class == GFX6 ||
- sscreen->info.chip_class == GFX10);
+ bool check_alignment =
+ log_hw_load_size >= 1 &&
+ (sscreen->info.chip_class == GFX6 || sscreen->info.chip_class >= GFX10);
bool opencode = sscreen->options.vs_fetch_always_opencode;
if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0)
/* Compute LATE_ALLOC_VS.LIMIT. */
unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa;
- unsigned late_alloc_wave64 = 0; /* The limit is per SH. */
+ unsigned late_alloc_wave64 = 0; /* The limit is per SA. */
unsigned cu_mask_vs = 0xffff;
unsigned cu_mask_gs = 0xffff;
if (!sscreen->info.use_late_alloc) {
late_alloc_wave64 = 0;
} else if (num_cu_per_sh <= 4) {
- /* Too few available compute units per SH. Disallowing
+ /* Too few available compute units per SA. Disallowing
* VS to run on one CU could hurt us more than late VS
* allocation would help.
*
* a single primitive shader subgroup.
*/
si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
+ /* Reuse for legacy (non-NGG) only. */
si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
if (!has_clear_state) {
S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
}
+ if (sctx->chip_class >= GFX10_3) {
+ si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff);
+ }
if (sctx->chip_class >= GFX9) {
si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,