#include "util/u_resource.h"
#include "util/u_upload_mgr.h"
-struct gfx10_format {
- unsigned img_format : 9;
-
- /* Various formats are only supported with workarounds for vertex fetch,
- * and some 32_32_32 formats are supported natively, but only for buffers
- * (possibly with some image support, actually, but no filtering). */
- bool buffers_only : 1;
-};
-
#include "gfx10_format_table.h"
static unsigned si_map_swizzle(unsigned swizzle)
sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
}
break;
+
+ case V_028C70_COLOR_5_9_9_9:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);
+ break;
}
}
unsigned initial_cdw = sctx->gfx_cs->current.cdw;
unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | clipdist_mask |
- (culldist_mask << 8);
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+ S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
+ clipdist_mask | (culldist_mask << 8);
if (sctx->chip_class >= GFX10) {
radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
if (sctx->chip_class >= GFX7) {
unsigned log_sample_rate = sctx->framebuffer.log_samples;
- /* Stoney doesn't increment occlusion query counters
- * if the sample rate is 16x. Use 8x sample rate instead.
- */
- if (sctx->family == CHIP_STONEY)
- log_sample_rate = MIN2(log_sample_rate, 3);
-
db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) |
S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) |
radeon_opt_set_context_reg(
sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
- S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
- S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
+ S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
+ S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
+ S_028010_CENTROID_COMPUTATION_MODE_GFX103(sctx->chip_class >= GFX10_3 ? 2 : 0));
db_shader_control = sctx->ps_db_shader_control;
/*
* format translation
*/
-static uint32_t si_translate_colorformat(enum pipe_format format)
+static uint32_t si_translate_colorformat(enum chip_class chip_class,
+ enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
if (!desc)
if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
return V_028C70_COLOR_10_11_11;
+ if (chip_class >= GFX10_3 &&
+ format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */
+ return V_028C70_COLOR_5_9_9_9;
+
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
return V_028C70_COLOR_INVALID;
return usage;
}
-static bool si_is_colorbuffer_format_supported(enum pipe_format format)
+static bool si_is_colorbuffer_format_supported(enum chip_class chip_class,
+ enum pipe_format format)
{
- return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
+ return si_translate_colorformat(chip_class, format) != V_028C70_COLOR_INVALID &&
si_translate_colorswap(format, false) != ~0U;
}
!util_is_power_of_two_or_zero(storage_sample_count))
return false;
+ /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate,
+ * so don't expose 16 samples there.
+ */
+ const unsigned max_eqaa_samples = sscreen->info.num_render_backends == 1 ? 8 : 16;
+ const unsigned max_samples = 8;
+
/* MSAA support without framebuffer attachments. */
- if (format == PIPE_FORMAT_NONE && sample_count <= 16)
+ if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples)
return true;
if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) {
/* Color without EQAA or depth/stencil. */
- if (sample_count > 8 || sample_count != storage_sample_count)
+ if (sample_count > max_samples || sample_count != storage_sample_count)
return false;
} else {
/* Color with EQAA. */
- if (sample_count > 16 || storage_sample_count > 8)
+ if (sample_count > max_eqaa_samples || storage_sample_count > max_samples)
return false;
}
}
if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) &&
- si_is_colorbuffer_format_supported(format)) {
+ si_is_colorbuffer_format_supported(sscreen->info.chip_class, format)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format))
case V_028C70_COLOR_4_4_4_4:
case V_028C70_COLOR_10_11_11:
case V_028C70_COLOR_11_11_10:
+ case V_028C70_COLOR_5_9_9_9:
case V_028C70_COLOR_8:
case V_028C70_COLOR_8_8:
case V_028C70_COLOR_8_8_8_8:
}
}
- format = si_translate_colorformat(surf->base.format);
+ format = si_translate_colorformat(sctx->chip_class, surf->base.format);
if (format == V_028C70_COLOR_INVALID) {
PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
}
surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.htile_offset) >> 8;
surf->db_htile_surface =
- S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(tex->surface.u.gfx9.htile.pipe_aligned);
+ S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
if (sctx->chip_class == GFX9) {
- surf->db_htile_surface |= S_028ABC_RB_ALIGNED(tex->surface.u.gfx9.htile.rb_aligned);
+ surf->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
}
}
} else {
else
sctx->framebuffer.uncompressed_cb_mask |= 1 << i;
- if (tex->surface.dcc_offset)
+ if (tex->surface.display_dcc_offset)
sctx->framebuffer.displayable_dcc_cb_mask |= 1 << i;
/* Don't update nr_color_samples for non-AA buffers.
sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);
sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
- S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples);
+ S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
+ S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(sctx->chip_class >= GFX10_3);
if (sctx->framebuffer.nr_samples > 1) {
db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) |
S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) |
S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type);
- /* Depth is the the last accessible layer on gfx9+. The hw doesn't need
- * to know the total number of layers.
- */
- state[4] =
- S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) |
- S_00A010_BASE_ARRAY(first_layer);
+
+ if (res->target == PIPE_TEXTURE_1D ||
+ res->target == PIPE_TEXTURE_2D) {
+ /* 1D, 2D, and 2D_MSAA can set a custom pitch for shader resources
+ * starting with gfx10.3 (ignored if pitch <= width). Other texture
+ * targets can't. CB and DB can't set a custom pitch for any target.
+ */
+ if (screen->info.chip_class >= GFX10_3)
+ state[4] = S_00A010_DEPTH(tex->surface.u.gfx9.surf_pitch - 1);
+ else
+ state[4] = 0;
+ } else {
+ /* Depth is the last accessible layer on gfx9+. The hw doesn't need
+ * to know the total number of layers.
+ */
+ state[4] = S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ?
+ depth - 1 : last_layer) |
+ S_00A010_BASE_ARRAY(first_layer);
+ }
+
state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) |
S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples)
: tex->buffer.b.b.last_level) |
state[6] = 0;
state[7] = 0;
- if (tex->surface.dcc_offset) {
+ if (vi_dcc_enabled(tex, first_level)) {
state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) |
S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
state[5] |= S_008F24_LAST_ARRAY(last_layer);
}
- if (tex->surface.dcc_offset) {
+ if (vi_dcc_enabled(tex, first_level)) {
state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
} else {
/* The last dword is unused by hw. The shader uses it to clear
depth = u_minify(depth, force_level);
}
- /* This is not needed if state trackers set last_layer correctly. */
+ /* This is not needed if gallium frontends set last_layer correctly. */
if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D ||
state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE)
last_layer = state->u.tex.first_layer;
struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy;
unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso);
+ bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ state->mag_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ state->compare_mode == PIPE_TEX_COMPARE_NONE;
union pipe_color_union clamped_border_color;
if (!rstate) {
S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
+ S_008F30_TRUNC_COORD(trunc_coord) |
S_008F30_COMPAT_MODE(sctx->chip_class == GFX8 || sctx->chip_class == GFX9));
rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
unsigned mask = sctx->sample_mask;
/* Needed for line and polygon smoothing as well as for the Polaris
- * small primitive filter. We expect the state tracker to take care of
+ * small primitive filter. We expect the gallium frontend to take care of
* this for us.
*/
assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
* into account would complicate the fast path (where everything
* is nicely aligned).
*/
- bool check_alignment = log_hw_load_size >= 1 && (sscreen->info.chip_class == GFX6 ||
- sscreen->info.chip_class == GFX10);
+ bool check_alignment =
+ log_hw_load_size >= 1 &&
+ (sscreen->info.chip_class == GFX6 || sscreen->info.chip_class >= GFX10);
bool opencode = sscreen->options.vs_fetch_always_opencode;
if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0)
return;
si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
- si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
- si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
+ si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1));
+ si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1));
si_pm4_cmd_end(pm4, false);
if (has_clear_state) {
}
/* Compute LATE_ALLOC_VS.LIMIT. */
- unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh;
- unsigned late_alloc_wave64 = 0; /* The limit is per SH. */
+ unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa;
+ unsigned late_alloc_wave64 = 0; /* The limit is per SA. */
unsigned cu_mask_vs = 0xffff;
unsigned cu_mask_gs = 0xffff;
if (!sscreen->info.use_late_alloc) {
late_alloc_wave64 = 0;
} else if (num_cu_per_sh <= 4) {
- /* Too few available compute units per SH. Disallowing
+ /* Too few available compute units per SA. Disallowing
* VS to run on one CU could hurt us more than late VS
* allocation would help.
*
* a single primitive shader subgroup.
*/
si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
+ /* Reuse for legacy (non-NGG) only. */
si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
if (!has_clear_state) {
S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
}
+ if (sctx->chip_class >= GFX10_3) {
+ si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff);
+ }
if (sctx->chip_class >= GFX9) {
si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
}
- si_pm4_upload_indirect_buffer(sctx, pm4);
sctx->init_config = pm4;
}