#include "util/u_memory.h"
#include "util/u_resource.h"
#include "util/u_upload_mgr.h"
-
-struct gfx10_format {
- unsigned img_format : 9;
-
- /* Various formats are only supported with workarounds for vertex fetch,
- * and some 32_32_32 formats are supported natively, but only for buffers
- * (possibly with some image support, actually, but no filtering). */
- bool buffers_only : 1;
-};
+#include "util/u_blend.h"
#include "gfx10_format_table.h"
sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
}
break;
+
+ case V_028C70_COLOR_5_9_9_9:
+ if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
+ sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);
+ break;
}
}
}
}
-static bool si_blend_factor_uses_dst(unsigned factor)
-{
- return factor == PIPE_BLENDFACTOR_DST_COLOR || factor == PIPE_BLENDFACTOR_DST_ALPHA ||
- factor == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE ||
- factor == PIPE_BLENDFACTOR_INV_DST_ALPHA || factor == PIPE_BLENDFACTOR_INV_DST_COLOR;
-}
-
static void *si_create_blend_state_mode(struct pipe_context *ctx,
const struct pipe_blend_state *state, unsigned mode)
{
dstA_opt = si_translate_blend_opt_factor(dstA, true);
/* Handle interdependencies. */
- if (si_blend_factor_uses_dst(srcRGB))
+ if (util_blend_factor_uses_dest(srcRGB, false))
dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
- if (si_blend_factor_uses_dst(srcA))
+ if (util_blend_factor_uses_dest(srcA, false))
dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE;
if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE &&
unsigned initial_cdw = sctx->gfx_cs->current.cdw;
unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
- S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | clipdist_mask |
- (culldist_mask << 8);
+ S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
+ S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
+ S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->chip_class >= GFX10_3) |
+ clipdist_mask | (culldist_mask << 8);
if (sctx->chip_class >= GFX10) {
radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL,
if (sctx->chip_class >= GFX7) {
unsigned log_sample_rate = sctx->framebuffer.log_samples;
- /* Stoney doesn't increment occlusion query counters
- * if the sample rate is 16x. Use 8x sample rate instead.
- */
- if (sctx->family == CHIP_STONEY)
- log_sample_rate = MIN2(log_sample_rate, 3);
-
db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) |
S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) |
radeon_opt_set_context_reg(
sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2,
S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) |
- S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
- S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4));
+ S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) |
+ S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) |
+ S_028010_CENTROID_COMPUTATION_MODE(sctx->chip_class >= GFX10_3 ? 2 : 0));
db_shader_control = sctx->ps_db_shader_control;
/*
* format translation
*/
-static uint32_t si_translate_colorformat(enum pipe_format format)
+static uint32_t si_translate_colorformat(enum chip_class chip_class,
+ enum pipe_format format)
{
const struct util_format_description *desc = util_format_description(format);
if (!desc)
if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */
return V_028C70_COLOR_10_11_11;
+ if (chip_class >= GFX10_3 &&
+ format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */
+ return V_028C70_COLOR_5_9_9_9;
+
if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
return V_028C70_COLOR_INVALID;
if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
(sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA10 ||
- sscreen->info.family == CHIP_RAVEN)) {
+ sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2)) {
switch (format) {
case PIPE_FORMAT_ETC1_RGB8:
case PIPE_FORMAT_ETC2_RGB8:
case 4:
switch (desc->nr_channels) {
#if 0 /* Not supported for render targets */
- case 2:
- return V_008F14_IMG_DATA_FORMAT_4_4;
+ case 2:
+ return V_008F14_IMG_DATA_FORMAT_4_4;
#endif
case 4:
return V_008F14_IMG_DATA_FORMAT_4_4_4_4;
case 2:
return V_008F14_IMG_DATA_FORMAT_32_32;
#if 0 /* Not supported for render targets */
- case 3:
- return V_008F14_IMG_DATA_FORMAT_32_32_32;
+ case 3:
+ return V_008F14_IMG_DATA_FORMAT_32_32_32;
#endif
case 4:
return V_008F14_IMG_DATA_FORMAT_32_32_32_32;
return usage;
}
-static bool si_is_colorbuffer_format_supported(enum pipe_format format)
+static bool si_is_colorbuffer_format_supported(enum chip_class chip_class,
+ enum pipe_format format)
{
- return si_translate_colorformat(format) != V_028C70_COLOR_INVALID &&
+ return si_translate_colorformat(chip_class, format) != V_028C70_COLOR_INVALID &&
si_translate_colorswap(format, false) != ~0U;
}
!util_is_power_of_two_or_zero(storage_sample_count))
return false;
+ /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate,
+ * so don't expose 16 samples there.
+ */
+ const unsigned max_eqaa_samples = sscreen->info.num_render_backends == 1 ? 8 : 16;
+ const unsigned max_samples = 8;
+
/* MSAA support without framebuffer attachments. */
- if (format == PIPE_FORMAT_NONE && sample_count <= 16)
+ if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples)
return true;
if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) {
/* Color without EQAA or depth/stencil. */
- if (sample_count > 8 || sample_count != storage_sample_count)
+ if (sample_count > max_samples || sample_count != storage_sample_count)
return false;
} else {
/* Color with EQAA. */
- if (sample_count > 16 || storage_sample_count > 8)
+ if (sample_count > max_eqaa_samples || storage_sample_count > max_samples)
return false;
}
}
if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) &&
- si_is_colorbuffer_format_supported(format)) {
+ si_is_colorbuffer_format_supported(sscreen->info.chip_class, format)) {
retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT |
PIPE_BIND_SHARED);
if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format))
static void si_choose_spi_color_formats(struct si_surface *surf, unsigned format, unsigned swap,
unsigned ntype, bool is_depth)
{
- /* Alpha is needed for alpha-to-coverage.
- * Blending may be with or without alpha.
- */
- unsigned normal = 0; /* most optimal, may not support blending or export alpha */
- unsigned alpha = 0; /* exports alpha, but may not support blending */
- unsigned blend = 0; /* supports blending, but may not export alpha */
- unsigned blend_alpha = 0; /* least optimal, supports blending and exports alpha */
-
- /* Choose the SPI color formats. These are required values for RB+.
- * Other chips have multiple choices, though they are not necessarily better.
- */
- switch (format) {
- case V_028C70_COLOR_5_6_5:
- case V_028C70_COLOR_1_5_5_5:
- case V_028C70_COLOR_5_5_5_1:
- case V_028C70_COLOR_4_4_4_4:
- case V_028C70_COLOR_10_11_11:
- case V_028C70_COLOR_11_11_10:
- case V_028C70_COLOR_8:
- case V_028C70_COLOR_8_8:
- case V_028C70_COLOR_8_8_8_8:
- case V_028C70_COLOR_10_10_10_2:
- case V_028C70_COLOR_2_10_10_10:
- if (ntype == V_028C70_NUMBER_UINT)
- alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
- else if (ntype == V_028C70_NUMBER_SINT)
- alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
- else
- alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
- break;
-
- case V_028C70_COLOR_16:
- case V_028C70_COLOR_16_16:
- case V_028C70_COLOR_16_16_16_16:
- if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) {
- /* UNORM16 and SNORM16 don't support blending */
- if (ntype == V_028C70_NUMBER_UNORM)
- normal = alpha = V_028714_SPI_SHADER_UNORM16_ABGR;
- else
- normal = alpha = V_028714_SPI_SHADER_SNORM16_ABGR;
-
- /* Use 32 bits per channel for blending. */
- if (format == V_028C70_COLOR_16) {
- if (swap == V_028C70_SWAP_STD) { /* R */
- blend = V_028714_SPI_SHADER_32_R;
- blend_alpha = V_028714_SPI_SHADER_32_AR;
- } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
- blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
- else
- assert(0);
- } else if (format == V_028C70_COLOR_16_16) {
- if (swap == V_028C70_SWAP_STD) { /* RG */
- blend = V_028714_SPI_SHADER_32_GR;
- blend_alpha = V_028714_SPI_SHADER_32_ABGR;
- } else if (swap == V_028C70_SWAP_ALT) /* RA */
- blend = blend_alpha = V_028714_SPI_SHADER_32_AR;
- else
- assert(0);
- } else /* 16_16_16_16 */
- blend = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
- } else if (ntype == V_028C70_NUMBER_UINT)
- alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_UINT16_ABGR;
- else if (ntype == V_028C70_NUMBER_SINT)
- alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_SINT16_ABGR;
- else if (ntype == V_028C70_NUMBER_FLOAT)
- alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_FP16_ABGR;
- else
- assert(0);
- break;
+ struct ac_spi_color_formats formats = {};
- case V_028C70_COLOR_32:
- if (swap == V_028C70_SWAP_STD) { /* R */
- blend = normal = V_028714_SPI_SHADER_32_R;
- alpha = blend_alpha = V_028714_SPI_SHADER_32_AR;
- } else if (swap == V_028C70_SWAP_ALT_REV) /* A */
- alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
- else
- assert(0);
- break;
-
- case V_028C70_COLOR_32_32:
- if (swap == V_028C70_SWAP_STD) { /* RG */
- blend = normal = V_028714_SPI_SHADER_32_GR;
- alpha = blend_alpha = V_028714_SPI_SHADER_32_ABGR;
- } else if (swap == V_028C70_SWAP_ALT) /* RA */
- alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_AR;
- else
- assert(0);
- break;
-
- case V_028C70_COLOR_32_32_32_32:
- case V_028C70_COLOR_8_24:
- case V_028C70_COLOR_24_8:
- case V_028C70_COLOR_X24_8_32_FLOAT:
- alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
- break;
-
- default:
- assert(0);
- return;
- }
+ ac_choose_spi_color_formats(format, swap, ntype, is_depth, &formats);
- /* The DB->CB copy needs 32_ABGR. */
- if (is_depth)
- alpha = blend = blend_alpha = normal = V_028714_SPI_SHADER_32_ABGR;
-
- surf->spi_shader_col_format = normal;
- surf->spi_shader_col_format_alpha = alpha;
- surf->spi_shader_col_format_blend = blend;
- surf->spi_shader_col_format_blend_alpha = blend_alpha;
+ surf->spi_shader_col_format = formats.normal;
+ surf->spi_shader_col_format_alpha = formats.alpha;
+ surf->spi_shader_col_format_blend = formats.blend;
+ surf->spi_shader_col_format_blend_alpha = formats.blend_alpha;
}
static void si_initialize_color_surface(struct si_context *sctx, struct si_surface *surf)
}
}
- format = si_translate_colorformat(surf->base.format);
+ format = si_translate_colorformat(sctx->chip_class, surf->base.format);
if (format == V_028C70_COLOR_INVALID) {
PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format);
}
sctx->framebuffer.DB_has_shader_readable_metadata = false;
sctx->framebuffer.all_DCC_pipe_aligned = true;
sctx->framebuffer.min_bytes_per_pixel = 0;
+ sctx->framebuffer.color_big_page = true;
+ sctx->framebuffer.zs_big_page = true;
for (i = 0; i < state->nr_cbufs; i++) {
if (!state->cbufs[i])
sctx->framebuffer.spi_shader_col_format_blend_alpha |= surf->spi_shader_col_format_blend_alpha
<< (i * 4);
+ sctx->framebuffer.color_big_page &=
+ tex->buffer.bo_alignment % (64 * 1024) == 0;
+
if (surf->color_is_int8)
sctx->framebuffer.color_is_int8 |= 1 << i;
if (surf->color_is_int10)
else
sctx->framebuffer.uncompressed_cb_mask |= 1 << i;
- if (tex->surface.dcc_offset)
+ if (tex->surface.display_dcc_offset)
sctx->framebuffer.displayable_dcc_cb_mask |= 1 << i;
/* Don't update nr_color_samples for non-AA buffers.
si_init_depth_surface(sctx, surf);
}
+ sctx->framebuffer.zs_big_page = zstex->buffer.bo_alignment % (64 * 1024) == 0;
+
if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level, PIPE_MASK_ZS))
sctx->framebuffer.DB_has_shader_readable_metadata = true;
struct si_surface *cb = NULL;
unsigned cb_color_info = 0;
+ /* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */
+ unsigned meta_write_policy, meta_read_policy;
+ /* TODO: investigate whether LRU improves performance on other chips too */
+ if (sctx->screen->info.num_render_backends <= 4) {
+ meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
+ meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */
+ } else {
+ meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */
+ meta_read_policy = V_02807C_CACHE_NOA; /* don't cache reads */
+ }
+
/* Colorbuffers. */
for (i = 0; i < nr_cbufs; i++) {
uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base;
}
if (sctx->chip_class >= GFX10) {
+ bool zs_big_page = sctx->chip_class >= GFX10_3 &&
+ sctx->framebuffer.zs_big_page;
+
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
radeon_set_context_reg(cs, R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size);
radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */
radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */
- radeon_set_context_reg_seq(cs, R_028068_DB_Z_READ_BASE_HI, 5);
+ radeon_set_context_reg_seq(cs, R_028068_DB_Z_READ_BASE_HI, 6);
radeon_emit(cs, zb->db_depth_base >> 32); /* DB_Z_READ_BASE_HI */
radeon_emit(cs, zb->db_stencil_base >> 32); /* DB_STENCIL_READ_BASE_HI */
radeon_emit(cs, zb->db_depth_base >> 32); /* DB_Z_WRITE_BASE_HI */
radeon_emit(cs, zb->db_stencil_base >> 32); /* DB_STENCIL_WRITE_BASE_HI */
radeon_emit(cs, zb->db_htile_data_base >> 32); /* DB_HTILE_DATA_BASE_HI */
+ radeon_emit(cs, /* DB_RMI_L2_CACHE_CONTROL */
+ S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) |
+ S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) |
+ S_02807C_HTILE_WR_POLICY(meta_write_policy) |
+ S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) |
+ S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA) |
+ S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA) |
+ S_02807C_HTILE_RD_POLICY(meta_read_policy) |
+ S_02807C_Z_BIG_PAGE(zs_big_page) |
+ S_02807C_S_BIG_PAGE(zs_big_page));
} else if (sctx->chip_class == GFX9) {
radeon_set_context_reg_seq(cs, R_028014_DB_HTILE_DATA_BASE, 3);
radeon_emit(cs, zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */
}
/* Framebuffer dimensions. */
- /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_config() */
+ /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_cs_preamble_state */
radeon_set_context_reg(cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
S_028208_BR_X(state->width) | S_028208_BR_Y(state->height));
+ if (nr_cbufs) {
+ bool color_big_page = sctx->chip_class >= GFX10_3 &&
+ sctx->framebuffer.color_big_page;
+ radeon_set_context_reg(cs, R_028410_CB_RMI_GL2_CACHE_CONTROL,
+ S_028410_CMASK_WR_POLICY(meta_write_policy) |
+ S_028410_FMASK_WR_POLICY(meta_write_policy) |
+ S_028410_DCC_WR_POLICY(meta_write_policy) |
+ S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM) |
+ S_028410_CMASK_RD_POLICY(meta_read_policy) |
+ S_028410_FMASK_RD_POLICY(meta_read_policy) |
+ S_028410_DCC_RD_POLICY(meta_read_policy) |
+ S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA) |
+ S_028410_FMASK_BIG_PAGE(color_big_page) |
+ S_028410_COLOR_BIG_PAGE(color_big_page));
+ }
+
if (sctx->screen->dfsm_allowed) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1);
sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) |
S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) |
- S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples);
+ S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) |
+ S_028BE0_COVERED_CENTROID_IS_CENTER(sctx->chip_class >= GFX10_3);
if (sctx->framebuffer.nr_samples > 1) {
db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) |
state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) |
S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples)
: tex->buffer.b.b.last_level) |
- S_00A014_PERF_MOD(4);
+ S_00A014_PERF_MOD(4) |
+ S_00A014_BIG_PAGE(screen->info.chip_class >= GFX10_3 &&
+ tex->buffer.bo_alignment % (64 * 1024) == 0);
state[6] = 0;
state[7] = 0;
- if (tex->surface.dcc_offset) {
+ if (vi_dcc_enabled(tex, first_level)) {
state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) |
S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
state[5] |= S_008F24_LAST_ARRAY(last_layer);
}
- if (tex->surface.dcc_offset) {
+ if (vi_dcc_enabled(tex, first_level)) {
state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
} else {
/* The last dword is unused by hw. The shader uses it to clear
data_format = V_008F14_IMG_DATA_FORMAT_FMASK;
switch (FMASK(res->nr_samples, res->nr_storage_samples)) {
case FMASK(2, 1):
- num_format = V_008F14_IMG_FMASK_8_2_1;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1;
break;
case FMASK(2, 2):
- num_format = V_008F14_IMG_FMASK_8_2_2;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
break;
case FMASK(4, 1):
- num_format = V_008F14_IMG_FMASK_8_4_1;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1;
break;
case FMASK(4, 2):
- num_format = V_008F14_IMG_FMASK_8_4_2;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2;
break;
case FMASK(4, 4):
- num_format = V_008F14_IMG_FMASK_8_4_4;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
break;
case FMASK(8, 1):
- num_format = V_008F14_IMG_FMASK_8_8_1;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1;
break;
case FMASK(8, 2):
- num_format = V_008F14_IMG_FMASK_16_8_2;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2;
break;
case FMASK(8, 4):
- num_format = V_008F14_IMG_FMASK_32_8_4;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4;
break;
case FMASK(8, 8):
- num_format = V_008F14_IMG_FMASK_32_8_8;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
break;
case FMASK(16, 1):
- num_format = V_008F14_IMG_FMASK_16_16_1;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1;
break;
case FMASK(16, 2):
- num_format = V_008F14_IMG_FMASK_32_16_2;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2;
break;
case FMASK(16, 4):
- num_format = V_008F14_IMG_FMASK_64_16_4;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4;
break;
case FMASK(16, 8):
- num_format = V_008F14_IMG_FMASK_64_16_8;
+ num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8;
break;
default:
unreachable("invalid nr_samples");
depth = u_minify(depth, force_level);
}
- /* This is not needed if state trackers set last_layer correctly. */
+ /* This is not needed if gallium frontends set last_layer correctly. */
if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D ||
state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE)
last_layer = state->u.tex.first_layer;
struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy;
unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso);
+ bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ state->mag_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ state->compare_mode == PIPE_TEX_COMPARE_NONE;
union pipe_color_union clamped_border_color;
if (!rstate) {
S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
+ S_008F30_TRUNC_COORD(trunc_coord) |
S_008F30_COMPAT_MODE(sctx->chip_class == GFX8 || sctx->chip_class == GFX9));
rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
} else {
rstate->val[2] |= S_008F38_DISABLE_LSB_CEIL(sctx->chip_class <= GFX8) |
S_008F38_FILTER_PREC_FIX(1) |
- S_008F38_ANISO_OVERRIDE_GFX6(sctx->chip_class >= GFX8);
+ S_008F38_ANISO_OVERRIDE_GFX8(sctx->chip_class >= GFX8);
}
/* Create sampler resource for integer textures. */
unsigned mask = sctx->sample_mask;
/* Needed for line and polygon smoothing as well as for the Polaris
- * small primitive filter. We expect the state tracker to take care of
+ * small primitive filter. We expect the gallium frontend to take care of
* this for us.
*/
assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
* into account would complicate the fast path (where everything
* is nicely aligned).
*/
- bool check_alignment = log_hw_load_size >= 1 && (sscreen->info.chip_class == GFX6 ||
- sscreen->info.chip_class == GFX10);
+ bool check_alignment =
+ log_hw_load_size >= 1 &&
+ (sscreen->info.chip_class == GFX6 || sscreen->info.chip_class >= GFX10);
bool opencode = sscreen->options.vs_fetch_always_opencode;
if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0)
return si_create_blend_state_mode(&sctx->b, &blend, mode);
}
-static void si_init_config(struct si_context *sctx);
-
void si_init_state_compute_functions(struct si_context *sctx)
{
sctx->b.create_sampler_state = si_create_sampler_state;
sctx->b.set_tess_state = si_set_tess_state;
sctx->b.set_active_query_state = si_set_active_query_state;
-
- si_init_config(sctx);
}
void si_init_screen_state_functions(struct si_screen *sscreen)
}
}
-static void si_init_config(struct si_context *sctx)
+void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
{
struct si_screen *sscreen = sctx->screen;
uint64_t border_color_va = sctx->border_color_buffer->gpu_address;
if (!pm4)
return;
- si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
- si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
- si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
- si_pm4_cmd_end(pm4, false);
-
- if (has_clear_state) {
- si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE);
- si_pm4_cmd_add(pm4, 0);
- si_pm4_cmd_end(pm4, false);
- }
-
- if (sctx->chip_class <= GFX8)
- si_set_raster_config(sctx, pm4);
-
- si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
- if (!has_clear_state)
- si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
-
- /* FIXME calculate these values somehow ??? */
- if (sctx->chip_class <= GFX8) {
- si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
- si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
- }
+ if (!uses_reg_shadowing) {
+ si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
+ si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1));
+ si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1));
- if (!has_clear_state) {
- si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
- si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
- si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
+ if (has_clear_state) {
+ si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0));
+ si_pm4_cmd_add(pm4, 0);
+ }
}
- if (sscreen->info.chip_class <= GFX9)
- si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
- if (!has_clear_state)
- si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
- if (sctx->chip_class < GFX7)
- si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE,
- S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
-
/* CLEAR_STATE doesn't restore these correctly. */
si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1));
si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR,
S_028244_BR_X(16384) | S_028244_BR_Y(16384));
- /* CLEAR_STATE doesn't clear these correctly on certain generations.
- * I don't know why. Deduced by trial and error.
- */
- if (sctx->chip_class <= GFX7 || !has_clear_state) {
- si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
- si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
- si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
- si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
- S_028034_BR_X(16384) | S_028034_BR_Y(16384));
- }
+ si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64));
+ if (!has_clear_state)
+ si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
if (!has_clear_state) {
si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE,
si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0);
si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0);
si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0);
+ si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
+ si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
+ si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
+ si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
}
- if (sctx->chip_class >= GFX10) {
- si_pm4_set_reg(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);
- si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0);
- si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0);
- si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0);
- si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0);
- si_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0);
- } else if (sctx->chip_class == GFX9) {
- si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
- si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
- si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
- } else {
- /* These registers, when written, also overwrite the CLEAR_STATE
- * context, so we can't rely on CLEAR_STATE setting them.
- * It would be an issue if there was another UMD changing them.
- */
- si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
- si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
- si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
+ si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
+ if (sctx->chip_class >= GFX7)
+ si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40));
+
+ if (sctx->chip_class == GFX6) {
+ si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE,
+ S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1));
}
- if (sctx->chip_class >= GFX7) {
- if (sctx->chip_class >= GFX10) {
- /* Logical CUs 16 - 31 */
- si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff));
- si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff));
- si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(0xffff));
- }
+ if (sctx->chip_class <= GFX7 || !has_clear_state) {
+ si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
+ si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
- if (sctx->chip_class >= GFX9) {
- si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
- S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
- } else {
- si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
- S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
- si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
- si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
- S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
-
- /* If this is 0, Bonaire can hang even if GS isn't being used.
- * Other chips are unaffected. These are suboptimal values,
- * but we don't use on-chip GS.
- */
- si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
- S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
- }
+ /* CLEAR_STATE doesn't clear these correctly on certain generations.
+ * I don't know why. Deduced by trial and error.
+ */
+ si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
+ si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
+ si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0);
+ si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR,
+ S_028034_BR_X(16384) | S_028034_BR_Y(16384));
+ }
+ if (sctx->chip_class >= GFX7) {
/* Compute LATE_ALLOC_VS.LIMIT. */
- unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh;
- unsigned late_alloc_wave64 = 0; /* The limit is per SH. */
+ unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa;
+ unsigned late_alloc_wave64 = 0; /* The limit is per SA. */
unsigned cu_mask_vs = 0xffff;
unsigned cu_mask_gs = 0xffff;
if (!sscreen->info.use_late_alloc) {
late_alloc_wave64 = 0;
} else if (num_cu_per_sh <= 4) {
- /* Too few available compute units per SH. Disallowing
+ /* Too few available compute units per SA. Disallowing
* VS to run on one CU could hurt us more than late VS
* allocation would help.
*
si_pm4_set_reg(pm4, R_00B118_SPI_SHADER_PGM_RSRC3_VS,
S_00B118_CU_EN(cu_mask_vs) | S_00B118_WAVE_LIMIT(0x3F));
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64));
-
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
S_00B21C_CU_EN(cu_mask_gs) | S_00B21C_WAVE_LIMIT(0x3F));
-
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
}
- if (sctx->chip_class >= GFX10) {
- /* Break up a pixel wave if it contains deallocs for more than
- * half the parameter cache.
- *
- * To avoid a deadlock where pixel waves aren't launched
- * because they're waiting for more pixels while the frontend
- * is stuck waiting for PC space, the maximum allowed value is
- * the size of the PC minus the largest possible allocation for
- * a single primitive shader subgroup.
- */
- si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
- si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
+ if (sctx->chip_class <= GFX8) {
+ si_set_raster_config(sctx, pm4);
- if (!has_clear_state) {
- si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
- sscreen->info.pa_sc_tile_steering_override);
- }
+ /* FIXME calculate these values somehow ??? */
+ si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
+ si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
- /* Enable CMASK/FMASK/HTILE/DCC caching in L2 for small chips. */
- unsigned meta_write_policy, meta_read_policy;
- /* TODO: investigate whether LRU improves performance on other chips too */
- if (sscreen->info.num_render_backends <= 4) {
- meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */
- meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */
- } else {
- meta_write_policy = V_02807C_CACHE_STREAM_WR; /* write combine */
- meta_read_policy = V_02807C_CACHE_NOA_RD; /* don't cache reads */
- }
+ /* These registers, when written, also overwrite the CLEAR_STATE
+ * context, so we can't rely on CLEAR_STATE setting them.
+ * It would be an issue if there was another UMD changing them.
+ */
+ si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0);
+ si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0);
+ si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0);
+ }
- si_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL,
- S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
- S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
- S_02807C_HTILE_WR_POLICY(meta_write_policy) |
- S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM_WR) |
- S_02807C_Z_RD_POLICY(V_02807C_CACHE_NOA_RD) |
- S_02807C_S_RD_POLICY(V_02807C_CACHE_NOA_RD) |
- S_02807C_HTILE_RD_POLICY(meta_read_policy));
-
- si_pm4_set_reg(
- pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL,
- S_028410_CMASK_WR_POLICY(meta_write_policy) | S_028410_FMASK_WR_POLICY(meta_write_policy) |
- S_028410_DCC_WR_POLICY(meta_write_policy) |
- S_028410_COLOR_WR_POLICY(V_028410_CACHE_STREAM_WR) |
- S_028410_CMASK_RD_POLICY(meta_read_policy) |
- S_028410_FMASK_RD_POLICY(meta_read_policy) | S_028410_DCC_RD_POLICY(meta_read_policy) |
- S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_RD));
- si_pm4_set_reg(pm4, R_028428_CB_COVERAGE_OUT_CONTROL, 0);
+ if (sctx->chip_class >= GFX7 && sctx->chip_class <= GFX8) {
+ si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS,
+ S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F));
+ si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F));
+ si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES,
+ S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F));
- si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
- S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
- si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
+ /* If this is 0, Bonaire can hang even if GS isn't being used.
+ * Other chips are unaffected. These are suboptimal values,
+ * but we don't use on-chip GS.
+ */
+ si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
+ S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4));
}
- if (sctx->chip_class >= GFX9) {
- si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
- S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) |
- S_028B50_DONUT_SPLIT(24) | S_028B50_TRAP_SPLIT(6));
- } else if (sctx->chip_class >= GFX8) {
+ if (sctx->chip_class == GFX8) {
unsigned vgt_tess_distribution;
vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | S_028B50_ACCUM_TRI(11) |
vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3);
si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution);
- } else if (!has_clear_state) {
- si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
- si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
}
- si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8);
- if (sctx->chip_class >= GFX7) {
- si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40));
+ if (sscreen->info.chip_class <= GFX9) {
+ si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
+ }
+
+ if (sctx->chip_class == GFX9) {
+ si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0);
+ si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0);
+ si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0);
}
- si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ, RADEON_PRIO_BORDER_COLORS);
if (sctx->chip_class >= GFX9) {
+ si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS,
+ S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F));
+
+ si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION,
+ S_028B50_ACCUM_ISOLINE(40) | S_028B50_ACCUM_TRI(30) | S_028B50_ACCUM_QUAD(24) |
+ S_028B50_DONUT_SPLIT(24) | S_028B50_TRAP_SPLIT(6));
si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) |
- S_028C48_MAX_PRIM_PER_BATCH(1023));
+ S_028C48_MAX_PRIM_PER_BATCH(1023));
si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
+
si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
+ si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY,
+ sctx->chip_class >= GFX10 ? 0x20 : 0);
+ }
+
+ if (sctx->chip_class >= GFX10) {
+ /* Logical CUs 16 - 31 */
+ si_pm4_set_reg(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(0xffff));
+ si_pm4_set_reg(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff));
+ si_pm4_set_reg(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff));
+
+ si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
+ si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
+ si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
+ si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
+ si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
+ si_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
+ si_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
+ si_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
+ si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
+ si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
+ si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
+ si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
+ si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
+ si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
+ si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
+ si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
+
+ si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
+ S_00B0C0_SOFT_GROUPING_EN(1) |
+ S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
+ si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
+
+ si_pm4_set_reg(pm4, R_028428_CB_COVERAGE_OUT_CONTROL, 0);
+ si_pm4_set_reg(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0);
+
+ /* Break up a pixel wave if it contains deallocs for more than
+ * half the parameter cache.
+ *
+ * To avoid a deadlock where pixel waves aren't launched
+ * because they're waiting for more pixels while the frontend
+ * is stuck waiting for PC space, the maximum allowed value is
+ * the size of the PC minus the largest possible allocation for
+ * a single primitive shader subgroup.
+ */
+ si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512));
+ /* Reuse for legacy (non-NGG) only. */
+ si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14);
+
+ if (!has_clear_state) {
+ si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE,
+ sscreen->info.pa_sc_tile_steering_override);
+ }
+
+
+ si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0);
+ si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0);
+ si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0);
+ si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0);
+ si_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0);
+ }
+
+ if (sctx->chip_class >= GFX10_3) {
+ si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff);
+ si_pm4_set_reg(pm4, 0x28848, 1 << 9); /* This fixes sample shading. */
}
- si_pm4_upload_indirect_buffer(sctx, pm4);
- sctx->init_config = pm4;
+ sctx->cs_preamble_state = pm4;
}