bool buffers_only : 1;
};
-#include "gfx10_format_table.h"
+#include "amd/common/gfx10_format_table.h"
static unsigned si_map_swizzle(unsigned swizzle)
{
blend->dual_src_blend = util_blend_state_is_dual(state, 0);
blend->logicop_enable = logicop_enable;
+ unsigned num_shader_outputs = state->max_rt + 1; /* estimate */
+ if (blend->dual_src_blend)
+ num_shader_outputs = MAX2(num_shader_outputs, 2);
+
if (logicop_enable) {
color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
} else {
blend->cb_target_mask = 0;
blend->cb_target_enabled_4bit = 0;
- for (int i = 0; i < 8; i++) {
+ for (int i = 0; i < num_shader_outputs; i++) {
/* state->rt entries > 0 only written if independent blending */
const int j = state->independent_blend_enable ? i : 0;
* Vulkan does this.
*/
if (blend->dual_src_blend) {
- for (int i = 0; i < 8; i++) {
+ for (int i = 0; i < num_shader_outputs; i++) {
sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
}
}
- for (int i = 0; i < 8; i++)
+ for (int i = 0; i < num_shader_outputs; i++)
si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]);
/* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */
if (sctx->chip_class >= GFX7) {
unsigned log_sample_rate = sctx->framebuffer.log_samples;
- /* Stoney doesn't increment occlusion query counters
- * if the sample rate is 16x. Use 8x sample rate instead.
- */
- if (sctx->family == CHIP_STONEY)
- log_sample_rate = MIN2(log_sample_rate, 3);
-
db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) |
S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) |
!util_is_power_of_two_or_zero(storage_sample_count))
return false;
+ /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate,
+ * so don't expose 16 samples there.
+ */
+ const unsigned max_eqaa_samples = sscreen->info.num_render_backends == 1 ? 8 : 16;
+ const unsigned max_samples = 8;
+
/* MSAA support without framebuffer attachments. */
- if (format == PIPE_FORMAT_NONE && sample_count <= 16)
+ if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples)
return true;
if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) {
/* Color without EQAA or depth/stencil. */
- if (sample_count > 8 || sample_count != storage_sample_count)
+ if (sample_count > max_samples || sample_count != storage_sample_count)
return false;
} else {
/* Color with EQAA. */
- if (sample_count > 16 || storage_sample_count > 8)
+ if (sample_count > max_eqaa_samples || storage_sample_count > max_samples)
return false;
}
}
min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
- S_028C78_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
+ S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) |
S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
- S_028C78_INDEPENDENT_64B_BLOCKS(0) |
- S_028C78_INDEPENDENT_128B_BLOCKS(1);
+ S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.dcc.independent_64B_blocks) |
+ S_028C78_INDEPENDENT_128B_BLOCKS(tex->surface.u.gfx9.dcc.independent_128B_blocks);
} else if (sctx->chip_class >= GFX8) {
unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) {
z_info |= S_028038_TILE_SURFACE_ENABLE(1) | S_028038_ALLOW_EXPCLEAR(1);
- if (tex->tc_compatible_htile) {
- unsigned max_zplanes = 4;
-
- if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1)
- max_zplanes = 2;
-
- z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1);
-
- if (sctx->chip_class >= GFX10) {
- z_info |= S_028040_ITERATE_FLUSH(1);
- s_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled);
- } else {
- z_info |= S_028038_ITERATE_FLUSH(1);
- s_info |= S_02803C_ITERATE_FLUSH(1);
- }
- }
-
if (tex->surface.has_stencil && !tex->htile_stencil_disabled) {
/* Stencil buffer workaround ported from the GFX6-GFX8 code.
* See that for explanation.
surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.htile_offset) >> 8;
surf->db_htile_surface =
- S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(tex->surface.u.gfx9.htile.pipe_aligned);
+ S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
if (sctx->chip_class == GFX9) {
- surf->db_htile_surface |= S_028ABC_RB_ALIGNED(tex->surface.u.gfx9.htile.rb_aligned);
+ surf->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
}
}
} else {
z_info =
S_028040_FORMAT(format) | S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples));
s_info = S_028044_FORMAT(stencil_format);
- surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile);
+ surf->db_depth_info = 0;
if (sctx->chip_class >= GFX7) {
struct radeon_info *info = &sctx->screen->info;
*/
if (tex->buffer.b.b.nr_samples <= 1)
s_info |= S_028044_ALLOW_EXPCLEAR(1);
- } else if (!tex->tc_compatible_htile) {
- /* Use all of the htile_buffer for depth if there's no stencil.
- * This must not be set when TC-compatible HTILE is enabled
- * due to a hw bug.
- */
- s_info |= S_028044_TILE_STENCIL_DISABLE(1);
}
surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.htile_offset) >> 8;
surf->db_htile_surface = S_028ABC_FULL_CACHE(1);
-
- if (tex->tc_compatible_htile) {
- surf->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
-
- /* 0 = full compression. N = only compress up to N-1 Z planes. */
- if (tex->buffer.b.b.nr_samples <= 1)
- z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
- else if (tex->buffer.b.b.nr_samples <= 4)
- z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
- else
- z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
- }
}
}
else
sctx->framebuffer.uncompressed_cb_mask |= 1 << i;
- if (tex->surface.dcc_offset)
+ if (tex->surface.display_dcc_offset)
sctx->framebuffer.displayable_dcc_cb_mask |= 1 << i;
/* Don't update nr_color_samples for non-AA buffers.
cb_color_attrib3 = cb->cb_color_attrib3 |
S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) |
S_028EE0_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) |
- S_028EE0_CMASK_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned) |
+ S_028EE0_CMASK_PIPE_ALIGNED(1) |
S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.dcc.pipe_aligned);
radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 14);
radeon_set_context_reg(cs, R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2);
radeon_set_context_reg(cs, R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3);
} else if (sctx->chip_class == GFX9) {
- struct gfx9_surf_meta_flags meta;
+ struct gfx9_surf_meta_flags meta = {
+ .rb_aligned = 1,
+ .pipe_aligned = 1,
+ };
if (tex->surface.dcc_offset)
meta = tex->surface.u.gfx9.dcc;
- else
- meta = tex->surface.u.gfx9.cmask;
/* Set mutable surface parameters. */
cb_color_base += tex->surface.u.gfx9.surf_offset >> 8;
if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) {
struct si_surface *zb = (struct si_surface *)state->zsbuf;
struct si_texture *tex = (struct si_texture *)zb->base.texture;
+ unsigned db_z_info = zb->db_z_info;
+ unsigned db_stencil_info = zb->db_stencil_info;
+ unsigned db_htile_surface = zb->db_htile_surface;
radeon_add_to_buffer_list(sctx, sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE,
zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA
: RADEON_PRIO_DEPTH_BUFFER);
+ /* Set fields dependent on tc_compatile_htile. */
+ if (sctx->chip_class >= GFX9 &&
+ vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) {
+ unsigned max_zplanes = 4;
+
+ if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1)
+ max_zplanes = 2;
+
+ db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1);
+
+ if (sctx->chip_class >= GFX10) {
+ db_z_info |= S_028040_ITERATE_FLUSH(1);
+ db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled);
+ } else {
+ db_z_info |= S_028038_ITERATE_FLUSH(1);
+ db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
+ }
+ }
+
if (sctx->chip_class >= GFX10) {
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
radeon_set_context_reg(cs, R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size);
radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 7);
radeon_emit(cs, S_02803C_RESOURCE_LEVEL(1)); /* DB_DEPTH_INFO */
- radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */
+ radeon_emit(cs, db_z_info | /* DB_Z_INFO */
S_028038_ZRANGE_PRECISION(tex->depth_clear_value != 0));
- radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */
+ radeon_emit(cs, db_stencil_info); /* DB_STENCIL_INFO */
radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */
radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */
radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */
radeon_emit(cs, zb->db_depth_size); /* DB_DEPTH_SIZE */
radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10);
- radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */
+ radeon_emit(cs, db_z_info | /* DB_Z_INFO */
S_028038_ZRANGE_PRECISION(tex->depth_clear_value != 0));
- radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */
+ radeon_emit(cs, db_stencil_info); /* DB_STENCIL_INFO */
radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */
radeon_emit(cs, S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */
radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */
radeon_emit(cs, zb->db_z_info2); /* DB_Z_INFO2 */
radeon_emit(cs, zb->db_stencil_info2); /* DB_STENCIL_INFO2 */
} else {
+ /* GFX6-GFX8 */
+ /* Set fields dependent on tc_compatile_htile. */
+ if (si_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) {
+ if (!tex->surface.has_stencil && !tex->tc_compatible_htile) {
+ /* Use all of the htile_buffer for depth if there's no stencil.
+ * This must not be set when TC-compatible HTILE is enabled
+ * due to a hw bug.
+ */
+ db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
+ }
+
+ if (tex->tc_compatible_htile) {
+ db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
+
+ /* 0 = full compression. N = only compress up to N-1 Z planes. */
+ if (tex->buffer.b.b.nr_samples <= 1)
+ db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5);
+ else if (tex->buffer.b.b.nr_samples <= 4)
+ db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3);
+ else
+ db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2);
+ }
+ }
+
radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base);
radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9);
- radeon_emit(cs, zb->db_depth_info); /* DB_DEPTH_INFO */
- radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */
+ radeon_emit(cs, zb->db_depth_info | /* DB_DEPTH_INFO */
+ S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile));
+ radeon_emit(cs, db_z_info | /* DB_Z_INFO */
S_028040_ZRANGE_PRECISION(tex->depth_clear_value != 0));
- radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */
+ radeon_emit(cs, db_stencil_info); /* DB_STENCIL_INFO */
radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */
radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */
radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */
radeon_emit(cs, fui(tex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */
radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view);
- radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface);
+ radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, db_htile_surface);
} else if (sctx->framebuffer.dirty_zsbuf) {
if (sctx->chip_class == GFX9)
radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 2);
state[6] = 0;
state[7] = 0;
- if (tex->surface.dcc_offset) {
+ if (vi_dcc_enabled(tex, first_level)) {
state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
- S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
+ S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) |
S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
}
S_00A00C_TYPE(si_tex_dim(screen, tex, target, 0));
fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
fmask_state[5] = 0;
- fmask_state[6] = S_00A018_META_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned);
+ fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
fmask_state[7] = 0;
}
}
}
/* S8 with Z32 HTILE needs a special format. */
- if (screen->info.chip_class == GFX9 && pipe_format == PIPE_FORMAT_S8_UINT &&
- tex->tc_compatible_htile)
+ if (screen->info.chip_class == GFX9 && pipe_format == PIPE_FORMAT_S8_UINT)
data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY ||
state[5] |= S_008F24_LAST_ARRAY(last_layer);
}
- if (tex->surface.dcc_offset) {
+ if (vi_dcc_enabled(tex, first_level)) {
state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
} else {
/* The last dword is unused by hw. The shader uses it to clear
fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode);
fmask_state[4] |=
S_008F20_DEPTH(last_layer) | S_008F20_PITCH(tex->surface.u.gfx9.fmask.epitch);
- fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned) |
- S_008F24_META_RB_ALIGNED(tex->surface.u.gfx9.cmask.rb_aligned);
+ fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) |
+ S_008F24_META_RB_ALIGNED(1);
} else {
fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.fmask.tiling_index);
fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
depth = u_minify(depth, force_level);
}
- /* This is not needed if state trackers set last_layer correctly. */
+ /* This is not needed if gallium frontends set last_layer correctly. */
if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D ||
state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE)
last_layer = state->u.tex.first_layer;
struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state);
unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy;
unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso);
+ bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ state->mag_img_filter == PIPE_TEX_FILTER_NEAREST &&
+ state->compare_mode == PIPE_TEX_COMPARE_NONE;
union pipe_color_union clamped_border_color;
if (!rstate) {
S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) |
+ S_008F30_TRUNC_COORD(trunc_coord) |
S_008F30_COMPAT_MODE(sctx->chip_class == GFX8 || sctx->chip_class == GFX9));
rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) |
unsigned mask = sctx->sample_mask;
/* Needed for line and polygon smoothing as well as for the Polaris
- * small primitive filter. We expect the state tracker to take care of
+ * small primitive filter. We expect the gallium frontend to take care of
* this for us.
*/
assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 ||
return;
si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL);
- si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1));
- si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1));
+ si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1));
+ si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1));
si_pm4_cmd_end(pm4, false);
if (has_clear_state) {
}
/* Compute LATE_ALLOC_VS.LIMIT. */
- unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh;
- unsigned late_alloc_wave64 = 0; /* The limit is per SH. */
+ unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa;
+ unsigned late_alloc_wave64 = 0; /* The limit is per SA. */
unsigned cu_mask_vs = 0xffff;
unsigned cu_mask_gs = 0xffff;
if (!sscreen->info.use_late_alloc) {
late_alloc_wave64 = 0;
} else if (num_cu_per_sh <= 4) {
- /* Too few available compute units per SH. Disallowing
+ /* Too few available compute units per SA. Disallowing
* VS to run on one CU could hurt us more than late VS
* allocation would help.
*
si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
}
- si_pm4_upload_indirect_buffer(sctx, pm4);
sctx->init_config = pm4;
}