X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_state.c;h=2f156a0885f493210314fa3264a56c2834381615;hb=788696c7b29a5e7de8659ae4a3faedf94fe2b9cd;hp=2efafe6b61af1bd43ecb1502731e40e48ea210ae;hpb=04085bedc26535e47d81e333ee8969b7755421b2;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 2efafe6b61a..2f156a0885f 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -33,15 +33,6 @@ #include "util/u_resource.h" #include "util/u_upload_mgr.h" -struct gfx10_format { - unsigned img_format : 9; - - /* Various formats are only supported with workarounds for vertex fetch, - * and some 32_32_32 formats are supported natively, but only for buffers - * (possibly with some image support, actually, but no filtering). */ - bool buffers_only : 1; -}; - #include "gfx10_format_table.h" static unsigned si_map_swizzle(unsigned swizzle) @@ -246,6 +237,11 @@ static void si_emit_cb_render_state(struct si_context *sctx) sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); } break; + + case V_028C70_COLOR_5_9_9_9: + if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) + sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4); + break; } } @@ -766,8 +762,9 @@ static void si_emit_clip_regs(struct si_context *sctx) unsigned initial_cdw = sctx->gfx_cs->current.cdw; unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | clipdist_mask | - (culldist_mask << 8); + S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | + S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) | + clipdist_mask | (culldist_mask << 8); if (sctx->chip_class >= GFX10) { radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, @@ -1369,12 +1366,6 @@ static void si_emit_db_render_state(struct si_context *sctx) if (sctx->chip_class >= GFX7) { unsigned log_sample_rate = sctx->framebuffer.log_samples; - /* Stoney doesn't increment occlusion query counters - * if the sample rate is 16x. Use 8x sample rate instead. - */ - if (sctx->family == CHIP_STONEY) - log_sample_rate = MIN2(log_sample_rate, 3); - db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) | S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) | S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) | @@ -1399,8 +1390,9 @@ static void si_emit_db_render_state(struct si_context *sctx) radeon_opt_set_context_reg( sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2, S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | - S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | - S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4)); + S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | + S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) | + S_028010_CENTROID_COMPUTATION_MODE_GFX103(sctx->chip_class >= GFX10_3 ? 2 : 0)); db_shader_control = sctx->ps_db_shader_control; @@ -1427,7 +1419,8 @@ static void si_emit_db_render_state(struct si_context *sctx) /* * format translation */ -static uint32_t si_translate_colorformat(enum pipe_format format) +static uint32_t si_translate_colorformat(enum chip_class chip_class, + enum pipe_format format) { const struct util_format_description *desc = util_format_description(format); if (!desc) @@ -1440,6 +1433,10 @@ static uint32_t si_translate_colorformat(enum pipe_format format) if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ return V_028C70_COLOR_10_11_11; + if (chip_class >= GFX10_3 && + format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */ + return V_028C70_COLOR_5_9_9_9; + if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) return V_028C70_COLOR_INVALID; @@ -2105,9 +2102,10 @@ static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, enum p return usage; } -static bool si_is_colorbuffer_format_supported(enum pipe_format format) +static bool si_is_colorbuffer_format_supported(enum chip_class chip_class, + enum pipe_format format) { - return si_translate_colorformat(format) != V_028C70_COLOR_INVALID && + return si_translate_colorformat(chip_class, format) != V_028C70_COLOR_INVALID && si_translate_colorswap(format, false) != ~0U; } @@ -2140,17 +2138,23 @@ static bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format !util_is_power_of_two_or_zero(storage_sample_count)) return false; + /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate, + * so don't expose 16 samples there. + */ + const unsigned max_eqaa_samples = sscreen->info.num_render_backends == 1 ? 8 : 16; + const unsigned max_samples = 8; + /* MSAA support without framebuffer attachments. */ - if (format == PIPE_FORMAT_NONE && sample_count <= 16) + if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples) return true; if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) { /* Color without EQAA or depth/stencil. */ - if (sample_count > 8 || sample_count != storage_sample_count) + if (sample_count > max_samples || sample_count != storage_sample_count) return false; } else { /* Color with EQAA. */ - if (sample_count > 16 || storage_sample_count > 8) + if (sample_count > max_eqaa_samples || storage_sample_count > max_samples) return false; } } @@ -2167,7 +2171,7 @@ static bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) && - si_is_colorbuffer_format_supported(format)) { + si_is_colorbuffer_format_supported(sscreen->info.chip_class, format)) { retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | PIPE_BIND_SHARED); if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format)) @@ -2214,6 +2218,7 @@ static void si_choose_spi_color_formats(struct si_surface *surf, unsigned format case V_028C70_COLOR_4_4_4_4: case V_028C70_COLOR_10_11_11: case V_028C70_COLOR_11_11_10: + case V_028C70_COLOR_5_9_9_9: case V_028C70_COLOR_8: case V_028C70_COLOR_8_8: case V_028C70_COLOR_8_8_8_8: @@ -2346,7 +2351,7 @@ static void si_initialize_color_surface(struct si_context *sctx, struct si_surfa } } - format = si_translate_colorformat(surf->base.format); + format = si_translate_colorformat(sctx->chip_class, surf->base.format); if (format == V_028C70_COLOR_INVALID) { PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); } @@ -2528,23 +2533,6 @@ static void si_init_depth_surface(struct si_context *sctx, struct si_surface *su if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) { z_info |= S_028038_TILE_SURFACE_ENABLE(1) | S_028038_ALLOW_EXPCLEAR(1); - if (tex->tc_compatible_htile) { - unsigned max_zplanes = 4; - - if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1) - max_zplanes = 2; - - z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1); - - if (sctx->chip_class >= GFX10) { - z_info |= S_028040_ITERATE_FLUSH(1); - s_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled); - } else { - z_info |= S_028038_ITERATE_FLUSH(1); - s_info |= S_02803C_ITERATE_FLUSH(1); - } - } - if (tex->surface.has_stencil && !tex->htile_stencil_disabled) { /* Stencil buffer workaround ported from the GFX6-GFX8 code. * See that for explanation. @@ -2557,9 +2545,9 @@ static void si_init_depth_surface(struct si_context *sctx, struct si_surface *su surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.htile_offset) >> 8; surf->db_htile_surface = - S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(tex->surface.u.gfx9.htile.pipe_aligned); + S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); if (sctx->chip_class == GFX9) { - surf->db_htile_surface |= S_028ABC_RB_ALIGNED(tex->surface.u.gfx9.htile.rb_aligned); + surf->db_htile_surface |= S_028ABC_RB_ALIGNED(1); } } } else { @@ -2576,7 +2564,7 @@ static void si_init_depth_surface(struct si_context *sctx, struct si_surface *su z_info = S_028040_FORMAT(format) | S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)); s_info = S_028044_FORMAT(stencil_format); - surf->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile); + surf->db_depth_info = 0; if (sctx->chip_class >= GFX7) { struct radeon_info *info = &sctx->screen->info; @@ -2624,28 +2612,10 @@ static void si_init_depth_surface(struct si_context *sctx, struct si_surface *su */ if (tex->buffer.b.b.nr_samples <= 1) s_info |= S_028044_ALLOW_EXPCLEAR(1); - } else if (!tex->tc_compatible_htile) { - /* Use all of the htile_buffer for depth if there's no stencil. - * This must not be set when TC-compatible HTILE is enabled - * due to a hw bug. - */ - s_info |= S_028044_TILE_STENCIL_DISABLE(1); } surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.htile_offset) >> 8; surf->db_htile_surface = S_028ABC_FULL_CACHE(1); - - if (tex->tc_compatible_htile) { - surf->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); - - /* 0 = full compression. N = only compress up to N-1 Z planes. */ - if (tex->buffer.b.b.nr_samples <= 1) - z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); - else if (tex->buffer.b.b.nr_samples <= 4) - z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); - else - z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); - } } } @@ -2865,7 +2835,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, else sctx->framebuffer.uncompressed_cb_mask |= 1 << i; - if (tex->surface.dcc_offset) + if (tex->surface.display_dcc_offset) sctx->framebuffer.displayable_dcc_cb_mask |= 1 << i; /* Don't update nr_color_samples for non-AA buffers. @@ -3084,7 +3054,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) cb_color_attrib3 = cb->cb_color_attrib3 | S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.surf.swizzle_mode) | S_028EE0_FMASK_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode) | - S_028EE0_CMASK_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned) | + S_028EE0_CMASK_PIPE_ALIGNED(1) | S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.dcc.pipe_aligned); radeon_set_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 14); @@ -3112,12 +3082,13 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_set_context_reg(cs, R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2); radeon_set_context_reg(cs, R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3); } else if (sctx->chip_class == GFX9) { - struct gfx9_surf_meta_flags meta; + struct gfx9_surf_meta_flags meta = { + .rb_aligned = 1, + .pipe_aligned = 1, + }; if (tex->surface.dcc_offset) meta = tex->surface.u.gfx9.dcc; - else - meta = tex->surface.u.gfx9.cmask; /* Set mutable surface parameters. */ cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; @@ -3220,20 +3191,42 @@ static void si_emit_framebuffer_state(struct si_context *sctx) if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { struct si_surface *zb = (struct si_surface *)state->zsbuf; struct si_texture *tex = (struct si_texture *)zb->base.texture; + unsigned db_z_info = zb->db_z_info; + unsigned db_stencil_info = zb->db_stencil_info; + unsigned db_htile_surface = zb->db_htile_surface; radeon_add_to_buffer_list(sctx, sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE, zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA : RADEON_PRIO_DEPTH_BUFFER); + /* Set fields dependent on tc_compatile_htile. */ + if (sctx->chip_class >= GFX9 && + vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) { + unsigned max_zplanes = 4; + + if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1) + max_zplanes = 2; + + db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1); + + if (sctx->chip_class >= GFX10) { + db_z_info |= S_028040_ITERATE_FLUSH(1); + db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled); + } else { + db_z_info |= S_028038_ITERATE_FLUSH(1); + db_stencil_info |= S_02803C_ITERATE_FLUSH(1); + } + } + if (sctx->chip_class >= GFX10) { radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); radeon_set_context_reg(cs, R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size); radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 7); radeon_emit(cs, S_02803C_RESOURCE_LEVEL(1)); /* DB_DEPTH_INFO */ - radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */ + radeon_emit(cs, db_z_info | /* DB_Z_INFO */ S_028038_ZRANGE_PRECISION(tex->depth_clear_value != 0)); - radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */ + radeon_emit(cs, db_stencil_info); /* DB_STENCIL_INFO */ radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */ radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */ @@ -3253,9 +3246,9 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_emit(cs, zb->db_depth_size); /* DB_DEPTH_SIZE */ radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 10); - radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */ + radeon_emit(cs, db_z_info | /* DB_Z_INFO */ S_028038_ZRANGE_PRECISION(tex->depth_clear_value != 0)); - radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */ + radeon_emit(cs, db_stencil_info); /* DB_STENCIL_INFO */ radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */ radeon_emit(cs, S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */ radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ @@ -3270,13 +3263,38 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_emit(cs, zb->db_z_info2); /* DB_Z_INFO2 */ radeon_emit(cs, zb->db_stencil_info2); /* DB_STENCIL_INFO2 */ } else { + /* GFX6-GFX8 */ + /* Set fields dependent on tc_compatile_htile. */ + if (si_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) { + if (!tex->surface.has_stencil && !tex->tc_compatible_htile) { + /* Use all of the htile_buffer for depth if there's no stencil. + * This must not be set when TC-compatible HTILE is enabled + * due to a hw bug. + */ + db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); + } + + if (tex->tc_compatible_htile) { + db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); + + /* 0 = full compression. N = only compress up to N-1 Z planes. */ + if (tex->buffer.b.b.nr_samples <= 1) + db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); + else if (tex->buffer.b.b.nr_samples <= 4) + db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); + else + db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); + } + } + radeon_set_context_reg(cs, R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); radeon_set_context_reg_seq(cs, R_02803C_DB_DEPTH_INFO, 9); - radeon_emit(cs, zb->db_depth_info); /* DB_DEPTH_INFO */ - radeon_emit(cs, zb->db_z_info | /* DB_Z_INFO */ + radeon_emit(cs, zb->db_depth_info | /* DB_DEPTH_INFO */ + S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile)); + radeon_emit(cs, db_z_info | /* DB_Z_INFO */ S_028040_ZRANGE_PRECISION(tex->depth_clear_value != 0)); - radeon_emit(cs, zb->db_stencil_info); /* DB_STENCIL_INFO */ + radeon_emit(cs, db_stencil_info); /* DB_STENCIL_INFO */ radeon_emit(cs, zb->db_depth_base); /* DB_Z_READ_BASE */ radeon_emit(cs, zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ radeon_emit(cs, zb->db_depth_base); /* DB_Z_WRITE_BASE */ @@ -3290,7 +3308,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) radeon_emit(cs, fui(tex->depth_clear_value)); /* R_02802C_DB_DEPTH_CLEAR */ radeon_set_context_reg(cs, R_028008_DB_DEPTH_VIEW, zb->db_depth_view); - radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, zb->db_htile_surface); + radeon_set_context_reg(cs, R_028ABC_DB_HTILE_SURFACE, db_htile_surface); } else if (sctx->framebuffer.dirty_zsbuf) { if (sctx->chip_class == GFX9) radeon_set_context_reg_seq(cs, R_028038_DB_Z_INFO, 2); @@ -3531,7 +3549,8 @@ static void si_emit_msaa_config(struct si_context *sctx) sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) | S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | - S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples); + S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | + S_028BE0_COVERED_CENTROID_IS_CENTER_GFX103(sctx->chip_class >= GFX10_3); if (sctx->framebuffer.nr_samples > 1) { db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | @@ -3784,12 +3803,26 @@ static void gfx10_make_texture_descriptor( S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) | S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) | S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type); - /* Depth is the the last accessible layer on gfx9+. The hw doesn't need - * to know the total number of layers. - */ - state[4] = - S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) | - S_00A010_BASE_ARRAY(first_layer); + + if (res->target == PIPE_TEXTURE_1D || + res->target == PIPE_TEXTURE_2D) { + /* 1D, 2D, and 2D_MSAA can set a custom pitch for shader resources + * starting with gfx10.3 (ignored if pitch <= width). Other texture + * targets can't. CB and DB can't set a custom pitch for any target. + */ + if (screen->info.chip_class >= GFX10_3) + state[4] = S_00A010_DEPTH(tex->surface.u.gfx9.surf_pitch - 1); + else + state[4] = 0; + } else { + /* Depth is the last accessible layer on gfx9+. The hw doesn't need + * to know the total number of layers. + */ + state[4] = S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? + depth - 1 : last_layer) | + S_00A010_BASE_ARRAY(first_layer); + } + state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) | S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : tex->buffer.b.b.last_level) | @@ -3797,7 +3830,7 @@ static void gfx10_make_texture_descriptor( state[6] = 0; state[7] = 0; - if (tex->surface.dcc_offset) { + if (vi_dcc_enabled(tex, first_level)) { state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) | S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); @@ -3866,7 +3899,7 @@ static void gfx10_make_texture_descriptor( S_00A00C_TYPE(si_tex_dim(screen, tex, target, 0)); fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer); fmask_state[5] = 0; - fmask_state[6] = S_00A018_META_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned); + fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1); fmask_state[7] = 0; } } @@ -4058,7 +4091,7 @@ static void si_make_texture_descriptor(struct si_screen *screen, struct si_textu state[5] |= S_008F24_LAST_ARRAY(last_layer); } - if (tex->surface.dcc_offset) { + if (vi_dcc_enabled(tex, first_level)) { state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); } else { /* The last dword is unused by hw. The shader uses it to clear @@ -4189,8 +4222,8 @@ static void si_make_texture_descriptor(struct si_screen *screen, struct si_textu fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.fmask.swizzle_mode); fmask_state[4] |= S_008F20_DEPTH(last_layer) | S_008F20_PITCH(tex->surface.u.gfx9.fmask.epitch); - fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(tex->surface.u.gfx9.cmask.pipe_aligned) | - S_008F24_META_RB_ALIGNED(tex->surface.u.gfx9.cmask.rb_aligned); + fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | + S_008F24_META_RB_ALIGNED(1); } else { fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.fmask.tiling_index); fmask_state[4] |= S_008F20_DEPTH(depth - 1) | @@ -4271,7 +4304,7 @@ struct pipe_sampler_view *si_create_sampler_view_custom(struct pipe_context *ctx depth = u_minify(depth, force_level); } - /* This is not needed if state trackers set last_layer correctly. */ + /* This is not needed if gallium frontends set last_layer correctly. */ if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D || state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE) last_layer = state->u.tex.first_layer; @@ -4465,6 +4498,9 @@ static void *si_create_sampler_state(struct pipe_context *ctx, struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy; unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso); + bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST && + state->mag_img_filter == PIPE_TEX_FILTER_NEAREST && + state->compare_mode == PIPE_TEX_COMPARE_NONE; union pipe_color_union clamped_border_color; if (!rstate) { @@ -4481,6 +4517,7 @@ static void *si_create_sampler_state(struct pipe_context *ctx, S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | + S_008F30_TRUNC_COORD(trunc_coord) | S_008F30_COMPAT_MODE(sctx->chip_class == GFX8 || sctx->chip_class == GFX9)); rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | @@ -4541,7 +4578,7 @@ static void si_emit_sample_mask(struct si_context *sctx) unsigned mask = sctx->sample_mask; /* Needed for line and polygon smoothing as well as for the Polaris - * small primitive filter. We expect the state tracker to take care of + * small primitive filter. We expect the gallium frontend to take care of * this for us. */ assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || @@ -4737,8 +4774,9 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count, * into account would complicate the fast path (where everything * is nicely aligned). */ - bool check_alignment = log_hw_load_size >= 1 && (sscreen->info.chip_class == GFX6 || - sscreen->info.chip_class == GFX10); + bool check_alignment = + log_hw_load_size >= 1 && + (sscreen->info.chip_class == GFX6 || sscreen->info.chip_class >= GFX10); bool opencode = sscreen->options.vs_fetch_always_opencode; if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0) @@ -5139,8 +5177,8 @@ static void si_init_config(struct si_context *sctx) return; si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); - si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); - si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); + si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1)); + si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1)); si_pm4_cmd_end(pm4, false); if (has_clear_state) { @@ -5253,8 +5291,8 @@ static void si_init_config(struct si_context *sctx) } /* Compute LATE_ALLOC_VS.LIMIT. */ - unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh; - unsigned late_alloc_wave64 = 0; /* The limit is per SH. */ + unsigned num_cu_per_sh = sscreen->info.min_good_cu_per_sa; + unsigned late_alloc_wave64 = 0; /* The limit is per SA. */ unsigned cu_mask_vs = 0xffff; unsigned cu_mask_gs = 0xffff; @@ -5278,7 +5316,7 @@ static void si_init_config(struct si_context *sctx) if (!sscreen->info.use_late_alloc) { late_alloc_wave64 = 0; } else if (num_cu_per_sh <= 4) { - /* Too few available compute units per SH. Disallowing + /* Too few available compute units per SA. Disallowing * VS to run on one CU could hurt us more than late VS * allocation would help. * @@ -5320,6 +5358,7 @@ static void si_init_config(struct si_context *sctx) * a single primitive shader subgroup. */ si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, S_028C50_MAX_DEALLOCS_IN_WAVE(512)); + /* Reuse for legacy (non-NGG) only. */ si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); if (!has_clear_state) { @@ -5361,6 +5400,9 @@ static void si_init_config(struct si_context *sctx) S_00B0C0_SOFT_GROUPING_EN(1) | S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); } + if (sctx->chip_class >= GFX10_3) { + si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff); + } if (sctx->chip_class >= GFX9) { si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, @@ -5399,6 +5441,5 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); } - si_pm4_upload_indirect_buffer(sctx, pm4); sctx->init_config = pm4; }