X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fradeonsi%2Fsi_state.c;h=9390206f72f46a13a174a6ed4f94473d573d64bf;hb=27cc7703d3da25656f7b953b11b69719d9df8d94;hp=cd26f5af9d2e98996b09134f0afdaa5a7cb8d3f5;hpb=44a46c09deb96040ec25903e58e1ffc297b841c9;p=mesa.git diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index cd26f5af9d2..9390206f72f 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -27,8 +27,8 @@ #include "si_query.h" #include "util/u_dual_blend.h" -#include "util/u_format.h" -#include "util/u_format_s3tc.h" +#include "util/format/u_format.h" +#include "util/format/u_format_s3tc.h" #include "util/u_memory.h" #include "util/u_resource.h" #include "util/u_upload_mgr.h" @@ -131,7 +131,7 @@ static void si_emit_cb_render_state(struct si_context *sctx) } /* RB+ register settings. */ - if (sctx->screen->rbplus_allowed) { + if (sctx->screen->info.rbplus_allowed) { unsigned spi_shader_col_format = sctx->ps_shader.cso ? sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0; @@ -640,7 +640,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx, color_control |= S_028808_MODE(V_028808_CB_DISABLE); } - if (sctx->screen->rbplus_allowed) { + if (sctx->screen->info.rbplus_allowed) { /* Disable RB+ blend optimizations for dual source blending. * Vulkan does this. */ @@ -715,6 +715,10 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state) static void si_delete_blend_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; + + if (sctx->queued.named.blend == state) + si_bind_blend_state(ctx, sctx->noop_blend); + si_pm4_delete_state(sctx, blend, (struct si_state_blend *)state); } @@ -775,7 +779,7 @@ static void si_emit_clip_regs(struct si_context *sctx) { struct si_shader *vs = si_get_vs_state(sctx); struct si_shader_selector *vs_sel = vs->selector; - struct tgsi_shader_info *info = &vs_sel->info; + struct si_shader_info *info = &vs_sel->info; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned window_space = info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION]; @@ -801,12 +805,20 @@ static void si_emit_clip_regs(struct si_context *sctx) culldist_mask |= clipdist_mask; unsigned initial_cdw = sctx->gfx_cs->current.cdw; - radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, - SI_TRACKED_PA_CL_VS_OUT_CNTL, - vs_sel->pa_cl_vs_out_cntl | - S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | - S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | - clipdist_mask | (culldist_mask << 8)); + unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) | + S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) | + clipdist_mask | (culldist_mask << 8); + + if (sctx->chip_class >= GFX10) { + radeon_opt_set_context_reg_rmw(sctx, R_02881C_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, + pa_cl_cntl, + ~SI_TRACKED_PA_CL_VS_OUT_CNTL__VS_MASK); + } else { + radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, + SI_TRACKED_PA_CL_VS_OUT_CNTL__CL, + vs_sel->pa_cl_vs_out_cntl | pa_cl_cntl); + } radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, rs->pa_cl_clip_cntl | @@ -907,6 +919,14 @@ static void *si_create_rs_state(struct pipe_context *ctx, rs->flatshade_first = state->flatshade_first; rs->sprite_coord_enable = state->sprite_coord_enable; rs->rasterizer_discard = state->rasterizer_discard; + rs->polygon_mode_enabled = (state->fill_front != PIPE_POLYGON_MODE_FILL && + !(state->cull_face & PIPE_FACE_FRONT)) || + (state->fill_back != PIPE_POLYGON_MODE_FILL && + !(state->cull_face & PIPE_FACE_BACK)); + rs->polygon_mode_is_lines = (state->fill_front == PIPE_POLYGON_MODE_LINE && + !(state->cull_face & PIPE_FACE_FRONT)) || + (state->fill_back == PIPE_POLYGON_MODE_LINE && + !(state->cull_face & PIPE_FACE_BACK)); rs->pa_sc_line_stipple = state->line_stipple_enable ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0; @@ -964,8 +984,7 @@ static void *si_create_rs_state(struct pipe_context *ctx, S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | - S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL || - state->fill_back != PIPE_POLYGON_MODE_FILL) | + S_028814_POLY_MODE(rs->polygon_mode_enabled) | S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back))); @@ -1034,7 +1053,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); /* Update the small primitive filter workaround if necessary. */ - if (sctx->screen->has_msaa_sample_loc_bug && + if (sctx->screen->info.has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1) si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); } @@ -1060,9 +1079,6 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state) old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl) si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); - sctx->ia_multi_vgt_param_key.u.line_stipple_enabled = - rs->line_stipple_enable; - if (old_rs->clip_plane_enable != rs->clip_plane_enable || old_rs->rasterizer_discard != rs->rasterizer_discard || old_rs->sprite_coord_enable != rs->sprite_coord_enable || @@ -1083,7 +1099,7 @@ static void si_delete_rs_state(struct pipe_context *ctx, void *state) struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; if (sctx->queued.named.rasterizer == state) - si_pm4_bind_state(sctx, poly_offset, NULL); + si_bind_rs_state(ctx, sctx->discard_rasterizer_state); FREE(rs->pm4_poly_offset); si_pm4_delete_state(sctx, rasterizer, rs); @@ -1327,6 +1343,10 @@ static void si_bind_dsa_state(struct pipe_context *ctx, void *state) static void si_delete_dsa_state(struct pipe_context *ctx, void *state) { struct si_context *sctx = (struct si_context *)ctx; + + if (sctx->queued.named.dsa == state) + si_bind_dsa_state(ctx, sctx->noop_dsa); + si_pm4_delete_state(sctx, dsa, (struct si_state_dsa *)state); } @@ -1482,8 +1502,8 @@ static void si_emit_db_render_state(struct si_context *sctx) if (!rs->multisample_enable) db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; - if (sctx->screen->has_rbplus && - !sctx->screen->rbplus_allowed) + if (sctx->screen->info.has_rbplus && + !sctx->screen->info.rbplus_allowed) db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, @@ -2222,6 +2242,13 @@ static bool si_is_format_supported(struct pipe_screen *screen, return false; } + if (util_format_get_num_planes(format) >= 2) { + return util_format_planar_is_supported(screen, format, target, + sample_count, + storage_sample_count, + usage); + } + if (MAX2(1, sample_count) < MAX2(1, storage_sample_count)) return false; @@ -2229,9 +2256,6 @@ static bool si_is_format_supported(struct pipe_screen *screen, if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) return false; - if (usage & PIPE_BIND_SHADER_IMAGE) - return false; - /* Only power-of-two sample counts are supported. */ if (!util_is_power_of_two_or_zero(sample_count) || !util_is_power_of_two_or_zero(storage_sample_count)) @@ -2519,7 +2543,7 @@ static void si_initialize_color_surface(struct si_context *sctx, color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_fragments); - if (tex->fmask_offset) { + if (tex->surface.fmask_offset) { color_info |= S_028C70_COMPRESSION(1); unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.fmask.bankh); @@ -2687,7 +2711,7 @@ static void si_init_depth_surface(struct si_context *sctx, } surf->db_htile_data_base = (tex->buffer.gpu_address + - tex->htile_offset) >> 8; + tex->surface.htile_offset) >> 8; surf->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(tex->surface.u.gfx9.htile.pipe_aligned); if (sctx->chip_class == GFX9) { @@ -2768,7 +2792,7 @@ static void si_init_depth_surface(struct si_context *sctx, } surf->db_htile_data_base = (tex->buffer.gpu_address + - tex->htile_offset) >> 8; + tex->surface.htile_offset) >> 8; surf->db_htile_surface = S_028ABC_FULL_CACHE(1); if (tex->tc_compatible_htile) { @@ -2812,8 +2836,10 @@ void si_update_fb_dirtiness_after_rendering(struct si_context *sctx) struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; struct si_texture *tex = (struct si_texture*)surf->texture; - if (tex->fmask_offset) + if (tex->surface.fmask_offset) { tex->dirty_level_mask |= 1 << surf->u.tex.level; + tex->fmask_is_identity = false; + } if (tex->dcc_gather_statistics) tex->separate_dcc_dirty = true; } @@ -2962,6 +2988,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, sctx->framebuffer.compressed_cb_mask = 0; sctx->framebuffer.uncompressed_cb_mask = 0; + sctx->framebuffer.displayable_dcc_cb_mask = 0; sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples; sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); @@ -2997,11 +3024,14 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, if (surf->color_is_int10) sctx->framebuffer.color_is_int10 |= 1 << i; - if (tex->fmask_offset) + if (tex->surface.fmask_offset) sctx->framebuffer.compressed_cb_mask |= 1 << i; else sctx->framebuffer.uncompressed_cb_mask |= 1 << i; + if (tex->surface.dcc_offset) + sctx->framebuffer.displayable_dcc_cb_mask |= 1 << i; + /* Don't update nr_color_samples for non-AA buffers. * (e.g. destination of MSAA resolve) */ @@ -3190,8 +3220,8 @@ static void si_emit_framebuffer_state(struct si_context *sctx) if (cb->base.u.tex.level > 0) cb_color_info &= C_028C70_FAST_CLEAR; - if (tex->fmask_offset) { - cb_color_fmask = (tex->buffer.gpu_address + tex->fmask_offset) >> 8; + if (tex->surface.fmask_offset) { + cb_color_fmask = (tex->buffer.gpu_address + tex->surface.fmask_offset) >> 8; cb_color_fmask |= tex->surface.fmask_tile_swizzle; } @@ -3206,7 +3236,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) cb_color_info |= S_028C70_DCC_ENABLE(1); cb_dcc_base = ((!tex->dcc_separate_buffer ? tex->buffer.gpu_address : 0) + - tex->dcc_offset) >> 8; + tex->surface.dcc_offset) >> 8; unsigned dcc_tile_swizzle = tex->surface.tile_swizzle; dcc_tile_swizzle &= (tex->surface.dcc_alignment - 1) >> 8; @@ -3219,7 +3249,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) /* Set mutable surface parameters. */ cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; cb_color_base |= tex->surface.tile_swizzle; - if (!tex->fmask_offset) + if (!tex->surface.fmask_offset) cb_color_fmask = cb_color_base; if (cb->base.u.tex.level > 0) cb_color_cmask = cb_color_base; @@ -3261,7 +3291,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) } else if (sctx->chip_class == GFX9) { struct gfx9_surf_meta_flags meta; - if (tex->dcc_offset) + if (tex->surface.dcc_offset) meta = tex->surface.u.gfx9.dcc; else meta = tex->surface.u.gfx9.cmask; @@ -3269,7 +3299,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) /* Set mutable surface parameters. */ cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; cb_color_base |= tex->surface.tile_swizzle; - if (!tex->fmask_offset) + if (!tex->surface.fmask_offset) cb_color_fmask = cb_color_base; if (cb->base.u.tex.level > 0) cb_color_cmask = cb_color_base; @@ -3309,7 +3339,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) if (level_info->mode == RADEON_SURF_MODE_2D) cb_color_base |= tex->surface.tile_swizzle; - if (!tex->fmask_offset) + if (!tex->surface.fmask_offset) cb_color_fmask = cb_color_base; if (cb->base.u.tex.level > 0) cb_color_cmask = cb_color_base; @@ -3325,7 +3355,7 @@ static void si_emit_framebuffer_state(struct si_context *sctx) cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); - if (tex->fmask_offset) { + if (tex->surface.fmask_offset) { if (sctx->chip_class >= GFX7) cb_color_pitch |= S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.fmask.pitch_in_pixels / 8 - 1); cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.fmask.tiling_index); @@ -3466,7 +3496,7 @@ static void si_emit_msaa_sample_locs(struct si_context *sctx) struct radeon_cmdbuf *cs = sctx->gfx_cs; struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; unsigned nr_samples = sctx->framebuffer.nr_samples; - bool has_msaa_sample_loc_bug = sctx->screen->has_msaa_sample_loc_bug; + bool has_msaa_sample_loc_bug = sctx->screen->info.has_msaa_sample_loc_bug; /* Smoothing (only possible with nr_samples == 1) uses the same * sample locations as the MSAA it simulates. @@ -3823,7 +3853,7 @@ si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, * else: swizzle_address >= NUM_RECORDS */ state[7] |= S_008F0C_FORMAT(fmt->img_format) | - S_008F0C_OOB_SELECT(0) | + S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | S_008F0C_RESOURCE_LEVEL(1); } else { int first_non_void; @@ -3980,17 +4010,17 @@ gfx10_make_texture_descriptor(struct si_screen *screen, state[6] = 0; state[7] = 0; - if (tex->dcc_offset) { + if (tex->surface.dcc_offset) { state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) | S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); } /* Initialize the sampler view for FMASK. */ - if (tex->fmask_offset) { + if (tex->surface.fmask_offset) { uint32_t format; - va = tex->buffer.gpu_address + tex->fmask_offset; + va = tex->buffer.gpu_address + tex->surface.fmask_offset; #define FMASK(s,f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) switch (FMASK(res->nr_samples, res->nr_storage_samples)) { @@ -4263,7 +4293,7 @@ si_make_texture_descriptor(struct si_screen *screen, state[5] |= S_008F24_LAST_ARRAY(last_layer); } - if (tex->dcc_offset) { + if (tex->surface.dcc_offset) { state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); } else { /* The last dword is unused by hw. The shader uses it to clear @@ -4278,10 +4308,10 @@ si_make_texture_descriptor(struct si_screen *screen, } /* Initialize the sampler view for FMASK. */ - if (tex->fmask_offset) { + if (tex->surface.fmask_offset) { uint32_t data_format, num_format; - va = tex->buffer.gpu_address + tex->fmask_offset; + va = tex->buffer.gpu_address + tex->surface.fmask_offset; #define FMASK(s,f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) if (screen->info.chip_class == GFX9) { @@ -4843,7 +4873,10 @@ static void *si_create_vertex_elements(struct pipe_context *ctx, return NULL; v->count = count; - v->desc_list_byte_size = align(count * 16, SI_CPDMA_ALIGNMENT); + + unsigned alloc_count = count > sscreen->num_vbos_in_user_sgprs ? + count - sscreen->num_vbos_in_user_sgprs : 0; + v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT); for (i = 0; i < count; ++i) { const struct util_format_description *desc; @@ -5044,7 +5077,14 @@ static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) struct si_vertex_elements *v = (struct si_vertex_elements*)state; sctx->vertex_elements = v; - sctx->vertex_buffers_dirty = true; + sctx->num_vertex_elements = v ? v->count : 0; + + if (sctx->num_vertex_elements) { + sctx->vertex_buffers_dirty = true; + } else { + sctx->vertex_buffer_pointer_dirty = false; + sctx->vertex_buffer_user_sgprs_dirty = false; + } if (v && (!old || @@ -5080,8 +5120,10 @@ static void si_delete_vertex_element(struct pipe_context *ctx, void *state) struct si_context *sctx = (struct si_context *)ctx; struct si_vertex_elements *v = (struct si_vertex_elements*)state; - if (sctx->vertex_elements == state) + if (sctx->vertex_elements == state) { sctx->vertex_elements = NULL; + sctx->num_vertex_elements = 0; + } si_resource_reference(&v->instance_divisor_factor_buffer, NULL); FREE(state); } @@ -5092,8 +5134,9 @@ static void si_set_vertex_buffers(struct pipe_context *ctx, { struct si_context *sctx = (struct si_context *)ctx; struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; + unsigned updated_mask = u_bit_consecutive(start_slot, count); uint32_t orig_unaligned = sctx->vertex_buffer_unaligned; - uint32_t unaligned = orig_unaligned; + uint32_t unaligned = 0; int i; assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer)); @@ -5103,14 +5146,14 @@ static void si_set_vertex_buffers(struct pipe_context *ctx, const struct pipe_vertex_buffer *src = buffers + i; struct pipe_vertex_buffer *dsti = dst + i; struct pipe_resource *buf = src->buffer.resource; + unsigned slot_bit = 1 << (start_slot + i); pipe_resource_reference(&dsti->buffer.resource, buf); dsti->buffer_offset = src->buffer_offset; dsti->stride = src->stride; + if (dsti->buffer_offset & 3 || dsti->stride & 3) - unaligned |= 1 << (start_slot + i); - else - unaligned &= ~(1 << (start_slot + i)); + unaligned |= slot_bit; si_context_add_resource_size(sctx, buf); if (buf) @@ -5120,10 +5163,10 @@ static void si_set_vertex_buffers(struct pipe_context *ctx, for (i = 0; i < count; i++) { pipe_resource_reference(&dst[i].buffer.resource, NULL); } - unaligned &= ~u_bit_consecutive(start_slot, count); + unaligned &= ~updated_mask; } sctx->vertex_buffers_dirty = true; - sctx->vertex_buffer_unaligned = unaligned; + sctx->vertex_buffer_unaligned = (orig_unaligned & ~updated_mask) | unaligned; /* Check whether alignment may have changed in a way that requires * shader changes. This check is conservative: a vertex buffer can only @@ -5134,7 +5177,7 @@ static void si_set_vertex_buffers(struct pipe_context *ctx, */ if (sctx->vertex_elements && (sctx->vertex_elements->vb_alignment_check_mask & - (unaligned | orig_unaligned) & u_bit_consecutive(start_slot, count))) + (unaligned | orig_unaligned) & updated_mask)) sctx->do_update_shaders = true; } @@ -5396,13 +5439,10 @@ static void si_init_config(struct si_context *sctx) if (!pm4) return; - /* Since amdgpu version 3.6.0, CONTEXT_CONTROL is emitted by the kernel */ - if (!sscreen->info.is_amdgpu || sscreen->info.drm_minor < 6) { - si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); - si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); - si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); - si_pm4_cmd_end(pm4, false); - } + si_pm4_cmd_begin(pm4, PKT3_CONTEXT_CONTROL); + si_pm4_cmd_add(pm4, CONTEXT_CONTROL_LOAD_ENABLE(1)); + si_pm4_cmd_add(pm4, CONTEXT_CONTROL_SHADOW_ENABLE(1)); + si_pm4_cmd_end(pm4, false); if (has_clear_state) { si_pm4_cmd_begin(pm4, PKT3_CLEAR_STATE); @@ -5429,22 +5469,25 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); } - si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); + if (sscreen->info.chip_class <= GFX9) + si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); if (!has_clear_state) si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); if (sctx->chip_class < GFX7) si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); + /* CLEAR_STATE doesn't restore these correctly. */ + si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); + si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, + S_028244_BR_X(16384) | S_028244_BR_Y(16384)); + /* CLEAR_STATE doesn't clear these correctly on certain generations. * I don't know why. Deduced by trial and error. */ if (sctx->chip_class <= GFX7 || !has_clear_state) { si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); - si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); - si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, - S_028244_BR_X(16384) | S_028244_BR_Y(16384)); si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, S_028034_BR_X(16384) | S_028034_BR_Y(16384)); @@ -5638,7 +5681,7 @@ static void si_init_config(struct si_context *sctx) /* TODO: For culling, replace 128 with 256. */ si_pm4_set_reg(pm4, R_030980_GE_PC_ALLOC, S_030980_OVERSUB_EN(1) | - S_030980_NUM_PC_LINES(128 * sscreen->info.max_se - 1)); + S_030980_NUM_PC_LINES(sscreen->info.pc_lines / 4 - 1)); } if (sctx->chip_class >= GFX8) { @@ -5672,38 +5715,8 @@ static void si_init_config(struct si_context *sctx) RADEON_PRIO_BORDER_COLORS); if (sctx->chip_class >= GFX9) { - unsigned num_se = sscreen->info.max_se; - unsigned pc_lines = 0; - unsigned max_alloc_count = 0; - - switch (sctx->family) { - case CHIP_VEGA10: - case CHIP_VEGA12: - case CHIP_VEGA20: - pc_lines = 2048; - break; - case CHIP_RAVEN: - case CHIP_RAVEN2: - case CHIP_RENOIR: - case CHIP_NAVI10: - case CHIP_NAVI12: - pc_lines = 1024; - break; - case CHIP_NAVI14: - pc_lines = 512; - break; - default: - assert(0); - } - - if (sctx->chip_class >= GFX10) { - max_alloc_count = pc_lines / 3; - } else { - max_alloc_count = MIN2(128, pc_lines / (4 * num_se)); - } - si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, - S_028C48_MAX_ALLOC_COUNT(max_alloc_count - 1) | + S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) | S_028C48_MAX_PRIM_PER_BATCH(1023)); si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));