{
struct si_shader *vs = si_get_vs_state(sctx);
struct si_shader_selector *vs_sel = vs->selector;
- struct tgsi_shader_info *info = &vs_sel->info;
+ struct si_shader_info *info = &vs_sel->info;
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
unsigned window_space =
info->properties[TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION];
rs->flatshade_first = state->flatshade_first;
rs->sprite_coord_enable = state->sprite_coord_enable;
rs->rasterizer_discard = state->rasterizer_discard;
+ rs->polygon_mode_enabled = (state->fill_front != PIPE_POLYGON_MODE_FILL &&
+ !(state->cull_face & PIPE_FACE_FRONT)) ||
+ (state->fill_back != PIPE_POLYGON_MODE_FILL &&
+ !(state->cull_face & PIPE_FACE_BACK));
+ rs->polygon_mode_is_lines = (state->fill_front == PIPE_POLYGON_MODE_LINE &&
+ !(state->cull_face & PIPE_FACE_FRONT)) ||
+ (state->fill_back == PIPE_POLYGON_MODE_LINE &&
+ !(state->cull_face & PIPE_FACE_BACK));
rs->pa_sc_line_stipple = state->line_stipple_enable ?
S_028A0C_LINE_PATTERN(state->line_stipple_pattern) |
S_028A0C_REPEAT_COUNT(state->line_stipple_factor) : 0;
S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
- S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
- state->fill_back != PIPE_POLYGON_MODE_FILL) |
+ S_028814_POLY_MODE(rs->polygon_mode_enabled) |
S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)));
old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl)
si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs);
- sctx->ia_multi_vgt_param_key.u.line_stipple_enabled =
- rs->line_stipple_enable;
-
if (old_rs->clip_plane_enable != rs->clip_plane_enable ||
old_rs->rasterizer_discard != rs->rasterizer_discard ||
old_rs->sprite_coord_enable != rs->sprite_coord_enable ||
return false;
}
- if (util_format_get_num_planes(format) >= 2) {
- return util_format_planar_is_supported(screen, format, target,
- sample_count,
- storage_sample_count,
- usage);
- }
-
if (MAX2(1, sample_count) < MAX2(1, storage_sample_count))
return false;
if (tex->surface.fmask_offset) {
tex->dirty_level_mask |= 1 << surf->u.tex.level;
- tex->fmask_is_not_identity = true;
+ tex->fmask_is_identity = false;
}
if (tex->dcc_gather_statistics)
tex->separate_dcc_dirty = true;
si_update_ps_colorbuf0_slot(sctx);
si_update_poly_offset_state(sctx);
+ si_update_ngg_small_prim_precision(sctx);
si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
* else: swizzle_address >= NUM_RECORDS
*/
state[7] |= S_008F0C_FORMAT(fmt->img_format) |
- S_008F0C_OOB_SELECT(0) |
+ S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
S_008F0C_RESOURCE_LEVEL(1);
} else {
int first_non_void;
return NULL;
v->count = count;
- v->desc_list_byte_size = align(count * 16, SI_CPDMA_ALIGNMENT);
+
+ unsigned alloc_count = count > sscreen->num_vbos_in_user_sgprs ?
+ count - sscreen->num_vbos_in_user_sgprs : 0;
+ v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT);
for (i = 0; i < count; ++i) {
const struct util_format_description *desc;
struct si_vertex_elements *v = (struct si_vertex_elements*)state;
sctx->vertex_elements = v;
- sctx->vertex_buffers_dirty = true;
+ sctx->num_vertex_elements = v ? v->count : 0;
+
+ if (sctx->num_vertex_elements) {
+ sctx->vertex_buffers_dirty = true;
+ } else {
+ sctx->vertex_buffer_pointer_dirty = false;
+ sctx->vertex_buffer_user_sgprs_dirty = false;
+ }
if (v &&
(!old ||
struct si_context *sctx = (struct si_context *)ctx;
struct si_vertex_elements *v = (struct si_vertex_elements*)state;
- if (sctx->vertex_elements == state)
+ if (sctx->vertex_elements == state) {
sctx->vertex_elements = NULL;
+ sctx->num_vertex_elements = 0;
+ }
si_resource_reference(&v->instance_divisor_factor_buffer, NULL);
FREE(state);
}
{
struct si_context *sctx = (struct si_context *)ctx;
struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot;
+ unsigned updated_mask = u_bit_consecutive(start_slot, count);
uint32_t orig_unaligned = sctx->vertex_buffer_unaligned;
- uint32_t unaligned = orig_unaligned;
+ uint32_t unaligned = 0;
int i;
assert(start_slot + count <= ARRAY_SIZE(sctx->vertex_buffer));
const struct pipe_vertex_buffer *src = buffers + i;
struct pipe_vertex_buffer *dsti = dst + i;
struct pipe_resource *buf = src->buffer.resource;
+ unsigned slot_bit = 1 << (start_slot + i);
pipe_resource_reference(&dsti->buffer.resource, buf);
dsti->buffer_offset = src->buffer_offset;
dsti->stride = src->stride;
+
if (dsti->buffer_offset & 3 || dsti->stride & 3)
- unaligned |= 1 << (start_slot + i);
- else
- unaligned &= ~(1 << (start_slot + i));
+ unaligned |= slot_bit;
si_context_add_resource_size(sctx, buf);
if (buf)
for (i = 0; i < count; i++) {
pipe_resource_reference(&dst[i].buffer.resource, NULL);
}
- unaligned &= ~u_bit_consecutive(start_slot, count);
+ unaligned &= ~updated_mask;
}
sctx->vertex_buffers_dirty = true;
- sctx->vertex_buffer_unaligned = unaligned;
+ sctx->vertex_buffer_unaligned = (orig_unaligned & ~updated_mask) | unaligned;
/* Check whether alignment may have changed in a way that requires
* shader changes. This check is conservative: a vertex buffer can only
*/
if (sctx->vertex_elements &&
(sctx->vertex_elements->vb_alignment_check_mask &
- (unaligned | orig_unaligned) & u_bit_consecutive(start_slot, count)))
+ (unaligned | orig_unaligned) & updated_mask))
sctx->do_update_shaders = true;
}
/* Compute LATE_ALLOC_VS.LIMIT. */
unsigned num_cu_per_sh = sscreen->info.num_good_cu_per_sh;
- unsigned late_alloc_limit; /* The limit is per SH. */
-
- if (sctx->family == CHIP_KABINI) {
- late_alloc_limit = 0; /* Potential hang on Kabini. */
- } else if (num_cu_per_sh <= 4) {
- /* Too few available compute units per SH. Disallowing
- * VS to run on one CU could hurt us more than late VS
- * allocation would help.
- *
- * 2 is the highest safe number that allows us to keep
- * all CUs enabled.
- */
- late_alloc_limit = 2;
- } else {
- /* This is a good initial value, allowing 1 late_alloc
- * wave per SIMD on num_cu - 2.
- */
- late_alloc_limit = (num_cu_per_sh - 2) * 4;
- }
-
- unsigned late_alloc_limit_gs = late_alloc_limit;
+ unsigned late_alloc_wave64 = 0; /* The limit is per SH. */
unsigned cu_mask_vs = 0xffff;
unsigned cu_mask_gs = 0xffff;
- if (late_alloc_limit > 2) {
- if (sctx->chip_class >= GFX10) {
+ if (sctx->chip_class >= GFX10) {
+ /* For Wave32, the hw will launch twice the number of late
+ * alloc waves, so 1 == 2x wave32.
+ */
+ if (num_cu_per_sh <= 6) {
+ late_alloc_wave64 = num_cu_per_sh - 2;
+ } else {
+ late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
+
/* CU2 & CU3 disabled because of the dual CU design */
+ /* Late alloc is not used for NGG on Navi14 due to a hw bug. */
cu_mask_vs = 0xfff3;
- cu_mask_gs = 0xfff3; /* NGG only */
+ cu_mask_gs = sscreen->use_ngg &&
+ sctx->family != CHIP_NAVI14 ? 0xfff3 : 0xffff;
+ }
+ } else {
+ if (sctx->family == CHIP_KABINI) {
+ late_alloc_wave64 = 0; /* Potential hang on Kabini. */
+ } else if (num_cu_per_sh <= 4) {
+ /* Too few available compute units per SH. Disallowing
+ * VS to run on one CU could hurt us more than late VS
+ * allocation would help.
+ *
+ * 2 is the highest safe number that allows us to keep
+ * all CUs enabled.
+ */
+ late_alloc_wave64 = 2;
} else {
- cu_mask_vs = 0xfffe; /* 1 CU disabled */
+ /* This is a good initial value, allowing 1 late_alloc
+ * wave per SIMD on num_cu - 2.
+ */
+ late_alloc_wave64 = (num_cu_per_sh - 2) * 4;
}
- }
- /* Don't use late alloc for NGG on Navi14 due to a hw bug.
- * If NGG is never used, enable all CUs.
- */
- if (!sscreen->use_ngg || sctx->family == CHIP_NAVI14) {
- late_alloc_limit_gs = 0;
- cu_mask_gs = 0xffff;
+ if (late_alloc_wave64 > 2)
+ cu_mask_vs = 0xfffe; /* 1 CU disabled */
}
/* VS can't execute on one CU if the limit is > 2. */
S_00B118_CU_EN(cu_mask_vs) |
S_00B118_WAVE_LIMIT(0x3F));
si_pm4_set_reg(pm4, R_00B11C_SPI_SHADER_LATE_ALLOC_VS,
- S_00B11C_LIMIT(late_alloc_limit));
+ S_00B11C_LIMIT(late_alloc_wave64));
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS,
S_00B21C_CU_EN(cu_mask_gs) | S_00B21C_WAVE_LIMIT(0x3F));
- if (sctx->chip_class >= GFX10) {
- si_pm4_set_reg(pm4, R_00B204_SPI_SHADER_PGM_RSRC4_GS,
- S_00B204_CU_EN(0xffff) |
- S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_limit_gs));
- }
-
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS,
S_00B01C_CU_EN(0xffff) | S_00B01C_WAVE_LIMIT(0x3F));
}
S_00B0C0_SOFT_GROUPING_EN(1) |
S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0);
-
- if (sctx->family == CHIP_NAVI10 ||
- sctx->family == CHIP_NAVI12 ||
- sctx->family == CHIP_NAVI14) {
- /* SQ_NON_EVENT must be emitted before GE_PC_ALLOC is written. */
- si_pm4_cmd_begin(pm4, PKT3_EVENT_WRITE);
- si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_SQ_NON_EVENT) | EVENT_INDEX(0));
- si_pm4_cmd_end(pm4, false);
- }
- /* TODO: For culling, replace 128 with 256. */
- si_pm4_set_reg(pm4, R_030980_GE_PC_ALLOC,
- S_030980_OVERSUB_EN(1) |
- S_030980_NUM_PC_LINES(128 * sscreen->info.max_se - 1));
}
if (sctx->chip_class >= GFX8) {