sscreen->info.family == CHIP_FIJI ||
sscreen->info.family == CHIP_POLARIS10 ||
sscreen->info.family == CHIP_POLARIS11 ||
- sscreen->info.family == CHIP_POLARIS12)
+ sscreen->info.family == CHIP_POLARIS12 ||
+ sscreen->info.family == CHIP_VEGAM)
partial_vs_wave = true;
} else {
partial_vs_wave = true;
/* Emit state atoms. */
unsigned mask = sctx->dirty_atoms & ~skip_atom_mask;
while (mask) {
- struct r600_atom *atom = sctx->atoms.array[u_bit_scan(&mask)];
+ struct si_atom *atom = &sctx->atoms.array[u_bit_scan(&mask)];
atom->emit(sctx, atom);
}
sctx->framebuffer.dirty_cbufs |=
((1 << sctx->framebuffer.state.nr_cbufs) - 1);
sctx->framebuffer.dirty_zsbuf = true;
- si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
si_update_all_texture_descriptors(sctx);
}
bool new_is_poly = rast_prim >= PIPE_PRIM_TRIANGLES;
if (old_is_poly != new_is_poly) {
sctx->scissors.dirty_mask = (1 << SI_MAX_VIEWPORTS) - 1;
- si_mark_atom_dirty(sctx, &sctx->scissors.atom);
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors);
}
sctx->current_rast_prim = rast_prim;
* more involved alternative workaround.
*/
if ((sctx->family == CHIP_VEGA10 || sctx->family == CHIP_RAVEN) &&
- si_is_atom_dirty(sctx, &sctx->scissors.atom)) {
+ si_is_atom_dirty(sctx, &sctx->atoms.s.scissors)) {
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
si_emit_cache_flush(sctx);
}
SI_CONTEXT_CS_PARTIAL_FLUSH))) {
/* If we have to wait for idle, set all states first, so that all
* SET packets are processed in parallel with previous draw calls.
- * Then upload descriptors, set shader pointers, and draw, and
- * prefetch at the end. This ensures that the time the CUs
- * are idle is very short. (there are only SET_SH packets between
- * the wait and the draw)
+ * Then draw and prefetch at the end. This ensures that the time
+ * the CUs are idle is very short.
*/
- struct r600_atom *shader_pointers = &sctx->shader_pointers.atom;
- unsigned masked_atoms = 1u << shader_pointers->id;
+ unsigned masked_atoms = 0;
if (unlikely(sctx->flags & SI_CONTEXT_FLUSH_FOR_RENDER_COND))
- masked_atoms |= 1u << sctx->render_cond_atom.id;
+ masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
- /* Emit all states except shader pointers and render condition. */
+ if (!si_upload_graphics_shader_descriptors(sctx))
+ return;
+
+ /* Emit all states except possibly render condition. */
si_emit_all_states(sctx, info, masked_atoms);
si_emit_cache_flush(sctx);
-
/* <-- CUs are idle here. */
- if (!si_upload_graphics_shader_descriptors(sctx))
- return;
- /* Set shader pointers after descriptors are uploaded. */
- if (si_is_atom_dirty(sctx, shader_pointers))
- shader_pointers->emit(sctx, NULL);
- if (si_is_atom_dirty(sctx, &sctx->render_cond_atom))
- sctx->render_cond_atom.emit(sctx, NULL);
+ if (si_is_atom_dirty(sctx, &sctx->atoms.s.render_cond))
+ sctx->atoms.s.render_cond.emit(sctx, NULL);
sctx->dirty_atoms = 0;
si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
* in parallel, but starting the draw first is more important.
*/
if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
- cik_emit_prefetch_L2(sctx);
+ cik_emit_prefetch_L2(sctx, false);
} else {
/* If we don't wait for idle, start prefetches first, then set
* states, and draw at the end.
if (sctx->flags)
si_emit_cache_flush(sctx);
+ /* Only prefetch the API VS and VBO descriptors. */
if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
- cik_emit_prefetch_L2(sctx);
+ cik_emit_prefetch_L2(sctx, true);
if (!si_upload_graphics_shader_descriptors(sctx))
return;
si_emit_all_states(sctx, info, 0);
si_emit_draw_packets(sctx, info, indexbuf, index_size, index_offset);
+
+ /* Prefetch the remaining shaders after the draw has been
+ * started. */
+ if (sctx->chip_class >= CIK && sctx->prefetch_L2_mask)
+ cik_emit_prefetch_L2(sctx, false);
}
if (unlikely(sctx->current_saved_cs)) {