#include "si_pipe.h"
#include "radeon/r600_cs.h"
#include "sid.h"
+#include "gfx9d.h"
#include "util/u_index_modify.h"
#include "util/u_upload_mgr.h"
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_WD_SWITCH_ON_EOP(sscreen->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->b.chip_class >= VI ?
- max_primgroup_in_wave : 0);
+ max_primgroup_in_wave : 0) |
+ S_030960_EN_INST_OPT_BASIC(sscreen->b.chip_class >= GFX9) |
+ S_030960_EN_INST_OPT_ADV(sscreen->b.chip_class >= GFX9);
}
void si_init_ia_multi_vgt_param_table(struct si_context *sctx)
/* Draw state. */
if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
- if (sctx->b.chip_class >= CIK)
+ if (sctx->b.chip_class >= GFX9)
+ radeon_set_uconfig_reg_idx(cs, R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
+ else if (sctx->b.chip_class >= CIK)
radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
else
radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
/* Primitive restart. */
if (info->primitive_restart != sctx->last_primitive_restart_en) {
- radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, info->primitive_restart);
+ if (sctx->b.chip_class >= GFX9)
+ radeon_set_uconfig_reg(cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
+ info->primitive_restart);
+ else
+ radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
+ info->primitive_restart);
+
sctx->last_primitive_restart_en = info->primitive_restart;
}
/* draw packet */
if (info->indexed) {
if (ib->index_size != sctx->last_index_size) {
- radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
+ unsigned index_type;
/* index type */
switch (ib->index_size) {
case 1:
- radeon_emit(cs, V_028A7C_VGT_INDEX_8);
+ index_type = V_028A7C_VGT_INDEX_8;
break;
case 2:
- radeon_emit(cs, V_028A7C_VGT_INDEX_16 |
- (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
- V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+ index_type = V_028A7C_VGT_INDEX_16 |
+ (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+ V_028A7C_VGT_DMA_SWAP_16_BIT : 0);
break;
case 4:
- radeon_emit(cs, V_028A7C_VGT_INDEX_32 |
- (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
- V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
+ index_type = V_028A7C_VGT_INDEX_32 |
+ (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+ V_028A7C_VGT_DMA_SWAP_32_BIT : 0);
break;
default:
assert(!"unreachable");
return;
}
+ if (sctx->b.chip_class >= GFX9) {
+ radeon_set_uconfig_reg_idx(cs, R_03090C_VGT_INDEX_TYPE,
+ 2, index_type);
+ } else {
+ radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
+ radeon_emit(cs, index_type);
+ }
+
sctx->last_index_size = ib->index_size;
}
radeon_emit(cs, PKT3(PKT3_DRAW_INDEX_2, 4, render_cond_bit));
radeon_emit(cs, index_max_size);
radeon_emit(cs, index_va);
- radeon_emit(cs, (index_va >> 32UL) & 0xFF);
+ radeon_emit(cs, index_va >> 32);
radeon_emit(cs, info->count);
radeon_emit(cs, V_0287F0_DI_SRC_SEL_DMA);
} else {
{
struct radeon_winsys_cs *cs = rctx->gfx.cs;
- /* ACQUIRE_MEM is only required on a compute ring. */
- radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
- radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
- radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
- radeon_emit(cs, 0); /* CP_COHER_BASE */
- radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+ if (rctx->chip_class >= GFX9) {
+ /* Flush caches and wait for the caches to assert idle. */
+ radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0));
+ radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
+ radeon_emit(cs, 0xffffff); /* CP_COHER_SIZE_HI */
+ radeon_emit(cs, 0); /* CP_COHER_BASE */
+ radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
+ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+ } else {
+ /* ACQUIRE_MEM is only required on a compute ring. */
+ radeon_emit(cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
+ radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
+ radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
+ radeon_emit(cs, 0); /* CP_COHER_BASE */
+ radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
+ }
}
void si_emit_cache_flush(struct si_context *sctx)
unsigned *start, unsigned *count)
{
if (info->indirect) {
- struct r600_resource *indirect =
- (struct r600_resource*)info->indirect;
- int *data = r600_buffer_map_sync_with_rings(&sctx->b,
- indirect, PIPE_TRANSFER_READ);
- data += info->indirect_offset/sizeof(int);
- *start = data[2];
- *count = data[0];
+ unsigned indirect_count;
+ struct pipe_transfer *transfer;
+ unsigned begin, end;
+ unsigned map_size;
+ unsigned *data;
+
+ if (info->indirect_params) {
+ data = pipe_buffer_map_range(&sctx->b.b,
+ info->indirect_params,
+ info->indirect_params_offset,
+ sizeof(unsigned),
+ PIPE_TRANSFER_READ, &transfer);
+
+ indirect_count = *data;
+
+ pipe_buffer_unmap(&sctx->b.b, transfer);
+ } else {
+ indirect_count = info->indirect_count;
+ }
+
+ if (!indirect_count) {
+ *start = *count = 0;
+ return;
+ }
+
+ map_size = (indirect_count - 1) * info->indirect_stride + 3 * sizeof(unsigned);
+ data = pipe_buffer_map_range(&sctx->b.b, info->indirect,
+ info->indirect_offset, map_size,
+ PIPE_TRANSFER_READ, &transfer);
+
+ begin = UINT_MAX;
+ end = 0;
+
+ for (unsigned i = 0; i < indirect_count; ++i) {
+ unsigned count = data[0];
+ unsigned start = data[2];
+
+ if (count > 0) {
+ begin = MIN2(begin, start);
+ end = MAX2(end, start + count);
+ }
+
+ data += info->indirect_stride / sizeof(unsigned);
+ }
+
+ pipe_buffer_unmap(&sctx->b.b, transfer);
+
+ if (begin < end) {
+ *start = begin;
+ *count = end - begin;
+ } else {
+ *start = *count = 0;
+ }
} else {
*start = info->start;
*count = info->count;
if (!si_upload_vertex_buffer_descriptors(sctx))
return;
+ /* GFX9 scissor bug workaround. There is also a more efficient but
+ * more involved alternative workaround. */
+ if (sctx->b.chip_class == GFX9 &&
+ si_is_atom_dirty(sctx, &sctx->b.scissors.atom))
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+
/* Flush caches before the first state atom, which does L2 prefetches. */
if (sctx->b.flags)
si_emit_cache_flush(sctx);