ucp_mask |
S_028810_CLIP_DISABLE(window_space));
- /* reuse needs to be set off if we write oViewport */
- radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
- S_028AB4_REUSE_OFF(info->writes_viewport_index));
+ if (sctx->b.chip_class <= VI) {
+ /* reuse needs to be set off if we write oViewport */
+ radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
+ S_028AB4_REUSE_OFF(info->writes_viewport_index));
+ }
}
/*
num_records = size / stride;
num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
- if (screen->b.chip_class == VI)
+ /* The NUM_RECORDS field has a different meaning depending on the chip,
+ * instruction type, STRIDE, and SWIZZLE_ENABLE.
+ *
+ * SI-CIK:
+ * - If STRIDE == 0, it's in byte units.
+ * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN.
+ *
+ * VI:
+ * - For SMEM and STRIDE == 0, it's in byte units.
+ * - For SMEM and STRIDE != 0, it's in units of STRIDE.
+ * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units.
+ * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE.
+ * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_-
+ * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when
+ * using SMEM. This can be done in the shader by clearing STRIDE with s_and.
+ * That way the same descriptor can be used by both SMEM and VMEM.
+ *
+ * GFX9:
+ * - For SMEM and STRIDE == 0, it's in byte units.
+ * - For SMEM and STRIDE != 0, it's in units of STRIDE.
+ * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.
+ * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.
+ */
+ if (screen->b.chip_class >= GFX9)
+ /* When vindex == 0, LLVM sets IDXEN = 0, thus changing units
+ * from STRIDE to bytes. This works around it by setting
+ * NUM_RECORDS to at least the size of one element, so that
+ * the first element is readable when IDXEN == 0.
+ *
+ * TODO: Fix this in LLVM, but do we need a new intrinsic where
+ * IDXEN is enforced?
+ */
+ num_records = num_records ? MAX2(num_records, stride) : 0;
+ else if (screen->b.chip_class == VI)
num_records *= stride;
state[4] = 0;
if (!sampler &&
(res->target == PIPE_TEXTURE_CUBE ||
res->target == PIPE_TEXTURE_CUBE_ARRAY ||
- res->target == PIPE_TEXTURE_3D)) {
+ (screen->b.chip_class <= VI &&
+ res->target == PIPE_TEXTURE_3D))) {
/* For the purpose of shader images, treat cube maps and 3D
* textures as 2D arrays. For 3D textures, the address
* calculations for mipmaps are different, so we rely on the
const struct pipe_vertex_buffer *src = buffers + i;
struct pipe_vertex_buffer *dsti = dst + i;
- if (unlikely(src->user_buffer)) {
+ if (unlikely(src->is_user_buffer)) {
/* Zero-stride attribs only. */
assert(src->stride == 0);
* Use const_uploader to upload into VRAM directly.
*/
u_upload_data(sctx->b.b.const_uploader, 0, 32, 32,
- src->user_buffer,
+ src->buffer.user,
&dsti->buffer_offset,
- &dsti->buffer);
+ &dsti->buffer.resource);
dsti->stride = 0;
} else {
- struct pipe_resource *buf = src->buffer;
+ struct pipe_resource *buf = src->buffer.resource;
- pipe_resource_reference(&dsti->buffer, buf);
+ pipe_resource_reference(&dsti->buffer.resource, buf);
dsti->buffer_offset = src->buffer_offset;
dsti->stride = src->stride;
r600_context_add_resource_size(ctx, buf);
}
} else {
for (i = 0; i < count; i++) {
- pipe_resource_reference(&dst[i].buffer, NULL);
+ pipe_resource_reference(&dst[i].buffer.resource, NULL);
}
}
sctx->vertex_buffers_dirty = true;
}
-static void si_set_index_buffer(struct pipe_context *ctx,
- const struct pipe_index_buffer *ib)
-{
- struct si_context *sctx = (struct si_context *)ctx;
-
- if (ib) {
- struct pipe_resource *buf = ib->buffer;
-
- pipe_resource_reference(&sctx->index_buffer.buffer, buf);
- memcpy(&sctx->index_buffer, ib, sizeof(*ib));
- r600_context_add_resource_size(ctx, buf);
- if (buf)
- r600_resource(buf)->bind_history |= PIPE_BIND_INDEX_BUFFER;
- } else {
- pipe_resource_reference(&sctx->index_buffer.buffer, NULL);
- }
-}
-
/*
* Misc
*/
sctx->b.b.bind_vertex_elements_state = si_bind_vertex_elements;
sctx->b.b.delete_vertex_elements_state = si_delete_vertex_element;
sctx->b.b.set_vertex_buffers = si_set_vertex_buffers;
- sctx->b.b.set_index_buffer = si_set_index_buffer;
sctx->b.b.texture_barrier = si_texture_barrier;
sctx->b.b.memory_barrier = si_memory_barrier;
si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0));
/* FIXME calculate these values somehow ??? */
- si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
- si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
+ if (sctx->b.chip_class <= VI) {
+ si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES);
+ si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40);
+ }
si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2);
si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0);
si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
+ si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
+ if (sctx->b.chip_class >= GFX9)
+ si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (sctx->b.chip_class < CIK)
si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
}
if (sctx->b.chip_class >= CIK) {
- /* If this is 0, Bonaire can hang even if GS isn't being used.
- * Other chips are unaffected. These are suboptimal values,
- * but we don't use on-chip GS.
- */
- si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
- S_028A44_ES_VERTS_PER_SUBGRP(64) |
- S_028A44_GS_PRIMS_PER_SUBGRP(4));
-
if (sctx->b.chip_class >= GFX9) {
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff));
} else {
si_pm4_set_reg(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
si_pm4_set_reg(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
+
+ /* If this is 0, Bonaire can hang even if GS isn't being used.
+ * Other chips are unaffected. These are suboptimal values,
+ * but we don't use on-chip GS.
+ */
+ si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL,
+ S_028A44_ES_VERTS_PER_SUBGRP(64) |
+ S_028A44_GS_PRIMS_PER_SUBGRP(4));
}
si_pm4_set_reg(pm4, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
RADEON_PRIO_BORDER_COLORS);
if (sctx->b.chip_class >= GFX9) {
- si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, 0);
+ unsigned num_se = sscreen->b.info.max_se;
+ unsigned pc_lines = 0;
+
+ switch (sctx->b.family) {
+ case CHIP_VEGA10:
+ pc_lines = 4096;
+ break;
+ default:
+ assert(0);
+ }
+
+ si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL,
+ S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
si_pm4_set_reg(pm4, R_028064_DB_RENDER_FILTER, 0);
/* TODO: We can use this to disable RBs for rendering to GART: */
si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 0);
si_pm4_set_reg(pm4, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
/* TODO: Enable the binner: */
si_pm4_set_reg(pm4, R_028C44_PA_SC_BINNER_CNTL_0,
- S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC));
- si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, 0);
+ S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+ S_028C44_DISABLE_START_OF_PRIM(1));
+ si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
+ S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
+ S_028C48_MAX_PRIM_PER_BATCH(1023));
si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);