case CHIP_KABINI: return "AMD KABINI";
case CHIP_HAWAII: return "AMD HAWAII";
case CHIP_MULLINS: return "AMD MULLINS";
+ case CHIP_TONGA: return "AMD TONGA";
+ case CHIP_ICELAND: return "AMD ICELAND";
+ case CHIP_CARRIZO: return "AMD CARRIZO";
default: return "AMD unknown";
}
}
#else
return "kabini";
#endif
+ case CHIP_TONGA: return "tonga";
+ case CHIP_ICELAND: return "iceland";
+ case CHIP_CARRIZO: return "carrizo";
default: return "";
}
}
unsigned cb_color_pitch; /* EG and later */
unsigned cb_color_slice; /* EG and later */
unsigned cb_color_attrib; /* EG and later */
+ unsigned cb_dcc_control; /* VI and later */
unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
unsigned cb_color_fmask_slice; /* EG and later */
unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */
desc[0] = va & 0xFFFFFFFF;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(vb->stride);
- if (vb->stride)
+
+ if (sctx->b.chip_class <= CIK && vb->stride)
/* Round up by rounding down and adding 1 */
desc[2] = (vb->buffer->width0 - offset -
sctx->vertex_elements->format_size[i]) /
break;
}
+ if (sctx->b.chip_class >= VI && stride)
+ num_records *= stride;
+
/* Set the descriptor. */
uint32_t *desc = buffers->desc.list + slot*4;
desc[0] = va;
struct pipe_resource *buffer = targets[i]->buffer;
uint64_t va = r600_resource(buffer)->gpu_address;
- /* Set the descriptor. */
+ /* Set the descriptor.
+ *
+ * On VI, the format must be non-INVALID, otherwise
+ * the buffer will be considered not bound and store
+ * instructions will be no-ops.
+ */
uint32_t *desc = buffers->desc.list + bufidx*4;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
- S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+ S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+ S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
/* Set the resource. */
pipe_resource_reference(&buffers->buffers[bufidx],
r600_target = radeon_llvm_get_r600_target(triple);
sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
r600_get_llvm_processor_name(sscreen->b.family),
- "+DumpCode,+vgpr-spilling",
+ sctx->b.chip_class >= VI ?
+ "+DumpCode" :
+ "+DumpCode,+vgpr-spilling",
LLVMCodeGenLevelDefault,
LLVMRelocDefault,
LLVMCodeModelDefault);
struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
const struct tgsi_full_instruction *inst = emit_data->inst;
struct gallivm_state *gallivm = bld_base->base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
unsigned target = inst->Texture.Texture;
LLVMValueRef res_ptr;
LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
/* Read the size from the buffer descriptor directly. */
- LLVMValueRef size = res_ptr;
- size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
- size = LLVMBuildExtractElement(gallivm->builder, size,
- lp_build_const_int32(gallivm, 6), "");
+ LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+ LLVMValueRef size = LLVMBuildExtractElement(builder, res,
+ lp_build_const_int32(gallivm, 6), "");
+
+ if (si_shader_ctx->screen->b.chip_class >= VI) {
+ /* On VI, the descriptor contains the size in bytes,
+ * but TXQ must return the size in elements.
+ * The stride is always non-zero for resources using TXQ.
+ */
+ LLVMValueRef stride =
+ LLVMBuildExtractElement(builder, res,
+ lp_build_const_int32(gallivm, 5), "");
+ stride = LLVMBuildLShr(builder, stride,
+ lp_build_const_int32(gallivm, 16), "");
+ stride = LLVMBuildAnd(builder, stride,
+ lp_build_const_int32(gallivm, 0x3FFF), "");
+
+ size = LLVMBuildUDiv(builder, size, stride, "");
+ }
+
emit_data->args[0] = size;
return;
}
uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
{
- if (sscreen->b.chip_class == CIK &&
+ if (sscreen->b.chip_class >= CIK &&
sscreen->b.info.cik_macrotile_mode_array_valid) {
unsigned index, tileb;
surf->cb_color_info = color_info;
surf->cb_color_attrib = color_attrib;
+ if (sctx->b.chip_class >= VI)
+ surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1);
+
if (rtex->fmask.size) {
surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
db_htile_surface = 0;
}
+ /* Bug workaround. */
+ if (sctx->b.chip_class >= VI)
+ s_info |= S_028044_TILE_STENCIL_DISABLE(1);
+
assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
si_update_fb_rs_state(sctx);
si_update_fb_blend_state(sctx);
- sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
+ sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
RADEON_PRIO_COLOR_META);
}
- r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
+ r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
+ sctx->b.chip_class >= VI ? 14 : 13);
radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */
radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */
radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */
radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */
radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */
- radeon_emit(cs, 0); /* R_028C78 unused */
+ radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */
radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */
radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */
radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */
radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */
radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
+
+ if (sctx->b.chip_class >= VI)
+ radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */
}
/* set CB_COLOR1_INFO for possible dual-src blending */
if (i == 1 && state->cbufs[0]) {
/* Buffer resource. */
if (texture->target == PIPE_BUFFER) {
- unsigned stride;
+ unsigned stride, num_records;
desc = util_format_description(state->format);
first_non_void = util_format_get_first_non_void_channel(state->format);
format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
+ num_records = state->u.buf.last_element + 1 - state->u.buf.first_element;
+ num_records = MIN2(num_records, texture->width0 / stride);
+
+ if (sctx->b.chip_class >= VI)
+ num_records *= stride;
+
view->state[4] = va;
view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(stride);
- view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element;
+ view->state[6] = num_records;
view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a);
si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e);
break;
+ case CHIP_TONGA:
+ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
+ si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002a);
+ break;
+ case CHIP_ICELAND:
+ case CHIP_CARRIZO:
+ si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000002);
+ si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
+ break;
case CHIP_KAVERI:
/* XXX todo */
case CHIP_KABINI:
si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
}
+ if (sctx->b.chip_class >= VI) {
+ si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
+ S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1));
+ si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
+ si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
+ }
+
sctx->init_config = pm4;
}
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
- S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0);
+ S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
+ S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0);
}
static unsigned si_get_ls_hs_config(struct si_context *sctx,
if (info->indexed) {
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
- if (ib->index_size == 4) {
- radeon_emit(cs, V_028A7C_VGT_INDEX_32 | (SI_BIG_ENDIAN ?
- V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
- } else {
- radeon_emit(cs, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ?
- V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+ /* index type */
+ switch (ib->index_size) {
+ case 1:
+ radeon_emit(cs, V_028A7C_VGT_INDEX_8);
+ break;
+ case 2:
+ radeon_emit(cs, V_028A7C_VGT_INDEX_16 |
+ (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+ V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+ break;
+ case 4:
+ radeon_emit(cs, V_028A7C_VGT_INDEX_32 |
+ (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+ V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
+ break;
+ default:
+ assert(!"unreachable");
+ return;
}
}
if (sctx->flags & SI_CONTEXT_INV_TC_L1)
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
- if (sctx->flags & SI_CONTEXT_INV_TC_L2)
+ if (sctx->flags & SI_CONTEXT_INV_TC_L2) {
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
+ /* TODO: this might not be needed. */
+ if (sctx->chip_class >= VI)
+ cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
+ }
+
if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
S_0085F0_CB0_DEST_BASE_ENA(1) |
ib.offset = sctx->index_buffer.offset;
/* Translate or upload, if needed. */
- if (ib.index_size == 1) {
+ /* 8-bit indices are supported on VI. */
+ if (sctx->b.chip_class <= CIK && ib.index_size == 1) {
struct pipe_resource *out_buffer = NULL;
unsigned out_offset, start, count, start_offset;
void *ptr;
}
}
+ /* TODO: VI should read index buffers through TC, so this shouldn't be
+ * needed on VI. */
if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) {
sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
r600_resource(ib.buffer)->TC_L2_dirty = false;
/* Workaround for a VGT hang when streamout is enabled.
* It must be done after drawing. */
- if (sctx->b.family == CHIP_HAWAII &&
+ if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
(sctx->b.streamout.streamout_enabled ||
sctx->b.streamout.prims_gen_query_enabled)) {
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
static void si_init_gs_rings(struct si_context *sctx)
{
unsigned esgs_ring_size = 128 * 1024;
- unsigned gsvs_ring_size = 64 * 1024 * 1024;
+ unsigned gsvs_ring_size = 60 * 1024 * 1024;
assert(!sctx->gs_rings);
sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
PIPE_USAGE_DEFAULT, gsvs_ring_size);
if (sctx->b.chip_class >= CIK) {
+ if (sctx->b.chip_class >= VI) {
+ /* The maximum sizes are 63.999 MB on VI, because
+ * the register fields only have 18 bits. */
+ assert(esgs_ring_size / 256 < (1 << 18));
+ assert(gsvs_ring_size / 256 < (1 << 18));
+ }
si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
esgs_ring_size / 256);
si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,