From 2d1952e2a5abd273983374b420371d263388bb20 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Thu, 16 Apr 2015 20:44:54 +0200 Subject: [PATCH] radeonsi: add VI hardware support --- src/gallium/drivers/radeon/r600_pipe_common.c | 6 +++ src/gallium/drivers/radeon/r600_pipe_common.h | 1 + src/gallium/drivers/radeonsi/si_descriptors.c | 16 +++++-- src/gallium/drivers/radeonsi/si_pipe.c | 4 +- src/gallium/drivers/radeonsi/si_shader.c | 25 +++++++++-- src/gallium/drivers/radeonsi/si_state.c | 45 ++++++++++++++++--- src/gallium/drivers/radeonsi/si_state_draw.c | 41 ++++++++++++----- .../drivers/radeonsi/si_state_shaders.c | 8 +++- 8 files changed, 121 insertions(+), 25 deletions(-) diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c index 51c72cc882f..c982a4d9bad 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.c +++ b/src/gallium/drivers/radeon/r600_pipe_common.c @@ -408,6 +408,9 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen) case CHIP_KABINI: return "AMD KABINI"; case CHIP_HAWAII: return "AMD HAWAII"; case CHIP_MULLINS: return "AMD MULLINS"; + case CHIP_TONGA: return "AMD TONGA"; + case CHIP_ICELAND: return "AMD ICELAND"; + case CHIP_CARRIZO: return "AMD CARRIZO"; default: return "AMD unknown"; } } @@ -532,6 +535,9 @@ const char *r600_get_llvm_processor_name(enum radeon_family family) #else return "kabini"; #endif + case CHIP_TONGA: return "tonga"; + case CHIP_ICELAND: return "iceland"; + case CHIP_CARRIZO: return "carrizo"; default: return ""; } } diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h index 768fe282981..29db1cc4e07 100644 --- a/src/gallium/drivers/radeon/r600_pipe_common.h +++ b/src/gallium/drivers/radeon/r600_pipe_common.h @@ -242,6 +242,7 @@ struct r600_surface { unsigned cb_color_pitch; /* EG and later */ unsigned cb_color_slice; /* EG and later */ unsigned cb_color_attrib; /* EG and later */ + unsigned cb_dcc_control; /* VI and later */ unsigned cb_color_fmask; /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */ unsigned cb_color_fmask_slice; /* EG and later */ unsigned cb_color_cmask; /* CB_COLORn_TILE (r600 only) */ diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 8d9f8f71dc5..890be071596 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -429,7 +429,8 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx) desc[0] = va & 0xFFFFFFFF; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(vb->stride); - if (vb->stride) + + if (sctx->b.chip_class <= CIK && vb->stride) /* Round up by rounding down and adding 1 */ desc[2] = (vb->buffer->width0 - offset - sctx->vertex_elements->format_size[i]) / @@ -593,6 +594,9 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, break; } + if (sctx->b.chip_class >= VI && stride) + num_records *= stride; + /* Set the descriptor. */ uint32_t *desc = buffers->desc.list + slot*4; desc[0] = va; @@ -682,7 +686,12 @@ static void si_set_streamout_targets(struct pipe_context *ctx, struct pipe_resource *buffer = targets[i]->buffer; uint64_t va = r600_resource(buffer)->gpu_address; - /* Set the descriptor. */ + /* Set the descriptor. + * + * On VI, the format must be non-INVALID, otherwise + * the buffer will be considered not bound and store + * instructions will be no-ops. + */ uint32_t *desc = buffers->desc.list + bufidx*4; desc[0] = va; desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); @@ -690,7 +699,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx, desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | - S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); /* Set the resource. */ pipe_resource_reference(&buffers->buffers[bufidx], diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 9b5cdd8dc12..97cf24bf715 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -184,7 +184,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void * r600_target = radeon_llvm_get_r600_target(triple); sctx->tm = LLVMCreateTargetMachine(r600_target, triple, r600_get_llvm_processor_name(sscreen->b.family), - "+DumpCode,+vgpr-spilling", + sctx->b.chip_class >= VI ? + "+DumpCode" : + "+DumpCode,+vgpr-spilling", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault); diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index ac1c1be7a79..4288e9b2ab1 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -2787,6 +2787,7 @@ static void txq_fetch_args( struct si_shader_context *si_shader_ctx = si_shader_context(bld_base); const struct tgsi_full_instruction *inst = emit_data->inst; struct gallivm_state *gallivm = bld_base->base.gallivm; + LLVMBuilderRef builder = gallivm->builder; unsigned target = inst->Texture.Texture; LLVMValueRef res_ptr; @@ -2807,10 +2808,26 @@ static void txq_fetch_args( LLVMTypeRef v8i32 = LLVMVectorType(i32, 8); /* Read the size from the buffer descriptor directly. */ - LLVMValueRef size = res_ptr; - size = LLVMBuildBitCast(gallivm->builder, size, v8i32, ""); - size = LLVMBuildExtractElement(gallivm->builder, size, - lp_build_const_int32(gallivm, 6), ""); + LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, ""); + LLVMValueRef size = LLVMBuildExtractElement(builder, res, + lp_build_const_int32(gallivm, 6), ""); + + if (si_shader_ctx->screen->b.chip_class >= VI) { + /* On VI, the descriptor contains the size in bytes, + * but TXQ must return the size in elements. + * The stride is always non-zero for resources using TXQ. + */ + LLVMValueRef stride = + LLVMBuildExtractElement(builder, res, + lp_build_const_int32(gallivm, 5), ""); + stride = LLVMBuildLShr(builder, stride, + lp_build_const_int32(gallivm, 16), ""); + stride = LLVMBuildAnd(builder, stride, + lp_build_const_int32(gallivm, 0x3FFF), ""); + + size = LLVMBuildUDiv(builder, size, stride, ""); + } + emit_data->args[0] = size; return; } diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 51ade5248a4..6a8d786282c 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -61,7 +61,7 @@ unsigned si_array_mode(unsigned mode) uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex) { - if (sscreen->b.chip_class == CIK && + if (sscreen->b.chip_class >= CIK && sscreen->b.info.cik_macrotile_mode_array_valid) { unsigned index, tileb; @@ -1846,6 +1846,9 @@ static void si_initialize_color_surface(struct si_context *sctx, surf->cb_color_info = color_info; surf->cb_color_attrib = color_attrib; + if (sctx->b.chip_class >= VI) + surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1); + if (rtex->fmask.size) { surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8; surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max); @@ -1991,6 +1994,10 @@ static void si_init_depth_surface(struct si_context *sctx, db_htile_surface = 0; } + /* Bug workaround. */ + if (sctx->b.chip_class >= VI) + s_info |= S_028044_TILE_STENCIL_DISABLE(1); + assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | @@ -2084,7 +2091,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, si_update_fb_rs_state(sctx); si_update_fb_blend_state(sctx); - sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3; + sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3; sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4; sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */ sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */ @@ -2163,20 +2170,24 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom RADEON_PRIO_COLOR_META); } - r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13); + r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, + sctx->b.chip_class >= VI ? 14 : 13); radeon_emit(cs, cb->cb_color_base); /* R_028C60_CB_COLOR0_BASE */ radeon_emit(cs, cb->cb_color_pitch); /* R_028C64_CB_COLOR0_PITCH */ radeon_emit(cs, cb->cb_color_slice); /* R_028C68_CB_COLOR0_SLICE */ radeon_emit(cs, cb->cb_color_view); /* R_028C6C_CB_COLOR0_VIEW */ radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */ radeon_emit(cs, cb->cb_color_attrib); /* R_028C74_CB_COLOR0_ATTRIB */ - radeon_emit(cs, 0); /* R_028C78 unused */ + radeon_emit(cs, cb->cb_dcc_control); /* R_028C78_CB_COLOR0_DCC_CONTROL */ radeon_emit(cs, tex->cmask.base_address_reg); /* R_028C7C_CB_COLOR0_CMASK */ radeon_emit(cs, tex->cmask.slice_tile_max); /* R_028C80_CB_COLOR0_CMASK_SLICE */ radeon_emit(cs, cb->cb_color_fmask); /* R_028C84_CB_COLOR0_FMASK */ radeon_emit(cs, cb->cb_color_fmask_slice); /* R_028C88_CB_COLOR0_FMASK_SLICE */ radeon_emit(cs, tex->color_clear_value[0]); /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */ radeon_emit(cs, tex->color_clear_value[1]); /* R_028C90_CB_COLOR0_CLEAR_WORD1 */ + + if (sctx->b.chip_class >= VI) + radeon_emit(cs, 0); /* R_028C94_CB_COLOR0_DCC_BASE */ } /* set CB_COLOR1_INFO for possible dual-src blending */ if (i == 1 && state->cbufs[0]) { @@ -2332,7 +2343,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx, /* Buffer resource. */ if (texture->target == PIPE_BUFFER) { - unsigned stride; + unsigned stride, num_records; desc = util_format_description(state->format); first_non_void = util_format_get_first_non_void_channel(state->format); @@ -2341,10 +2352,16 @@ si_create_sampler_view_custom(struct pipe_context *ctx, format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); + num_records = state->u.buf.last_element + 1 - state->u.buf.first_element; + num_records = MIN2(num_records, texture->width0 / stride); + + if (sctx->b.chip_class >= VI) + num_records *= stride; + view->state[4] = va; view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); - view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element; + view->state[6] = num_records; view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | @@ -3167,6 +3184,15 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a); si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e); break; + case CHIP_TONGA: + si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012); + si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002a); + break; + case CHIP_ICELAND: + case CHIP_CARRIZO: + si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000002); + si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000); + break; case CHIP_KAVERI: /* XXX todo */ case CHIP_KABINI: @@ -3261,5 +3287,12 @@ static void si_init_config(struct si_context *sctx) si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff)); } + if (sctx->b.chip_class >= VI) { + si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL, + S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1)); + si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30); + si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32); + } + sctx->init_config = pm4; } diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index f136a1c94d8..4c21655596c 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -318,7 +318,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx, S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) | S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) | S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) | - S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0); + S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) | + S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0); } static unsigned si_get_ls_hs_config(struct si_context *sctx, @@ -473,12 +474,24 @@ static void si_emit_draw_packets(struct si_context *sctx, if (info->indexed) { radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); - if (ib->index_size == 4) { - radeon_emit(cs, V_028A7C_VGT_INDEX_32 | (SI_BIG_ENDIAN ? - V_028A7C_VGT_DMA_SWAP_32_BIT : 0)); - } else { - radeon_emit(cs, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ? - V_028A7C_VGT_DMA_SWAP_16_BIT : 0)); + /* index type */ + switch (ib->index_size) { + case 1: + radeon_emit(cs, V_028A7C_VGT_INDEX_8); + break; + case 2: + radeon_emit(cs, V_028A7C_VGT_INDEX_16 | + (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ? + V_028A7C_VGT_DMA_SWAP_16_BIT : 0)); + break; + case 4: + radeon_emit(cs, V_028A7C_VGT_INDEX_32 | + (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ? + V_028A7C_VGT_DMA_SWAP_32_BIT : 0)); + break; + default: + assert(!"unreachable"); + return; } } @@ -604,9 +617,14 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato if (sctx->flags & SI_CONTEXT_INV_TC_L1) cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1); - if (sctx->flags & SI_CONTEXT_INV_TC_L2) + if (sctx->flags & SI_CONTEXT_INV_TC_L2) { cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1); + /* TODO: this might not be needed. */ + if (sctx->chip_class >= VI) + cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1); + } + if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) { cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) | S_0085F0_CB0_DEST_BASE_ENA(1) | @@ -754,7 +772,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) ib.offset = sctx->index_buffer.offset; /* Translate or upload, if needed. */ - if (ib.index_size == 1) { + /* 8-bit indices are supported on VI. */ + if (sctx->b.chip_class <= CIK && ib.index_size == 1) { struct pipe_resource *out_buffer = NULL; unsigned out_offset, start, count, start_offset; void *ptr; @@ -789,6 +808,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) } } + /* TODO: VI should read index buffers through TC, so this shouldn't be + * needed on VI. */ if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) { sctx->b.flags |= SI_CONTEXT_INV_TC_L2; r600_resource(ib.buffer)->TC_L2_dirty = false; @@ -822,7 +843,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) /* Workaround for a VGT hang when streamout is enabled. * It must be done after drawing. */ - if (sctx->b.family == CHIP_HAWAII && + if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) && (sctx->b.streamout.streamout_enabled || sctx->b.streamout.prims_gen_query_enabled)) { sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC; diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c index 044acd8fcea..0347014948d 100644 --- a/src/gallium/drivers/radeonsi/si_state_shaders.c +++ b/src/gallium/drivers/radeonsi/si_state_shaders.c @@ -1016,7 +1016,7 @@ bcolor: static void si_init_gs_rings(struct si_context *sctx) { unsigned esgs_ring_size = 128 * 1024; - unsigned gsvs_ring_size = 64 * 1024 * 1024; + unsigned gsvs_ring_size = 60 * 1024 * 1024; assert(!sctx->gs_rings); sctx->gs_rings = CALLOC_STRUCT(si_pm4_state); @@ -1028,6 +1028,12 @@ static void si_init_gs_rings(struct si_context *sctx) PIPE_USAGE_DEFAULT, gsvs_ring_size); if (sctx->b.chip_class >= CIK) { + if (sctx->b.chip_class >= VI) { + /* The maximum sizes are 63.999 MB on VI, because + * the register fields only have 18 bits. */ + assert(esgs_ring_size / 256 < (1 << 18)); + assert(gsvs_ring_size / 256 < (1 << 18)); + } si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE, esgs_ring_size / 256); si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE, -- 2.30.2