radeonsi: add VI hardware support
authorMarek Olšák <marek.olsak@amd.com>
Thu, 16 Apr 2015 18:44:54 +0000 (20:44 +0200)
committerMarek Olšák <marek.olsak@amd.com>
Fri, 14 Aug 2015 13:02:29 +0000 (15:02 +0200)
src/gallium/drivers/radeon/r600_pipe_common.c
src/gallium/drivers/radeon/r600_pipe_common.h
src/gallium/drivers/radeonsi/si_descriptors.c
src/gallium/drivers/radeonsi/si_pipe.c
src/gallium/drivers/radeonsi/si_shader.c
src/gallium/drivers/radeonsi/si_state.c
src/gallium/drivers/radeonsi/si_state_draw.c
src/gallium/drivers/radeonsi/si_state_shaders.c

index 51c72cc882f60ef44d3f40472b3d8e0cadcb68d2..c982a4d9bad9d271908529cbe5d438d014262ecc 100644 (file)
@@ -408,6 +408,9 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
        case CHIP_KABINI: return "AMD KABINI";
        case CHIP_HAWAII: return "AMD HAWAII";
        case CHIP_MULLINS: return "AMD MULLINS";
+       case CHIP_TONGA: return "AMD TONGA";
+       case CHIP_ICELAND: return "AMD ICELAND";
+       case CHIP_CARRIZO: return "AMD CARRIZO";
        default: return "AMD unknown";
        }
 }
@@ -532,6 +535,9 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
 #else
                return "kabini";
 #endif
+       case CHIP_TONGA: return "tonga";
+       case CHIP_ICELAND: return "iceland";
+       case CHIP_CARRIZO: return "carrizo";
        default: return "";
        }
 }
index 768fe2829816017e16265f3680e65221627015bd..29db1cc4e07b09b35fabc3b82a46882f75a36b24 100644 (file)
@@ -242,6 +242,7 @@ struct r600_surface {
        unsigned cb_color_pitch;        /* EG and later */
        unsigned cb_color_slice;        /* EG and later */
        unsigned cb_color_attrib;       /* EG and later */
+       unsigned cb_dcc_control;        /* VI and later */
        unsigned cb_color_fmask;        /* CB_COLORn_FMASK (EG and later) or CB_COLORn_FRAG (r600) */
        unsigned cb_color_fmask_slice;  /* EG and later */
        unsigned cb_color_cmask;        /* CB_COLORn_TILE (r600 only) */
index 8d9f8f71dc55dc6db139e890f04f82159dd5e11d..890be071596eb13ee6696f70469d499876bc54aa 100644 (file)
@@ -429,7 +429,8 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
                desc[0] = va & 0xFFFFFFFF;
                desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
                          S_008F04_STRIDE(vb->stride);
-               if (vb->stride)
+
+               if (sctx->b.chip_class <= CIK && vb->stride)
                        /* Round up by rounding down and adding 1 */
                        desc[2] = (vb->buffer->width0 - offset -
                                   sctx->vertex_elements->format_size[i]) /
@@ -593,6 +594,9 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
                        break;
                }
 
+               if (sctx->b.chip_class >= VI && stride)
+                       num_records *= stride;
+
                /* Set the descriptor. */
                uint32_t *desc = buffers->desc.list + slot*4;
                desc[0] = va;
@@ -682,7 +686,12 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
                        struct pipe_resource *buffer = targets[i]->buffer;
                        uint64_t va = r600_resource(buffer)->gpu_address;
 
-                       /* Set the descriptor. */
+                       /* Set the descriptor.
+                        *
+                        * On VI, the format must be non-INVALID, otherwise
+                        * the buffer will be considered not bound and store
+                        * instructions will be no-ops.
+                        */
                        uint32_t *desc = buffers->desc.list + bufidx*4;
                        desc[0] = va;
                        desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
@@ -690,7 +699,8 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
                        desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
                                  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
                                  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-                                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
+                                 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+                                 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
 
                        /* Set the resource. */
                        pipe_resource_reference(&buffers->buffers[bufidx],
index 9b5cdd8dc12bf1d9205d4783889688f9b1c33d4d..97cf24bf715724af4f33a11f48a3b37d84a8de57 100644 (file)
@@ -184,7 +184,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
        r600_target = radeon_llvm_get_r600_target(triple);
        sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
                                           r600_get_llvm_processor_name(sscreen->b.family),
-                                          "+DumpCode,+vgpr-spilling",
+                                          sctx->b.chip_class >= VI ?
+                                                  "+DumpCode" :
+                                                  "+DumpCode,+vgpr-spilling",
                                           LLVMCodeGenLevelDefault,
                                           LLVMRelocDefault,
                                           LLVMCodeModelDefault);
index ac1c1be7a79cf447413776c5f172a0ec4094531a..4288e9b2ab17b8da83eaa7655fe6ffa06a96c5e5 100644 (file)
@@ -2787,6 +2787,7 @@ static void txq_fetch_args(
        struct si_shader_context *si_shader_ctx = si_shader_context(bld_base);
        const struct tgsi_full_instruction *inst = emit_data->inst;
        struct gallivm_state *gallivm = bld_base->base.gallivm;
+       LLVMBuilderRef builder = gallivm->builder;
        unsigned target = inst->Texture.Texture;
        LLVMValueRef res_ptr;
 
@@ -2807,10 +2808,26 @@ static void txq_fetch_args(
                LLVMTypeRef v8i32 = LLVMVectorType(i32, 8);
 
                /* Read the size from the buffer descriptor directly. */
-               LLVMValueRef size = res_ptr;
-               size = LLVMBuildBitCast(gallivm->builder, size, v8i32, "");
-               size = LLVMBuildExtractElement(gallivm->builder, size,
-                                             lp_build_const_int32(gallivm, 6), "");
+               LLVMValueRef res = LLVMBuildBitCast(builder, res_ptr, v8i32, "");
+               LLVMValueRef size = LLVMBuildExtractElement(builder, res,
+                                               lp_build_const_int32(gallivm, 6), "");
+
+               if (si_shader_ctx->screen->b.chip_class >= VI) {
+                       /* On VI, the descriptor contains the size in bytes,
+                        * but TXQ must return the size in elements.
+                        * The stride is always non-zero for resources using TXQ.
+                        */
+                       LLVMValueRef stride =
+                               LLVMBuildExtractElement(builder, res,
+                                                       lp_build_const_int32(gallivm, 5), "");
+                       stride = LLVMBuildLShr(builder, stride,
+                                              lp_build_const_int32(gallivm, 16), "");
+                       stride = LLVMBuildAnd(builder, stride,
+                                             lp_build_const_int32(gallivm, 0x3FFF), "");
+
+                       size = LLVMBuildUDiv(builder, size, stride, "");
+               }
+
                emit_data->args[0] = size;
                return;
        }
index 51ade5248a45930c51b3450b2eb83c5991b299e8..6a8d786282cb943cf50044d113538f89ba029804 100644 (file)
@@ -61,7 +61,7 @@ unsigned si_array_mode(unsigned mode)
 
 uint32_t si_num_banks(struct si_screen *sscreen, struct r600_texture *tex)
 {
-       if (sscreen->b.chip_class == CIK &&
+       if (sscreen->b.chip_class >= CIK &&
            sscreen->b.info.cik_macrotile_mode_array_valid) {
                unsigned index, tileb;
 
@@ -1846,6 +1846,9 @@ static void si_initialize_color_surface(struct si_context *sctx,
        surf->cb_color_info = color_info;
        surf->cb_color_attrib = color_attrib;
 
+       if (sctx->b.chip_class >= VI)
+               surf->cb_dcc_control = S_028C78_OVERWRITE_COMBINER_DISABLE(1);
+
        if (rtex->fmask.size) {
                surf->cb_color_fmask = (offset + rtex->fmask.offset) >> 8;
                surf->cb_color_fmask_slice = S_028C88_TILE_MAX(rtex->fmask.slice_tile_max);
@@ -1991,6 +1994,10 @@ static void si_init_depth_surface(struct si_context *sctx,
                db_htile_surface = 0;
        }
 
+       /* Bug workaround. */
+       if (sctx->b.chip_class >= VI)
+               s_info |= S_028044_TILE_STENCIL_DISABLE(1);
+
        assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0);
 
        surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) |
@@ -2084,7 +2091,7 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
        si_update_fb_rs_state(sctx);
        si_update_fb_blend_state(sctx);
 
-       sctx->framebuffer.atom.num_dw = state->nr_cbufs*15 + (8 - state->nr_cbufs)*3;
+       sctx->framebuffer.atom.num_dw = state->nr_cbufs*16 + (8 - state->nr_cbufs)*3;
        sctx->framebuffer.atom.num_dw += state->zsbuf ? 26 : 4;
        sctx->framebuffer.atom.num_dw += 3; /* WINDOW_SCISSOR_BR */
        sctx->framebuffer.atom.num_dw += 18; /* MSAA sample locations */
@@ -2163,20 +2170,24 @@ static void si_emit_framebuffer_state(struct si_context *sctx, struct r600_atom
                                RADEON_PRIO_COLOR_META);
                }
 
-               r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C, 13);
+               r600_write_context_reg_seq(cs, R_028C60_CB_COLOR0_BASE + i * 0x3C,
+                                          sctx->b.chip_class >= VI ? 14 : 13);
                radeon_emit(cs, cb->cb_color_base);     /* R_028C60_CB_COLOR0_BASE */
                radeon_emit(cs, cb->cb_color_pitch);    /* R_028C64_CB_COLOR0_PITCH */
                radeon_emit(cs, cb->cb_color_slice);    /* R_028C68_CB_COLOR0_SLICE */
                radeon_emit(cs, cb->cb_color_view);     /* R_028C6C_CB_COLOR0_VIEW */
                radeon_emit(cs, cb->cb_color_info | tex->cb_color_info); /* R_028C70_CB_COLOR0_INFO */
                radeon_emit(cs, cb->cb_color_attrib);   /* R_028C74_CB_COLOR0_ATTRIB */
-               radeon_emit(cs, 0);                     /* R_028C78 unused */
+               radeon_emit(cs, cb->cb_dcc_control);    /* R_028C78_CB_COLOR0_DCC_CONTROL */
                radeon_emit(cs, tex->cmask.base_address_reg);   /* R_028C7C_CB_COLOR0_CMASK */
                radeon_emit(cs, tex->cmask.slice_tile_max);     /* R_028C80_CB_COLOR0_CMASK_SLICE */
                radeon_emit(cs, cb->cb_color_fmask);            /* R_028C84_CB_COLOR0_FMASK */
                radeon_emit(cs, cb->cb_color_fmask_slice);      /* R_028C88_CB_COLOR0_FMASK_SLICE */
                radeon_emit(cs, tex->color_clear_value[0]);     /* R_028C8C_CB_COLOR0_CLEAR_WORD0 */
                radeon_emit(cs, tex->color_clear_value[1]);     /* R_028C90_CB_COLOR0_CLEAR_WORD1 */
+
+               if (sctx->b.chip_class >= VI)
+                       radeon_emit(cs, 0);     /* R_028C94_CB_COLOR0_DCC_BASE */
        }
        /* set CB_COLOR1_INFO for possible dual-src blending */
        if (i == 1 && state->cbufs[0]) {
@@ -2332,7 +2343,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
 
        /* Buffer resource. */
        if (texture->target == PIPE_BUFFER) {
-               unsigned stride;
+               unsigned stride, num_records;
 
                desc = util_format_description(state->format);
                first_non_void = util_format_get_first_non_void_channel(state->format);
@@ -2341,10 +2352,16 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
                format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
                num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
 
+               num_records = state->u.buf.last_element + 1 - state->u.buf.first_element;
+               num_records = MIN2(num_records, texture->width0 / stride);
+
+               if (sctx->b.chip_class >= VI)
+                       num_records *= stride;
+
                view->state[4] = va;
                view->state[5] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
                                 S_008F04_STRIDE(stride);
-               view->state[6] = state->u.buf.last_element + 1 - state->u.buf.first_element;
+               view->state[6] = num_records;
                view->state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
                                 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
                                 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) |
@@ -3167,6 +3184,15 @@ static void si_init_config(struct si_context *sctx)
                        si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x3a00161a);
                        si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002e);
                        break;
+               case CHIP_TONGA:
+                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x16000012);
+                       si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x0000002a);
+                       break;
+               case CHIP_ICELAND:
+               case CHIP_CARRIZO:
+                       si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, 0x00000002);
+                       si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, 0x00000000);
+                       break;
                case CHIP_KAVERI:
                        /* XXX todo */
                case CHIP_KABINI:
@@ -3261,5 +3287,12 @@ static void si_init_config(struct si_context *sctx)
                si_pm4_set_reg(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, S_00B01C_CU_EN(0xffff));
        }
 
+       if (sctx->b.chip_class >= VI) {
+               si_pm4_set_reg(pm4, R_028424_CB_DCC_CONTROL,
+                              S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1));
+               si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 30);
+               si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 32);
+       }
+
        sctx->init_config = pm4;
 }
index f136a1c94d83d79e4cee02f84b2a278f98a87fb8..4c21655596c4f8af947af35148c5a7c0ffc7d2df 100644 (file)
@@ -318,7 +318,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
                S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
                S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
                S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
-               S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0);
+               S_028AA8_WD_SWITCH_ON_EOP(sctx->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
+               S_028AA8_MAX_PRIMGRP_IN_WAVE(sctx->b.chip_class >= VI ? 2 : 0);
 }
 
 static unsigned si_get_ls_hs_config(struct si_context *sctx,
@@ -473,12 +474,24 @@ static void si_emit_draw_packets(struct si_context *sctx,
        if (info->indexed) {
                radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
 
-               if (ib->index_size == 4) {
-                       radeon_emit(cs, V_028A7C_VGT_INDEX_32 | (SI_BIG_ENDIAN ?
-                                       V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
-               } else {
-                       radeon_emit(cs, V_028A7C_VGT_INDEX_16 | (SI_BIG_ENDIAN ?
-                                       V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+               /* index type */
+               switch (ib->index_size) {
+               case 1:
+                       radeon_emit(cs, V_028A7C_VGT_INDEX_8);
+                       break;
+               case 2:
+                       radeon_emit(cs, V_028A7C_VGT_INDEX_16 |
+                                   (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+                                            V_028A7C_VGT_DMA_SWAP_16_BIT : 0));
+                       break;
+               case 4:
+                       radeon_emit(cs, V_028A7C_VGT_INDEX_32 |
+                                   (SI_BIG_ENDIAN && sctx->b.chip_class <= CIK ?
+                                            V_028A7C_VGT_DMA_SWAP_32_BIT : 0));
+                       break;
+               default:
+                       assert(!"unreachable");
+                       return;
                }
        }
 
@@ -604,9 +617,14 @@ void si_emit_cache_flush(struct r600_common_context *sctx, struct r600_atom *ato
 
        if (sctx->flags & SI_CONTEXT_INV_TC_L1)
                cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
-       if (sctx->flags & SI_CONTEXT_INV_TC_L2)
+       if (sctx->flags & SI_CONTEXT_INV_TC_L2) {
                cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
 
+               /* TODO: this might not be needed. */
+               if (sctx->chip_class >= VI)
+                       cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
+       }
+
        if (sctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
                cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
                                 S_0085F0_CB0_DEST_BASE_ENA(1) |
@@ -754,7 +772,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                ib.offset = sctx->index_buffer.offset;
 
                /* Translate or upload, if needed. */
-               if (ib.index_size == 1) {
+               /* 8-bit indices are supported on VI. */
+               if (sctx->b.chip_class <= CIK && ib.index_size == 1) {
                        struct pipe_resource *out_buffer = NULL;
                        unsigned out_offset, start, count, start_offset;
                        void *ptr;
@@ -789,6 +808,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
                }
        }
 
+       /* TODO: VI should read index buffers through TC, so this shouldn't be
+        * needed on VI. */
        if (info->indexed && r600_resource(ib.buffer)->TC_L2_dirty) {
                sctx->b.flags |= SI_CONTEXT_INV_TC_L2;
                r600_resource(ib.buffer)->TC_L2_dirty = false;
@@ -822,7 +843,7 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 
        /* Workaround for a VGT hang when streamout is enabled.
         * It must be done after drawing. */
-       if (sctx->b.family == CHIP_HAWAII &&
+       if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
            (sctx->b.streamout.streamout_enabled ||
             sctx->b.streamout.prims_gen_query_enabled)) {
                sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
index 044acd8fceae098ec145d1fbaab0f8c6bdf6d217..0347014948d968310a2100181816e0e1b6d61da6 100644 (file)
@@ -1016,7 +1016,7 @@ bcolor:
 static void si_init_gs_rings(struct si_context *sctx)
 {
        unsigned esgs_ring_size = 128 * 1024;
-       unsigned gsvs_ring_size = 64 * 1024 * 1024;
+       unsigned gsvs_ring_size = 60 * 1024 * 1024;
 
        assert(!sctx->gs_rings);
        sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
@@ -1028,6 +1028,12 @@ static void si_init_gs_rings(struct si_context *sctx)
                                             PIPE_USAGE_DEFAULT, gsvs_ring_size);
 
        if (sctx->b.chip_class >= CIK) {
+               if (sctx->b.chip_class >= VI) {
+                       /* The maximum sizes are 63.999 MB on VI, because
+                        * the register fields only have 18 bits. */
+                       assert(esgs_ring_size / 256 < (1 << 18));
+                       assert(gsvs_ring_size / 256 < (1 << 18));
+               }
                si_pm4_set_reg(sctx->gs_rings, R_030900_VGT_ESGS_RING_SIZE,
                               esgs_ring_size / 256);
                si_pm4_set_reg(sctx->gs_rings, R_030904_VGT_GSVS_RING_SIZE,