radv: misc GFX9 changes.
[mesa.git] / src / amd / vulkan / radv_device.c
index 35489f0c9c69d251d4d2500453f5937f35c692c9..98339fb73efd0f8d0a6dd029b044c2d961a6ed7a 100644 (file)
@@ -42,6 +42,7 @@
 #include "ac_llvm_util.h"
 #include "vk_format.h"
 #include "sid.h"
+#include "gfx9d.h"
 #include "util/debug.h"
 
 static int
@@ -230,6 +231,8 @@ get_chip_name(enum radeon_family family)
        case CHIP_POLARIS11: return "AMD RADV POLARIS11";
        case CHIP_POLARIS12: return "AMD RADV POLARIS12";
        case CHIP_STONEY: return "AMD RADV STONEY";
+       case CHIP_VEGA10: return "AMD RADV VEGA";
+       case CHIP_RAVEN: return "AMD RADV RAVEN";
        default: return "AMD RADV unknown";
        }
 }
@@ -301,6 +304,12 @@ radv_physical_device_init(struct radv_physical_device *device,
 
        radv_get_device_uuid(drm_device, device->device_uuid);
 
+       if (device->rad_info.family == CHIP_STONEY ||
+           device->rad_info.chip_class >= GFX9) {
+               device->has_rbplus = true;
+               device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
+       }
+
        return VK_SUCCESS;
 
 fail:
@@ -975,6 +984,8 @@ radv_device_init_gs_info(struct radv_device *device)
        case CHIP_POLARIS10:
        case CHIP_POLARIS11:
        case CHIP_POLARIS12:
+       case CHIP_VEGA10:
+       case CHIP_RAVEN:
                device->gs_table_depth = 32;
                return;
        default:
@@ -1098,6 +1109,7 @@ VkResult radv_CreateDevice(
                case RADV_QUEUE_COMPUTE:
                        si_cs_emit_cache_flush(device->flush_cs[family],
                                               device->physical_device->rad_info.chip_class,
+                                              NULL, 0,
                                               family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
                                               RADV_CMD_FLAG_INV_ICACHE |
                                               RADV_CMD_FLAG_INV_SMEM_L1 |
@@ -1113,6 +1125,7 @@ VkResult radv_CreateDevice(
                case RADV_QUEUE_COMPUTE:
                        si_cs_emit_cache_flush(device->flush_shader_cs[family],
                                               device->physical_device->rad_info.chip_class,
+                                              NULL, 0,
                                               family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
                                               family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
                                               RADV_CMD_FLAG_INV_ICACHE |
@@ -1475,11 +1488,10 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
                max_offchip_buffers = MIN2(max_offchip_buffers, 126);
                break;
        case CIK:
-               max_offchip_buffers = MIN2(max_offchip_buffers, 508);
-               break;
        case VI:
+       case GFX9:
        default:
-               max_offchip_buffers = MIN2(max_offchip_buffers, 512);
+               max_offchip_buffers = MIN2(max_offchip_buffers, 508);
                break;
        }
 
@@ -1716,6 +1728,10 @@ radv_get_preamble_cs(struct radv_queue *queue,
                                                       S_030938_SIZE(tess_factor_ring_size / 4));
                                radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
                                                       tf_va >> 8);
+                               if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
+                                       radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
+                                                              tf_va >> 40);
+                               }
                                radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
                        } else {
                                radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
@@ -1759,6 +1775,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
                if (!i) {
                        si_cs_emit_cache_flush(cs,
                                               queue->device->physical_device->rad_info.chip_class,
+                                              NULL, 0,
                                               queue->queue_family_index == RING_COMPUTE &&
                                                 queue->device->physical_device->rad_info.chip_class >= CIK,
                                               RADV_CMD_FLAG_INV_ICACHE |
@@ -2687,24 +2704,68 @@ radv_initialise_color_surface(struct radv_device *device,
        const struct vk_format_description *desc;
        unsigned ntype, format, swap, endian;
        unsigned blend_clamp = 0, blend_bypass = 0;
-       unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
        uint64_t va;
        const struct radeon_surf *surf = &iview->image->surface;
-       const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
 
        desc = vk_format_description(iview->vk_format);
 
        memset(cb, 0, sizeof(*cb));
 
+       /* Intensity is implemented as Red, so treat it that way. */
+       cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
+
        va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
-       va += level_info->offset;
+
+       if (device->physical_device->rad_info.chip_class >= GFX9) {
+               struct gfx9_surf_meta_flags meta;
+               if (iview->image->dcc_offset)
+                       meta = iview->image->surface.u.gfx9.dcc;
+               else
+                       meta = iview->image->surface.u.gfx9.cmask;
+
+               cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
+                       S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
+                       S_028C74_RB_ALIGNED(meta.rb_aligned) |
+                       S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
+
+               va += iview->image->surface.u.gfx9.surf_offset >> 8;
+       } else {
+               const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
+               unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
+
+               va += level_info->offset;
+
+               pitch_tile_max = level_info->nblk_x / 8 - 1;
+               slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
+               tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
+
+               cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
+               cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
+               cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
+
+               cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
+               cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
+
+               if (iview->image->fmask.size) {
+                       if (device->physical_device->rad_info.chip_class >= CIK)
+                               cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
+                       cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
+                       cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
+               } else {
+                       /* This must be set for fast clear to work without FMASK. */
+                       if (device->physical_device->rad_info.chip_class >= CIK)
+                               cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
+                       cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
+                       cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
+               }
+       }
+
        cb->cb_color_base = va >> 8;
 
        /* CMASK variables */
        va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
        va += iview->image->cmask.offset;
        cb->cb_color_cmask = va >> 8;
-       cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
 
        va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
        va += iview->image->dcc_offset;
@@ -2714,18 +2775,6 @@ radv_initialise_color_surface(struct radv_device *device,
        cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
                S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
 
-       cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
-       pitch_tile_max = level_info->nblk_x / 8 - 1;
-       slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
-       tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
-
-       cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
-       cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
-
-       /* Intensity is implemented as Red, so treat it that way. */
-       cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
-               S_028C74_TILE_MODE_INDEX(tile_mode_index);
-
        if (iview->image->info.samples > 1) {
                unsigned log_samples = util_logbase2(iview->image->info.samples);
 
@@ -2735,18 +2784,9 @@ radv_initialise_color_surface(struct radv_device *device,
 
        if (iview->image->fmask.size) {
                va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
-               if (device->physical_device->rad_info.chip_class >= CIK)
-                       cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
-               cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
                cb->cb_color_fmask = va >> 8;
-               cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
        } else {
-               /* This must be set for fast clear to work without FMASK. */
-               if (device->physical_device->rad_info.chip_class >= CIK)
-                       cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
-               cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
                cb->cb_color_fmask = cb->cb_color_base;
-               cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
        }
 
        ntype = radv_translate_color_numformat(iview->vk_format,
@@ -2821,6 +2861,21 @@ radv_initialise_color_surface(struct radv_device *device,
                unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
                cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
        }
+
+       if (device->physical_device->rad_info.chip_class >= GFX9) {
+               uint32_t max_slice = radv_surface_layer_count(iview);
+               unsigned mip0_depth = iview->base_layer + max_slice - 1;
+
+               cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
+               cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
+                       S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
+               cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
+                       S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
+                       S_028C68_MAX_MIP(iview->image->info.levels);
+
+               cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
+
+       }
 }
 
 static void
@@ -2871,7 +2926,39 @@ radv_initialise_ds_surface(struct radv_device *device,
        va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
        s_offs = z_offs = va;
 
-       {
+       if (device->physical_device->rad_info.chip_class >= GFX9) {
+               assert(iview->image->surface.u.gfx9.surf_offset == 0);
+               s_offs += iview->image->surface.u.gfx9.stencil_offset;
+
+               ds->db_z_info = S_028038_FORMAT(format) |
+                       S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
+                       S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
+                       S_028038_MAXMIP(iview->image->info.levels - 1);
+               ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
+                       S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
+
+               ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
+               ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
+               ds->db_depth_view |= S_028008_MIPID(level);
+
+               ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
+                       S_02801C_Y_MAX(iview->image->info.height - 1);
+
+               /* Only use HTILE for the first level. */
+               if (iview->image->surface.htile_size && !level) {
+                       ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
+
+                       if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
+                               /* Use all of the htile_buffer for depth if there's no stencil. */
+                               ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
+                       va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
+                               iview->image->htile_offset;
+                       ds->db_htile_data_base = va >> 8;
+                       ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
+                               S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
+                               S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
+               }
+       } else {
                const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
 
                if (stencil_only)