radv: disable FMASK compression when drawing with GENERAL layout
[mesa.git] / src / amd / vulkan / radv_cmd_buffer.c
index abc1cfbbd04f0055f44e9137f424c6b4d8c92ce4..695aa257c31690127e37d4347209f16c144b6452 100644 (file)
@@ -277,22 +277,15 @@ static VkResult radv_create_cmd_buffer(
        if (cmd_buffer == NULL)
                return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
-       cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
+       vk_object_base_init(&device->vk, &cmd_buffer->base,
+                           VK_OBJECT_TYPE_COMMAND_BUFFER);
+
        cmd_buffer->device = device;
        cmd_buffer->pool = pool;
        cmd_buffer->level = level;
 
-       if (pool) {
-               list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
-               cmd_buffer->queue_family_index = pool->queue_family_index;
-
-       } else {
-               /* Init the pool_link so we can safely call list_del when we destroy
-                * the command buffer
-                */
-               list_inithead(&cmd_buffer->pool_link);
-               cmd_buffer->queue_family_index = RADV_QUEUE_GENERAL;
-       }
+       list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
+       cmd_buffer->queue_family_index = pool->queue_family_index;
 
        ring = radv_queue_family_to_ring(cmd_buffer->queue_family_index);
 
@@ -325,9 +318,11 @@ radv_cmd_buffer_destroy(struct radv_cmd_buffer *cmd_buffer)
                cmd_buffer->device->ws->buffer_destroy(cmd_buffer->upload.upload_bo);
        cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs);
 
-       for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++)
+       for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
                free(cmd_buffer->descriptors[i].push_set.set.mapped_ptr);
 
+       vk_object_base_finish(&cmd_buffer->base);
+
        vk_free(&cmd_buffer->pool->alloc, cmd_buffer);
 }
 
@@ -364,7 +359,7 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 
        memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings));
 
-       for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) {
+       for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
                cmd_buffer->descriptors[i].dirty = 0;
                cmd_buffer->descriptors[i].valid = 0;
                cmd_buffer->descriptors[i].push_dirty = false;
@@ -700,8 +695,8 @@ radv_convert_user_sample_locs(struct radv_sample_locations_state *state,
                float shifted_pos_x = user_locs[i].x - 0.5;
                float shifted_pos_y = user_locs[i].y - 0.5;
 
-               int32_t scaled_pos_x = floor(shifted_pos_x * 16);
-               int32_t scaled_pos_y = floor(shifted_pos_y * 16);
+               int32_t scaled_pos_x = floorf(shifted_pos_x * 16);
+               int32_t scaled_pos_y = floorf(shifted_pos_y * 16);
 
                sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
                sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
@@ -1271,7 +1266,7 @@ radv_emit_line_width(struct radv_cmd_buffer *cmd_buffer)
        unsigned width = cmd_buffer->state.dynamic.line_width * 8;
 
        radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
-                              S_028A08_WIDTH(CLAMP(width, 0, 0xFFF)));
+                              S_028A08_WIDTH(CLAMP(width, 0, 0xFFFF)));
 }
 
 static void
@@ -1365,6 +1360,13 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
                cb_color_info &= C_028C70_DCC_ENABLE;
        }
 
+       if (!radv_layout_can_fast_clear(image, layout, in_render_loop,
+                                       radv_image_queue_family_mask(image,
+                                                                    cmd_buffer->queue_family_index,
+                                                                    cmd_buffer->queue_family_index))) {
+               cb_color_info &= C_028C70_COMPRESSION;
+       }
+
        if (radv_image_is_tc_compat_cmask(image) &&
            (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
             radv_is_dcc_decompress_pipeline(cmd_buffer))) {
@@ -1374,6 +1376,19 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
                cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
        }
 
+       if (radv_image_has_fmask(image) &&
+           (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
+            radv_is_hw_resolve_pipeline(cmd_buffer))) {
+               /* Make sure FMASK is enabled if it has been cleared because:
+                *
+                * 1) it's required for FMASK_DECOMPRESS operations to avoid
+                * GPU hangs
+                * 2) it's necessary for CB_RESOLVE which can read compressed
+                * FMASK data anyways.
+                */
+               cb_color_info |= S_028C70_COMPRESSION(1);
+       }
+
        if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {
                        radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
                        radeon_emit(cmd_buffer->cs, cb->cb_color_base);
@@ -1808,7 +1823,7 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
        uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
 
        if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
-               radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, 0));
+               radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
                radeon_emit(cs, va);
                radeon_emit(cs, va >> 32);
                radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
@@ -1992,7 +2007,7 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
        uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
 
        if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
-               radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG, 3, cmd_buffer->state.predicating));
+               radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, cmd_buffer->state.predicating));
                radeon_emit(cs, va);
                radeon_emit(cs, va >> 32);
                radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
@@ -2958,8 +2973,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
                        flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
                        /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
                         * invalidate the scalar cache. */
-                       if (cmd_buffer->device->physical_device->use_aco &&
-                           cmd_buffer->device->physical_device->rad_info.chip_class >= GFX8)
+                       if (!cmd_buffer->device->physical_device->use_llvm)
                                flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
 
                        if (!image_is_coherent)
@@ -3318,7 +3332,6 @@ VkResult radv_AllocateCommandBuffers(
                        list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
 
                        result = radv_reset_cmd_buffer(cmd_buffer);
-                       cmd_buffer->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
                        cmd_buffer->level = pAllocateInfo->level;
 
                        pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer);
@@ -3453,19 +3466,22 @@ void radv_CmdBindVertexBuffers(
 
        assert(firstBinding + bindingCount <= MAX_VBS);
        for (uint32_t i = 0; i < bindingCount; i++) {
+               RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
                uint32_t idx = firstBinding + i;
 
                if (!changed &&
-                   (vb[idx].buffer != radv_buffer_from_handle(pBuffers[i]) ||
+                   (vb[idx].buffer != buffer ||
                     vb[idx].offset != pOffsets[i])) {
                        changed = true;
                }
 
-               vb[idx].buffer = radv_buffer_from_handle(pBuffers[i]);
+               vb[idx].buffer = buffer;
                vb[idx].offset = pOffsets[i];
 
-               radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
-                                  vb[idx].buffer->bo);
+               if (buffer) {
+                       radv_cs_add_buffer(cmd_buffer->device->ws,
+                                          cmd_buffer->cs, vb[idx].buffer->bo);
+               }
        }
 
        if (!changed) {
@@ -3790,8 +3806,9 @@ VkResult radv_EndCommandBuffer(
        vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);
        vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.subpass_sample_locs);
 
-       if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs))
-               return vk_error(cmd_buffer->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
+       VkResult result = cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs);
+       if (result != VK_SUCCESS)
+               return vk_error(cmd_buffer->device->instance, result);
 
        cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE;
 
@@ -3870,9 +3887,7 @@ void radv_CmdBindPipeline(
                /* Prefetch all pipeline shaders at first draw time. */
                cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS;
 
-               if ((cmd_buffer->device->physical_device->rad_info.family == CHIP_NAVI10 ||
-                    cmd_buffer->device->physical_device->rad_info.family == CHIP_NAVI12 ||
-                    cmd_buffer->device->physical_device->rad_info.family == CHIP_NAVI14) &&
+               if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX10 &&
                    cmd_buffer->state.emitted_pipeline &&
                    radv_pipeline_has_ngg(cmd_buffer->state.emitted_pipeline) &&
                    !radv_pipeline_has_ngg(cmd_buffer->state.pipeline)) {
@@ -4255,15 +4270,18 @@ VkResult radv_CreateCommandPool(
        RADV_FROM_HANDLE(radv_device, device, _device);
        struct radv_cmd_pool *pool;
 
-       pool = vk_alloc2(&device->alloc, pAllocator, sizeof(*pool), 8,
+       pool = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8,
                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
        if (pool == NULL)
                return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
 
+       vk_object_base_init(&device->vk, &pool->base,
+                           VK_OBJECT_TYPE_COMMAND_POOL);
+
        if (pAllocator)
                pool->alloc = *pAllocator;
        else
-               pool->alloc = device->alloc;
+               pool->alloc = device->vk.alloc;
 
        list_inithead(&pool->cmd_buffers);
        list_inithead(&pool->free_cmd_buffers);
@@ -4297,7 +4315,8 @@ void radv_DestroyCommandPool(
                radv_cmd_buffer_destroy(cmd_buffer);
        }
 
-       vk_free2(&device->alloc, pAllocator, pool);
+       vk_object_base_finish(&pool->base);
+       vk_free2(&device->vk.alloc, pAllocator, pool);
 }
 
 VkResult radv_ResetCommandPool(
@@ -5466,7 +5485,7 @@ void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
                if (size != image->planes[0].surface.dcc_size) {
                        state->flush_bits |=
                                radv_fill_buffer(cmd_buffer, image->bo,
-                                                image->offset + image->dcc_offset + size,
+                                                image->offset + image->planes[0].surface.dcc_offset + size,
                                                 image->planes[0].surface.dcc_size - size,
                                                 0xffffffff);
                }