From 1c03a690bfc3265c7fefa7f87e69782a6672a9b2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Ol=C5=A1=C3=A1k?= Date: Wed, 6 Aug 2014 22:29:27 +0200 Subject: [PATCH] radeonsi: use gpu_address from r600_resource MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Reviewed-by: Christian König Reviewed-by: Alex Deucher --- src/gallium/drivers/radeonsi/si_compute.c | 10 +++--- src/gallium/drivers/radeonsi/si_descriptors.c | 33 ++++++++----------- src/gallium/drivers/radeonsi/si_dma.c | 12 +++---- src/gallium/drivers/radeonsi/si_hw_context.c | 2 +- src/gallium/drivers/radeonsi/si_state.c | 17 ++++------ src/gallium/drivers/radeonsi/si_state_draw.c | 23 +++++-------- 6 files changed, 41 insertions(+), 56 deletions(-) diff --git a/src/gallium/drivers/radeonsi/si_compute.c b/src/gallium/drivers/radeonsi/si_compute.c index 42e4fec8674..12e4f5611c5 100644 --- a/src/gallium/drivers/radeonsi/si_compute.c +++ b/src/gallium/drivers/radeonsi/si_compute.c @@ -114,7 +114,7 @@ static void si_set_global_binding( uint64_t va; uint32_t offset; program->global_buffers[i] = resources[i]; - va = r600_resource_va(ctx->screen, resources[i]); + va = r600_resource(resources[i])->gpu_address; offset = util_le32_to_cpu(*handles[i]); va += offset; va = util_cpu_to_le64(va); @@ -223,8 +223,7 @@ static void si_launch_grid( si_resource_create_custom(sctx->b.b.screen, PIPE_USAGE_DEFAULT, scratch_bytes); } - scratch_buffer_va = r600_resource_va(ctx->screen, - (struct pipe_resource*)shader->scratch_bo); + scratch_buffer_va = shader->scratch_bo->gpu_address; si_pm4_add_bo(pm4, shader->scratch_bo, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RESOURCE_RW); @@ -238,8 +237,7 @@ static void si_launch_grid( si_upload_const_buffer(sctx, &kernel_args_buffer, (uint8_t*)kernel_args, kernel_args_size, &kernel_args_offset); - kernel_args_va = r600_resource_va(ctx->screen, - (struct pipe_resource*)kernel_args_buffer); + kernel_args_va = kernel_args_buffer->gpu_address; kernel_args_va += kernel_args_offset; si_pm4_add_bo(pm4, kernel_args_buffer, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); @@ -285,7 +283,7 @@ static void si_launch_grid( 0x190 /* Default value */); } - shader_va = r600_resource_va(ctx->screen, (void *)shader->bo); + shader_va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff); si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40); diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c index 171de45845a..81ad14b0335 100644 --- a/src/gallium/drivers/radeonsi/si_descriptors.c +++ b/src/gallium/drivers/radeonsi/si_descriptors.c @@ -113,8 +113,6 @@ static void si_init_descriptors(struct si_context *sctx, unsigned num_elements, void (*emit_func)(struct si_context *ctx, struct r600_atom *state)) { - uint64_t va; - assert(num_elements <= sizeof(desc->enabled_mask)*8); assert(num_elements <= sizeof(desc->dirty_mask)*8); @@ -131,11 +129,11 @@ static void si_init_descriptors(struct si_context *sctx, r600_context_bo_reloc(&sctx->b, &sctx->b.rings.gfx, desc->buffer, RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_DATA); - va = r600_resource_va(sctx->b.b.screen, &desc->buffer->b.b); /* We don't check for CS space here, because this should be called * only once at context initialization. */ - si_emit_cp_dma_clear_buffer(sctx, va, desc->buffer->b.b.width0, 0, + si_emit_cp_dma_clear_buffer(sctx, desc->buffer->gpu_address, + desc->buffer->b.b.width0, 0, R600_CP_DMA_SYNC); } @@ -170,7 +168,7 @@ static void si_emit_shader_pointer(struct si_context *sctx, { struct si_descriptors *desc = (struct si_descriptors*)atom; struct radeon_winsys_cs *cs = sctx->b.rings.gfx.cs; - uint64_t va = r600_resource_va(sctx->b.b.screen, &desc->buffer->b.b) + + uint64_t va = desc->buffer->gpu_address + desc->current_context_id * desc->context_size + desc->buffer_offset; @@ -205,7 +203,7 @@ static void si_emit_descriptors(struct si_context *sctx, assert(dirty_mask); - va_base = r600_resource_va(sctx->b.b.screen, &desc->buffer->b.b); + va_base = desc->buffer->gpu_address; /* Copy the descriptors to a new context slot. */ /* XXX Consider using TC or L2 for this copy on CIK. */ @@ -567,7 +565,6 @@ static void si_vertex_buffers_begin_new_cs(struct si_context *sctx) void si_update_vertex_buffers(struct si_context *sctx) { - struct pipe_context *ctx = &sctx->b.b; struct si_descriptors *desc = &sctx->vertex_buffers; bool bound[SI_NUM_VERTEX_BUFFERS] = {}; unsigned i, count = sctx->vertex_elements->count; @@ -611,9 +608,7 @@ void si_update_vertex_buffers(struct si_context *sctx) } offset = vb->buffer_offset + ve->src_offset; - - va = r600_resource_va(ctx->screen, (void*)rbuffer); - va += offset; + va = rbuffer->gpu_address + offset; /* Fill in T# buffer resource description */ desc[0] = va & 0xFFFFFFFF; @@ -703,10 +698,10 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s si_upload_const_buffer(sctx, (struct r600_resource**)&buffer, input->user_buffer, input->buffer_size, &buffer_offset); - va = r600_resource_va(ctx->screen, buffer) + buffer_offset; + va = r600_resource(buffer)->gpu_address + buffer_offset; } else { pipe_resource_reference(&buffer, input->buffer); - va = r600_resource_va(ctx->screen, buffer) + input->buffer_offset; + va = r600_resource(buffer)->gpu_address + input->buffer_offset; } /* Set the descriptor. */ @@ -760,7 +755,7 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot, if (input && input->buffer) { uint64_t va; - va = r600_resource_va(ctx->screen, input->buffer); + va = r600_resource(input->buffer)->gpu_address; switch (element_size) { default: @@ -856,7 +851,7 @@ static void si_set_streamout_targets(struct pipe_context *ctx, if (targets[i]) { struct pipe_resource *buffer = targets[i]->buffer; - uint64_t va = r600_resource_va(ctx->screen, buffer); + uint64_t va = r600_resource(buffer)->gpu_address; /* Set the descriptor. */ uint32_t *desc = buffers->desc_data[bufidx]; @@ -909,7 +904,7 @@ static void si_desc_reset_buffer_offset(struct pipe_context *ctx, uint64_t offset_within_buffer = old_desc_va - old_buf_va; /* Update the descriptor. */ - uint64_t va = r600_resource_va(ctx->screen, new_buf) + offset_within_buffer; + uint64_t va = r600_resource(new_buf)->gpu_address + offset_within_buffer; desc[0] = va; desc[1] = (desc[1] & C_008F04_BASE_ADDRESS_HI) | @@ -930,7 +925,7 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource struct si_context *sctx = (struct si_context*)ctx; struct r600_resource *rbuffer = r600_resource(buf); unsigned i, shader, alignment = rbuffer->buf->alignment; - uint64_t old_va = r600_resource_va(ctx->screen, buf); + uint64_t old_va = rbuffer->gpu_address; unsigned num_elems = sctx->vertex_elements ? sctx->vertex_elements->count : 0; @@ -1076,7 +1071,7 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst, return; } - uint64_t va = r600_resource_va(&sctx->screen->b.b, dst) + offset; + uint64_t va = r600_resource(dst)->gpu_address + offset; /* Flush the caches where the resource is bound. */ /* XXX only flush the caches where the buffer is bound. */ @@ -1142,8 +1137,8 @@ void si_copy_buffer(struct si_context *sctx, util_range_add(&r600_resource(dst)->valid_buffer_range, dst_offset, dst_offset + size); - dst_offset += r600_resource_va(&sctx->screen->b.b, dst); - src_offset += r600_resource_va(&sctx->screen->b.b, src); + dst_offset += r600_resource(dst)->gpu_address; + src_offset += r600_resource(src)->gpu_address; /* Flush the caches where the resource is bound. */ sctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE | diff --git a/src/gallium/drivers/radeonsi/si_dma.c b/src/gallium/drivers/radeonsi/si_dma.c index 26f1e1b63ad..e90874624ed 100644 --- a/src/gallium/drivers/radeonsi/si_dma.c +++ b/src/gallium/drivers/radeonsi/si_dma.c @@ -75,8 +75,8 @@ static void si_dma_copy_buffer(struct si_context *ctx, util_range_add(&rdst->valid_buffer_range, dst_offset, dst_offset + size); - dst_offset += r600_resource_va(&ctx->screen->b.b, dst); - src_offset += r600_resource_va(&ctx->screen->b.b, src); + dst_offset += rdst->gpu_address; + src_offset += rsrc->gpu_address; /* see if we use dword or byte copy */ if (!(dst_offset % 4) && !(src_offset % 4) && !(size % 4)) { @@ -175,8 +175,8 @@ static void si_dma_copy_tile(struct si_context *ctx, util_format_has_stencil(util_format_description(src->format))); nbanks = si_num_banks(sscreen, rsrc->surface.bpe, rsrc->surface.tile_split, tile_mode_index); - base += r600_resource_va(&ctx->screen->b.b, src); - addr += r600_resource_va(&ctx->screen->b.b, dst); + base += rsrc->resource.gpu_address; + addr += rdst->resource.gpu_address; } else { /* L2T */ array_mode = si_array_mode(dst_mode); @@ -204,8 +204,8 @@ static void si_dma_copy_tile(struct si_context *ctx, util_format_has_stencil(util_format_description(dst->format))); nbanks = si_num_banks(sscreen, rdst->surface.bpe, rdst->surface.tile_split, tile_mode_index); - base += r600_resource_va(&ctx->screen->b.b, dst); - addr += r600_resource_va(&ctx->screen->b.b, src); + base += rdst->resource.gpu_address; + addr += rsrc->resource.gpu_address; } pipe_config = cik_db_pipe_config(sscreen, tile_mode_index); diff --git a/src/gallium/drivers/radeonsi/si_hw_context.c b/src/gallium/drivers/radeonsi/si_hw_context.c index 56fa6649882..bd8409bfa62 100644 --- a/src/gallium/drivers/radeonsi/si_hw_context.c +++ b/src/gallium/drivers/radeonsi/si_hw_context.c @@ -176,7 +176,7 @@ void si_trace_emit(struct si_context *sctx) struct radeon_winsys_cs *cs = sctx->cs; uint64_t va; - va = r600_resource_va(&sscreen->screen, (void*)sscreen->b.trace_bo); + va = sscreen->b.trace_bo->gpu_address; r600_context_bo_reloc(sctx, sscreen->b.trace_bo, RADEON_USAGE_READWRITE); cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0); cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) | diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 3dec53683ce..6e9a60a62c6 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -1680,7 +1680,7 @@ static void si_initialize_color_surface(struct si_context *sctx, } } - offset += r600_resource_va(sctx->b.b.screen, surf->base.texture); + offset += rtex->resource.gpu_address; surf->cb_color_base = offset >> 8; surf->cb_color_pitch = color_pitch; @@ -1758,7 +1758,7 @@ static void si_init_depth_surface(struct si_context *sctx, } assert(format != V_028040_Z_INVALID); - s_offs = z_offs = r600_resource_va(sctx->b.b.screen, surf->base.texture); + s_offs = z_offs = rtex->resource.gpu_address; z_offs += rtex->surface.level[level].offset; s_offs += rtex->surface.stencil_level[level].offset; @@ -1841,7 +1841,7 @@ static void si_init_depth_surface(struct si_context *sctx, s_info |= S_028044_TILE_STENCIL_DISABLE(1); } - uint64_t va = r600_resource_va(&sctx->screen->b.b, &rtex->htile_buffer->b.b); + uint64_t va = rtex->htile_buffer->gpu_address; db_htile_data_base = va >> 8; db_htile_surface = S_028ABC_FULL_CACHE(1); } else { @@ -2388,7 +2388,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx desc = util_format_description(state->format); first_non_void = util_format_get_first_non_void_channel(state->format); stride = desc->block.bits / 8; - va = r600_resource_va(ctx->screen, texture) + state->u.buf.first_element*stride; + va = tmp->resource.gpu_address + state->u.buf.first_element*stride; format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); @@ -2533,8 +2533,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx } else if (texture->target == PIPE_TEXTURE_CUBE_ARRAY) depth = texture->array_size / 6; - va = r600_resource_va(ctx->screen, texture); - va += surflevel[0].offset; + va = tmp->resource.gpu_address + surflevel[0].offset; va += tmp->mipmap_shift * surflevel[texture->last_level].slice_size * tmp->surface.array_size; view->state[0] = va >> 8; @@ -2563,7 +2562,7 @@ static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx /* Initialize the sampler view for FMASK. */ if (tmp->fmask.size) { - uint64_t va = r600_resource_va(ctx->screen, texture) + tmp->fmask.offset; + uint64_t va = tmp->resource.gpu_address + tmp->fmask.offset; uint32_t fmask_format; switch (texture->nr_samples) { @@ -2722,9 +2721,7 @@ static void si_set_border_colors(struct si_context *sctx, unsigned count, if (border_color_table) { struct si_pm4_state *pm4 = si_pm4_alloc_state(sctx); - uint64_t va_offset = - r600_resource_va(&sctx->screen->b.b, - (void*)sctx->border_color_table); + uint64_t va_offset = sctx->border_color_table->gpu_address; si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va_offset >> 8); if (sctx->b.chip_class >= CIK) diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index ae839ba1642..411ea04059f 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -54,7 +54,7 @@ static void si_pipe_shader_es(struct pipe_context *ctx, struct si_pipe_shader *s if (pm4 == NULL) return; - va = r600_resource_va(ctx->screen, (void *)shader->bo); + va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); vgpr_comp_cnt = shader->shader.uses_instanceid ? 3 : 0; @@ -129,7 +129,7 @@ static void si_pipe_shader_gs(struct pipe_context *ctx, struct si_pipe_shader *s si_pm4_set_reg(pm4, R_028B5C_VGT_GS_VERT_ITEMSIZE, gs_vert_itemsize); - va = r600_resource_va(ctx->screen, (void *)shader->bo); + va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); si_pm4_set_reg(pm4, R_00B220_SPI_SHADER_PGM_LO_GS, va >> 8); si_pm4_set_reg(pm4, R_00B224_SPI_SHADER_PGM_HI_GS, va >> 40); @@ -166,7 +166,7 @@ static void si_pipe_shader_vs(struct pipe_context *ctx, struct si_pipe_shader *s if (pm4 == NULL) return; - va = r600_resource_va(ctx->screen, (void *)shader->bo); + va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); vgpr_comp_cnt = shader->shader.uses_instanceid ? 3 : 0; @@ -298,7 +298,7 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s shader->spi_shader_col_format); si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader->cb_shader_mask); - va = r600_resource_va(ctx->screen, (void *)shader->bo); + va = shader->bo->gpu_address; si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ, RADEON_PRIO_SHADER_DATA); si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); si_pm4_set_reg(pm4, R_00B024_SPI_SHADER_PGM_HI_PS, va >> 40); @@ -715,9 +715,8 @@ static void si_state_draw(struct si_context *sctx, if (info->count_from_stream_output) { struct r600_so_target *t = (struct r600_so_target*)info->count_from_stream_output; - uint64_t va = r600_resource_va(&sctx->screen->b.b, - &t->buf_filled_size->b.b); - va += t->buf_filled_size_offset; + uint64_t va = t->buf_filled_size->gpu_address + + t->buf_filled_size_offset; si_pm4_set_reg(pm4, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, t->stride_in_dw); @@ -764,16 +763,13 @@ static void si_state_draw(struct si_context *sctx, if (info->indexed) { uint32_t max_size = (ib->buffer->width0 - ib->offset) / sctx->index_buffer.index_size; - uint64_t va; - va = r600_resource_va(&sctx->screen->b.b, ib->buffer); - va += ib->offset; + uint64_t va = r600_resource(ib->buffer)->gpu_address + ib->offset; si_pm4_add_bo(pm4, (struct r600_resource *)ib->buffer, RADEON_USAGE_READ, RADEON_PRIO_MIN); if (info->indirect) { - uint64_t indirect_va = r600_resource_va(&sctx->screen->b.b, - info->indirect); + uint64_t indirect_va = r600_resource(info->indirect)->gpu_address; si_cmd_draw_index_indirect(pm4, indirect_va, va, max_size, info->indirect_offset, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, @@ -787,8 +783,7 @@ static void si_state_draw(struct si_context *sctx, } } else { if (info->indirect) { - uint64_t indirect_va = r600_resource_va(&sctx->screen->b.b, - info->indirect); + uint64_t indirect_va = r600_resource(info->indirect)->gpu_address; si_cmd_draw_indirect(pm4, indirect_va, info->indirect_offset, sh_base_reg + SI_SGPR_BASE_VERTEX * 4, sh_base_reg + SI_SGPR_START_INSTANCE * 4, -- 2.30.2