From bb1f0cf3508630a9a93512c79badf8c493c46743 Mon Sep 17 00:00:00 2001 From: Jerome Glisse Date: Fri, 2 Dec 2011 10:20:29 -0500 Subject: [PATCH] r600g: add support for virtual address space on cayman v11 Virtual address space put the userspace in charge of their GPU address space. It's up to userspace to bind bo into the virtual address space. Command stream can them be executed using the IB_VM chunck. This patch add support for this configuration. It doesn't remove the 64K ib size limit thought this limit can be extanded up to 1M for IB_VM chunk. v2: fix rendering v3: fix rendering when using index buffer v4: make vm conditional on kernel support add basic va management v5: catch the case when we already have va for a bo v6: agd5f: update on top of ioctl changes v7: agd5f: further ioctl updates v8: indentation cleanup + fix non cayman v9: rebase against lastest mesa + improvement from Marek & Michel v10: fix cut/paste bug v11: don't rely on updated radeon_drm.h Signed-off-by: Jerome Glisse Signed-off-by: Alex Deucher --- .../drivers/r600/evergreen_hw_context.c | 9 +- src/gallium/drivers/r600/evergreen_state.c | 49 ++-- src/gallium/drivers/r600/r600_hw_context.c | 47 ++-- src/gallium/drivers/r600/r600_pipe.h | 3 +- src/gallium/drivers/r600/r600_resource.c | 11 + src/gallium/drivers/r600/r600_resource.h | 2 + src/gallium/drivers/r600/r600_state_common.c | 14 +- src/gallium/winsys/radeon/drm/radeon_drm_bo.c | 209 ++++++++++++++++++ src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 2 + src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 34 ++- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 4 +- .../winsys/radeon/drm/radeon_drm_winsys.c | 23 ++ src/gallium/winsys/radeon/drm/radeon_winsys.h | 11 + 13 files changed, 370 insertions(+), 48 deletions(-) diff --git a/src/gallium/drivers/r600/evergreen_hw_context.c b/src/gallium/drivers/r600/evergreen_hw_context.c index bd1d969eca3..e75eaf2b79f 100644 --- a/src/gallium/drivers/r600/evergreen_hw_context.c +++ b/src/gallium/drivers/r600/evergreen_hw_context.c @@ -1135,6 +1135,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr struct r600_block *dirty_block = NULL; struct r600_block *next_block; uint32_t *pm4; + uint64_t va; if (draw->indices) { ndwords = 11; @@ -1174,9 +1175,11 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing); pm4[3] = draw->vgt_num_instances; if (draw->indices) { - pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); - pm4[5] = draw->indices_bo_offset; - pm4[6] = 0; + va = r600_resource_va(&ctx->screen->screen, (void*)draw->indices); + va += draw->indices_bo_offset; + pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing); + pm4[5] = va; + pm4[6] = (va >> 32UL) & 0xFF; pm4[7] = draw->vgt_num_indices; pm4[8] = draw->vgt_draw_initiator; pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing); diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c index f3aab69dec5..df6ad28681e 100644 --- a/src/gallium/drivers/r600/evergreen_state.c +++ b/src/gallium/drivers/r600/evergreen_state.c @@ -1105,8 +1105,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte rstate->val[1] = (S_030004_TEX_HEIGHT(height - 1) | S_030004_TEX_DEPTH(depth - 1) | S_030004_ARRAY_MODE(array_mode)); - rstate->val[2] = tmp->offset[0] >> 8; - rstate->val[3] = tmp->offset[1] >> 8; + rstate->val[2] = (tmp->offset[0] + r600_resource_va(ctx->screen, texture)) >> 8; + rstate->val[3] = (tmp->offset[1] + r600_resource_va(ctx->screen, texture)) >> 8; rstate->val[4] = (word4 | S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) | S_030010_ENDIAN_SWAP(endian) | @@ -1343,7 +1343,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state unsigned pitch, slice; unsigned color_info; unsigned format, swap, ntype, endian; - unsigned offset; + uint64_t offset; unsigned tile_type; const struct util_format_description *desc; int i; @@ -1443,10 +1443,13 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state } else /* workaround for linear buffers */ tile_type = 1; + offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture); + offset >>= 8; + /* FIXME handle enabling of CB beyond BASE8 which has different offset */ r600_pipe_state_add_reg(rstate, R_028C60_CB_COLOR0_BASE + cb * 0x3C, - offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); + offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028C78_CB_COLOR0_DIM + cb * 0x3C, 0x0, 0xFFFFFFFF, NULL, 0); @@ -1475,7 +1478,8 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state { struct r600_resource_texture *rtex; struct r600_surface *surf; - unsigned level, first_layer, pitch, slice, format, offset, array_mode; + unsigned level, first_layer, pitch, slice, format, array_mode; + uint64_t offset; if (state->zsbuf == NULL) return; @@ -1494,20 +1498,26 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1; format = r600_translate_dbformat(rtex->real_format); + offset += r600_resource_va(rctx->context.screen, surf->base.texture); + offset >>= 8; + r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE, - offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); + offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE, - offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); + offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0); if (rtex->stencil) { - uint32_t stencil_offset = + uint64_t stencil_offset = r600_texture_get_offset(rtex->stencil, level, first_layer); + stencil_offset += r600_resource_va(rctx->context.screen, (void*)rtex->stencil); + stencil_offset >>= 8; + r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE, - stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE); + stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE, - stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE); + stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE); r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO, 1, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE); } else { @@ -2383,7 +2393,8 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader r600_pipe_state_add_reg(rstate, R_028840_SQ_PGM_START_PS, - 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); + r600_resource_va(ctx->screen, (void *)shader->bo) >> 8, + 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_028844_SQ_PGM_RESOURCES_PS, S_028844_NUM_GPRS(rshader->bc.ngpr) | @@ -2457,7 +2468,8 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader 0x0, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_02885C_SQ_PGM_START_VS, - 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); + r600_resource_va(ctx->screen, (void *)shader->bo) >> 8, + 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ); r600_pipe_state_add_reg(rstate, R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF, @@ -2474,7 +2486,7 @@ void evergreen_fetch_shader(struct pipe_context *ctx, r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS, 0x00000000, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS, - 0, + r600_resource_va(ctx->screen, (void *)ve->fetch_shader) >> 8, 0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ); } @@ -2521,15 +2533,20 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, } -void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, +void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx, + struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride, enum radeon_bo_usage usage) { + uint64_t va; + + va = r600_resource_va(ctx->screen, (void *)rbuffer); rstate->bo[0] = rbuffer; rstate->bo_usage[0] = usage; - rstate->val[0] = offset; + rstate->val[0] = (offset + va) & 0xFFFFFFFFUL; rstate->val[1] = rbuffer->buf->size - offset - 1; rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) | - S_030008_STRIDE(stride); + S_030008_STRIDE(stride) | + (((va + offset) >> 32UL) & 0xFF); } diff --git a/src/gallium/drivers/r600/r600_hw_context.c b/src/gallium/drivers/r600/r600_hw_context.c index 1dba96642aa..b0a28d98215 100644 --- a/src/gallium/drivers/r600/r600_hw_context.c +++ b/src/gallium/drivers/r600/r600_hw_context.c @@ -978,6 +978,8 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags) void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, unsigned flush_mask, struct r600_resource *bo) { + uint64_t va = 0; + /* if bo has already been flushed */ if (!(~bo->cs_buf->last_flush & flush_flags)) { bo->cs_buf->last_flush &= flush_mask; @@ -1007,10 +1009,11 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags, ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH; } } else { + va = r600_resource_va(&ctx->screen->screen, (void *)bo); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, 0); ctx->pm4[ctx->pm4_cdwords++] = flush_flags; ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->size + 255) >> 8; - ctx->pm4[ctx->pm4_cdwords++] = 0x00000000; + ctx->pm4[ctx->pm4_cdwords++] = va >> 8; ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE); @@ -1590,14 +1593,20 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags) void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value) { + uint64_t va; + r600_need_cs_space(ctx, 10, FALSE); + va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo); + va = va + (offset << 2); + ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = offset << 2; /* ADDRESS_LO */ - ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); /* DATA_SEL | INT_EN | ADDRESS_HI */ + ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */ + /* DATA_SEL | INT_EN | ADDRESS_HI */ + ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF); ctx->pm4[ctx->pm4_cdwords++] = value; /* DATA_LO */ ctx->pm4[ctx->pm4_cdwords++] = 0; /* DATA_HI */ ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); @@ -1707,6 +1716,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) { unsigned new_results_end, i; u32 *results; + uint64_t va; r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE); @@ -1751,13 +1761,16 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) } /* emit begin query */ + va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); + va += query->results_end; + switch (query->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end; - ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = va; + ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFF; break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: @@ -1771,8 +1784,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) case PIPE_QUERY_TIME_ELAPSED: ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end; - ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); + ctx->pm4[ctx->pm4_cdwords++] = va; + ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFF); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; break; @@ -1787,14 +1800,18 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query) void r600_query_end(struct r600_context *ctx, struct r600_query *query) { + uint64_t va; + + va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); /* emit end query */ switch (query->type) { case PIPE_QUERY_OCCLUSION_COUNTER: case PIPE_QUERY_OCCLUSION_PREDICATE: + va += query->results_end + 8; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8; - ctx->pm4[ctx->pm4_cdwords++] = 0; + ctx->pm4[ctx->pm4_cdwords++] = va; + ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFF; break; case PIPE_QUERY_PRIMITIVES_EMITTED: case PIPE_QUERY_PRIMITIVES_GENERATED: @@ -1806,10 +1823,11 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) ctx->pm4[ctx->pm4_cdwords++] = 0; break; case PIPE_QUERY_TIME_ELAPSED: + va += query->results_end + query->result_size/2; ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0); ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5); - ctx->pm4[ctx->pm4_cdwords++] = query->results_end + query->result_size/2; - ctx->pm4[ctx->pm4_cdwords++] = (3 << 29); + ctx->pm4[ctx->pm4_cdwords++] = va; + ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFF); ctx->pm4[ctx->pm4_cdwords++] = 0; ctx->pm4[ctx->pm4_cdwords++] = 0; break; @@ -1826,6 +1844,8 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query) void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation, int flag_wait) { + uint64_t va; + if (operation == PREDICATION_OP_CLEAR) { r600_need_cs_space(ctx, 3, FALSE); @@ -1845,12 +1865,13 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query, op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE | (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW); + va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer); /* emit predicate packets for all data blocks */ while (results_base != query->results_end) { ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0); - ctx->pm4[ctx->pm4_cdwords++] = results_base; - ctx->pm4[ctx->pm4_cdwords++] = op; + ctx->pm4[ctx->pm4_cdwords++] = (va + results_base) & 0xFFFFFFFFUL; + ctx->pm4[ctx->pm4_cdwords++] = op | (((va + results_base) >> 32UL) & 0xFF); ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0); ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_READ); diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h index 447b9dc13a4..bd782438354 100644 --- a/src/gallium/drivers/r600/r600_pipe.h +++ b/src/gallium/drivers/r600/r600_pipe.h @@ -243,7 +243,8 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx); void evergreen_polygon_offset_update(struct r600_pipe_context *rctx); void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx, struct r600_pipe_resource_state *rstate); -void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate, +void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx, + struct r600_pipe_resource_state *rstate, struct r600_resource *rbuffer, unsigned offset, unsigned stride, enum radeon_bo_usage usage); diff --git a/src/gallium/drivers/r600/r600_resource.c b/src/gallium/drivers/r600/r600_resource.c index f3ab3613c84..01db97ad42c 100644 --- a/src/gallium/drivers/r600/r600_resource.c +++ b/src/gallium/drivers/r600/r600_resource.c @@ -62,3 +62,14 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600) r600->context.transfer_destroy = u_transfer_destroy_vtbl; r600->context.transfer_inline_write = u_transfer_inline_write_vtbl; } + +uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource) +{ + struct r600_screen *rscreen = (struct r600_screen*)screen; + struct r600_resource *rresource = (struct r600_resource*)resource; + + if (rresource->buf) { + return rscreen->ws->buffer_get_virtual_address(rresource->buf); + } + return 0; +} diff --git a/src/gallium/drivers/r600/r600_resource.h b/src/gallium/drivers/r600/r600_resource.h index 1ca67298d05..f39ac55e877 100644 --- a/src/gallium/drivers/r600/r600_resource.h +++ b/src/gallium/drivers/r600/r600_resource.h @@ -102,4 +102,6 @@ struct r600_pipe_context; void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource **rbuffer, uint32_t *offset); +uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource); + #endif diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c index 054ab90595c..034a560a7ec 100644 --- a/src/gallium/drivers/r600/r600_state_common.c +++ b/src/gallium/drivers/r600/r600_state_common.c @@ -337,6 +337,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx; struct r600_resource *rbuffer = r600_resource(buffer); struct r600_pipe_resource_state *rstate; + uint64_t va_offset; uint32_t offset; /* Note that the state tracker can unbind constant buffers by @@ -347,6 +348,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } r600_upload_const_buffer(rctx, &rbuffer, &offset); + va_offset = r600_resource_va(ctx->screen, (void*)rbuffer); + va_offset += offset; + va_offset >>= 8; switch (shader) { case PIPE_SHADER_VERTEX: @@ -357,7 +361,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->vs_const_buffer, R_028980_ALU_CONST_CACHE_VS_0, - offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ); + va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer); rstate = &rctx->vs_const_buffer_resource[index]; @@ -370,7 +374,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); + evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index); } else { r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); @@ -385,7 +389,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, 0xFFFFFFFF, NULL, 0); r600_pipe_state_add_reg(&rctx->ps_const_buffer, R_028940_ALU_CONST_CACHE_PS_0, - offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ); + va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ); r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer); rstate = &rctx->ps_const_buffer_resource[index]; @@ -397,7 +401,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index, } } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); + evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ); evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index); } else { r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ); @@ -522,7 +526,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx) } if (rctx->chip_class >= EVERGREEN) { - evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); + evergreen_pipe_mod_buffer_resource(&rctx->context, rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i); } else { r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index d4746ffc535..de2906faef9 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -30,6 +30,7 @@ #include "util/u_hash_table.h" #include "util/u_memory.h" #include "util/u_simple_list.h" +#include "util/u_double_list.h" #include "os/os_thread.h" #include "os/os_mman.h" @@ -39,6 +40,11 @@ #include #include +/* + * this are copy from radeon_drm, once an updated libdrm is released + * we should bump configure.ac requirement for it and remove the following + * field + */ #define RADEON_BO_FLAGS_MACRO_TILE 1 #define RADEON_BO_FLAGS_MICRO_TILE 2 #define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20 @@ -57,6 +63,33 @@ struct drm_radeon_gem_wait { #endif +#ifndef RADEON_VA_MAP + +#define RADEON_VA_MAP 1 +#define RADEON_VA_UNMAP 2 + +#define RADEON_VA_RESULT_OK 0 +#define RADEON_VA_RESULT_ERROR 1 +#define RADEON_VA_RESULT_VA_EXIST 2 + +#define RADEON_VM_PAGE_VALID (1 << 0) +#define RADEON_VM_PAGE_READABLE (1 << 1) +#define RADEON_VM_PAGE_WRITEABLE (1 << 2) +#define RADEON_VM_PAGE_SYSTEM (1 << 3) +#define RADEON_VM_PAGE_SNOOPED (1 << 4) + +struct drm_radeon_gem_va { + uint32_t handle; + uint32_t operation; + uint32_t vm_id; + uint32_t flags; + uint64_t offset; +}; + +#define DRM_RADEON_GEM_VA 0x2b +#endif + + extern const struct pb_vtbl radeon_bo_vtbl; @@ -67,6 +100,12 @@ static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo) return (struct radeon_bo *)bo; } +struct radeon_bo_va_hole { + struct list_head list; + uint64_t offset; + uint64_t size; +}; + struct radeon_bomgr { /* Base class. */ struct pb_manager base; @@ -77,6 +116,12 @@ struct radeon_bomgr { /* List of buffer handles and its mutex. */ struct util_hash_table *bo_handles; pipe_mutex bo_handles_mutex; + pipe_mutex bo_va_mutex; + + /* is virtual address supported */ + bool va; + unsigned va_offset; + struct list_head va_holes; }; static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr) @@ -151,9 +196,94 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf, } } +static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size) +{ + struct radeon_bo_va_hole *hole, *n; + uint64_t offset = 0; + + pipe_mutex_lock(mgr->bo_va_mutex); + /* first look for a hole */ + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) { + if (hole->size == size) { + offset = hole->offset; + list_del(&hole->list); + FREE(hole); + pipe_mutex_unlock(mgr->bo_va_mutex); + return offset; + } + if (hole->size > size) { + offset = hole->offset; + hole->size -= size; + hole->offset += size; + pipe_mutex_unlock(mgr->bo_va_mutex); + return offset; + } + } + + offset = mgr->va_offset; + mgr->va_offset += size; + pipe_mutex_unlock(mgr->bo_va_mutex); + return offset; +} + +static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size) +{ + pipe_mutex_lock(mgr->bo_va_mutex); + if (va >= mgr->va_offset) { + if (va > mgr->va_offset) { + struct radeon_bo_va_hole *hole; + hole = CALLOC_STRUCT(radeon_bo_va_hole); + if (hole) { + hole->size = va - mgr->va_offset; + hole->offset = mgr->va_offset; + list_add(&hole->list, &mgr->va_holes); + } + } + mgr->va_offset = va + size; + } else { + struct radeon_bo_va_hole *hole, *n; + uint64_t stmp, etmp; + + /* free all holes that fall into the range + * NOTE that we might lose virtual address space + */ + LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) { + stmp = hole->offset; + etmp = stmp + hole->size; + if (va >= stmp && va < etmp) { + list_del(&hole->list); + FREE(hole); + } + } + } + pipe_mutex_unlock(mgr->bo_va_mutex); +} + +static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size) +{ + pipe_mutex_lock(mgr->bo_va_mutex); + if ((va + size) == mgr->va_offset) { + mgr->va_offset = va; + } else { + struct radeon_bo_va_hole *hole; + + /* FIXME on allocation failure we just lose virtual address space + * maybe print a warning + */ + hole = CALLOC_STRUCT(radeon_bo_va_hole); + if (hole) { + hole->size = size; + hole->offset = va; + list_add(&hole->list, &mgr->va_holes); + } + } + pipe_mutex_unlock(mgr->bo_va_mutex); +} + static void radeon_bo_destroy(struct pb_buffer *_buf) { struct radeon_bo *bo = radeon_bo(_buf); + struct radeon_bomgr *mgr = bo->mgr; struct drm_gem_close args; memset(&args, 0, sizeof(args)); @@ -168,6 +298,10 @@ static void radeon_bo_destroy(struct pb_buffer *_buf) if (bo->ptr) os_munmap(bo->ptr, bo->base.size); + if (mgr->va) { + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); + } + /* Close object. */ args.handle = bo->handle; drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args); @@ -343,6 +477,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr, struct radeon_bo *bo; struct drm_radeon_gem_create args; struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc; + int r; memset(&args, 0, sizeof(args)); @@ -375,8 +510,38 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr, bo->mgr = mgr; bo->rws = mgr->rws; bo->handle = args.handle; + bo->va = 0; pipe_mutex_init(bo->map_mutex); + if (mgr->va) { + struct drm_radeon_gem_va va; + + bo->va_size = align(size, 4096); + bo->va = radeon_bomgr_find_va(mgr, bo->va_size); + + va.handle = bo->handle; + va.vm_id = 0; + va.operation = RADEON_VA_MAP; + va.flags = RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_WRITEABLE | + RADEON_VM_PAGE_SNOOPED; + va.offset = bo->va; + r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); + if (r && va.operation == RADEON_VA_RESULT_ERROR) { + fprintf(stderr, "radeon: Failed to allocate a buffer:\n"); + fprintf(stderr, "radeon: size : %d bytes\n", size); + fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment); + fprintf(stderr, "radeon: domains : %d\n", args.initial_domain); + radeon_bo_destroy(&bo->base); + return NULL; + } + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); + bo->va = va.offset; + radeon_bomgr_force_va(mgr, bo->va, bo->va_size); + } + } + return &bo->base; } @@ -407,6 +572,7 @@ static void radeon_bomgr_destroy(struct pb_manager *_mgr) struct radeon_bomgr *mgr = radeon_bomgr(_mgr); util_hash_table_destroy(mgr->bo_handles); pipe_mutex_destroy(mgr->bo_handles_mutex); + pipe_mutex_destroy(mgr->bo_va_mutex); FREE(mgr); } @@ -438,6 +604,12 @@ struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws) mgr->rws = rws; mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare); pipe_mutex_init(mgr->bo_handles_mutex); + pipe_mutex_init(mgr->bo_va_mutex); + + mgr->va = rws->info.r600_virtual_address; + mgr->va_offset = rws->info.r600_va_start; + list_inithead(&mgr->va_holes); + return &mgr->base; } @@ -560,6 +732,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, struct radeon_bo *bo; struct radeon_bomgr *mgr = radeon_bomgr(ws->kman); struct drm_gem_open open_arg = {}; + int r; memset(&open_arg, 0, sizeof(open_arg)); @@ -603,6 +776,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws, bo->base.vtbl = &radeon_bo_vtbl; bo->mgr = mgr; bo->rws = mgr->rws; + bo->va = 0; pipe_mutex_init(bo->map_mutex); util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo); @@ -613,6 +787,33 @@ done: if (stride) *stride = whandle->stride; + if (mgr->va) { + struct drm_radeon_gem_va va; + + bo->va_size = ((bo->base.size + 4095) & ~4095); + bo->va = radeon_bomgr_find_va(mgr, bo->va_size); + + va.handle = bo->handle; + va.operation = RADEON_VA_MAP; + va.vm_id = 0; + va.offset = bo->va; + va.flags = RADEON_VM_PAGE_READABLE | + RADEON_VM_PAGE_WRITEABLE | + RADEON_VM_PAGE_SNOOPED; + va.offset = bo->va; + r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va)); + if (r && va.operation == RADEON_VA_RESULT_ERROR) { + fprintf(stderr, "radeon: Failed to assign virtual address space\n"); + radeon_bo_destroy(&bo->base); + return NULL; + } + if (va.operation == RADEON_VA_RESULT_VA_EXIST) { + radeon_bomgr_free_va(mgr, bo->va, bo->va_size); + bo->va = va.offset; + radeon_bomgr_force_va(mgr, bo->va, bo->va_size); + } + } + return (struct pb_buffer*)bo; fail: @@ -649,6 +850,13 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer, return TRUE; } +static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer) +{ + struct radeon_bo *bo = get_radeon_bo(buffer); + + return bo->va; +} + void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) { ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle; @@ -661,4 +869,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws) ws->base.buffer_create = radeon_winsys_bo_create; ws->base.buffer_from_handle = radeon_winsys_bo_from_handle; ws->base.buffer_get_handle = radeon_winsys_bo_get_handle; + ws->base.buffer_get_virtual_address = radeon_winsys_bo_va; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index 35d25e87eb3..21cfe995510 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -59,6 +59,8 @@ struct radeon_bo { uint32_t handle; uint32_t name; + uint64_t va; + uint64_t va_size; /* how many command streams is this bo referenced in? */ int num_cs_references; diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index e6109afd7ea..cf8f25bc5b4 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -71,18 +71,32 @@ #include #include +/* + * this are copy from radeon_drm, once an updated libdrm is released + * we should bump configure.ac requirement for it and remove the following + * field + */ #ifndef RADEON_CHUNK_ID_FLAGS -#define RADEON_CHUNK_ID_FLAGS 0x03 +#define RADEON_CHUNK_ID_FLAGS 0x03 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */ #define RADEON_CS_KEEP_TILING_FLAGS 0x01 #endif +#ifndef RADEON_CS_USE_VM +#define RADEON_CS_USE_VM 0x02 +/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */ +#define RADEON_CS_RING_GFX 0 +#define RADEON_CS_RING_COMPUTE 1 +#endif + + #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) -static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd) +static boolean radeon_init_cs_context(struct radeon_cs_context *csc, + struct radeon_drm_winsys *ws) { - csc->fd = fd; + csc->fd = ws->fd; csc->nrelocs = 512; csc->relocs_bo = (struct radeon_bo**) CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*)); @@ -157,11 +171,11 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws) cs->ws = ws; - if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) { + if (!radeon_init_cs_context(&cs->csc1, cs->ws)) { FREE(cs); return NULL; } - if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) { + if (!radeon_init_cs_context(&cs->csc2, cs->ws)) { radeon_destroy_cs_context(&cs->csc1); FREE(cs); return NULL; @@ -440,11 +454,15 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags) p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls); } + cs->cst->flags = 0; + cs->cst->cs.num_chunks = 2; if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) { + cs->cst->flags |= RADEON_CS_KEEP_TILING_FLAGS; cs->cst->cs.num_chunks = 3; - cs->cst->flags = RADEON_CS_KEEP_TILING_FLAGS; - } else { - cs->cst->cs.num_chunks = 2; + } + if (cs->ws->info.r600_virtual_address) { + cs->cst->cs.num_chunks = 3; + cs->cst->flags |= RADEON_CS_USE_VM; } if (cs->thread && diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 904000d6933..05b9a487645 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -51,8 +51,8 @@ struct radeon_cs_context { struct drm_radeon_cs_reloc *relocs_hashlist[256]; unsigned reloc_indices_hashlist[256]; - unsigned used_vram; - unsigned used_gart; + unsigned used_vram; + unsigned used_gart; }; struct radeon_drm_cs { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index f337411e223..051a390ed22 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -41,6 +41,11 @@ #include #include +/* + * this are copy from radeon_drm, once an updated libdrm is released + * we should bump configure.ac requirement for it and remove the following + * field + */ #ifndef RADEON_INFO_TILING_CONFIG #define RADEON_INFO_TILING_CONFIG 6 #endif @@ -69,6 +74,14 @@ #define RADEON_INFO_BACKEND_MAP 0xd #endif +#ifndef RADEON_INFO_VA_START +/* virtual address start, va < start are reserved by the kernel */ +#define RADEON_INFO_VA_START 0x0e +/* maximum size of ib using the virtual memory cs */ +#define RADEON_INFO_IB_VM_MAX_SIZE 0x0f +#endif + + /* Enable/disable feature access for one command stream. * If enable == TRUE, return TRUE on success. * Otherwise, return FALSE. @@ -265,6 +278,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws) &ws->info.r600_backend_map)) ws->info.r600_backend_map_valid = TRUE; } + ws->info.r600_virtual_address = FALSE; + if (ws->info.drm_minor >= 13) { + ws->info.r600_virtual_address = TRUE; + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL, + &ws->info.r600_va_start)) + ws->info.r600_virtual_address = FALSE; + if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL, + &ws->info.r600_ib_vm_max_size)) + ws->info.r600_virtual_address = FALSE; + } } return TRUE; diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index 59c1aad3308..d33eaa7059c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -103,6 +103,9 @@ struct radeon_info { uint32_t r600_num_tile_pipes; uint32_t r600_backend_map; boolean r600_backend_map_valid; + boolean r600_virtual_address; + uint32_t r600_va_start; + uint32_t r600_ib_vm_max_size; }; enum radeon_feature_id { @@ -250,6 +253,14 @@ struct radeon_winsys { unsigned stride, struct winsys_handle *whandle); + /** + * Return the virtual address of a buffer. + * + * \param buf A winsys buffer object + * \return virtual address + */ + uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf); + /************************************************************************** * Command submission. * -- 2.30.2