r600g: add support for virtual address space on cayman v11
authorJerome Glisse <jglisse@redhat.com>
Fri, 2 Dec 2011 15:20:29 +0000 (10:20 -0500)
committerJerome Glisse <jglisse@redhat.com>
Fri, 13 Jan 2012 23:00:53 +0000 (18:00 -0500)
Virtual address space put the userspace in charge of their GPU
address space. It's up to userspace to bind bo into the virtual
address space. Command stream can them be executed using the
IB_VM chunck.

This patch add support for this configuration. It doesn't remove
the 64K ib size limit thought this limit can be extanded up to
1M for IB_VM chunk.

v2: fix rendering
v3: fix rendering when using index buffer
v4: make vm conditional on kernel support add basic va management
v5: catch the case when we already have va for a bo
v6: agd5f: update on top of ioctl changes
v7: agd5f: further ioctl updates
v8: indentation cleanup + fix non cayman
v9: rebase against lastest mesa + improvement from Marek & Michel
v10: fix cut/paste bug
v11: don't rely on updated radeon_drm.h

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
13 files changed:
src/gallium/drivers/r600/evergreen_hw_context.c
src/gallium/drivers/r600/evergreen_state.c
src/gallium/drivers/r600/r600_hw_context.c
src/gallium/drivers/r600/r600_pipe.h
src/gallium/drivers/r600/r600_resource.c
src/gallium/drivers/r600/r600_resource.h
src/gallium/drivers/r600/r600_state_common.c
src/gallium/winsys/radeon/drm/radeon_drm_bo.c
src/gallium/winsys/radeon/drm/radeon_drm_bo.h
src/gallium/winsys/radeon/drm/radeon_drm_cs.c
src/gallium/winsys/radeon/drm/radeon_drm_cs.h
src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
src/gallium/winsys/radeon/drm/radeon_winsys.h

index bd1d969eca39f67e078d248a8e9eb03e394ec843..e75eaf2b79f52036e2972f626956ec17c0bb626f 100644 (file)
@@ -1135,6 +1135,7 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
        struct r600_block *dirty_block = NULL;
        struct r600_block *next_block;
        uint32_t *pm4;
+       uint64_t va;
 
        if (draw->indices) {
                ndwords = 11;
@@ -1174,9 +1175,11 @@ void evergreen_context_draw(struct r600_context *ctx, const struct r600_draw *dr
        pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing);
        pm4[3] = draw->vgt_num_instances;
        if (draw->indices) {
-               pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
-               pm4[5] = draw->indices_bo_offset;
-               pm4[6] = 0;
+               va = r600_resource_va(&ctx->screen->screen, (void*)draw->indices);
+               va += draw->indices_bo_offset;
+               pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
+               pm4[5] = va;
+               pm4[6] = (va >> 32UL) & 0xFF;
                pm4[7] = draw->vgt_num_indices;
                pm4[8] = draw->vgt_draw_initiator;
                pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
index f3aab69dec50d66d61a93b5a4ce19ab37dcffbd9..df6ad28681e5dba98323ad503bb7c96bb5aee0ec 100644 (file)
@@ -1105,8 +1105,8 @@ static struct pipe_sampler_view *evergreen_create_sampler_view(struct pipe_conte
        rstate->val[1] = (S_030004_TEX_HEIGHT(height - 1) |
                          S_030004_TEX_DEPTH(depth - 1) |
                          S_030004_ARRAY_MODE(array_mode));
-       rstate->val[2] = tmp->offset[0] >> 8;
-       rstate->val[3] = tmp->offset[1] >> 8;
+       rstate->val[2] = (tmp->offset[0] + r600_resource_va(ctx->screen, texture)) >> 8;
+       rstate->val[3] = (tmp->offset[1] + r600_resource_va(ctx->screen, texture)) >> 8;
        rstate->val[4] = (word4 |
                          S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
                          S_030010_ENDIAN_SWAP(endian) |
@@ -1343,7 +1343,7 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
        unsigned pitch, slice;
        unsigned color_info;
        unsigned format, swap, ntype, endian;
-       unsigned offset;
+       uint64_t offset;
        unsigned tile_type;
        const struct util_format_description *desc;
        int i;
@@ -1443,10 +1443,13 @@ static void evergreen_cb(struct r600_pipe_context *rctx, struct r600_pipe_state
        } else /* workaround for linear buffers */
                tile_type = 1;
 
+       offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture);
+       offset >>= 8;
+
        /* FIXME handle enabling of CB beyond BASE8 which has different offset */
        r600_pipe_state_add_reg(rstate,
                                R_028C60_CB_COLOR0_BASE + cb * 0x3C,
-                               offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+                               offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
        r600_pipe_state_add_reg(rstate,
                                R_028C78_CB_COLOR0_DIM + cb * 0x3C,
                                0x0, 0xFFFFFFFF, NULL, 0);
@@ -1475,7 +1478,8 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
 {
        struct r600_resource_texture *rtex;
        struct r600_surface *surf;
-       unsigned level, first_layer, pitch, slice, format, offset, array_mode;
+       unsigned level, first_layer, pitch, slice, format, array_mode;
+       uint64_t offset;
 
        if (state->zsbuf == NULL)
                return;
@@ -1494,20 +1498,26 @@ static void evergreen_db(struct r600_pipe_context *rctx, struct r600_pipe_state
        slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
        format = r600_translate_dbformat(rtex->real_format);
 
+       offset += r600_resource_va(rctx->context.screen, surf->base.texture);
+       offset >>= 8;
+
        r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
-                               offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+                               offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
        r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
-                               offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+                               offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
        r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0);
 
        if (rtex->stencil) {
-               uint32_t stencil_offset =
+               uint64_t stencil_offset =
                        r600_texture_get_offset(rtex->stencil, level, first_layer);
 
+               stencil_offset += r600_resource_va(rctx->context.screen, (void*)rtex->stencil);
+               stencil_offset >>= 8;
+
                r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
-                                       stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
+                                       stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
                r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
-                                       stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
+                                       stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
                r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
                                        1, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
        } else {
@@ -2383,7 +2393,8 @@ void evergreen_pipe_shader_ps(struct pipe_context *ctx, struct r600_pipe_shader
 
        r600_pipe_state_add_reg(rstate,
                                R_028840_SQ_PGM_START_PS,
-                               0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
+                               r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
+                               0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
        r600_pipe_state_add_reg(rstate,
                                R_028844_SQ_PGM_RESOURCES_PS,
                                S_028844_NUM_GPRS(rshader->bc.ngpr) |
@@ -2457,7 +2468,8 @@ void evergreen_pipe_shader_vs(struct pipe_context *ctx, struct r600_pipe_shader
                                0x0, 0xFFFFFFFF, NULL, 0);
        r600_pipe_state_add_reg(rstate,
                        R_02885C_SQ_PGM_START_VS,
-                       0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
+                       r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
+                       0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
 
        r600_pipe_state_add_reg(rstate,
                                R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
@@ -2474,7 +2486,7 @@ void evergreen_fetch_shader(struct pipe_context *ctx,
        r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
                                0x00000000, 0xFFFFFFFF, NULL, 0);
        r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
-                               0,
+                               r600_resource_va(ctx->screen, (void *)ve->fetch_shader) >> 8,
                                0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ);
 }
 
@@ -2521,15 +2533,20 @@ void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
 }
 
 
-void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
+void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx,
+                                       struct r600_pipe_resource_state *rstate,
                                        struct r600_resource *rbuffer,
                                        unsigned offset, unsigned stride,
                                        enum radeon_bo_usage usage)
 {
+       uint64_t va;
+
+       va = r600_resource_va(ctx->screen, (void *)rbuffer);
        rstate->bo[0] = rbuffer;
        rstate->bo_usage[0] = usage;
-       rstate->val[0] = offset;
+       rstate->val[0] = (offset + va) & 0xFFFFFFFFUL;
        rstate->val[1] = rbuffer->buf->size - offset - 1;
        rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
-                        S_030008_STRIDE(stride);
+                        S_030008_STRIDE(stride) |
+                        (((va + offset) >> 32UL) & 0xFF);
 }
index 1dba96642aa468ece582ab5750def146f0efa90f..b0a28d98215947224422ee0fcef89cac54790191 100644 (file)
@@ -978,6 +978,8 @@ void r600_context_flush_all(struct r600_context *ctx, unsigned flush_flags)
 void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
                                unsigned flush_mask, struct r600_resource *bo)
 {
+       uint64_t va = 0;
+
        /* if bo has already been flushed */
        if (!(~bo->cs_buf->last_flush & flush_flags)) {
                bo->cs_buf->last_flush &= flush_mask;
@@ -1007,10 +1009,11 @@ void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
                        ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
                }
        } else {
+               va = r600_resource_va(&ctx->screen->screen, (void *)bo);
                ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
                ctx->pm4[ctx->pm4_cdwords++] = flush_flags;
                ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->size + 255) >> 8;
-               ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
+               ctx->pm4[ctx->pm4_cdwords++] = va >> 8;
                ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
                ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
                ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE);
@@ -1590,14 +1593,20 @@ void r600_context_flush(struct r600_context *ctx, unsigned flags)
 
 void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value)
 {
+       uint64_t va;
+
        r600_need_cs_space(ctx, 10, FALSE);
 
+       va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
+       va = va + (offset << 2);
+
        ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
        ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
        ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
        ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-       ctx->pm4[ctx->pm4_cdwords++] = offset << 2;             /* ADDRESS_LO */
-       ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24);   /* DATA_SEL | INT_EN | ADDRESS_HI */
+       ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL;       /* ADDRESS_LO */
+       /* DATA_SEL | INT_EN | ADDRESS_HI */
+       ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF);
        ctx->pm4[ctx->pm4_cdwords++] = value;                   /* DATA_LO */
        ctx->pm4[ctx->pm4_cdwords++] = 0;                       /* DATA_HI */
        ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
@@ -1707,6 +1716,7 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 {
        unsigned new_results_end, i;
        u32 *results;
+       uint64_t va;
 
        r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
 
@@ -1751,13 +1761,16 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
        }
 
        /* emit begin query */
+       va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+       va += query->results_end;
+
        switch (query->type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
        case PIPE_QUERY_OCCLUSION_PREDICATE:
                ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
                ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-               ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
-               ctx->pm4[ctx->pm4_cdwords++] = 0;
+               ctx->pm4[ctx->pm4_cdwords++] = va;
+               ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFF;
                break;
        case PIPE_QUERY_PRIMITIVES_EMITTED:
        case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -1771,8 +1784,8 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
        case PIPE_QUERY_TIME_ELAPSED:
                ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
                ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-               ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
-               ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
+               ctx->pm4[ctx->pm4_cdwords++] = va;
+               ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFF);
                ctx->pm4[ctx->pm4_cdwords++] = 0;
                ctx->pm4[ctx->pm4_cdwords++] = 0;
                break;
@@ -1787,14 +1800,18 @@ void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
 
 void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 {
+       uint64_t va;
+
+       va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
        /* emit end query */
        switch (query->type) {
        case PIPE_QUERY_OCCLUSION_COUNTER:
        case PIPE_QUERY_OCCLUSION_PREDICATE:
+               va += query->results_end + 8;
                ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
                ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
-               ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8;
-               ctx->pm4[ctx->pm4_cdwords++] = 0;
+               ctx->pm4[ctx->pm4_cdwords++] = va;
+               ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFF;
                break;
        case PIPE_QUERY_PRIMITIVES_EMITTED:
        case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -1806,10 +1823,11 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
                ctx->pm4[ctx->pm4_cdwords++] = 0;
                break;
        case PIPE_QUERY_TIME_ELAPSED:
+               va += query->results_end + query->result_size/2;
                ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
                ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
-               ctx->pm4[ctx->pm4_cdwords++] = query->results_end + query->result_size/2;
-               ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
+               ctx->pm4[ctx->pm4_cdwords++] = va;
+               ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFF);
                ctx->pm4[ctx->pm4_cdwords++] = 0;
                ctx->pm4[ctx->pm4_cdwords++] = 0;
                break;
@@ -1826,6 +1844,8 @@ void r600_query_end(struct r600_context *ctx, struct r600_query *query)
 void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
                            int flag_wait)
 {
+       uint64_t va;
+
        if (operation == PREDICATION_OP_CLEAR) {
                r600_need_cs_space(ctx, 3, FALSE);
 
@@ -1845,12 +1865,13 @@ void r600_query_predication(struct r600_context *ctx, struct r600_query *query,
 
                op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
                                (flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
+               va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
 
                /* emit predicate packets for all data blocks */
                while (results_base != query->results_end) {
                        ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
-                       ctx->pm4[ctx->pm4_cdwords++] = results_base;
-                       ctx->pm4[ctx->pm4_cdwords++] = op;
+                       ctx->pm4[ctx->pm4_cdwords++] = (va + results_base) & 0xFFFFFFFFUL;
+                       ctx->pm4[ctx->pm4_cdwords++] = op | (((va + results_base) >> 32UL) & 0xFF);
                        ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
                        ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer,
                                                                             RADEON_USAGE_READ);
index 447b9dc13a4eefdea5e67731a21e4f0ec350e47e..bd7824383544a6672c58e4af30fa088e7bd50e9d 100644 (file)
@@ -243,7 +243,8 @@ void *evergreen_create_db_flush_dsa(struct r600_pipe_context *rctx);
 void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
 void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
                                         struct r600_pipe_resource_state *rstate);
-void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
+void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx,
+                                       struct r600_pipe_resource_state *rstate,
                                        struct r600_resource *rbuffer,
                                        unsigned offset, unsigned stride,
                                        enum radeon_bo_usage usage);
index f3ab3613c84033d6a72e083f06a99976c4013b9e..01db97ad42c3dcd0bf7ac7ef28a768a34aca026e 100644 (file)
@@ -62,3 +62,14 @@ void r600_init_context_resource_functions(struct r600_pipe_context *r600)
        r600->context.transfer_destroy = u_transfer_destroy_vtbl;
        r600->context.transfer_inline_write = u_transfer_inline_write_vtbl;
 }
+
+uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource)
+{
+       struct r600_screen *rscreen = (struct r600_screen*)screen;
+       struct r600_resource *rresource = (struct r600_resource*)resource;
+
+       if (rresource->buf) {
+               return rscreen->ws->buffer_get_virtual_address(rresource->buf);
+       }
+       return 0;
+}
index 1ca67298d05f7b76e4095b32bfc99689abbf00cc..f39ac55e877ff29f90a97f7bc74df566ab17ba36 100644 (file)
@@ -102,4 +102,6 @@ struct r600_pipe_context;
 
 void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource **rbuffer, uint32_t *offset);
 
+uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource);
+
 #endif
index 054ab90595c3deb9a89b9d0ce21f71c47178a17b..034a560a7ecfd0fb5bbbb5aaa43f6c88aa96c287 100644 (file)
@@ -337,6 +337,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
        struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
        struct r600_resource *rbuffer = r600_resource(buffer);
        struct r600_pipe_resource_state *rstate;
+       uint64_t va_offset;
        uint32_t offset;
 
        /* Note that the state tracker can unbind constant buffers by
@@ -347,6 +348,9 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
        }
 
        r600_upload_const_buffer(rctx, &rbuffer, &offset);
+       va_offset = r600_resource_va(ctx->screen, (void*)rbuffer);
+       va_offset += offset;
+       va_offset >>= 8;
 
        switch (shader) {
        case PIPE_SHADER_VERTEX:
@@ -357,7 +361,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
                                        0xFFFFFFFF, NULL, 0);
                r600_pipe_state_add_reg(&rctx->vs_const_buffer,
                                        R_028980_ALU_CONST_CACHE_VS_0,
-                                       offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
+                                       va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
                r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
 
                rstate = &rctx->vs_const_buffer_resource[index];
@@ -370,7 +374,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
                }
 
                if (rctx->chip_class >= EVERGREEN) {
-                       evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
+                       evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
                        evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
                } else {
                        r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
@@ -385,7 +389,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
                                        0xFFFFFFFF, NULL, 0);
                r600_pipe_state_add_reg(&rctx->ps_const_buffer,
                                        R_028940_ALU_CONST_CACHE_PS_0,
-                                       offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
+                                       va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
                r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
 
                rstate = &rctx->ps_const_buffer_resource[index];
@@ -397,7 +401,7 @@ void r600_set_constant_buffer(struct pipe_context *ctx, uint shader, uint index,
                        }
                }
                if (rctx->chip_class >= EVERGREEN) {
-                       evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
+                       evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
                        evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
                } else {
                        r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
@@ -522,7 +526,7 @@ static void r600_vertex_buffer_update(struct r600_pipe_context *rctx)
                }
 
                if (rctx->chip_class >= EVERGREEN) {
-                       evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
+                       evergreen_pipe_mod_buffer_resource(&rctx->context, rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
                        evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
                } else {
                        r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
index d4746ffc5357f0255c663438563c90aeb15a56c4..de2906faef9eb2e621756923db68910658be3f76 100644 (file)
@@ -30,6 +30,7 @@
 #include "util/u_hash_table.h"
 #include "util/u_memory.h"
 #include "util/u_simple_list.h"
+#include "util/u_double_list.h"
 #include "os/os_thread.h"
 #include "os/os_mman.h"
 
 #include <xf86drm.h>
 #include <errno.h>
 
+/*
+ * this are copy from radeon_drm, once an updated libdrm is released
+ * we should bump configure.ac requirement for it and remove the following
+ * field
+ */
 #define RADEON_BO_FLAGS_MACRO_TILE  1
 #define RADEON_BO_FLAGS_MICRO_TILE  2
 #define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20
@@ -57,6 +63,33 @@ struct drm_radeon_gem_wait {
 
 #endif
 
+#ifndef RADEON_VA_MAP
+
+#define RADEON_VA_MAP               1
+#define RADEON_VA_UNMAP             2
+
+#define RADEON_VA_RESULT_OK         0
+#define RADEON_VA_RESULT_ERROR      1
+#define RADEON_VA_RESULT_VA_EXIST   2
+
+#define RADEON_VM_PAGE_VALID        (1 << 0)
+#define RADEON_VM_PAGE_READABLE     (1 << 1)
+#define RADEON_VM_PAGE_WRITEABLE    (1 << 2)
+#define RADEON_VM_PAGE_SYSTEM       (1 << 3)
+#define RADEON_VM_PAGE_SNOOPED      (1 << 4)
+
+struct drm_radeon_gem_va {
+    uint32_t    handle;
+    uint32_t    operation;
+    uint32_t    vm_id;
+    uint32_t    flags;
+    uint64_t    offset;
+};
+
+#define DRM_RADEON_GEM_VA   0x2b
+#endif
+
+
 
 extern const struct pb_vtbl radeon_bo_vtbl;
 
@@ -67,6 +100,12 @@ static INLINE struct radeon_bo *radeon_bo(struct pb_buffer *bo)
     return (struct radeon_bo *)bo;
 }
 
+struct radeon_bo_va_hole {
+    struct list_head list;
+    uint64_t         offset;
+    uint64_t         size;
+};
+
 struct radeon_bomgr {
     /* Base class. */
     struct pb_manager base;
@@ -77,6 +116,12 @@ struct radeon_bomgr {
     /* List of buffer handles and its mutex. */
     struct util_hash_table *bo_handles;
     pipe_mutex bo_handles_mutex;
+    pipe_mutex bo_va_mutex;
+
+    /* is virtual address supported */
+    bool va;
+    unsigned va_offset;
+    struct list_head va_holes;
 };
 
 static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
@@ -151,9 +196,94 @@ static boolean radeon_bo_is_busy(struct pb_buffer *_buf,
     }
 }
 
+static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size)
+{
+    struct radeon_bo_va_hole *hole, *n;
+    uint64_t offset = 0;
+
+    pipe_mutex_lock(mgr->bo_va_mutex);
+    /* first look for a hole */
+    LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
+        if (hole->size == size) {
+            offset = hole->offset;
+            list_del(&hole->list);
+            FREE(hole);
+            pipe_mutex_unlock(mgr->bo_va_mutex);
+            return offset;
+        }
+        if (hole->size > size) {
+            offset = hole->offset;
+            hole->size -= size;
+            hole->offset += size;
+            pipe_mutex_unlock(mgr->bo_va_mutex);
+            return offset;
+        }
+    }
+
+    offset = mgr->va_offset;
+    mgr->va_offset += size;
+    pipe_mutex_unlock(mgr->bo_va_mutex);
+    return offset;
+}
+
+static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
+{
+    pipe_mutex_lock(mgr->bo_va_mutex);
+    if (va >= mgr->va_offset) {
+        if (va > mgr->va_offset) {
+            struct radeon_bo_va_hole *hole;
+            hole = CALLOC_STRUCT(radeon_bo_va_hole);
+            if (hole) {
+                hole->size = va - mgr->va_offset;
+                hole->offset = mgr->va_offset;
+                list_add(&hole->list, &mgr->va_holes);
+            }
+        }
+        mgr->va_offset = va + size;
+    } else {
+        struct radeon_bo_va_hole *hole, *n;
+        uint64_t stmp, etmp;
+
+        /* free all holes that fall into the range
+         * NOTE that we might lose virtual address space
+         */
+        LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
+            stmp = hole->offset;
+            etmp = stmp + hole->size;
+            if (va >= stmp && va < etmp) {
+                list_del(&hole->list);
+                FREE(hole);
+            }
+        }
+    }
+    pipe_mutex_unlock(mgr->bo_va_mutex);
+}
+
+static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
+{
+    pipe_mutex_lock(mgr->bo_va_mutex);
+    if ((va + size) == mgr->va_offset) {
+        mgr->va_offset = va;
+    } else {
+        struct radeon_bo_va_hole *hole;
+
+        /* FIXME on allocation failure we just lose virtual address space
+         * maybe print a warning
+         */
+        hole = CALLOC_STRUCT(radeon_bo_va_hole);
+        if (hole) {
+            hole->size = size;
+            hole->offset = va;
+            list_add(&hole->list, &mgr->va_holes);
+        }
+    }
+    pipe_mutex_unlock(mgr->bo_va_mutex);
+}
+
 static void radeon_bo_destroy(struct pb_buffer *_buf)
 {
     struct radeon_bo *bo = radeon_bo(_buf);
+    struct radeon_bomgr *mgr = bo->mgr;
     struct drm_gem_close args;
 
     memset(&args, 0, sizeof(args));
@@ -168,6 +298,10 @@ static void radeon_bo_destroy(struct pb_buffer *_buf)
     if (bo->ptr)
         os_munmap(bo->ptr, bo->base.size);
 
+    if (mgr->va) {
+        radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+    }
+
     /* Close object. */
     args.handle = bo->handle;
     drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
@@ -343,6 +477,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
     struct radeon_bo *bo;
     struct drm_radeon_gem_create args;
     struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc;
+    int r;
 
     memset(&args, 0, sizeof(args));
 
@@ -375,8 +510,38 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr,
     bo->mgr = mgr;
     bo->rws = mgr->rws;
     bo->handle = args.handle;
+    bo->va = 0;
     pipe_mutex_init(bo->map_mutex);
 
+    if (mgr->va) {
+        struct drm_radeon_gem_va va;
+
+        bo->va_size = align(size,  4096);
+        bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
+
+        va.handle = bo->handle;
+        va.vm_id = 0;
+        va.operation = RADEON_VA_MAP;
+        va.flags = RADEON_VM_PAGE_READABLE |
+                   RADEON_VM_PAGE_WRITEABLE |
+                   RADEON_VM_PAGE_SNOOPED;
+        va.offset = bo->va;
+        r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+        if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+            fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
+            fprintf(stderr, "radeon:    size      : %d bytes\n", size);
+            fprintf(stderr, "radeon:    alignment : %d bytes\n", desc->alignment);
+            fprintf(stderr, "radeon:    domains   : %d\n", args.initial_domain);
+            radeon_bo_destroy(&bo->base);
+            return NULL;
+        }
+        if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+            radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+            bo->va = va.offset;
+            radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
+        }
+    }
+
     return &bo->base;
 }
 
@@ -407,6 +572,7 @@ static void radeon_bomgr_destroy(struct pb_manager *_mgr)
     struct radeon_bomgr *mgr = radeon_bomgr(_mgr);
     util_hash_table_destroy(mgr->bo_handles);
     pipe_mutex_destroy(mgr->bo_handles_mutex);
+    pipe_mutex_destroy(mgr->bo_va_mutex);
     FREE(mgr);
 }
 
@@ -438,6 +604,12 @@ struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws)
     mgr->rws = rws;
     mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare);
     pipe_mutex_init(mgr->bo_handles_mutex);
+    pipe_mutex_init(mgr->bo_va_mutex);
+
+    mgr->va = rws->info.r600_virtual_address;
+    mgr->va_offset = rws->info.r600_va_start;
+    list_inithead(&mgr->va_holes);
+
     return &mgr->base;
 }
 
@@ -560,6 +732,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
     struct radeon_bo *bo;
     struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
     struct drm_gem_open open_arg = {};
+    int r;
 
     memset(&open_arg, 0, sizeof(open_arg));
 
@@ -603,6 +776,7 @@ static struct pb_buffer *radeon_winsys_bo_from_handle(struct radeon_winsys *rws,
     bo->base.vtbl = &radeon_bo_vtbl;
     bo->mgr = mgr;
     bo->rws = mgr->rws;
+    bo->va = 0;
     pipe_mutex_init(bo->map_mutex);
 
     util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo);
@@ -613,6 +787,33 @@ done:
     if (stride)
         *stride = whandle->stride;
 
+    if (mgr->va) {
+        struct drm_radeon_gem_va va;
+
+        bo->va_size = ((bo->base.size + 4095) & ~4095);
+        bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
+
+        va.handle = bo->handle;
+        va.operation = RADEON_VA_MAP;
+        va.vm_id = 0;
+        va.offset = bo->va;
+        va.flags = RADEON_VM_PAGE_READABLE |
+                   RADEON_VM_PAGE_WRITEABLE |
+                   RADEON_VM_PAGE_SNOOPED;
+        va.offset = bo->va;
+        r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+        if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+            fprintf(stderr, "radeon: Failed to assign virtual address space\n");
+            radeon_bo_destroy(&bo->base);
+            return NULL;
+        }
+        if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+            radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+            bo->va = va.offset;
+            radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
+        }
+    }
+
     return (struct pb_buffer*)bo;
 
 fail:
@@ -649,6 +850,13 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
     return TRUE;
 }
 
+static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer)
+{
+    struct radeon_bo *bo = get_radeon_bo(buffer);
+
+    return bo->va;
+}
+
 void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
 {
     ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
@@ -661,4 +869,5 @@ void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
     ws->base.buffer_create = radeon_winsys_bo_create;
     ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
     ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
+    ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
 }
index 35d25e87eb37914b67412f30321ac4673f91084d..21cfe995510dfb8904c10c54758093c9111a804e 100644 (file)
@@ -59,6 +59,8 @@ struct radeon_bo {
 
     uint32_t handle;
     uint32_t name;
+    uint64_t va;
+    uint64_t va_size;
 
     /* how many command streams is this bo referenced in? */
     int num_cs_references;
index e6109afd7ea00d1a3e42ac4b3124f9ccfef890b4..cf8f25bc5b44f3b499bac1a0027293b7e385ab45 100644 (file)
 #include <stdint.h>
 #include <xf86drm.h>
 
+/*
+ * this are copy from radeon_drm, once an updated libdrm is released
+ * we should bump configure.ac requirement for it and remove the following
+ * field
+ */
 #ifndef RADEON_CHUNK_ID_FLAGS
-#define RADEON_CHUNK_ID_FLAGS  0x03
+#define RADEON_CHUNK_ID_FLAGS       0x03
 
 /* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
 #define RADEON_CS_KEEP_TILING_FLAGS 0x01
 #endif
 
+#ifndef RADEON_CS_USE_VM
+#define RADEON_CS_USE_VM            0x02
+/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
+#define RADEON_CS_RING_GFX          0
+#define RADEON_CS_RING_COMPUTE      1
+#endif
+
+
 #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
 
-static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
+static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
+                                      struct radeon_drm_winsys *ws)
 {
-    csc->fd = fd;
+    csc->fd = ws->fd;
     csc->nrelocs = 512;
     csc->relocs_bo = (struct radeon_bo**)
                      CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
@@ -157,11 +171,11 @@ static struct radeon_winsys_cs *radeon_drm_cs_create(struct radeon_winsys *rws)
 
     cs->ws = ws;
 
-    if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) {
+    if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
         FREE(cs);
         return NULL;
     }
-    if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) {
+    if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
         radeon_destroy_cs_context(&cs->csc1);
         FREE(cs);
         return NULL;
@@ -440,11 +454,15 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, unsigned flags)
             p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
         }
 
+        cs->cst->flags = 0;
+        cs->cst->cs.num_chunks = 2;
         if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
+            cs->cst->flags |= RADEON_CS_KEEP_TILING_FLAGS;
             cs->cst->cs.num_chunks = 3;
-            cs->cst->flags = RADEON_CS_KEEP_TILING_FLAGS;
-        } else {
-            cs->cst->cs.num_chunks = 2;
+        }
+        if (cs->ws->info.r600_virtual_address) {
+            cs->cst->cs.num_chunks = 3;
+            cs->cst->flags |= RADEON_CS_USE_VM;
         }
 
         if (cs->thread &&
index 904000d693379ca2c7b1735cdead6790c42591c3..05b9a487645356a2d8f14248048e2a083986a8f1 100644 (file)
@@ -51,8 +51,8 @@ struct radeon_cs_context {
     struct drm_radeon_cs_reloc  *relocs_hashlist[256];
     unsigned                    reloc_indices_hashlist[256];
 
-    unsigned used_vram;
-    unsigned used_gart;
+    unsigned                    used_vram;
+    unsigned                    used_gart;
 };
 
 struct radeon_drm_cs {
index f337411e2237f90da3ae9ffb88b2e804fd709d29..051a390ed2255bed501b5c41e35bf7e2a007f93d 100644 (file)
 #include <xf86drm.h>
 #include <stdio.h>
 
+/*
+ * this are copy from radeon_drm, once an updated libdrm is released
+ * we should bump configure.ac requirement for it and remove the following
+ * field
+ */
 #ifndef RADEON_INFO_TILING_CONFIG
 #define RADEON_INFO_TILING_CONFIG 6
 #endif
 #define RADEON_INFO_BACKEND_MAP 0xd
 #endif
 
+#ifndef RADEON_INFO_VA_START
+/* virtual address start, va < start are reserved by the kernel */
+#define RADEON_INFO_VA_START        0x0e
+/* maximum size of ib using the virtual memory cs */
+#define RADEON_INFO_IB_VM_MAX_SIZE  0x0f
+#endif
+
+
 /* Enable/disable feature access for one command stream.
  * If enable == TRUE, return TRUE on success.
  * Otherwise, return FALSE.
@@ -265,6 +278,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
                                       &ws->info.r600_backend_map))
                 ws->info.r600_backend_map_valid = TRUE;
         }
+        ws->info.r600_virtual_address = FALSE;
+        if (ws->info.drm_minor >= 13) {
+            ws->info.r600_virtual_address = TRUE;
+            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
+                                      &ws->info.r600_va_start))
+                ws->info.r600_virtual_address = FALSE;
+            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
+                                      &ws->info.r600_ib_vm_max_size))
+                ws->info.r600_virtual_address = FALSE;
+        }
     }
 
     return TRUE;
index 59c1aad33083ada627fa02547d5472e1d34f6b3a..d33eaa7059c0ab7ff35af293cba91ebf3536e0cc 100644 (file)
@@ -103,6 +103,9 @@ struct radeon_info {
     uint32_t r600_num_tile_pipes;
     uint32_t r600_backend_map;
     boolean r600_backend_map_valid;
+    boolean r600_virtual_address;
+    uint32_t r600_va_start;
+    uint32_t r600_ib_vm_max_size;
 };
 
 enum radeon_feature_id {
@@ -250,6 +253,14 @@ struct radeon_winsys {
                                  unsigned stride,
                                  struct winsys_handle *whandle);
 
+    /**
+     * Return the virtual address of a buffer.
+     *
+     * \param buf       A winsys buffer object
+     * \return          virtual address
+     */
+    uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf);
+
     /**************************************************************************
      * Command submission.
      *