struct r600_block *dirty_block = NULL;
struct r600_block *next_block;
uint32_t *pm4;
+ uint64_t va;
if (draw->indices) {
ndwords = 11;
pm4[2] = PKT3(PKT3_NUM_INSTANCES, 0, ctx->predicate_drawing);
pm4[3] = draw->vgt_num_instances;
if (draw->indices) {
- pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
- pm4[5] = draw->indices_bo_offset;
- pm4[6] = 0;
+ va = r600_resource_va(&ctx->screen->screen, (void*)draw->indices);
+ va += draw->indices_bo_offset;
+ pm4[4] = PKT3(PKT3_DRAW_INDEX, 3, ctx->predicate_drawing);
+ pm4[5] = va;
+ pm4[6] = (va >> 32UL) & 0xFF;
pm4[7] = draw->vgt_num_indices;
pm4[8] = draw->vgt_draw_initiator;
pm4[9] = PKT3(PKT3_NOP, 0, ctx->predicate_drawing);
rstate->val[1] = (S_030004_TEX_HEIGHT(height - 1) |
S_030004_TEX_DEPTH(depth - 1) |
S_030004_ARRAY_MODE(array_mode));
- rstate->val[2] = tmp->offset[0] >> 8;
- rstate->val[3] = tmp->offset[1] >> 8;
+ rstate->val[2] = (tmp->offset[0] + r600_resource_va(ctx->screen, texture)) >> 8;
+ rstate->val[3] = (tmp->offset[1] + r600_resource_va(ctx->screen, texture)) >> 8;
rstate->val[4] = (word4 |
S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
S_030010_ENDIAN_SWAP(endian) |
unsigned pitch, slice;
unsigned color_info;
unsigned format, swap, ntype, endian;
- unsigned offset;
+ uint64_t offset;
unsigned tile_type;
const struct util_format_description *desc;
int i;
} else /* workaround for linear buffers */
tile_type = 1;
+ offset += r600_resource_va(rctx->context.screen, state->cbufs[cb]->texture);
+ offset >>= 8;
+
/* FIXME handle enabling of CB beyond BASE8 which has different offset */
r600_pipe_state_add_reg(rstate,
R_028C60_CB_COLOR0_BASE + cb * 0x3C,
- offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+ offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
r600_pipe_state_add_reg(rstate,
R_028C78_CB_COLOR0_DIM + cb * 0x3C,
0x0, 0xFFFFFFFF, NULL, 0);
{
struct r600_resource_texture *rtex;
struct r600_surface *surf;
- unsigned level, first_layer, pitch, slice, format, offset, array_mode;
+ unsigned level, first_layer, pitch, slice, format, array_mode;
+ uint64_t offset;
if (state->zsbuf == NULL)
return;
slice = rtex->pitch_in_blocks[level] * surf->aligned_height / 64 - 1;
format = r600_translate_dbformat(rtex->real_format);
+ offset += r600_resource_va(rctx->context.screen, surf->base.texture);
+ offset >>= 8;
+
r600_pipe_state_add_reg(rstate, R_028048_DB_Z_READ_BASE,
- offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+ offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
r600_pipe_state_add_reg(rstate, R_028050_DB_Z_WRITE_BASE,
- offset >> 8, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
+ offset, 0xFFFFFFFF, &rtex->resource, RADEON_USAGE_READWRITE);
r600_pipe_state_add_reg(rstate, R_028008_DB_DEPTH_VIEW, 0x00000000, 0xFFFFFFFF, NULL, 0);
if (rtex->stencil) {
- uint32_t stencil_offset =
+ uint64_t stencil_offset =
r600_texture_get_offset(rtex->stencil, level, first_layer);
+ stencil_offset += r600_resource_va(rctx->context.screen, (void*)rtex->stencil);
+ stencil_offset >>= 8;
+
r600_pipe_state_add_reg(rstate, R_02804C_DB_STENCIL_READ_BASE,
- stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
+ stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
r600_pipe_state_add_reg(rstate, R_028054_DB_STENCIL_WRITE_BASE,
- stencil_offset >> 8, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
+ stencil_offset, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
r600_pipe_state_add_reg(rstate, R_028044_DB_STENCIL_INFO,
1, 0xFFFFFFFF, &rtex->stencil->resource, RADEON_USAGE_READWRITE);
} else {
r600_pipe_state_add_reg(rstate,
R_028840_SQ_PGM_START_PS,
- 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
+ r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
+ 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
r600_pipe_state_add_reg(rstate,
R_028844_SQ_PGM_RESOURCES_PS,
S_028844_NUM_GPRS(rshader->bc.ngpr) |
0x0, 0xFFFFFFFF, NULL, 0);
r600_pipe_state_add_reg(rstate,
R_02885C_SQ_PGM_START_VS,
- 0, 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
+ r600_resource_va(ctx->screen, (void *)shader->bo) >> 8,
+ 0xFFFFFFFF, shader->bo, RADEON_USAGE_READ);
r600_pipe_state_add_reg(rstate,
R_03A200_SQ_LOOP_CONST_0 + (32 * 4), 0x01000FFF,
r600_pipe_state_add_reg(rstate, R_0288A8_SQ_PGM_RESOURCES_FS,
0x00000000, 0xFFFFFFFF, NULL, 0);
r600_pipe_state_add_reg(rstate, R_0288A4_SQ_PGM_START_FS,
- 0,
+ r600_resource_va(ctx->screen, (void *)ve->fetch_shader) >> 8,
0xFFFFFFFF, ve->fetch_shader, RADEON_USAGE_READ);
}
}
-void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
+void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx,
+ struct r600_pipe_resource_state *rstate,
struct r600_resource *rbuffer,
unsigned offset, unsigned stride,
enum radeon_bo_usage usage)
{
+ uint64_t va;
+
+ va = r600_resource_va(ctx->screen, (void *)rbuffer);
rstate->bo[0] = rbuffer;
rstate->bo_usage[0] = usage;
- rstate->val[0] = offset;
+ rstate->val[0] = (offset + va) & 0xFFFFFFFFUL;
rstate->val[1] = rbuffer->buf->size - offset - 1;
rstate->val[2] = S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
- S_030008_STRIDE(stride);
+ S_030008_STRIDE(stride) |
+ (((va + offset) >> 32UL) & 0xFF);
}
void r600_context_bo_flush(struct r600_context *ctx, unsigned flush_flags,
unsigned flush_mask, struct r600_resource *bo)
{
+ uint64_t va = 0;
+
/* if bo has already been flushed */
if (!(~bo->cs_buf->last_flush & flush_flags)) {
bo->cs_buf->last_flush &= flush_mask;
ctx->flags &= ~R600_CONTEXT_CHECK_EVENT_FLUSH;
}
} else {
+ va = r600_resource_va(&ctx->screen->screen, (void *)bo);
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
ctx->pm4[ctx->pm4_cdwords++] = flush_flags;
ctx->pm4[ctx->pm4_cdwords++] = (bo->buf->size + 255) >> 8;
- ctx->pm4[ctx->pm4_cdwords++] = 0x00000000;
+ ctx->pm4[ctx->pm4_cdwords++] = va >> 8;
ctx->pm4[ctx->pm4_cdwords++] = 0x0000000A;
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, bo, RADEON_USAGE_WRITE);
void r600_context_emit_fence(struct r600_context *ctx, struct r600_resource *fence_bo, unsigned offset, unsigned value)
{
+ uint64_t va;
+
r600_need_cs_space(ctx, 10, FALSE);
+ va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
+ va = va + (offset << 2);
+
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
- ctx->pm4[ctx->pm4_cdwords++] = offset << 2; /* ADDRESS_LO */
- ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24); /* DATA_SEL | INT_EN | ADDRESS_HI */
+ ctx->pm4[ctx->pm4_cdwords++] = va & 0xFFFFFFFFUL; /* ADDRESS_LO */
+ /* DATA_SEL | INT_EN | ADDRESS_HI */
+ ctx->pm4[ctx->pm4_cdwords++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF);
ctx->pm4[ctx->pm4_cdwords++] = value; /* DATA_LO */
ctx->pm4[ctx->pm4_cdwords++] = 0; /* DATA_HI */
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
{
unsigned new_results_end, i;
u32 *results;
+ uint64_t va;
r600_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
}
/* emit begin query */
+ va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
+ va += query->results_end;
+
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
- ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
- ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = va;
+ ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFF;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
case PIPE_QUERY_TIME_ELAPSED:
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
- ctx->pm4[ctx->pm4_cdwords++] = query->results_end;
- ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
+ ctx->pm4[ctx->pm4_cdwords++] = va;
+ ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFF);
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = 0;
break;
void r600_query_end(struct r600_context *ctx, struct r600_query *query)
{
+ uint64_t va;
+
+ va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
/* emit end query */
switch (query->type) {
case PIPE_QUERY_OCCLUSION_COUNTER:
case PIPE_QUERY_OCCLUSION_PREDICATE:
+ va += query->results_end + 8;
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
- ctx->pm4[ctx->pm4_cdwords++] = query->results_end + 8;
- ctx->pm4[ctx->pm4_cdwords++] = 0;
+ ctx->pm4[ctx->pm4_cdwords++] = va;
+ ctx->pm4[ctx->pm4_cdwords++] = (va >> 32UL) & 0xFF;
break;
case PIPE_QUERY_PRIMITIVES_EMITTED:
case PIPE_QUERY_PRIMITIVES_GENERATED:
ctx->pm4[ctx->pm4_cdwords++] = 0;
break;
case PIPE_QUERY_TIME_ELAPSED:
+ va += query->results_end + query->result_size/2;
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
ctx->pm4[ctx->pm4_cdwords++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
- ctx->pm4[ctx->pm4_cdwords++] = query->results_end + query->result_size/2;
- ctx->pm4[ctx->pm4_cdwords++] = (3 << 29);
+ ctx->pm4[ctx->pm4_cdwords++] = va;
+ ctx->pm4[ctx->pm4_cdwords++] = (3 << 29) | ((va >> 32UL) & 0xFF);
ctx->pm4[ctx->pm4_cdwords++] = 0;
ctx->pm4[ctx->pm4_cdwords++] = 0;
break;
void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
int flag_wait)
{
+ uint64_t va;
+
if (operation == PREDICATION_OP_CLEAR) {
r600_need_cs_space(ctx, 3, FALSE);
op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
+ va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
/* emit predicate packets for all data blocks */
while (results_base != query->results_end) {
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
- ctx->pm4[ctx->pm4_cdwords++] = results_base;
- ctx->pm4[ctx->pm4_cdwords++] = op;
+ ctx->pm4[ctx->pm4_cdwords++] = (va + results_base) & 0xFFFFFFFFUL;
+ ctx->pm4[ctx->pm4_cdwords++] = op | (((va + results_base) >> 32UL) & 0xFF);
ctx->pm4[ctx->pm4_cdwords++] = PKT3(PKT3_NOP, 0, 0);
ctx->pm4[ctx->pm4_cdwords++] = r600_context_bo_reloc(ctx, query->buffer,
RADEON_USAGE_READ);
void evergreen_polygon_offset_update(struct r600_pipe_context *rctx);
void evergreen_pipe_init_buffer_resource(struct r600_pipe_context *rctx,
struct r600_pipe_resource_state *rstate);
-void evergreen_pipe_mod_buffer_resource(struct r600_pipe_resource_state *rstate,
+void evergreen_pipe_mod_buffer_resource(struct pipe_context *ctx,
+ struct r600_pipe_resource_state *rstate,
struct r600_resource *rbuffer,
unsigned offset, unsigned stride,
enum radeon_bo_usage usage);
r600->context.transfer_destroy = u_transfer_destroy_vtbl;
r600->context.transfer_inline_write = u_transfer_inline_write_vtbl;
}
+
+uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource)
+{
+ struct r600_screen *rscreen = (struct r600_screen*)screen;
+ struct r600_resource *rresource = (struct r600_resource*)resource;
+
+ if (rresource->buf) {
+ return rscreen->ws->buffer_get_virtual_address(rresource->buf);
+ }
+ return 0;
+}
void r600_upload_const_buffer(struct r600_pipe_context *rctx, struct r600_resource **rbuffer, uint32_t *offset);
+uint64_t r600_resource_va(struct pipe_screen *screen, struct pipe_resource *resource);
+
#endif
struct r600_pipe_context *rctx = (struct r600_pipe_context *)ctx;
struct r600_resource *rbuffer = r600_resource(buffer);
struct r600_pipe_resource_state *rstate;
+ uint64_t va_offset;
uint32_t offset;
/* Note that the state tracker can unbind constant buffers by
}
r600_upload_const_buffer(rctx, &rbuffer, &offset);
+ va_offset = r600_resource_va(ctx->screen, (void*)rbuffer);
+ va_offset += offset;
+ va_offset >>= 8;
switch (shader) {
case PIPE_SHADER_VERTEX:
0xFFFFFFFF, NULL, 0);
r600_pipe_state_add_reg(&rctx->vs_const_buffer,
R_028980_ALU_CONST_CACHE_VS_0,
- offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
+ va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
r600_context_pipe_state_set(&rctx->ctx, &rctx->vs_const_buffer);
rstate = &rctx->vs_const_buffer_resource[index];
}
if (rctx->chip_class >= EVERGREEN) {
- evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
+ evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
evergreen_context_pipe_state_set_vs_resource(&rctx->ctx, rstate, index);
} else {
r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
0xFFFFFFFF, NULL, 0);
r600_pipe_state_add_reg(&rctx->ps_const_buffer,
R_028940_ALU_CONST_CACHE_PS_0,
- offset >> 8, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
+ va_offset, 0xFFFFFFFF, rbuffer, RADEON_USAGE_READ);
r600_context_pipe_state_set(&rctx->ctx, &rctx->ps_const_buffer);
rstate = &rctx->ps_const_buffer_resource[index];
}
}
if (rctx->chip_class >= EVERGREEN) {
- evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
+ evergreen_pipe_mod_buffer_resource(ctx, rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
evergreen_context_pipe_state_set_ps_resource(&rctx->ctx, rstate, index);
} else {
r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, 16, RADEON_USAGE_READ);
}
if (rctx->chip_class >= EVERGREEN) {
- evergreen_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
+ evergreen_pipe_mod_buffer_resource(&rctx->context, rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
evergreen_context_pipe_state_set_fs_resource(&rctx->ctx, rstate, i);
} else {
r600_pipe_mod_buffer_resource(rstate, rbuffer, offset, vertex_buffer->stride, RADEON_USAGE_READ);
#include "util/u_hash_table.h"
#include "util/u_memory.h"
#include "util/u_simple_list.h"
+#include "util/u_double_list.h"
#include "os/os_thread.h"
#include "os/os_mman.h"
#include <xf86drm.h>
#include <errno.h>
+/*
+ * this are copy from radeon_drm, once an updated libdrm is released
+ * we should bump configure.ac requirement for it and remove the following
+ * field
+ */
#define RADEON_BO_FLAGS_MACRO_TILE 1
#define RADEON_BO_FLAGS_MICRO_TILE 2
#define RADEON_BO_FLAGS_MICRO_TILE_SQUARE 0x20
#endif
+#ifndef RADEON_VA_MAP
+
+#define RADEON_VA_MAP 1
+#define RADEON_VA_UNMAP 2
+
+#define RADEON_VA_RESULT_OK 0
+#define RADEON_VA_RESULT_ERROR 1
+#define RADEON_VA_RESULT_VA_EXIST 2
+
+#define RADEON_VM_PAGE_VALID (1 << 0)
+#define RADEON_VM_PAGE_READABLE (1 << 1)
+#define RADEON_VM_PAGE_WRITEABLE (1 << 2)
+#define RADEON_VM_PAGE_SYSTEM (1 << 3)
+#define RADEON_VM_PAGE_SNOOPED (1 << 4)
+
+struct drm_radeon_gem_va {
+ uint32_t handle;
+ uint32_t operation;
+ uint32_t vm_id;
+ uint32_t flags;
+ uint64_t offset;
+};
+
+#define DRM_RADEON_GEM_VA 0x2b
+#endif
+
+
extern const struct pb_vtbl radeon_bo_vtbl;
return (struct radeon_bo *)bo;
}
+struct radeon_bo_va_hole {
+ struct list_head list;
+ uint64_t offset;
+ uint64_t size;
+};
+
struct radeon_bomgr {
/* Base class. */
struct pb_manager base;
/* List of buffer handles and its mutex. */
struct util_hash_table *bo_handles;
pipe_mutex bo_handles_mutex;
+ pipe_mutex bo_va_mutex;
+
+ /* is virtual address supported */
+ bool va;
+ unsigned va_offset;
+ struct list_head va_holes;
};
static INLINE struct radeon_bomgr *radeon_bomgr(struct pb_manager *mgr)
}
}
+static uint64_t radeon_bomgr_find_va(struct radeon_bomgr *mgr, uint64_t size)
+{
+ struct radeon_bo_va_hole *hole, *n;
+ uint64_t offset = 0;
+
+ pipe_mutex_lock(mgr->bo_va_mutex);
+ /* first look for a hole */
+ LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
+ if (hole->size == size) {
+ offset = hole->offset;
+ list_del(&hole->list);
+ FREE(hole);
+ pipe_mutex_unlock(mgr->bo_va_mutex);
+ return offset;
+ }
+ if (hole->size > size) {
+ offset = hole->offset;
+ hole->size -= size;
+ hole->offset += size;
+ pipe_mutex_unlock(mgr->bo_va_mutex);
+ return offset;
+ }
+ }
+
+ offset = mgr->va_offset;
+ mgr->va_offset += size;
+ pipe_mutex_unlock(mgr->bo_va_mutex);
+ return offset;
+}
+
+static void radeon_bomgr_force_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
+{
+ pipe_mutex_lock(mgr->bo_va_mutex);
+ if (va >= mgr->va_offset) {
+ if (va > mgr->va_offset) {
+ struct radeon_bo_va_hole *hole;
+ hole = CALLOC_STRUCT(radeon_bo_va_hole);
+ if (hole) {
+ hole->size = va - mgr->va_offset;
+ hole->offset = mgr->va_offset;
+ list_add(&hole->list, &mgr->va_holes);
+ }
+ }
+ mgr->va_offset = va + size;
+ } else {
+ struct radeon_bo_va_hole *hole, *n;
+ uint64_t stmp, etmp;
+
+ /* free all holes that fall into the range
+ * NOTE that we might lose virtual address space
+ */
+ LIST_FOR_EACH_ENTRY_SAFE(hole, n, &mgr->va_holes, list) {
+ stmp = hole->offset;
+ etmp = stmp + hole->size;
+ if (va >= stmp && va < etmp) {
+ list_del(&hole->list);
+ FREE(hole);
+ }
+ }
+ }
+ pipe_mutex_unlock(mgr->bo_va_mutex);
+}
+
+static void radeon_bomgr_free_va(struct radeon_bomgr *mgr, uint64_t va, uint64_t size)
+{
+ pipe_mutex_lock(mgr->bo_va_mutex);
+ if ((va + size) == mgr->va_offset) {
+ mgr->va_offset = va;
+ } else {
+ struct radeon_bo_va_hole *hole;
+
+ /* FIXME on allocation failure we just lose virtual address space
+ * maybe print a warning
+ */
+ hole = CALLOC_STRUCT(radeon_bo_va_hole);
+ if (hole) {
+ hole->size = size;
+ hole->offset = va;
+ list_add(&hole->list, &mgr->va_holes);
+ }
+ }
+ pipe_mutex_unlock(mgr->bo_va_mutex);
+}
+
static void radeon_bo_destroy(struct pb_buffer *_buf)
{
struct radeon_bo *bo = radeon_bo(_buf);
+ struct radeon_bomgr *mgr = bo->mgr;
struct drm_gem_close args;
memset(&args, 0, sizeof(args));
if (bo->ptr)
os_munmap(bo->ptr, bo->base.size);
+ if (mgr->va) {
+ radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+ }
+
/* Close object. */
args.handle = bo->handle;
drmIoctl(bo->rws->fd, DRM_IOCTL_GEM_CLOSE, &args);
struct radeon_bo *bo;
struct drm_radeon_gem_create args;
struct radeon_bo_desc *rdesc = (struct radeon_bo_desc*)desc;
+ int r;
memset(&args, 0, sizeof(args));
bo->mgr = mgr;
bo->rws = mgr->rws;
bo->handle = args.handle;
+ bo->va = 0;
pipe_mutex_init(bo->map_mutex);
+ if (mgr->va) {
+ struct drm_radeon_gem_va va;
+
+ bo->va_size = align(size, 4096);
+ bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
+
+ va.handle = bo->handle;
+ va.vm_id = 0;
+ va.operation = RADEON_VA_MAP;
+ va.flags = RADEON_VM_PAGE_READABLE |
+ RADEON_VM_PAGE_WRITEABLE |
+ RADEON_VM_PAGE_SNOOPED;
+ va.offset = bo->va;
+ r = drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+ if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+ fprintf(stderr, "radeon: Failed to allocate a buffer:\n");
+ fprintf(stderr, "radeon: size : %d bytes\n", size);
+ fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment);
+ fprintf(stderr, "radeon: domains : %d\n", args.initial_domain);
+ radeon_bo_destroy(&bo->base);
+ return NULL;
+ }
+ if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+ radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+ bo->va = va.offset;
+ radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
+ }
+ }
+
return &bo->base;
}
struct radeon_bomgr *mgr = radeon_bomgr(_mgr);
util_hash_table_destroy(mgr->bo_handles);
pipe_mutex_destroy(mgr->bo_handles_mutex);
+ pipe_mutex_destroy(mgr->bo_va_mutex);
FREE(mgr);
}
mgr->rws = rws;
mgr->bo_handles = util_hash_table_create(handle_hash, handle_compare);
pipe_mutex_init(mgr->bo_handles_mutex);
+ pipe_mutex_init(mgr->bo_va_mutex);
+
+ mgr->va = rws->info.r600_virtual_address;
+ mgr->va_offset = rws->info.r600_va_start;
+ list_inithead(&mgr->va_holes);
+
return &mgr->base;
}
struct radeon_bo *bo;
struct radeon_bomgr *mgr = radeon_bomgr(ws->kman);
struct drm_gem_open open_arg = {};
+ int r;
memset(&open_arg, 0, sizeof(open_arg));
bo->base.vtbl = &radeon_bo_vtbl;
bo->mgr = mgr;
bo->rws = mgr->rws;
+ bo->va = 0;
pipe_mutex_init(bo->map_mutex);
util_hash_table_set(mgr->bo_handles, (void*)(uintptr_t)whandle->handle, bo);
if (stride)
*stride = whandle->stride;
+ if (mgr->va) {
+ struct drm_radeon_gem_va va;
+
+ bo->va_size = ((bo->base.size + 4095) & ~4095);
+ bo->va = radeon_bomgr_find_va(mgr, bo->va_size);
+
+ va.handle = bo->handle;
+ va.operation = RADEON_VA_MAP;
+ va.vm_id = 0;
+ va.offset = bo->va;
+ va.flags = RADEON_VM_PAGE_READABLE |
+ RADEON_VM_PAGE_WRITEABLE |
+ RADEON_VM_PAGE_SNOOPED;
+ va.offset = bo->va;
+ r = drmCommandWriteRead(ws->fd, DRM_RADEON_GEM_VA, &va, sizeof(va));
+ if (r && va.operation == RADEON_VA_RESULT_ERROR) {
+ fprintf(stderr, "radeon: Failed to assign virtual address space\n");
+ radeon_bo_destroy(&bo->base);
+ return NULL;
+ }
+ if (va.operation == RADEON_VA_RESULT_VA_EXIST) {
+ radeon_bomgr_free_va(mgr, bo->va, bo->va_size);
+ bo->va = va.offset;
+ radeon_bomgr_force_va(mgr, bo->va, bo->va_size);
+ }
+ }
+
return (struct pb_buffer*)bo;
fail:
return TRUE;
}
+static uint64_t radeon_winsys_bo_va(struct pb_buffer *buffer)
+{
+ struct radeon_bo *bo = get_radeon_bo(buffer);
+
+ return bo->va;
+}
+
void radeon_bomgr_init_functions(struct radeon_drm_winsys *ws)
{
ws->base.buffer_get_cs_handle = radeon_drm_get_cs_handle;
ws->base.buffer_create = radeon_winsys_bo_create;
ws->base.buffer_from_handle = radeon_winsys_bo_from_handle;
ws->base.buffer_get_handle = radeon_winsys_bo_get_handle;
+ ws->base.buffer_get_virtual_address = radeon_winsys_bo_va;
}
uint32_t handle;
uint32_t name;
+ uint64_t va;
+ uint64_t va_size;
/* how many command streams is this bo referenced in? */
int num_cs_references;
#include <stdint.h>
#include <xf86drm.h>
+/*
+ * this are copy from radeon_drm, once an updated libdrm is released
+ * we should bump configure.ac requirement for it and remove the following
+ * field
+ */
#ifndef RADEON_CHUNK_ID_FLAGS
-#define RADEON_CHUNK_ID_FLAGS 0x03
+#define RADEON_CHUNK_ID_FLAGS 0x03
/* The first dword of RADEON_CHUNK_ID_FLAGS is a uint32 of these flags: */
#define RADEON_CS_KEEP_TILING_FLAGS 0x01
#endif
+#ifndef RADEON_CS_USE_VM
+#define RADEON_CS_USE_VM 0x02
+/* The second dword of RADEON_CHUNK_ID_FLAGS is a uint32 that sets the ring type */
+#define RADEON_CS_RING_GFX 0
+#define RADEON_CS_RING_COMPUTE 1
+#endif
+
+
#define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t))
-static boolean radeon_init_cs_context(struct radeon_cs_context *csc, int fd)
+static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
+ struct radeon_drm_winsys *ws)
{
- csc->fd = fd;
+ csc->fd = ws->fd;
csc->nrelocs = 512;
csc->relocs_bo = (struct radeon_bo**)
CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
cs->ws = ws;
- if (!radeon_init_cs_context(&cs->csc1, cs->ws->fd)) {
+ if (!radeon_init_cs_context(&cs->csc1, cs->ws)) {
FREE(cs);
return NULL;
}
- if (!radeon_init_cs_context(&cs->csc2, cs->ws->fd)) {
+ if (!radeon_init_cs_context(&cs->csc2, cs->ws)) {
radeon_destroy_cs_context(&cs->csc1);
FREE(cs);
return NULL;
p_atomic_inc(&cs->cst->relocs_bo[i]->num_active_ioctls);
}
+ cs->cst->flags = 0;
+ cs->cst->cs.num_chunks = 2;
if (flags & RADEON_FLUSH_KEEP_TILING_FLAGS) {
+ cs->cst->flags |= RADEON_CS_KEEP_TILING_FLAGS;
cs->cst->cs.num_chunks = 3;
- cs->cst->flags = RADEON_CS_KEEP_TILING_FLAGS;
- } else {
- cs->cst->cs.num_chunks = 2;
+ }
+ if (cs->ws->info.r600_virtual_address) {
+ cs->cst->cs.num_chunks = 3;
+ cs->cst->flags |= RADEON_CS_USE_VM;
}
if (cs->thread &&
struct drm_radeon_cs_reloc *relocs_hashlist[256];
unsigned reloc_indices_hashlist[256];
- unsigned used_vram;
- unsigned used_gart;
+ unsigned used_vram;
+ unsigned used_gart;
};
struct radeon_drm_cs {
#include <xf86drm.h>
#include <stdio.h>
+/*
+ * this are copy from radeon_drm, once an updated libdrm is released
+ * we should bump configure.ac requirement for it and remove the following
+ * field
+ */
#ifndef RADEON_INFO_TILING_CONFIG
#define RADEON_INFO_TILING_CONFIG 6
#endif
#define RADEON_INFO_BACKEND_MAP 0xd
#endif
+#ifndef RADEON_INFO_VA_START
+/* virtual address start, va < start are reserved by the kernel */
+#define RADEON_INFO_VA_START 0x0e
+/* maximum size of ib using the virtual memory cs */
+#define RADEON_INFO_IB_VM_MAX_SIZE 0x0f
+#endif
+
+
/* Enable/disable feature access for one command stream.
* If enable == TRUE, return TRUE on success.
* Otherwise, return FALSE.
&ws->info.r600_backend_map))
ws->info.r600_backend_map_valid = TRUE;
}
+ ws->info.r600_virtual_address = FALSE;
+ if (ws->info.drm_minor >= 13) {
+ ws->info.r600_virtual_address = TRUE;
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
+ &ws->info.r600_va_start))
+ ws->info.r600_virtual_address = FALSE;
+ if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
+ &ws->info.r600_ib_vm_max_size))
+ ws->info.r600_virtual_address = FALSE;
+ }
}
return TRUE;
uint32_t r600_num_tile_pipes;
uint32_t r600_backend_map;
boolean r600_backend_map_valid;
+ boolean r600_virtual_address;
+ uint32_t r600_va_start;
+ uint32_t r600_ib_vm_max_size;
};
enum radeon_feature_id {
unsigned stride,
struct winsys_handle *whandle);
+ /**
+ * Return the virtual address of a buffer.
+ *
+ * \param buf A winsys buffer object
+ * \return virtual address
+ */
+ uint64_t (*buffer_get_virtual_address)(struct pb_buffer *buf);
+
/**************************************************************************
* Command submission.
*