From: Michel Dänzer Date: Thu, 19 Jun 2014 01:40:38 +0000 (+0900) Subject: r600g/radeonsi: Use write-combined CPU mappings of some BOs in GTT X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=07c65b85eada8dd34019763b6e82ed4257a9b4a6;p=mesa.git r600g/radeonsi: Use write-combined CPU mappings of some BOs in GTT Reviewed-by: Marek Olšák --- diff --git a/src/gallium/drivers/r300/r300_query.c b/src/gallium/drivers/r300/r300_query.c index 5305ebdcb7f..1679433425b 100644 --- a/src/gallium/drivers/r300/r300_query.c +++ b/src/gallium/drivers/r300/r300_query.c @@ -59,7 +59,7 @@ static struct pipe_query *r300_create_query(struct pipe_context *pipe, q->num_pipes = r300screen->info.r300_num_gb_pipes; q->buf = r300->rws->buffer_create(r300->rws, 4096, 4096, TRUE, - RADEON_DOMAIN_GTT); + RADEON_DOMAIN_GTT, 0); if (!q->buf) { FREE(q); return NULL; diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c index 175b83a1ace..6e5b3811dcc 100644 --- a/src/gallium/drivers/r300/r300_render.c +++ b/src/gallium/drivers/r300/r300_render.c @@ -907,7 +907,7 @@ static boolean r300_render_allocate_vertices(struct vbuf_render* render, r300->vbo = rws->buffer_create(rws, MAX2(R300_MAX_DRAW_VBO_SIZE, size), R300_BUFFER_ALIGNMENT, TRUE, - RADEON_DOMAIN_GTT); + RADEON_DOMAIN_GTT, 0); if (!r300->vbo) { return FALSE; } diff --git a/src/gallium/drivers/r300/r300_screen_buffer.c b/src/gallium/drivers/r300/r300_screen_buffer.c index 86e4478d637..de557b57776 100644 --- a/src/gallium/drivers/r300/r300_screen_buffer.c +++ b/src/gallium/drivers/r300/r300_screen_buffer.c @@ -103,7 +103,7 @@ r300_buffer_transfer_map( struct pipe_context *context, /* Create a new one in the same pipe_resource. */ new_buf = r300->rws->buffer_create(r300->rws, rbuf->b.b.width0, R300_BUFFER_ALIGNMENT, TRUE, - rbuf->domain); + rbuf->domain, 0); if (new_buf) { /* Discard the old buffer. */ pb_reference(&rbuf->buf, NULL); @@ -185,7 +185,7 @@ struct pipe_resource *r300_buffer_create(struct pipe_screen *screen, rbuf->buf = r300screen->rws->buffer_create(r300screen->rws, rbuf->b.b.width0, R300_BUFFER_ALIGNMENT, TRUE, - rbuf->domain); + rbuf->domain, 0); if (!rbuf->buf) { FREE(rbuf); return NULL; diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c index 4ea69dcb4ba..ffe8c004525 100644 --- a/src/gallium/drivers/r300/r300_texture.c +++ b/src/gallium/drivers/r300/r300_texture.c @@ -1042,7 +1042,7 @@ r300_texture_create_object(struct r300_screen *rscreen, /* Create the backing buffer if needed. */ if (!tex->buf) { tex->buf = rws->buffer_create(rws, tex->tex.size_in_bytes, 2048, TRUE, - tex->domain); + tex->domain, 0); if (!tex->buf) { goto fail; diff --git a/src/gallium/drivers/radeon/r600_buffer_common.c b/src/gallium/drivers/radeon/r600_buffer_common.c index 0eaa817aee8..4e6b8978efb 100644 --- a/src/gallium/drivers/radeon/r600_buffer_common.c +++ b/src/gallium/drivers/radeon/r600_buffer_common.c @@ -107,11 +107,14 @@ bool r600_init_resource(struct r600_common_screen *rscreen, { struct r600_texture *rtex = (struct r600_texture*)res; struct pb_buffer *old_buf, *new_buf; + enum radeon_bo_flag flags = 0; switch (res->b.b.usage) { - case PIPE_USAGE_STAGING: case PIPE_USAGE_DYNAMIC: case PIPE_USAGE_STREAM: + flags = RADEON_FLAG_GTT_WC; + /* fall through */ + case PIPE_USAGE_STAGING: /* Transfers are likely to occur more often with these resources. */ res->domains = RADEON_DOMAIN_GTT; break; @@ -120,6 +123,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen, default: /* Not listing GTT here improves performance in some apps. */ res->domains = RADEON_DOMAIN_VRAM; + flags = RADEON_FLAG_GTT_WC; break; } @@ -129,6 +133,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen, res->b.b.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT | PIPE_RESOURCE_FLAG_MAP_COHERENT)) { res->domains = RADEON_DOMAIN_GTT; + flags = 0; } /* Tiled textures are unmappable. Always put them in VRAM. */ @@ -140,7 +145,7 @@ bool r600_init_resource(struct r600_common_screen *rscreen, /* Allocate a new resource. */ new_buf = rscreen->ws->buffer_create(rscreen->ws, size, alignment, use_reusable_pool, - res->domains); + res->domains, flags); if (!new_buf) { return false; } diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c index bfda69e6094..6dd84a4f255 100644 --- a/src/gallium/drivers/radeon/r600_texture.c +++ b/src/gallium/drivers/radeon/r600_texture.c @@ -1027,6 +1027,8 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx, r600_init_temp_resource_from_box(&resource, texture, box, level, R600_RESOURCE_FLAG_TRANSFER); + resource.usage = (usage & PIPE_TRANSFER_READ) ? + PIPE_USAGE_STAGING : PIPE_USAGE_STREAM; /* Create the temporary texture. */ staging = (struct r600_texture*)ctx->screen->resource_create(ctx->screen, &resource); diff --git a/src/gallium/drivers/radeon/radeon_uvd.c b/src/gallium/drivers/radeon/radeon_uvd.c index 137c69cd157..d77217ca856 100644 --- a/src/gallium/drivers/radeon/radeon_uvd.c +++ b/src/gallium/drivers/radeon/radeon_uvd.c @@ -816,12 +816,14 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, for (i = 0; i < NUM_BUFFERS; ++i) { unsigned msg_fb_size = FB_BUFFER_OFFSET + FB_BUFFER_SIZE; STATIC_ASSERT(sizeof(struct ruvd_msg) <= FB_BUFFER_OFFSET); - if (!rvid_create_buffer(dec->ws, &dec->msg_fb_buffers[i], msg_fb_size, RADEON_DOMAIN_VRAM)) { + if (!rvid_create_buffer(dec->ws, &dec->msg_fb_buffers[i], msg_fb_size, + RADEON_DOMAIN_VRAM, 0)) { RVID_ERR("Can't allocated message buffers.\n"); goto error; } - if (!rvid_create_buffer(dec->ws, &dec->bs_buffers[i], bs_buf_size, RADEON_DOMAIN_GTT)) { + if (!rvid_create_buffer(dec->ws, &dec->bs_buffers[i], bs_buf_size, + RADEON_DOMAIN_GTT, 0)) { RVID_ERR("Can't allocated bitstream buffers.\n"); goto error; } @@ -830,7 +832,7 @@ struct pipe_video_codec *ruvd_create_decoder(struct pipe_context *context, rvid_clear_buffer(dec->ws, dec->cs, &dec->bs_buffers[i]); } - if (!rvid_create_buffer(dec->ws, &dec->dpb, dpb_size, RADEON_DOMAIN_VRAM)) { + if (!rvid_create_buffer(dec->ws, &dec->dpb, dpb_size, RADEON_DOMAIN_VRAM, 0)) { RVID_ERR("Can't allocated dpb.\n"); goto error; } diff --git a/src/gallium/drivers/radeon/radeon_vce.c b/src/gallium/drivers/radeon/radeon_vce.c index f5395b3fb39..9174c973ffc 100644 --- a/src/gallium/drivers/radeon/radeon_vce.c +++ b/src/gallium/drivers/radeon/radeon_vce.c @@ -191,7 +191,7 @@ static void rvce_destroy(struct pipe_video_codec *encoder) struct rvce_encoder *enc = (struct rvce_encoder*)encoder; if (enc->stream_handle) { struct rvid_buffer fb; - rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT); + rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT, 0); enc->fb = &fb; enc->session(enc); enc->feedback(enc); @@ -233,7 +233,7 @@ static void rvce_begin_frame(struct pipe_video_codec *encoder, if (!enc->stream_handle) { struct rvid_buffer fb; enc->stream_handle = rvid_alloc_stream_handle(); - rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT); + rvid_create_buffer(enc->ws, &fb, 512, RADEON_DOMAIN_GTT, 0); enc->fb = &fb; enc->session(enc); enc->create(enc); @@ -265,7 +265,7 @@ static void rvce_encode_bitstream(struct pipe_video_codec *encoder, enc->bs_size = destination->width0; *fb = enc->fb = CALLOC_STRUCT(rvid_buffer); - if (!rvid_create_buffer(enc->ws, enc->fb, 512, RADEON_DOMAIN_GTT)) { + if (!rvid_create_buffer(enc->ws, enc->fb, 512, RADEON_DOMAIN_GTT, 0)) { RVID_ERR("Can't create feedback buffer.\n"); return; } @@ -390,7 +390,7 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context, cpb_size = cpb_size * 3 / 2; cpb_size = cpb_size * enc->cpb_num; tmp_buf->destroy(tmp_buf); - if (!rvid_create_buffer(enc->ws, &enc->cpb, cpb_size, RADEON_DOMAIN_VRAM)) { + if (!rvid_create_buffer(enc->ws, &enc->cpb, cpb_size, RADEON_DOMAIN_VRAM, 0)) { RVID_ERR("Can't create CPB buffer.\n"); goto error; } diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c index eae533e6f59..17e9a59d81d 100644 --- a/src/gallium/drivers/radeon/radeon_video.c +++ b/src/gallium/drivers/radeon/radeon_video.c @@ -61,11 +61,13 @@ unsigned rvid_alloc_stream_handle() /* create a buffer in the winsys */ bool rvid_create_buffer(struct radeon_winsys *ws, struct rvid_buffer *buffer, - unsigned size, enum radeon_bo_domain domain) + unsigned size, enum radeon_bo_domain domain, + enum radeon_bo_flag flags) { buffer->domain = domain; + buffer->flags = flags; - buffer->buf = ws->buffer_create(ws, size, 4096, false, domain); + buffer->buf = ws->buffer_create(ws, size, 4096, false, domain, flags); if (!buffer->buf) return false; @@ -91,7 +93,8 @@ bool rvid_resize_buffer(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, struct rvid_buffer old_buf = *new_buf; void *src = NULL, *dst = NULL; - if (!rvid_create_buffer(ws, new_buf, new_size, new_buf->domain)) + if (!rvid_create_buffer(ws, new_buf, new_size, new_buf->domain, + new_buf->flags)) goto error; src = ws->buffer_map(old_buf.cs_handle, cs, PIPE_TRANSFER_READ); @@ -191,7 +194,7 @@ void rvid_join_surfaces(struct radeon_winsys* ws, unsigned bind, /* TODO: 2D tiling workaround */ alignment *= 2; - pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM); + pb = ws->buffer_create(ws, size, alignment, bind, RADEON_DOMAIN_VRAM, 0); if (!pb) return; diff --git a/src/gallium/drivers/radeon/radeon_video.h b/src/gallium/drivers/radeon/radeon_video.h index 55d2ca466de..42de5a9d51b 100644 --- a/src/gallium/drivers/radeon/radeon_video.h +++ b/src/gallium/drivers/radeon/radeon_video.h @@ -44,6 +44,7 @@ struct rvid_buffer { enum radeon_bo_domain domain; + enum radeon_bo_flag flags; struct pb_buffer* buf; struct radeon_winsys_cs_handle* cs_handle; }; @@ -53,7 +54,8 @@ unsigned rvid_alloc_stream_handle(void); /* create a buffer in the winsys */ bool rvid_create_buffer(struct radeon_winsys *ws, struct rvid_buffer *buffer, - unsigned size, enum radeon_bo_domain domain); + unsigned size, enum radeon_bo_domain domain, + enum radeon_bo_flag flags); /* destroy a buffer */ void rvid_destroy_buffer(struct rvid_buffer *buffer); diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index fde06fcd6a2..3dec53683ce 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -2697,7 +2697,7 @@ static void si_set_border_colors(struct si_context *sctx, unsigned count, sctx->border_color_table = si_resource_create_custom(&sctx->screen->b.b, - PIPE_USAGE_STAGING, + PIPE_USAGE_DYNAMIC, 4096 * 4 * 4); } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c index d06bb344c73..73f8d385d7b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c @@ -477,6 +477,10 @@ const struct pb_vtbl radeon_bo_vtbl = { radeon_bo_get_base_buffer, }; +#ifndef RADEON_GEM_GTT_WC +#define RADEON_GEM_GTT_WC (1 << 2) +#endif + static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr, pb_size size, const struct pb_desc *desc) @@ -497,6 +501,10 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr, args.size = size; args.alignment = desc->alignment; args.initial_domain = rdesc->initial_domains; + args.flags = 0; + + if (rdesc->flags & RADEON_FLAG_GTT_WC) + args.flags |= RADEON_GEM_GTT_WC; if (drmCommandWriteRead(rws->fd, DRM_RADEON_GEM_CREATE, &args, sizeof(args))) { @@ -504,6 +512,7 @@ static struct pb_buffer *radeon_bomgr_create_bo(struct pb_manager *_mgr, fprintf(stderr, "radeon: size : %d bytes\n", size); fprintf(stderr, "radeon: alignment : %d bytes\n", desc->alignment); fprintf(stderr, "radeon: domains : %d\n", args.initial_domain); + fprintf(stderr, "radeon: flags : %d\n", args.flags); return NULL; } @@ -784,7 +793,8 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, unsigned size, unsigned alignment, boolean use_reusable_pool, - enum radeon_bo_domain domain) + enum radeon_bo_domain domain, + enum radeon_bo_flag flags) { struct radeon_drm_winsys *ws = radeon_drm_winsys(rws); struct radeon_bomgr *mgr = radeon_bomgr(ws->kman); @@ -798,13 +808,20 @@ radeon_winsys_bo_create(struct radeon_winsys *rws, /* Additional criteria for the cache manager. */ desc.base.usage = domain; desc.initial_domains = domain; + desc.flags = flags; /* Assign a buffer manager. */ if (use_reusable_pool) { - if (domain == RADEON_DOMAIN_VRAM) - provider = ws->cman_vram; - else + if (domain == RADEON_DOMAIN_VRAM) { + if (flags & RADEON_FLAG_GTT_WC) + provider = ws->cman_vram_gtt_wc; + else + provider = ws->cman_vram; + } else if (flags & RADEON_FLAG_GTT_WC) { + provider = ws->cman_gtt_wc; + } else { provider = ws->cman_gtt; + } } else { provider = ws->kman; } diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index f5b122f17c7..1c00a13c1e0 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -42,6 +42,7 @@ struct radeon_bo_desc { struct pb_desc base; unsigned initial_domains; + unsigned flags; }; struct radeon_bo { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 67375dccc1d..3596f8d1cd0 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -606,7 +606,7 @@ radeon_cs_create_fence(struct radeon_winsys_cs *rcs) /* Create a fence, which is a dummy BO. */ fence = cs->ws->base.buffer_create(&cs->ws->base, 1, 1, TRUE, - RADEON_DOMAIN_GTT); + RADEON_DOMAIN_GTT, 0); /* Add the fence as a dummy relocation. */ cs->ws->base.cs_add_reloc(rcs, cs->ws->base.buffer_get_cs_handle(fence), RADEON_USAGE_READWRITE, RADEON_DOMAIN_GTT, diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c index 9e60de3ab77..910d06b5dba 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c @@ -424,7 +424,9 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws) pipe_mutex_destroy(ws->cs_stack_lock); ws->cman_vram->destroy(ws->cman_vram); + ws->cman_vram_gtt_wc->destroy(ws->cman_vram_gtt_wc); ws->cman_gtt->destroy(ws->cman_gtt); + ws->cman_gtt_wc->destroy(ws->cman_gtt_wc); ws->kman->destroy(ws->kman); if (ws->gen >= DRV_R600) { radeon_surface_manager_free(ws->surf_man); @@ -642,9 +644,15 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create) ws->cman_vram = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0); if (!ws->cman_vram) goto fail; + ws->cman_vram_gtt_wc = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0); + if (!ws->cman_vram_gtt_wc) + goto fail; ws->cman_gtt = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0); if (!ws->cman_gtt) goto fail; + ws->cman_gtt_wc = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0); + if (!ws->cman_gtt_wc) + goto fail; if (ws->gen >= DRV_R600) { ws->surf_man = radeon_surface_manager_new(fd); @@ -701,8 +709,12 @@ fail: pipe_mutex_unlock(fd_tab_mutex); if (ws->cman_gtt) ws->cman_gtt->destroy(ws->cman_gtt); + if (ws->cman_gtt_wc) + ws->cman_gtt_wc->destroy(ws->cman_gtt_wc); if (ws->cman_vram) ws->cman_vram->destroy(ws->cman_vram); + if (ws->cman_vram_gtt_wc) + ws->cman_vram_gtt_wc->destroy(ws->cman_vram_gtt_wc); if (ws->kman) ws->kman->destroy(ws->kman); if (ws->surf_man) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h index fc6f53b7231..ea6f7f01f14 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h @@ -58,7 +58,9 @@ struct radeon_drm_winsys { struct pb_manager *kman; struct pb_manager *cman_vram; + struct pb_manager *cman_vram_gtt_wc; struct pb_manager *cman_gtt; + struct pb_manager *cman_gtt_wc; struct radeon_surface_manager *surf_man; uint32_t num_cpus; /* Number of CPUs. */ diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h b/src/gallium/winsys/radeon/drm/radeon_winsys.h index a63a50b01e8..9aea1e89f06 100644 --- a/src/gallium/winsys/radeon/drm/radeon_winsys.h +++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h @@ -65,6 +65,10 @@ enum radeon_bo_domain { /* bitfield */ RADEON_DOMAIN_VRAM_GTT = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GTT }; +enum radeon_bo_flag { /* bitfield */ + RADEON_FLAG_GTT_WC = (1 << 0) +}; + enum radeon_bo_usage { /* bitfield */ RADEON_USAGE_READ = 2, RADEON_USAGE_WRITE = 4, @@ -287,7 +291,8 @@ struct radeon_winsys { unsigned size, unsigned alignment, boolean use_reusable_pool, - enum radeon_bo_domain domain); + enum radeon_bo_domain domain, + enum radeon_bo_flag flags); struct radeon_winsys_cs_handle *(*buffer_get_cs_handle)( struct pb_buffer *buf);