From 2120cfe1affa24f173b135f149d0e385f1f60b73 Mon Sep 17 00:00:00 2001 From: Matt Turner Date: Thu, 18 May 2017 11:26:08 -0700 Subject: [PATCH] i965: Add and use brw_bo_map() We can encapsulate the logic for choosing the mapping type. This will also help when we add WC mappings. A few functional changes are made in this patch. On non-LLC, what were previously WB mappings are now GTT mappings (in the prefilling debug code in brw_performance_query.c; the shader_time code in brw_program.c; and in the case of an RW mapping in intel_buffer_objects.c). Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/brw_bufmgr.c | 30 +++++++++++++++++-- src/mesa/drivers/dri/i965/brw_bufmgr.h | 5 ++-- .../drivers/dri/i965/brw_performance_query.c | 6 ++-- src/mesa/drivers/dri/i965/brw_program.c | 2 +- src/mesa/drivers/dri/i965/brw_program_cache.c | 6 ++-- src/mesa/drivers/dri/i965/brw_queryobj.c | 2 +- src/mesa/drivers/dri/i965/gen6_queryobj.c | 2 +- src/mesa/drivers/dri/i965/gen6_sol.c | 2 +- src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 +-- .../drivers/dri/i965/intel_buffer_objects.c | 13 ++------ src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 ++---------- src/mesa/drivers/dri/i965/intel_pixel_read.c | 2 +- src/mesa/drivers/dri/i965/intel_screen.c | 4 +-- src/mesa/drivers/dri/i965/intel_tex_image.c | 2 +- .../drivers/dri/i965/intel_tex_subimage.c | 2 +- src/mesa/drivers/dri/i965/intel_upload.c | 5 +--- 16 files changed, 52 insertions(+), 54 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c b/src/mesa/drivers/dri/i965/brw_bufmgr.c index b79f5666906..ec9611feb92 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c @@ -658,7 +658,7 @@ set_domain(struct brw_context *brw, const char *action, } } -void * +static void * brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags) { struct brw_bufmgr *bufmgr = bo->bufmgr; @@ -740,7 +740,7 @@ map_gtt(struct brw_bo *bo) return bo->map_gtt; } -void * +static void * brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags) { struct brw_bufmgr *bufmgr = bo->bufmgr; @@ -814,6 +814,32 @@ brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo) return map; } +static bool +can_map_cpu(struct brw_bo *bo, unsigned flags) +{ + if (bo->cache_coherent) + return true; + + if (flags & MAP_PERSISTENT) + return false; + + if (flags & MAP_COHERENT) + return false; + + return !(flags & MAP_WRITE); +} + +void * +brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags) +{ + if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW)) + return brw_bo_map_gtt(brw, bo, flags); + else if (can_map_cpu(bo, flags)) + return brw_bo_map_cpu(brw, bo, flags); + else + return brw_bo_map_gtt(brw, bo, flags); +} + int brw_bo_unmap(struct brw_bo *bo) { diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h b/src/mesa/drivers/dri/i965/brw_bufmgr.h index 831da6960d3..099afcf4c57 100644 --- a/src/mesa/drivers/dri/i965/brw_bufmgr.h +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.h @@ -137,7 +137,7 @@ struct brw_bo { * * Buffer objects are not necessarily initially mapped into CPU virtual * address space or graphics device aperture. They must be mapped - * using brw_bo_map_cpu() or brw_bo_map_gtt() to be used by the CPU. + * using brw_bo_map() to be used by the CPU. */ struct brw_bo *brw_bo_alloc(struct brw_bufmgr *bufmgr, const char *name, uint64_t size, uint64_t alignment); @@ -189,7 +189,7 @@ void brw_bo_unreference(struct brw_bo *bo); * This function will block waiting for any existing execution on the * buffer to complete, first. The resulting mapping is returned. */ -MUST_CHECK void *brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags); +MUST_CHECK void *brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags); /** * Reduces the refcount on the userspace mapping of the buffer @@ -263,7 +263,6 @@ struct brw_bo *brw_bo_gem_create_from_name(struct brw_bufmgr *bufmgr, unsigned int handle); void brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr); MUST_CHECK void *brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo); -MUST_CHECK void *brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo, unsigned flags); int brw_bo_wait(struct brw_bo *bo, int64_t timeout_ns); diff --git a/src/mesa/drivers/dri/i965/brw_performance_query.c b/src/mesa/drivers/dri/i965/brw_performance_query.c index 2ec070bd6f5..1c9ddf52ea3 100644 --- a/src/mesa/drivers/dri/i965/brw_performance_query.c +++ b/src/mesa/drivers/dri/i965/brw_performance_query.c @@ -713,7 +713,7 @@ accumulate_oa_reports(struct brw_context *brw, if (!read_oa_samples(brw)) goto error; - query_buffer = brw_bo_map_cpu(brw, obj->oa.bo, MAP_READ); + query_buffer = brw_bo_map(brw, obj->oa.bo, MAP_READ); start = last = query_buffer; end = query_buffer + (MI_RPC_BO_END_OFFSET_BYTES / sizeof(uint32_t)); @@ -992,7 +992,7 @@ brw_begin_perf_query(struct gl_context *ctx, MI_RPC_BO_SIZE, 64); #ifdef DEBUG /* Pre-filling the BO helps debug whether writes landed. */ - void *map = brw_bo_map_cpu(brw, obj->oa.bo, MAP_WRITE); + void *map = brw_bo_map(brw, obj->oa.bo, MAP_WRITE); memset(map, 0x80, MI_RPC_BO_SIZE); brw_bo_unmap(obj->oa.bo); #endif @@ -1214,7 +1214,7 @@ get_pipeline_stats_data(struct brw_context *brw, int n_counters = obj->query->n_counters; uint8_t *p = data; - uint64_t *start = brw_bo_map_cpu(brw, obj->pipeline_stats.bo, MAP_READ); + uint64_t *start = brw_bo_map(brw, obj->pipeline_stats.bo, MAP_READ); uint64_t *end = start + (STATS_BO_END_OFFSET_BYTES / sizeof(uint64_t)); for (int i = 0; i < n_counters; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 7f87e7302ff..bff3475b3d9 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -578,7 +578,7 @@ brw_collect_shader_time(struct brw_context *brw) * delaying reading the reports, but it doesn't look like it's a big * overhead compared to the cost of tracking the time in the first place. */ - void *bo_map = brw_bo_map_cpu(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE); + void *bo_map = brw_bo_map(brw, brw->shader_time.bo, MAP_READ | MAP_WRITE); for (int i = 0; i < brw->shader_time.num_entries; i++) { uint32_t *times = bo_map + i * 3 * BRW_SHADER_TIME_STRIDE; diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c index 079e2ae53b5..ab03969db91 100644 --- a/src/mesa/drivers/dri/i965/brw_program_cache.c +++ b/src/mesa/drivers/dri/i965/brw_program_cache.c @@ -227,7 +227,7 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size) if (brw->has_llc) { memcpy(llc_map, cache->map, cache->next_offset); } else { - void *map = brw_bo_map_cpu(brw, cache->bo, MAP_READ); + void *map = brw_bo_map(brw, cache->bo, MAP_READ); brw_bo_subdata(new_bo, 0, cache->next_offset, map); brw_bo_unmap(cache->bo); } @@ -268,7 +268,7 @@ brw_lookup_prog(const struct brw_cache *cache, void *map; if (!brw->has_llc) - map = brw_bo_map_cpu(brw, cache->bo, MAP_READ); + map = brw_bo_map(brw, cache->bo, MAP_READ); else map = cache->map; @@ -550,7 +550,7 @@ brw_print_program_cache(struct brw_context *brw) void *map; if (!brw->has_llc) - map = brw_bo_map_cpu(brw, cache->bo, MAP_READ); + map = brw_bo_map(brw, cache->bo, MAP_READ); else map = cache->map; diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c index 05e23cda5c9..a7b896243d6 100644 --- a/src/mesa/drivers/dri/i965/brw_queryobj.c +++ b/src/mesa/drivers/dri/i965/brw_queryobj.c @@ -146,7 +146,7 @@ brw_queryobj_get_results(struct gl_context *ctx, } } - results = brw_bo_map_cpu(brw, query->bo, MAP_READ); + results = brw_bo_map(brw, query->bo, MAP_READ); switch (query->Base.Target) { case GL_TIME_ELAPSED_EXT: /* The query BO contains the starting and ending timestamps. diff --git a/src/mesa/drivers/dri/i965/gen6_queryobj.c b/src/mesa/drivers/dri/i965/gen6_queryobj.c index ab15112d81e..4fd30ec0649 100644 --- a/src/mesa/drivers/dri/i965/gen6_queryobj.c +++ b/src/mesa/drivers/dri/i965/gen6_queryobj.c @@ -221,7 +221,7 @@ gen6_queryobj_get_results(struct gl_context *ctx, if (query->bo == NULL) return; - uint64_t *results = brw_bo_map_cpu(brw, query->bo, MAP_READ); + uint64_t *results = brw_bo_map(brw, query->bo, MAP_READ); switch (query->Base.Target) { case GL_TIME_ELAPSED: /* The query BO contains the starting and ending timestamps. diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index 5873afdbbea..b4824b67e5b 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -247,7 +247,7 @@ tally_prims_generated(struct brw_context *brw, if (unlikely(brw->perf_debug && brw_bo_busy(obj->prim_count_bo))) perf_debug("Stalling for # of transform feedback primitives written.\n"); - uint64_t *prim_counts = brw_bo_map_cpu(brw, obj->prim_count_bo, MAP_READ); + uint64_t *prim_counts = brw_bo_map(brw, obj->prim_count_bo, MAP_READ); assert(obj->prim_count_buffer_index % (2 * streams) == 0); int pairs = obj->prim_count_buffer_index / (2 * streams); diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c index de93aeb0b29..62d2fe8ef35 100644 --- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c +++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c @@ -100,7 +100,7 @@ intel_batchbuffer_reset(struct intel_batchbuffer *batch, batch->bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096); if (has_llc) { - batch->map = brw_bo_map_cpu(NULL, batch->bo, MAP_READ | MAP_WRITE); + batch->map = brw_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE); } batch->map_next = batch->map; @@ -239,7 +239,7 @@ do_batch_dump(struct brw_context *brw) if (batch->ring != RENDER_RING) return; - void *map = brw_bo_map_cpu(brw, batch->bo, MAP_READ); + void *map = brw_bo_map(brw, batch->bo, MAP_READ); if (map == NULL) { fprintf(stderr, "WARNING: failed to map batchbuffer, " diff --git a/src/mesa/drivers/dri/i965/intel_buffer_objects.c b/src/mesa/drivers/dri/i965/intel_buffer_objects.c index cf6382d145e..5813989ce48 100644 --- a/src/mesa/drivers/dri/i965/intel_buffer_objects.c +++ b/src/mesa/drivers/dri/i965/intel_buffer_objects.c @@ -395,12 +395,7 @@ brw_map_buffer_range(struct gl_context *ctx, length + intel_obj->map_extra[index], alignment); - void *map; - if (brw->has_llc) { - map = brw_bo_map_cpu(brw, intel_obj->range_map_bo[index], access); - } else { - map = brw_bo_map_gtt(brw, intel_obj->range_map_bo[index], access); - } + void *map = brw_bo_map(brw, intel_obj->range_map_bo[index], access); obj->Mappings[index].Pointer = map + intel_obj->map_extra[index]; return obj->Mappings[index].Pointer; } @@ -412,12 +407,8 @@ brw_map_buffer_range(struct gl_context *ctx, perf_debug("MapBufferRange with GL_MAP_UNSYNCHRONIZED_BIT stalling (it's actually synchronized on non-LLC platforms)\n"); } map = brw_bo_map_unsynchronized(brw, intel_obj->buffer); - } else if (!brw->has_llc && (!(access & GL_MAP_READ_BIT) || - (access & GL_MAP_PERSISTENT_BIT))) { - map = brw_bo_map_gtt(brw, intel_obj->buffer, access); - mark_buffer_inactive(intel_obj); } else { - map = brw_bo_map_cpu(brw, intel_obj->buffer, access); + map = brw_bo_map(brw, intel_obj->buffer, access); mark_buffer_inactive(intel_obj); } diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index f81e087ec71..07e9ecf4f4f 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -1386,7 +1386,7 @@ intel_miptree_init_mcs(struct brw_context *brw, * * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff. */ - void *map = brw_bo_map_gtt(brw, mt->mcs_buf->bo, MAP_WRITE); + void *map = brw_bo_map(brw, mt->mcs_buf->bo, MAP_WRITE); if (unlikely(map == NULL)) { fprintf(stderr, "Failed to map mcs buffer into GTT\n"); brw_bo_unreference(mt->mcs_buf->bo); @@ -2426,22 +2426,7 @@ intel_miptree_map_raw(struct brw_context *brw, if (brw_batch_references(&brw->batch, bo)) intel_batchbuffer_flush(brw); - /* brw_bo_map_cpu() uses a WB mmaping of the buffer's backing storage. It - * will utilize the CPU cache even if the buffer is incoherent with the - * GPU (i.e. any writes will be stored in the cache and not flushed to - * memory and so will be invisible to the GPU or display engine). This - * is the majority of buffers on a !llc machine, but even on a llc - * almost all scanouts are incoherent with the CPU. A WB write into the - * backing storage of the current scanout will not be immediately - * visible on the screen. The transfer from cache to screen is slow and - * indeterministic causing visible glitching on the screen. Never use - * this WB mapping for writes to an active scanout (reads are fine, so - * long as cache consistency is maintained). - */ - if (mt->tiling != I915_TILING_NONE || mt->is_scanout) - return brw_bo_map_gtt(brw, bo, mode); - else - return brw_bo_map_cpu(brw, bo, mode); + return brw_bo_map(brw, bo, mode); } static void diff --git a/src/mesa/drivers/dri/i965/intel_pixel_read.c b/src/mesa/drivers/dri/i965/intel_pixel_read.c index 9d8c6f47934..4614279c643 100644 --- a/src/mesa/drivers/dri/i965/intel_pixel_read.c +++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c @@ -145,7 +145,7 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx, intel_batchbuffer_flush(brw); } - void *map = brw_bo_map_cpu(brw, bo, MAP_READ); + void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW); if (map == NULL) { DBG("%s: failed to map bo\n", __func__); return false; diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index 90832ae6a7b..9354cd53ba9 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -1420,7 +1420,7 @@ intel_detect_pipelined_register(struct intel_screen *screen, if (bo == NULL) goto err_results; - map = brw_bo_map_cpu(NULL, bo, MAP_WRITE); + map = brw_bo_map(NULL, bo, MAP_WRITE); if (!map) goto err_batch; @@ -1477,7 +1477,7 @@ intel_detect_pipelined_register(struct intel_screen *screen, drmIoctl(dri_screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf); /* Check whether the value got written. */ - void *results_map = brw_bo_map_cpu(NULL, results, MAP_READ); + void *results_map = brw_bo_map(NULL, results, MAP_READ); if (results_map) { success = *((uint32_t *)results_map + offset) == expected_value; brw_bo_unmap(results); diff --git a/src/mesa/drivers/dri/i965/intel_tex_image.c b/src/mesa/drivers/dri/i965/intel_tex_image.c index c34f73b5364..828f20b7b67 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_image.c +++ b/src/mesa/drivers/dri/i965/intel_tex_image.c @@ -534,7 +534,7 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx, intel_batchbuffer_flush(brw); } - void *map = brw_bo_map_cpu(brw, bo, MAP_READ); + void *map = brw_bo_map(brw, bo, MAP_READ | MAP_RAW); if (map == NULL) { DBG("%s: failed to map bo\n", __func__); return false; diff --git a/src/mesa/drivers/dri/i965/intel_tex_subimage.c b/src/mesa/drivers/dri/i965/intel_tex_subimage.c index b0b1a0ff68e..40569e17a33 100644 --- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c +++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c @@ -146,7 +146,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx, intel_batchbuffer_flush(brw); } - void *map = brw_bo_map_cpu(brw, bo, MAP_WRITE); + void *map = brw_bo_map(brw, bo, MAP_WRITE | MAP_RAW); if (map == NULL) { DBG("%s: failed to map bo\n", __func__); return false; diff --git a/src/mesa/drivers/dri/i965/intel_upload.c b/src/mesa/drivers/dri/i965/intel_upload.c index fc5b946fc08..4b5d880a5fe 100644 --- a/src/mesa/drivers/dri/i965/intel_upload.c +++ b/src/mesa/drivers/dri/i965/intel_upload.c @@ -100,10 +100,7 @@ intel_upload_space(struct brw_context *brw, if (!brw->upload.bo) { brw->upload.bo = brw_bo_alloc(brw->bufmgr, "streamed data", MAX2(INTEL_UPLOAD_SIZE, size), 4096); - if (brw->has_llc) - brw->upload.map = brw_bo_map_cpu(brw, brw->upload.bo, MAP_READ | MAP_WRITE); - else - brw->upload.map = brw_bo_map_gtt(brw, brw->upload.bo, MAP_READ | MAP_WRITE); + brw->upload.map = brw_bo_map(brw, brw->upload.bo, MAP_READ | MAP_WRITE); } brw->upload.next_offset = offset + size; -- 2.30.2