#include "util/hash_table.h"
#include "util/list.h"
#include "brw_bufmgr.h"
+#include "brw_context.h"
#include "string.h"
#include "i915_drm.h"
}
static void
-set_domain(struct brw_bo *bo, uint32_t read_domains, uint32_t write_domain)
+set_domain(struct brw_context *brw, const char *action,
+ struct brw_bo *bo, uint32_t read_domains, uint32_t write_domain)
{
struct drm_i915_gem_set_domain sd = {
.handle = bo->gem_handle,
.write_domain = write_domain,
};
+ double elapsed = unlikely(brw && brw->perf_debug) ? -get_time() : 0.0;
+
if (drmIoctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) {
DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s.\n",
__FILE__, __LINE__, bo->gem_handle, read_domains, write_domain,
strerror(errno));
}
+
+ if (unlikely(brw && brw->perf_debug)) {
+ elapsed += get_time();
+ if (elapsed > 1e-5) /* 0.01ms */
+ perf_debug("%s a busy \"%s\" BO stalled and took %.03f ms.\n",
+ action, bo->name, elapsed * 1000);
+ }
}
int
-brw_bo_map(struct brw_bo *bo, int write_enable)
+brw_bo_map(struct brw_context *brw, struct brw_bo *bo, int write_enable)
{
struct brw_bufmgr *bufmgr = bo->bufmgr;
int ret;
DBG("bo_map: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->mem_virtual);
bo->virtual = bo->mem_virtual;
- set_domain(bo, I915_GEM_DOMAIN_CPU,
+ set_domain(brw, "CPU mapping", bo, I915_GEM_DOMAIN_CPU,
write_enable ? I915_GEM_DOMAIN_CPU : 0);
bo_mark_mmaps_incoherent(bo);
}
int
-brw_bo_map_gtt(struct brw_bo *bo)
+brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo)
{
struct brw_bufmgr *bufmgr = bo->bufmgr;
int ret;
* tell it when we're about to use things if we had done
* rendering and it still happens to be bound to the GTT.
*/
- set_domain(bo, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+ set_domain(brw, "GTT mapping", bo,
+ I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
bo_mark_mmaps_incoherent(bo);
VG(VALGRIND_MAKE_MEM_DEFINED(bo->gtt_virtual, bo->size));
*/
int
-brw_bo_map_unsynchronized(struct brw_bo *bo)
+brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo)
{
struct brw_bufmgr *bufmgr = bo->bufmgr;
int ret;
* does reasonable things.
*/
if (!bufmgr->has_llc)
- return brw_bo_map_gtt(bo);
+ return brw_bo_map_gtt(brw, bo);
pthread_mutex_lock(&bufmgr->lock);
/** Waits for all GPU rendering with the object to have completed. */
void
-brw_bo_wait_rendering(struct brw_bo *bo)
+brw_bo_wait_rendering(struct brw_context *brw, struct brw_bo *bo)
{
- set_domain(bo, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+ set_domain(brw, "waiting for",
+ bo, I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
}
/**
#endif
struct gen_device_info;
+struct brw_context;
struct brw_bo {
/**
* buffer to complete, first. The resulting mapping is available at
* buf->virtual.
*/
-int brw_bo_map(struct brw_bo *bo, int write_enable);
+int brw_bo_map(struct brw_context *brw, struct brw_bo *bo, int write_enable);
/**
* Reduces the refcount on the userspace mapping of the buffer
* bo_subdata, etc. It is merely a way for the driver to implement
* glFinish.
*/
-void brw_bo_wait_rendering(struct brw_bo *bo);
+void brw_bo_wait_rendering(struct brw_context *brw, struct brw_bo *bo);
/**
* Tears down the buffer manager instance.
const char *name,
unsigned int handle);
void brw_bufmgr_enable_reuse(struct brw_bufmgr *bufmgr);
-int brw_bo_map_unsynchronized(struct brw_bo *bo);
-int brw_bo_map_gtt(struct brw_bo *bo);
+int brw_bo_map_unsynchronized(struct brw_context *brw, struct brw_bo *bo);
+int brw_bo_map_gtt(struct brw_context *brw, struct brw_bo *bo);
void *brw_bo_map__cpu(struct brw_bo *bo);
void *brw_bo_map__gtt(struct brw_bo *bo);
intel_glFlush(ctx);
if (brw->batch.last_bo)
- brw_bo_wait_rendering(brw->batch.last_bo);
+ brw_bo_wait_rendering(brw, brw->batch.last_bo);
}
static void
if (!read_oa_samples(brw))
goto error;
- brw_bo_map(obj->oa.bo, false);
+ brw_bo_map(brw, obj->oa.bo, false);
query_buffer = obj->oa.bo->virtual;
start = last = query_buffer;
MI_RPC_BO_SIZE, 64);
#ifdef DEBUG
/* Pre-filling the BO helps debug whether writes landed. */
- brw_bo_map(obj->oa.bo, true);
+ brw_bo_map(brw, obj->oa.bo, true);
memset((char *) obj->oa.bo->virtual, 0x80, MI_RPC_BO_SIZE);
brw_bo_unmap(obj->oa.bo);
#endif
if (brw_batch_references(&brw->batch, bo))
intel_batchbuffer_flush(brw);
- if (unlikely(brw->perf_debug)) {
- if (brw_bo_busy(bo))
- perf_debug("Stalling GPU waiting for a performance query object.\n");
- }
-
- brw_bo_wait_rendering(bo);
+ brw_bo_wait_rendering(brw, bo);
}
static bool
int n_counters = obj->query->n_counters;
uint8_t *p = data;
- brw_bo_map(obj->pipeline_stats.bo, false);
+ brw_bo_map(brw, obj->pipeline_stats.bo, false);
uint64_t *start = obj->pipeline_stats.bo->virtual;
uint64_t *end = start + (STATS_BO_END_OFFSET_BYTES / sizeof(uint64_t));
* delaying reading the reports, but it doesn't look like it's a big
* overhead compared to the cost of tracking the time in the first place.
*/
- brw_bo_map(brw->shader_time.bo, true);
+ brw_bo_map(brw, brw->shader_time.bo, true);
void *bo_map = brw->shader_time.bo->virtual;
for (int i = 0; i < brw->shader_time.num_entries; i++) {
new_bo = brw_bo_alloc(brw->bufmgr, "program cache", new_size, 64);
if (brw->has_llc)
- brw_bo_map_unsynchronized(new_bo);
+ brw_bo_map_unsynchronized(brw, new_bo);
/* Copy any existing data that needs to be saved. */
if (cache->next_offset != 0) {
if (brw->has_llc) {
memcpy(new_bo->virtual, cache->bo->virtual, cache->next_offset);
} else {
- brw_bo_map(cache->bo, false);
+ brw_bo_map(brw, cache->bo, false);
brw_bo_subdata(new_bo, 0, cache->next_offset,
cache->bo->virtual);
brw_bo_unmap(cache->bo);
enum brw_cache_id cache_id,
const void *data, unsigned data_size)
{
- const struct brw_context *brw = cache->brw;
+ struct brw_context *brw = cache->brw;
unsigned i;
const struct brw_cache_item *item;
continue;
if (!brw->has_llc)
- brw_bo_map(cache->bo, false);
+ brw_bo_map(brw, cache->bo, false);
ret = memcmp(cache->bo->virtual + item->offset, data, item->size);
if (!brw->has_llc)
brw_bo_unmap(cache->bo);
cache->bo = brw_bo_alloc(brw->bufmgr, "program cache", 4096, 64);
if (brw->has_llc)
- brw_bo_map_unsynchronized(cache->bo);
+ brw_bo_map_unsynchronized(brw, cache->bo);
}
static void
struct brw_cache_item *item;
if (!brw->has_llc)
- brw_bo_map(cache->bo, false);
+ brw_bo_map(brw, cache->bo, false);
for (unsigned i = 0; i < cache->size; i++) {
for (item = cache->items[i]; item; item = item->next) {
}
}
- brw_bo_map(query->bo, false);
+ brw_bo_map(brw, query->bo, false);
results = query->bo->virtual;
switch (query->Base.Target) {
case GL_TIME_ELAPSED_EXT:
if (query->bo == NULL)
return;
- brw_bo_map(query->bo, false);
+ brw_bo_map(brw, query->bo, false);
uint64_t *results = query->bo->virtual;
switch (query->Base.Target) {
case GL_TIME_ELAPSED:
if (unlikely(brw->perf_debug && brw_bo_busy(obj->prim_count_bo)))
perf_debug("Stalling for # of transform feedback primitives written.\n");
- brw_bo_map(obj->prim_count_bo, false);
+ brw_bo_map(brw, obj->prim_count_bo, false);
uint64_t *prim_counts = obj->prim_count_bo->virtual;
assert(obj->prim_count_buffer_index % (2 * streams) == 0);
batch->bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096);
if (has_llc) {
- brw_bo_map(batch->bo, true);
+ brw_bo_map(NULL, batch->bo, true);
batch->map = batch->bo->virtual;
}
batch->map_next = batch->map;
if (batch->ring != RENDER_RING)
return;
- int ret = brw_bo_map(batch->bo, false);
+ int ret = brw_bo_map(brw, batch->bo, false);
if (ret != 0) {
fprintf(stderr,
"WARNING: failed to map batchbuffer (%s), "
*/
if (brw->need_swap_throttle && brw->throttle_batch[0]) {
if (brw->throttle_batch[1]) {
- if (!brw->disable_throttling)
- brw_bo_wait_rendering(brw->throttle_batch[1]);
+ if (!brw->disable_throttling) {
+ /* Pass NULL rather than brw so we avoid perf_debug warnings;
+ * stalling is common and expected here...
+ */
+ brw_bo_wait_rendering(NULL, brw->throttle_batch[1]);
+ }
brw_bo_unreference(brw->throttle_batch[1]);
}
brw->throttle_batch[1] = brw->throttle_batch[0];
if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
fprintf(stderr, "waiting for idle\n");
- brw_bo_wait_rendering(brw->batch.bo);
+ brw_bo_wait_rendering(brw, brw->batch.bo);
}
/* Start a new batch buffer. */
if (offset + size <= intel_obj->gpu_active_start ||
intel_obj->gpu_active_end <= offset) {
if (brw->has_llc) {
- brw_bo_map_unsynchronized(intel_obj->buffer);
+ brw_bo_map_unsynchronized(brw, intel_obj->buffer);
memcpy(intel_obj->buffer->virtual + offset, data, size);
brw_bo_unmap(intel_obj->buffer);
intel_obj->map_extra[index],
alignment);
if (brw->has_llc) {
- brw_bo_map(intel_obj->range_map_bo[index],
- (access & GL_MAP_WRITE_BIT) != 0);
+ brw_bo_map(brw, intel_obj->range_map_bo[index],
+ (access & GL_MAP_WRITE_BIT) != 0);
} else {
- brw_bo_map_gtt(intel_obj->range_map_bo[index]);
+ brw_bo_map_gtt(brw, intel_obj->range_map_bo[index]);
}
obj->Mappings[index].Pointer =
intel_obj->range_map_bo[index]->virtual + intel_obj->map_extra[index];
brw_bo_busy(intel_obj->buffer)) {
perf_debug("MapBufferRange with GL_MAP_UNSYNCHRONIZED_BIT stalling (it's actually synchronized on non-LLC platforms)\n");
}
- brw_bo_map_unsynchronized(intel_obj->buffer);
+ brw_bo_map_unsynchronized(brw, intel_obj->buffer);
} else if (!brw->has_llc && (!(access & GL_MAP_READ_BIT) ||
(access & GL_MAP_PERSISTENT_BIT))) {
- brw_bo_map_gtt(intel_obj->buffer);
+ brw_bo_map_gtt(brw, intel_obj->buffer);
mark_buffer_inactive(intel_obj);
} else {
- brw_bo_map(intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
+ brw_bo_map(brw, intel_obj->buffer, (access & GL_MAP_WRITE_BIT) != 0);
mark_buffer_inactive(intel_obj);
}
*
* Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
*/
- const int ret = brw_bo_map_gtt(mt->mcs_buf->bo);
+ const int ret = brw_bo_map_gtt(brw, mt->mcs_buf->bo);
if (unlikely(ret)) {
fprintf(stderr, "Failed to map mcs buffer into GTT\n");
brw_bo_unreference(mt->mcs_buf->bo);
* long as cache consistency is maintained).
*/
if (mt->tiling != I915_TILING_NONE || mt->is_scanout)
- brw_bo_map_gtt(bo);
+ brw_bo_map_gtt(brw, bo);
else
- brw_bo_map(bo, true);
+ brw_bo_map(brw, bo, true);
return bo->virtual;
}
intel_batchbuffer_flush(brw);
}
- error = brw_bo_map(bo, false /* write enable */);
+ error = brw_bo_map(brw, bo, false /* write enable */);
if (error) {
DBG("%s: failed to map bo\n", __func__);
return false;
if (bo == NULL)
goto err_results;
- if (brw_bo_map(bo, 1))
+ if (brw_bo_map(NULL, bo, 1))
goto err_batch;
batch = bo->virtual;
drmIoctl(dri_screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
/* Check whether the value got written. */
- if (brw_bo_map(results, false) == 0) {
+ if (brw_bo_map(NULL, results, false) == 0) {
success = *((uint32_t *)results->virtual + offset) == expected_value;
brw_bo_unmap(results);
}
intel_batchbuffer_flush(brw);
}
- error = brw_bo_map(bo, false /* write enable */);
+ error = brw_bo_map(brw, bo, false /* write enable */);
if (error) {
DBG("%s: failed to map bo\n", __func__);
return false;
intel_batchbuffer_flush(brw);
}
- error = brw_bo_map(bo, true /* write enable */);
+ error = brw_bo_map(brw, bo, true /* write enable */);
if (error || bo->virtual == NULL) {
DBG("%s: failed to map bo\n", __func__);
return false;
brw->upload.bo = brw_bo_alloc(brw->bufmgr, "streamed data",
MAX2(INTEL_UPLOAD_SIZE, size), 4096);
if (brw->has_llc)
- brw_bo_map(brw->upload.bo, true);
+ brw_bo_map(brw, brw->upload.bo, true);
else
- brw_bo_map_gtt(brw->upload.bo);
+ brw_bo_map_gtt(brw, brw->upload.bo);
}
brw->upload.next_offset = offset + size;