#include <string.h>
#include <unistd.h>
#include <assert.h>
-#include <pthread.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
struct brw_bufmgr {
int fd;
- pthread_mutex_t lock;
+ mtx_t lock;
/** Array of lists of cached gem objects of power-of-two sizes */
struct bo_cache_bucket cache_bucket[14 * 4];
return NULL;
}
-inline void
-brw_bo_reference(struct brw_bo *bo)
-{
- p_atomic_inc(&bo->refcount);
-}
-
int
brw_bo_busy(struct brw_bo *bo)
{
struct bo_cache_bucket *bucket;
bool alloc_from_cache;
uint64_t bo_size;
- bool for_render = false;
+ bool busy = false;
bool zeroed = false;
- if (flags & BO_ALLOC_FOR_RENDER)
- for_render = true;
+ if (flags & BO_ALLOC_BUSY)
+ busy = true;
if (flags & BO_ALLOC_ZEROED)
zeroed = true;
- /* FOR_RENDER really means "I'm ok with a busy BO". This doesn't really
- * jive with ZEROED as we have to wait for it to be idle before we can
- * memset. Just disallow that combination.
+ /* BUSY does doesn't really jive with ZEROED as we have to wait for it to
+ * be idle before we can memset. Just disallow that combination.
*/
- assert(!(for_render && zeroed));
+ assert(!(busy && zeroed));
/* Round the allocated size up to a power of two number of pages. */
bucket = bucket_for_size(bufmgr, size);
bo_size = bucket->size;
}
- pthread_mutex_lock(&bufmgr->lock);
+ mtx_lock(&bufmgr->lock);
/* Get a buffer out of the cache if available */
retry:
alloc_from_cache = false;
if (bucket != NULL && !list_empty(&bucket->head)) {
- if (for_render && !zeroed) {
+ if (busy && !zeroed) {
/* Allocate new render-target BOs from the tail (MRU)
* of the list, as it will likely be hot in the GPU
* cache and in the aperture for us. If the caller
}
bo->gem_handle = create.handle;
- _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
bo->bufmgr = bufmgr;
bo->align = alignment;
p_atomic_set(&bo->refcount, 1);
bo->reusable = true;
bo->cache_coherent = bufmgr->has_llc;
+ bo->index = -1;
- pthread_mutex_unlock(&bufmgr->lock);
+ mtx_unlock(&bufmgr->lock);
- DBG("bo_create: buf %d (%s) %ldb\n", bo->gem_handle, bo->name, size);
+ DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, bo->name,
+ (unsigned long long) size);
return bo;
err_free:
bo_free(bo);
err:
- pthread_mutex_unlock(&bufmgr->lock);
+ mtx_unlock(&bufmgr->lock);
return NULL;
}
* alternating names for the front/back buffer a linear search
* provides a sufficiently fast match.
*/
- pthread_mutex_lock(&bufmgr->lock);
+ mtx_lock(&bufmgr->lock);
bo = hash_find_bo(bufmgr->name_table, handle);
if (bo) {
brw_bo_reference(bo);
p_atomic_set(&bo->refcount, 1);
bo->size = open_arg.size;
- bo->offset64 = 0;
+ bo->gtt_offset = 0;
bo->bufmgr = bufmgr;
bo->gem_handle = open_arg.handle;
bo->name = name;
DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name);
out:
- pthread_mutex_unlock(&bufmgr->lock);
+ mtx_unlock(&bufmgr->lock);
return bo;
err_unref:
bo_free(bo);
- pthread_mutex_unlock(&bufmgr->lock);
+ mtx_unlock(&bufmgr->lock);
return NULL;
}
{
struct brw_bufmgr *bufmgr = bo->bufmgr;
struct drm_gem_close close;
- struct hash_entry *entry;
int ret;
if (bo->map_cpu) {
drm_munmap(bo->map_gtt, bo->size);
}
- if (bo->global_name) {
- entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
- _mesa_hash_table_remove(bufmgr->name_table, entry);
+ if (bo->external) {
+ struct hash_entry *entry;
+
+ if (bo->global_name) {
+ entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name);
+ _mesa_hash_table_remove(bufmgr->name_table, entry);
+ }
+
+ entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
+ _mesa_hash_table_remove(bufmgr->handle_table, entry);
}
- entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle);
- _mesa_hash_table_remove(bufmgr->handle_table, entry);
/* Close this object */
memclear(close);
clock_gettime(CLOCK_MONOTONIC, &time);
- pthread_mutex_lock(&bufmgr->lock);
+ mtx_lock(&bufmgr->lock);
if (p_atomic_dec_zero(&bo->refcount)) {
bo_unreference_final(bo, time.tv_sec);
cleanup_bo_cache(bufmgr, time.tv_sec);
}
- pthread_mutex_unlock(&bufmgr->lock);
+ mtx_unlock(&bufmgr->lock);
}
}
struct brw_bo *bo,
const char *action)
{
- double elapsed = unlikely(brw && brw->perf_debug) ? -get_time() : 0.0;
+ bool busy = brw && brw->perf_debug && !bo->idle;
+ double elapsed = unlikely(busy) ? -get_time() : 0.0;
brw_bo_wait_rendering(bo);
- if (unlikely(brw && brw->perf_debug)) {
+ if (unlikely(busy)) {
elapsed += get_time();
if (elapsed > 1e-5) /* 0.01ms */
perf_debug("%s a busy \"%s\" BO stalled and took %.03f ms.\n",
bo_wait_with_stall_warning(brw, bo, "CPU mapping");
}
- if (!bo->cache_coherent) {
+ if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
/* If we're reusing an existing CPU mapping, the CPU caches may
* contain stale data from the last time we read from that mapping.
* (With the BO cache, it might even be data from a previous buffer!)
* We need to invalidate those cachelines so that we see the latest
* contents, and so long as we only read from the CPU mmap we do not
* need to write those cachelines back afterwards.
+ *
+ * On LLC, the emprical evidence suggests that writes from the GPU
+ * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU
+ * cachelines. (Other reads, such as the display engine, bypass the
+ * LLC entirely requiring us to keep dirty pixels for the scanout
+ * out of any cache.)
*/
gen_invalidate_range(bo->map_cpu, bo->size);
}
if (bo->cache_coherent)
return true;
+ /* Even if the buffer itself is not cache-coherent (such as a scanout), on
+ * an LLC platform reads always are coherent (as they are performed via the
+ * central system agent). It is just the writes that we need to take special
+ * care to ensure that land in main memory and not stick in the CPU cache.
+ */
+ if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc)
+ return true;
+
/* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid
* across batch flushes where the kernel will change cache domains of the
* bo, invalidating continued access to the CPU mmap on non-LLC device.
* We skip MAP_RAW because we want to avoid map_gtt's fence detiling.
*/
if (!map && !(flags & MAP_RAW)) {
- perf_debug("Fallback GTT mapping for %s with access flags %x\n",
- bo->name, flags);
+ if (brw) {
+ perf_debug("Fallback GTT mapping for %s with access flags %x\n",
+ bo->name, flags);
+ }
map = brw_bo_map_gtt(brw, bo, flags);
}
if (ret == -1)
return -errno;
+ bo->idle = true;
+
return ret;
}
void
brw_bufmgr_destroy(struct brw_bufmgr *bufmgr)
{
- pthread_mutex_destroy(&bufmgr->lock);
+ mtx_destroy(&bufmgr->lock);
/* Free any cached buffer objects we were going to reuse */
for (int i = 0; i < bufmgr->num_buckets; i++) {
struct brw_bo *bo;
struct drm_i915_gem_get_tiling get_tiling;
- pthread_mutex_lock(&bufmgr->lock);
+ mtx_lock(&bufmgr->lock);
ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle);
if (ret) {
DBG("create_from_prime: failed to obtain handle from fd: %s\n",
strerror(errno));
- pthread_mutex_unlock(&bufmgr->lock);
+ mtx_unlock(&bufmgr->lock);
return NULL;
}
/* XXX stride is unknown */
out:
- pthread_mutex_unlock(&bufmgr->lock);
+ mtx_unlock(&bufmgr->lock);
return bo;
err:
bo_free(bo);
- pthread_mutex_unlock(&bufmgr->lock);
+ mtx_unlock(&bufmgr->lock);
return NULL;
}
{
struct brw_bufmgr *bufmgr = bo->bufmgr;
+ if (!bo->external) {
+ mtx_lock(&bufmgr->lock);
+ if (!bo->external) {
+ _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
+ bo->external = true;
+ }
+ mtx_unlock(&bufmgr->lock);
+ }
+
if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
DRM_CLOEXEC, prime_fd) != 0)
return -errno;
bo->reusable = false;
- bo->external = true;
return 0;
}
if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
return -errno;
- pthread_mutex_lock(&bufmgr->lock);
+ mtx_lock(&bufmgr->lock);
+ if (!bo->external) {
+ _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
+ bo->external = true;
+ }
if (!bo->global_name) {
bo->global_name = flink.name;
- bo->reusable = false;
- bo->external = true;
-
_mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
}
- pthread_mutex_unlock(&bufmgr->lock);
+ mtx_unlock(&bufmgr->lock);
+
+ bo->reusable = false;
}
*name = bo->global_name;
return create.ctx_id;
}
+int
+brw_hw_context_set_priority(struct brw_bufmgr *bufmgr,
+ uint32_t ctx_id,
+ int priority)
+{
+ struct drm_i915_gem_context_param p = {
+ .ctx_id = ctx_id,
+ .param = I915_CONTEXT_PARAM_PRIORITY,
+ .value = priority,
+ };
+ int err;
+
+ err = 0;
+ if (drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p))
+ err = -errno;
+
+ return err;
+}
+
void
brw_destroy_hw_context(struct brw_bufmgr *bufmgr, uint32_t ctx_id)
{
* \param fd File descriptor of the opened DRM device.
*/
struct brw_bufmgr *
-brw_bufmgr_init(struct gen_device_info *devinfo, int fd, int batch_size)
+brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
{
struct brw_bufmgr *bufmgr;
*/
bufmgr->fd = fd;
- if (pthread_mutex_init(&bufmgr->lock, NULL) != 0) {
+ if (mtx_init(&bufmgr->lock, mtx_plain) != 0) {
free(bufmgr);
return NULL;
}