needs_indices = false;
}
+ uint64_t ht_key = 0;
+
if (!info->has_user_indices) {
/* Only resources can be directly mapped */
panfrost_batch_add_bo(batch, rsrc->bo,
PAN_BO_ACCESS_READ |
PAN_BO_ACCESS_VERTEX_TILER);
out = rsrc->bo->gpu + offset;
+
+ /* Check the cache */
+ if (rsrc->index_cache) {
+ ht_key = (((uint64_t) info->count) << 32) | info->start;
+
+ struct panfrost_minmax_cache *cache = rsrc->index_cache;
+
+ for (unsigned i = 0; i < cache->size; ++i) {
+ if (cache->keys[i] == ht_key) {
+ uint64_t hit = cache->values[i];
+
+ *min_index = hit & 0xffffffff;
+ *max_index = hit >> 32;
+ needs_indices = false;
+ break;
+ }
+ }
+ }
} else {
/* Otherwise, we need to upload to transient memory */
const uint8_t *ibuf8 = (const uint8_t *) info->index.user;
if (needs_indices) {
/* Fallback */
u_vbuf_get_minmax_index(&ctx->base, info, min_index, max_index);
+
+ if (!info->has_user_indices && rsrc->index_cache) {
+ struct panfrost_minmax_cache *cache = rsrc->index_cache;
+ uint64_t value = (*min_index) | (((uint64_t) *max_index) << 32);
+ unsigned index = 0;
+
+ if (cache->size == PANFROST_MINMAX_SIZE) {
+ index = cache->index++;
+ cache->index = cache->index % PANFROST_MINMAX_SIZE;
+ } else {
+ index = cache->size++;
+ }
+
+ cache->keys[index] = ht_key;
+ cache->values[index] = value;
+ }
}
+
return out;
}
panfrost_resource_create_bo(pscreen, so);
panfrost_resource_reset_damage(so);
+ if (template->bind & PIPE_BIND_INDEX_BUFFER)
+ so->index_cache = rzalloc(so, struct panfrost_minmax_cache);
+
return (struct pipe_resource *)so;
}
ralloc_free(rsrc);
}
+/* If we've been caching min/max indices and we update the index
+ * buffer, that may invalidate the min/max. Check what's been cached vs
+ * what we've written, and throw out invalid entries. */
+
+static void
+panfrost_invalidate_index_cache(struct panfrost_resource *rsrc, struct pipe_transfer *transfer)
+{
+ struct panfrost_minmax_cache *cache = rsrc->index_cache;
+
+ /* Ensure there is a cache to invalidate and a write */
+ if (!rsrc->index_cache) return;
+ if (!(transfer->usage & PIPE_TRANSFER_WRITE)) return;
+
+ unsigned valid_count = 0;
+
+ for (unsigned i = 0; i < cache->size; ++i) {
+ uint64_t key = cache->keys[i];
+
+ uint32_t start = key & 0xffffffff;
+ uint32_t count = key >> 32;
+
+ /* 1D range intersection */
+ bool invalid = MAX2(transfer->box.x, start) < MIN2(transfer->box.x + transfer->box.width, start + count);
+ if (!invalid) {
+ cache->keys[valid_count] = key;
+ cache->values[valid_count] = cache->values[i];
+ valid_count++;
+ }
+ }
+
+ cache->size = valid_count;
+ cache->index = 0;
+}
+
static void *
panfrost_transfer_map(struct pipe_context *pctx,
struct pipe_resource *resource,
return transfer->map;
} else {
+ /* Direct, persistent writes create holes in time for
+ * caching... I don't know if this is actually possible but we
+ * should still get it right */
+
+ unsigned dpw = PIPE_TRANSFER_MAP_DIRECTLY | PIPE_TRANSFER_WRITE | PIPE_TRANSFER_PERSISTENT;
+
+ if ((usage & dpw) == dpw && rsrc->index_cache)
+ return NULL;
+
transfer->base.stride = rsrc->slices[level].stride;
transfer->base.layer_stride = panfrost_get_layer_stride(
rsrc->slices, rsrc->base.target == PIPE_TEXTURE_3D,
/* By mapping direct-write, we're implicitly already
* initialized (maybe), so be conservative */
- if ((usage & PIPE_TRANSFER_WRITE) && (usage & PIPE_TRANSFER_MAP_DIRECTLY))
+ if ((usage & PIPE_TRANSFER_WRITE) && (usage & PIPE_TRANSFER_MAP_DIRECTLY)) {
rsrc->slices[level].initialized = true;
+ panfrost_invalidate_index_cache(rsrc, &transfer->base);
+ }
return bo->cpu
+ rsrc->slices[level].offset
transfer->box.x,
transfer->box.x + transfer->box.width);
+ panfrost_invalidate_index_cache(prsrc, transfer);
+
/* Derefence the resource */
pipe_resource_reference(&transfer->resource, NULL);
#include "drm-uapi/drm.h"
#include "util/u_range.h"
+/* Index buffer min/max cache. We need to caclculate the min/max for arbitrary
+ * slices (start, start + count) of the index buffer at drawtime. As this can
+ * be quite expensive, we cache. Conceptually, we just use a hash table mapping
+ * the key (start, count) to the value (min, max). In practice, mesa's hash
+ * table implementation is higher overhead than we would like and makes
+ * handling memory usage a little complicated. So we use this data structure
+ * instead. Searching is O(n) to the size, but the size is capped at the
+ * PANFROST_MINMAX_SIZE constant (so this is a tradeoff between cache hit/miss
+ * ratio and cache search speed). Note that keys are adjacent so we get cache
+ * line alignment benefits. Insertion is O(1) and in-order until the cache
+ * fills up, after that it evicts the oldest cached value in a ring facilitated
+ * by index.
+ */
+
+#define PANFROST_MINMAX_SIZE 64
+
+struct panfrost_minmax_cache {
+ uint64_t keys[PANFROST_MINMAX_SIZE];
+ uint64_t values[PANFROST_MINMAX_SIZE];
+ unsigned size;
+ unsigned index;
+};
+
struct panfrost_resource {
struct pipe_resource base;
struct {
bool checksummed;
enum pipe_format internal_format;
+
+ /* Cached min/max values for index buffers */
+ struct panfrost_minmax_cache *index_cache;
};
static inline struct panfrost_resource *