+ struct amdgpu_cs_context *cs = acs->csc;
+ struct amdgpu_cs_buffer *buffer;
+ unsigned hash;
+ int idx = amdgpu_lookup_buffer(cs, bo);
+ int real_idx;
+
+ if (idx >= 0)
+ return idx;
+
+ real_idx = amdgpu_lookup_or_add_real_buffer(acs, bo->u.slab.real);
+ if (real_idx < 0)
+ return -1;
+
+ /* New buffer, check if the backing array is large enough. */
+ if (cs->num_slab_buffers >= cs->max_slab_buffers) {
+ unsigned new_max =
+ MAX2(cs->max_slab_buffers + 16, (unsigned)(cs->max_slab_buffers * 1.3));
+ struct amdgpu_cs_buffer *new_buffers;
+
+ new_buffers = REALLOC(cs->slab_buffers,
+ cs->max_slab_buffers * sizeof(*new_buffers),
+ new_max * sizeof(*new_buffers));
+ if (!new_buffers) {
+ fprintf(stderr, "amdgpu_lookup_or_add_slab_buffer: allocation failed\n");
+ return -1;
+ }
+
+ cs->max_slab_buffers = new_max;
+ cs->slab_buffers = new_buffers;
+ }
+
+ idx = cs->num_slab_buffers;
+ buffer = &cs->slab_buffers[idx];
+
+ memset(buffer, 0, sizeof(*buffer));
+ amdgpu_winsys_bo_reference(&buffer->bo, bo);
+ buffer->u.slab.real_idx = real_idx;
+ p_atomic_inc(&bo->num_cs_references);
+ cs->num_slab_buffers++;
+
+ hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1);
+ cs->buffer_indices_hashlist[hash] = idx;
+
+ return idx;
+}
+
+static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
+ struct pb_buffer *buf,
+ enum radeon_bo_usage usage,
+ enum radeon_bo_domain domains,
+ enum radeon_bo_priority priority)
+{
+ /* Don't use the "domains" parameter. Amdgpu doesn't support changing
+ * the buffer placement during command submission.
+ */
+ struct amdgpu_cs *acs = amdgpu_cs(rcs);
+ struct amdgpu_cs_context *cs = acs->csc;
+ struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
+ struct amdgpu_cs_buffer *buffer;
+ int index;
+
+ /* Fast exit for no-op calls.
+ * This is very effective with suballocators and linear uploaders that
+ * are outside of the winsys.
+ */
+ if (bo == cs->last_added_bo &&
+ (usage & cs->last_added_bo_usage) == usage &&
+ (1ull << priority) & cs->last_added_bo_priority_usage)
+ return cs->last_added_bo_index;
+
+ if (!bo->bo) {
+ index = amdgpu_lookup_or_add_slab_buffer(acs, bo);
+ if (index < 0)
+ return 0;
+
+ buffer = &cs->slab_buffers[index];
+ buffer->usage |= usage;
+
+ usage &= ~RADEON_USAGE_SYNCHRONIZED;
+ index = buffer->u.slab.real_idx;
+ } else {
+ index = amdgpu_lookup_or_add_real_buffer(acs, bo);
+ if (index < 0)
+ return 0;
+ }
+
+ buffer = &cs->real_buffers[index];
+ buffer->u.real.priority_usage |= 1llu << priority;
+ buffer->usage |= usage;
+ cs->flags[index] = MAX2(cs->flags[index], priority / 4);
+
+ cs->last_added_bo = bo;
+ cs->last_added_bo_index = index;
+ cs->last_added_bo_usage = buffer->usage;
+ cs->last_added_bo_priority_usage = buffer->u.real.priority_usage;
+ return index;
+}
+
+static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)
+{
+ struct pb_buffer *pb;
+ uint8_t *mapped;
+ unsigned buffer_size;
+
+ /* Always create a buffer that is at least as large as the maximum seen IB
+ * size, aligned to a power of two (and multiplied by 4 to reduce internal
+ * fragmentation if chaining is not available). Limit to 512k dwords, which
+ * is the largest power of two that fits into the size field of the
+ * INDIRECT_BUFFER packet.
+ */
+ if (amdgpu_cs_has_chaining(amdgpu_cs_from_ib(ib)))
+ buffer_size = 4 *util_next_power_of_two(ib->max_ib_size);
+ else
+ buffer_size = 4 *util_next_power_of_two(4 * ib->max_ib_size);
+
+ buffer_size = MIN2(buffer_size, 4 * 512 * 1024);
+
+ switch (ib->ib_type) {
+ case IB_CONST_PREAMBLE:
+ buffer_size = MAX2(buffer_size, 4 * 1024);
+ break;
+ case IB_CONST:
+ buffer_size = MAX2(buffer_size, 16 * 1024 * 4);
+ break;
+ case IB_MAIN:
+ buffer_size = MAX2(buffer_size, 8 * 1024 * 4);
+ break;
+ default:
+ unreachable("unhandled IB type");
+ }
+
+ pb = ws->base.buffer_create(&ws->base, buffer_size,
+ ws->info.gart_page_size,
+ RADEON_DOMAIN_GTT,
+ RADEON_FLAG_CPU_ACCESS);
+ if (!pb)
+ return false;
+
+ mapped = ws->base.buffer_map(pb, NULL, PIPE_TRANSFER_WRITE);
+ if (!mapped) {
+ pb_reference(&pb, NULL);
+ return false;
+ }
+
+ pb_reference(&ib->big_ib_buffer, pb);
+ pb_reference(&pb, NULL);
+
+ ib->ib_mapped = mapped;
+ ib->used_ib_space = 0;
+
+ return true;
+}
+
+static unsigned amdgpu_ib_max_submit_dwords(enum ib_type ib_type)
+{
+ switch (ib_type) {
+ case IB_MAIN:
+ /* Smaller submits means the GPU gets busy sooner and there is less
+ * waiting for buffers and fences. Proof:
+ * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1
+ */
+ return 20 * 1024;
+ case IB_CONST_PREAMBLE:
+ case IB_CONST:
+ /* There isn't really any reason to limit CE IB size beyond the natural
+ * limit implied by the main IB, except perhaps GTT size. Just return
+ * an extremely large value that we never get anywhere close to.
+ */
+ return 16 * 1024 * 1024;
+ default:
+ unreachable("bad ib_type");
+ }
+}
+
+static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs,
+ enum ib_type ib_type)
+{
+ struct amdgpu_winsys *aws = (struct amdgpu_winsys*)ws;