brw_dispatch_compute_common(struct gl_context *ctx)
{
struct brw_context *brw = brw_context(ctx);
- int estimated_buffer_space_needed;
bool fail_next = false;
if (!_mesa_check_conditional_render(ctx))
brw_predraw_resolve_inputs(brw);
- const int sampler_state_size = 16; /* 16 bytes */
- estimated_buffer_space_needed = 512; /* batchbuffer commands */
- estimated_buffer_space_needed += (BRW_MAX_TEX_UNIT *
- (sampler_state_size +
- sizeof(struct gen5_sampler_default_color)));
- estimated_buffer_space_needed += 1024; /* push constants */
- estimated_buffer_space_needed += 512; /* misc. pad */
-
- /* Flush the batch if it's approaching full, so that we don't wrap while
- * we've got validated state that needs to be in the same batch as the
- * primitives.
+ /* Flush the batch if the batch/state buffers are nearly full. We can
+ * grow them if needed, but this is not free, so we'd like to avoid it.
*/
- intel_batchbuffer_require_space(brw, estimated_buffer_space_needed,
- RENDER_RING);
+ intel_batchbuffer_require_space(brw, 600, RENDER_RING);
+ brw_require_statebuffer_space(brw, 2500);
intel_batchbuffer_save_state(brw);
retry:
brw->ctx.NewDriverState |= BRW_NEW_VERTICES;
for (i = 0; i < nr_prims; i++) {
- int estimated_max_prim_size;
- const int sampler_state_size = 16;
-
- estimated_max_prim_size = 512; /* batchbuffer commands */
- estimated_max_prim_size += BRW_MAX_TEX_UNIT *
- (sampler_state_size + sizeof(struct gen5_sampler_default_color));
- estimated_max_prim_size += 1024; /* gen6 VS push constants */
- estimated_max_prim_size += 1024; /* gen6 WM push constants */
- estimated_max_prim_size += 512; /* misc. pad */
-
/* Flag BRW_NEW_DRAW_CALL on every draw. This allows us to have
* atoms that happen on every draw call.
*/
brw->ctx.NewDriverState |= BRW_NEW_DRAW_CALL;
- /* Flush the batch if it's approaching full, so that we don't wrap while
- * we've got validated state that needs to be in the same batch as the
- * primitives.
+ /* Flush the batch if the batch/state buffers are nearly full. We can
+ * grow them if needed, but this is not free, so we'd like to avoid it.
*/
- intel_batchbuffer_require_space(brw, estimated_max_prim_size, RENDER_RING);
+ intel_batchbuffer_require_space(brw, 1500, RENDER_RING);
+ brw_require_statebuffer_space(brw, 2400);
intel_batchbuffer_save_state(brw);
if (brw->num_instances != prims[i].num_instances ||
void brw_print_program_cache(struct brw_context *brw);
/* intel_batchbuffer.c */
+void brw_require_statebuffer_space(struct brw_context *brw, int size);
void *brw_state_batch(struct brw_context *brw,
int size, int alignment, uint32_t *out_offset);
uint32_t brw_state_batch_size(struct brw_context *brw, uint32_t offset);
assert(batch->blorp->driver_ctx == batch->driver_batch);
struct brw_context *brw = batch->driver_batch;
struct gl_context *ctx = &brw->ctx;
- const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1920 : 1700;
bool check_aperture_failed_once = false;
/* Flush the sampler and render caches. We definitely need to flush the
brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
retry:
- intel_batchbuffer_require_space(brw, estimated_max_batch_usage, RENDER_RING);
+ intel_batchbuffer_require_space(brw, 1400, RENDER_RING);
+ brw_require_statebuffer_space(brw, 600);
intel_batchbuffer_save_state(brw);
brw->no_batch_wrap = true;
* should flush. Each time we flush the batch, we recreate both buffers
* at the original target size, so it doesn't grow without bound.
*/
-#define BATCH_SZ (8192*sizeof(uint32_t))
-#define STATE_SZ (8192*sizeof(uint32_t))
+#define BATCH_SZ (20 * 1024)
+#define STATE_SZ (16 * 1024)
/* The kernel assumes batchbuffers are smaller than 256kB. */
#define MAX_BATCH_SIZE (256 * 1024)
intel_batchbuffer_flush(brw);
}
- /* For now, flush as if the batch and state buffers still shared a BO */
const unsigned batch_used = USED_BATCH(*batch) * 4;
- if (batch_used + sz >= BATCH_SZ - batch->state_used) {
+ if (batch_used + sz >= BATCH_SZ) {
if (!brw->no_batch_wrap) {
intel_batchbuffer_flush(brw);
} else {
grow_buffer(brw, &batch->bo, &batch->map, &batch->batch_cpu_map,
batch_used, new_size);
batch->map_next = (void *) batch->map + batch_used;
- assert(batch_used + sz < batch->bo->size - batch->state_used);
+ assert(batch_used + sz < batch->bo->size);
}
}
return entry ? (uintptr_t) entry->data : 0;
}
+/**
+ * Reserve some space in the statebuffer, or flush.
+ *
+ * This is used to estimate when we're near the end of the batch,
+ * so we can flush early.
+ */
+void
+brw_require_statebuffer_space(struct brw_context *brw, int size)
+{
+ if (brw->batch.state_used + size >= STATE_SZ)
+ intel_batchbuffer_flush(brw);
+}
+
/**
* Allocates a block of space in the batchbuffer for indirect state.
*/
uint32_t offset = ALIGN(batch->state_used, alignment);
- /* For now, follow the old flushing behavior. */
- int batch_space = USED_BATCH(*batch) * 4;
-
- if (offset + size >= STATE_SZ - batch_space) {
+ if (offset + size >= STATE_SZ) {
if (!brw->no_batch_wrap) {
intel_batchbuffer_flush(brw);
offset = ALIGN(batch->state_used, alignment);
MAX_STATE_SIZE);
grow_buffer(brw, &batch->state_bo, &batch->state_map,
&batch->state_cpu_map, batch->state_used, new_size);
- assert(offset + size < batch->state_bo->size - batch_space);
+ assert(offset + size < batch->state_bo->size);
}
}