X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fgen6_sol.c;h=436775a4c0f0c54693e6c577c0e586d98d48a80e;hb=32f0dc3a2919f8d8ba01fab811b2d63e4c6823e8;hp=3cbf28e7dc4c50a293647de31f0a0c0c603cc26d;hpb=b95cbe5e800e95d888d148e20e6a4e34c8857a9e;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index 3cbf28e7dc4..436775a4c0f 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -26,6 +26,7 @@ * Code to initialize the binding table entries used by transform feedback. */ +#include "main/bufferobj.h" #include "main/macros.h" #include "brw_context.h" #include "intel_batchbuffer.h" @@ -36,44 +37,54 @@ static void gen6_update_sol_surfaces(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; - /* BRW_NEW_TRANSFORM_FEEDBACK */ - struct gl_transform_feedback_object *xfb_obj = - ctx->TransformFeedback.CurrentObject; - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_shader_program *shaderprog = - ctx->Shader.CurrentVertexProgram; - const struct gl_transform_feedback_info *linked_xfb_info = - &shaderprog->LinkedTransformFeedback; - int i; - - for (i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) { - const int surf_index = SURF_INDEX_SOL_BINDING(i); - if (_mesa_is_xfb_active_and_unpaused(ctx) && - i < linked_xfb_info->NumOutputs) { + struct gl_context *ctx = &brw->ctx; + bool xfb_active = _mesa_is_xfb_active_and_unpaused(ctx); + struct gl_transform_feedback_object *xfb_obj; + const struct gl_transform_feedback_info *linked_xfb_info = NULL; + + if (xfb_active) { + /* BRW_NEW_TRANSFORM_FEEDBACK */ + xfb_obj = ctx->TransformFeedback.CurrentObject; + linked_xfb_info = xfb_obj->program->sh.LinkedTransformFeedback; + } + + for (int i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) { + const int surf_index = BRW_GEN6_SOL_BINDING_START + i; + if (xfb_active && i < linked_xfb_info->NumOutputs) { unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer; unsigned buffer_offset = xfb_obj->Offset[buffer] / 4 + linked_xfb_info->Outputs[i].DstOffset; - brw_update_sol_surface( - brw, xfb_obj->Buffers[buffer], &brw->gs.surf_offset[surf_index], - linked_xfb_info->Outputs[i].NumComponents, - linked_xfb_info->BufferStride[buffer], buffer_offset); + if (brw->geometry_program) { + brw_update_sol_surface( + brw, xfb_obj->Buffers[buffer], + &brw->gs.base.surf_offset[surf_index], + linked_xfb_info->Outputs[i].NumComponents, + linked_xfb_info->Buffers[buffer].Stride, buffer_offset); + } else { + brw_update_sol_surface( + brw, xfb_obj->Buffers[buffer], + &brw->ff_gs.surf_offset[surf_index], + linked_xfb_info->Outputs[i].NumComponents, + linked_xfb_info->Buffers[buffer].Stride, buffer_offset); + } } else { - brw->gs.surf_offset[surf_index] = 0; + if (!brw->geometry_program) + brw->ff_gs.surf_offset[surf_index] = 0; + else + brw->gs.base.surf_offset[surf_index] = 0; } } - brw->state.dirty.brw |= BRW_NEW_SURFACES; + brw->ctx.NewDriverState |= BRW_NEW_SURFACES; } const struct brw_tracked_state gen6_sol_surface = { .dirty = { .mesa = 0, - .brw = (BRW_NEW_BATCH | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_TRANSFORM_FEEDBACK), - .cache = 0 + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_TRANSFORM_FEEDBACK, }, .emit = gen6_update_sol_surfaces, }; @@ -85,124 +96,426 @@ const struct brw_tracked_state gen6_sol_surface = { static void brw_gs_upload_binding_table(struct brw_context *brw) { - struct gl_context *ctx = &brw->intel.ctx; - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_shader_program *shaderprog = - ctx->Shader.CurrentVertexProgram; - bool has_surfaces = false; uint32_t *bind; + struct gl_context *ctx = &brw->ctx; + const struct gl_program *prog; + bool need_binding_table = false; - if (shaderprog) { - const struct gl_transform_feedback_info *linked_xfb_info = - &shaderprog->LinkedTransformFeedback; - /* Currently we only ever upload surfaces for SOL. */ - has_surfaces = linked_xfb_info->NumOutputs != 0; - } + /* We have two scenarios here: + * 1) We are using a geometry shader only to implement transform feedback + * for a vertex shader (brw->geometry_program == NULL). In this case, we + * only need surfaces for transform feedback in the GS stage. + * 2) We have a user-provided geometry shader. In this case we may need + * surfaces for transform feedback and/or other stuff, like textures, + * in the GS stage. + */ - /* Skip making a binding table if we don't have anything to put in it. */ - if (!has_surfaces) { - if (brw->gs.bind_bo_offset != 0) { - brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE; - brw->gs.bind_bo_offset = 0; + if (!brw->geometry_program) { + /* BRW_NEW_VERTEX_PROGRAM */ + prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + if (prog) { + /* Skip making a binding table if we don't have anything to put in it */ + const struct gl_transform_feedback_info *linked_xfb_info = + prog->sh.LinkedTransformFeedback; + need_binding_table = linked_xfb_info->NumOutputs > 0; + } + if (!need_binding_table) { + if (brw->ff_gs.bind_bo_offset != 0) { + brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; + brw->ff_gs.bind_bo_offset = 0; + } + return; } - return; - } - /* Might want to calculate nr_surfaces first, to avoid taking up so much - * space for the binding table. - */ - bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE, - sizeof(uint32_t) * BRW_MAX_GS_SURFACES, - 32, &brw->gs.bind_bo_offset); + /* Might want to calculate nr_surfaces first, to avoid taking up so much + * space for the binding table. Anyway, in this case we know that we only + * use BRW_MAX_SOL_BINDINGS surfaces at most. + */ + bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SOL_BINDINGS, + 32, &brw->ff_gs.bind_bo_offset); + + /* BRW_NEW_SURFACES */ + memcpy(bind, brw->ff_gs.surf_offset, + BRW_MAX_SOL_BINDINGS * sizeof(uint32_t)); + } else { + /* BRW_NEW_GEOMETRY_PROGRAM */ + prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; + if (prog) { + /* Skip making a binding table if we don't have anything to put in it */ + struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data; + const struct gl_transform_feedback_info *linked_xfb_info = + prog->sh.LinkedTransformFeedback; + need_binding_table = linked_xfb_info->NumOutputs > 0 || + prog_data->binding_table.size_bytes > 0; + } + if (!need_binding_table) { + if (brw->gs.base.bind_bo_offset != 0) { + brw->gs.base.bind_bo_offset = 0; + brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; + } + return; + } + + /* Might want to calculate nr_surfaces first, to avoid taking up so much + * space for the binding table. + */ + bind = brw_state_batch(brw, sizeof(uint32_t) * BRW_MAX_SURFACES, + 32, &brw->gs.base.bind_bo_offset); - /* BRW_NEW_SURFACES */ - memcpy(bind, brw->gs.surf_offset, BRW_MAX_GS_SURFACES * sizeof(uint32_t)); + /* BRW_NEW_SURFACES */ + memcpy(bind, brw->gs.base.surf_offset, + BRW_MAX_SURFACES * sizeof(uint32_t)); + } - brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE; + brw->ctx.NewDriverState |= BRW_NEW_BINDING_TABLE_POINTERS; } const struct brw_tracked_state gen6_gs_binding_table = { .dirty = { .mesa = 0, - .brw = (BRW_NEW_BATCH | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_SURFACES), - .cache = 0 + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_GEOMETRY_PROGRAM | + BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_SURFACES, }, .emit = brw_gs_upload_binding_table, }; +struct gl_transform_feedback_object * +brw_new_transform_feedback(struct gl_context *ctx, GLuint name) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + CALLOC_STRUCT(brw_transform_feedback_object); + if (!brw_obj) + return NULL; + + _mesa_init_transform_feedback_object(&brw_obj->base, name); + + brw_obj->offset_bo = + brw_bo_alloc(brw->bufmgr, "transform feedback offsets", 16, 64); + brw_obj->prim_count_bo = + brw_bo_alloc(brw->bufmgr, "xfb primitive counts", 4096, 64); + + return &brw_obj->base; +} + +void +brw_delete_transform_feedback(struct gl_context *ctx, + struct gl_transform_feedback_object *obj) +{ + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; + + for (unsigned i = 0; i < ARRAY_SIZE(obj->Buffers); i++) { + _mesa_reference_buffer_object(ctx, &obj->Buffers[i], NULL); + } + + brw_bo_unreference(brw_obj->offset_bo); + brw_bo_unreference(brw_obj->prim_count_bo); + + free(brw_obj); +} + +/** + * Tally the number of primitives generated so far. + * + * The buffer contains a series of pairs: + * (, ) ; + * (, ) ; + * + * For each stream, we subtract the pair of values (end - start) to get the + * number of primitives generated during one section. We accumulate these + * values, adding them up to get the total number of primitives generated. + * + * Note that we expose one stream pre-Gen7, so the above is just (start, end). + */ static void -gen6_update_sol_indices(struct brw_context *brw) +tally_prims_generated(struct brw_context *brw, + struct brw_transform_feedback_object *obj) { - struct intel_context *intel = &brw->intel; + const struct gl_context *ctx = &brw->ctx; + const int streams = ctx->Const.MaxVertexStreams; - BEGIN_BATCH(4); - OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); - OUT_BATCH(0); - OUT_BATCH(brw->sol.svbi_0_starting_index); /* BRW_NEW_SOL_INDICES */ - OUT_BATCH(brw->sol.svbi_0_max_index); /* BRW_NEW_SOL_INDICES */ - ADVANCE_BATCH(); + /* If the current batch is still contributing to the number of primitives + * generated, flush it now so the results will be present when mapped. + */ + if (brw_batch_references(&brw->batch, obj->prim_count_bo)) + intel_batchbuffer_flush(brw); + + if (unlikely(brw->perf_debug && brw_bo_busy(obj->prim_count_bo))) + perf_debug("Stalling for # of transform feedback primitives written.\n"); + + brw_bo_map(brw, obj->prim_count_bo, false); + uint64_t *prim_counts = obj->prim_count_bo->virtual; + + assert(obj->prim_count_buffer_index % (2 * streams) == 0); + int pairs = obj->prim_count_buffer_index / (2 * streams); + + for (int i = 0; i < pairs; i++) { + for (int s = 0; s < streams; s++) { + obj->prims_generated[s] += prim_counts[streams + s] - prim_counts[s]; + } + prim_counts += 2 * streams; /* move to the next pair */ + } + + brw_bo_unmap(obj->prim_count_bo); + + /* We've already gathered up the old data; we can safely overwrite it now. */ + obj->prim_count_buffer_index = 0; } -const struct brw_tracked_state gen6_sol_indices = { - .dirty = { - .mesa = 0, - .brw = (BRW_NEW_CONTEXT | - BRW_NEW_SOL_INDICES), - .cache = 0 - }, - .emit = gen6_update_sol_indices, -}; +/** + * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values) + * to prim_count_bo. + * + * If prim_count_bo is out of space, gather up the results so far into + * prims_generated[] and allocate a new buffer with enough space. + * + * The number of primitives written is used to compute the number of vertices + * written to a transform feedback stream, which is required to implement + * DrawTransformFeedback(). + */ +void +brw_save_primitives_written_counters(struct brw_context *brw, + struct brw_transform_feedback_object *obj) +{ + const struct gl_context *ctx = &brw->ctx; + const int streams = ctx->Const.MaxVertexStreams; + + assert(obj->prim_count_bo != NULL); + + /* Check if there's enough space for a new pair of four values. */ + if (obj->prim_count_buffer_index + 2 * streams >= 4096 / sizeof(uint64_t)) { + /* Gather up the results so far and release the BO. */ + tally_prims_generated(brw, obj); + } + + /* Flush any drawing so that the counters have the right values. */ + brw_emit_mi_flush(brw); + + /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ + if (brw->gen >= 7) { + for (int i = 0; i < streams; i++) { + int offset = (obj->prim_count_buffer_index + i) * sizeof(uint64_t); + brw_store_register_mem64(brw, obj->prim_count_bo, + GEN7_SO_NUM_PRIMS_WRITTEN(i), + offset); + } + } else { + brw_store_register_mem64(brw, obj->prim_count_bo, + GEN6_SO_NUM_PRIMS_WRITTEN, + obj->prim_count_buffer_index * sizeof(uint64_t)); + } + + /* Update where to write data to. */ + obj->prim_count_buffer_index += streams; +} + +static void +compute_vertices_written_so_far(struct brw_context *brw, + struct brw_transform_feedback_object *obj, + uint64_t *vertices_written) +{ + const struct gl_context *ctx = &brw->ctx; + unsigned vertices_per_prim = 0; + + switch (obj->primitive_mode) { + case GL_POINTS: + vertices_per_prim = 1; + break; + case GL_LINES: + vertices_per_prim = 2; + break; + case GL_TRIANGLES: + vertices_per_prim = 3; + break; + default: + unreachable("Invalid transform feedback primitive mode."); + } + + /* Get the number of primitives generated. */ + tally_prims_generated(brw, obj); + + for (int i = 0; i < ctx->Const.MaxVertexStreams; i++) { + vertices_written[i] = vertices_per_prim * obj->prims_generated[i]; + } +} + +/** + * Compute the number of vertices written by this transform feedback operation. + */ +void +brw_compute_xfb_vertices_written(struct brw_context *brw, + struct brw_transform_feedback_object *obj) +{ + if (obj->vertices_written_valid || !obj->base.EndedAnytime) + return; + + compute_vertices_written_so_far(brw, obj, obj->vertices_written); + + obj->vertices_written_valid = true; +} + +/** + * GetTransformFeedbackVertexCount() driver hook. + * + * Returns the number of vertices written to a particular stream by the last + * Begin/EndTransformFeedback block. Used to implement DrawTransformFeedback(). + */ +GLsizei +brw_get_transform_feedback_vertex_count(struct gl_context *ctx, + struct gl_transform_feedback_object *obj, + GLuint stream) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; + + assert(obj->EndedAnytime); + assert(stream < ctx->Const.MaxVertexStreams); + + brw_compute_xfb_vertices_written(brw, brw_obj); + return brw_obj->vertices_written[stream]; +} void brw_begin_transform_feedback(struct gl_context *ctx, GLenum mode, struct gl_transform_feedback_object *obj) { struct brw_context *brw = brw_context(ctx); - struct intel_context *intel = &brw->intel; - const struct gl_shader_program *vs_prog = - ctx->Shader.CurrentVertexProgram; - const struct gl_transform_feedback_info *linked_xfb_info = - &vs_prog->LinkedTransformFeedback; + const struct gl_program *prog; + const struct gl_transform_feedback_info *linked_xfb_info; struct gl_transform_feedback_object *xfb_obj = ctx->TransformFeedback.CurrentObject; + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) xfb_obj; + + assert(brw->gen == 6); + + if (ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]) { + /* BRW_NEW_GEOMETRY_PROGRAM */ + prog = ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY]; + } else { + /* BRW_NEW_VERTEX_PROGRAM */ + prog = ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX]; + } + linked_xfb_info = prog->sh.LinkedTransformFeedback; /* Compute the maximum number of vertices that we can write without * overflowing any of the buffers currently being used for feedback. */ - unsigned max_index - = _mesa_compute_max_transform_feedback_vertices(xfb_obj, + brw_obj->max_index + = _mesa_compute_max_transform_feedback_vertices(ctx, xfb_obj, linked_xfb_info); - /* Initialize the SVBI 0 register to zero and set the maximum index. - * These values will be sent to the hardware on the next draw. + /* Initialize the SVBI 0 register to zero and set the maximum index. */ + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(0); /* SVBI 0 */ + OUT_BATCH(0); /* starting index */ + OUT_BATCH(brw_obj->max_index); + ADVANCE_BATCH(); + + /* Initialize the rest of the unused streams to sane values. Otherwise, + * they may indicate that there is no room to write data and prevent + * anything from happening at all. */ - brw->state.dirty.brw |= BRW_NEW_SOL_INDICES; - brw->sol.svbi_0_starting_index = 0; - brw->sol.svbi_0_max_index = max_index; - brw->sol.offset_0_batch_start = 0; - - if (intel->gen >= 7) { - /* Ask the kernel to reset the SO offsets for any previous transform - * feedback, so we start at the start of the user's buffer. (note: these - * are not the query counters) - */ - intel->batch.needs_sol_reset = true; + for (int i = 1; i < 4; i++) { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(i << SVB_INDEX_SHIFT); + OUT_BATCH(0); /* starting index */ + OUT_BATCH(0xffffffff); + ADVANCE_BATCH(); } + + /* We're about to lose the information needed to compute the number of + * vertices written during the last Begin/EndTransformFeedback section, + * so we can't delay it any further. + */ + brw_compute_xfb_vertices_written(brw, brw_obj); + + /* No primitives have been generated yet. */ + for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) { + brw_obj->prims_generated[i] = 0; + } + + /* Store the starting value of the SO_NUM_PRIMS_WRITTEN counters. */ + brw_save_primitives_written_counters(brw, brw_obj); + + brw_obj->primitive_mode = mode; } void brw_end_transform_feedback(struct gl_context *ctx, struct gl_transform_feedback_object *obj) { - /* After EndTransformFeedback, it's likely that the client program will try - * to draw using the contents of the transform feedback buffer as vertex - * input. In order for this to work, we need to flush the data through at - * least the GS stage of the pipeline, and flush out the render cache. For - * simplicity, just do a full flush. + struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; + + /* Store the ending value of the SO_NUM_PRIMS_WRITTEN counters. */ + if (!obj->Paused) + brw_save_primitives_written_counters(brw, brw_obj); + + /* EndTransformFeedback() means that we need to update the number of + * vertices written. Since it's only necessary if DrawTransformFeedback() + * is called and it means mapping a buffer object, we delay computing it + * until it's absolutely necessary to try and avoid stalls. */ + brw_obj->vertices_written_valid = false; +} + +void +brw_pause_transform_feedback(struct gl_context *ctx, + struct gl_transform_feedback_object *obj) +{ struct brw_context *brw = brw_context(ctx); - struct intel_context *intel = &brw->intel; - intel_batchbuffer_emit_mi_flush(intel); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; + + /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters. + * While this operation is paused, other transform feedback actions may + * occur, which will contribute to the counters. We need to exclude that + * from our counts. + */ + brw_save_primitives_written_counters(brw, brw_obj); +} + +void +brw_resume_transform_feedback(struct gl_context *ctx, + struct gl_transform_feedback_object *obj) +{ + struct brw_context *brw = brw_context(ctx); + struct brw_transform_feedback_object *brw_obj = + (struct brw_transform_feedback_object *) obj; + + /* Reload SVBI 0 with the count of vertices written so far. */ + uint64_t svbi; + compute_vertices_written_so_far(brw, brw_obj, &svbi); + + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(0); /* SVBI 0 */ + OUT_BATCH((uint32_t) svbi); /* starting index */ + OUT_BATCH(brw_obj->max_index); + ADVANCE_BATCH(); + + /* Initialize the rest of the unused streams to sane values. Otherwise, + * they may indicate that there is no room to write data and prevent + * anything from happening at all. + */ + for (int i = 1; i < 4; i++) { + BEGIN_BATCH(4); + OUT_BATCH(_3DSTATE_GS_SVB_INDEX << 16 | (4 - 2)); + OUT_BATCH(i << SVB_INDEX_SHIFT); + OUT_BATCH(0); /* starting index */ + OUT_BATCH(0xffffffff); + ADVANCE_BATCH(); + } + + /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */ + brw_save_primitives_written_counters(brw, brw_obj); }