X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fgen7_sol_state.c;h=41573a80a52dfe3ec6719e68fe16002afb2a37df;hb=79154d99d6e760b1daf327b4594dded18f1d4191;hp=8ef899306a0982fd0fcab7400ebf86f1a12f2682;hpb=066fb237e642ec5ebe2c86f89753cba1ad389410;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/gen7_sol_state.c b/src/mesa/drivers/dri/i965/gen7_sol_state.c index 8ef899306a0..41573a80a52 100644 --- a/src/mesa/drivers/dri/i965/gen7_sol_state.c +++ b/src/mesa/drivers/dri/i965/gen7_sol_state.c @@ -39,14 +39,11 @@ static void upload_3dstate_so_buffers(struct brw_context *brw) { struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_shader_program *vs_prog = - ctx->Shader.CurrentVertexProgram; - const struct gl_transform_feedback_info *linked_xfb_info = - &vs_prog->LinkedTransformFeedback; /* BRW_NEW_TRANSFORM_FEEDBACK */ struct gl_transform_feedback_object *xfb_obj = ctx->TransformFeedback.CurrentObject; + const struct gl_transform_feedback_info *linked_xfb_info = + &xfb_obj->shader_program->LinkedTransformFeedback; int i; /* Set up the up to 4 output buffers. These are the ranges defined in the @@ -102,18 +99,19 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw, const struct brw_vue_map *vue_map) { struct gl_context *ctx = &brw->ctx; - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_shader_program *vs_prog = - ctx->Shader.CurrentVertexProgram; /* BRW_NEW_TRANSFORM_FEEDBACK */ + struct gl_transform_feedback_object *xfb_obj = + ctx->TransformFeedback.CurrentObject; const struct gl_transform_feedback_info *linked_xfb_info = - &vs_prog->LinkedTransformFeedback; - uint16_t so_decl[128]; - int buffer_mask = 0; - int next_offset[4] = {0, 0, 0, 0}; - int decls = 0; + &xfb_obj->shader_program->LinkedTransformFeedback; + uint16_t so_decl[MAX_VERTEX_STREAMS][128]; + int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; + int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; + int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0}; + int max_decls = 0; + STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS); - STATIC_ASSERT(ARRAY_SIZE(so_decl) >= MAX_PROGRAM_OUTPUTS); + memset(so_decl, 0, sizeof(so_decl)); /* Construct the list of SO_DECLs to be emitted. The formatting of the * command is feels strange -- each dword pair contains a SO_DECL per stream. @@ -124,20 +122,38 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw, int varying = linked_xfb_info->Outputs[i].OutputRegister; const unsigned components = linked_xfb_info->Outputs[i].NumComponents; unsigned component_mask = (1 << components) - 1; + unsigned stream_id = linked_xfb_info->Outputs[i].StreamId; + + assert(stream_id < MAX_VERTEX_STREAMS); - /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ + /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w + * gl_Layer is stored in VARYING_SLOT_PSIZ.y + * gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z + */ if (varying == VARYING_SLOT_PSIZ) { assert(components == 1); component_mask <<= 3; + } else if (varying == VARYING_SLOT_LAYER) { + assert(components == 1); + component_mask <<= 1; + } else if (varying == VARYING_SLOT_VIEWPORT) { + assert(components == 1); + component_mask <<= 2; } else { component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset; } - buffer_mask |= 1 << buffer; + buffer_mask[stream_id] |= 1 << buffer; decl |= buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT; - decl |= vue_map->varying_to_slot[varying] << - SO_DECL_REGISTER_INDEX_SHIFT; + if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT) { + decl |= vue_map->varying_to_slot[VARYING_SLOT_PSIZ] << + SO_DECL_REGISTER_INDEX_SHIFT; + } else { + assert(vue_map->varying_to_slot[varying] >= 0); + decl |= vue_map->varying_to_slot[varying] << + SO_DECL_REGISTER_INDEX_SHIFT; + } decl |= component_mask << SO_DECL_COMPONENT_MASK_SHIFT; /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[] @@ -148,7 +164,7 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw, * for fake "hole" components, rather than simply taking the offset * for each real varying. Each hole can have size 1, 2, 3, or 4; we * program as many size = 4 holes as we can, then a final hole to - * accomodate the final 1, 2, or 3 remaining. + * accommodate the final 1, 2, or 3 remaining. */ int skip_components = linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer]; @@ -156,35 +172,41 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw, next_offset[buffer] += skip_components; while (skip_components >= 4) { - so_decl[decls++] = SO_DECL_HOLE_FLAG | 0xf; + so_decl[stream_id][decls[stream_id]++] = SO_DECL_HOLE_FLAG | 0xf; skip_components -= 4; } if (skip_components > 0) - so_decl[decls++] = SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1); + so_decl[stream_id][decls[stream_id]++] = + SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1); assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]); next_offset[buffer] += components; - so_decl[decls++] = decl; + so_decl[stream_id][decls[stream_id]++] = decl; + + if (decls[stream_id] > max_decls) + max_decls = decls[stream_id]; } - BEGIN_BATCH(decls * 2 + 3); - OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 | (decls * 2 + 1)); + BEGIN_BATCH(max_decls * 2 + 3); + OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 | (max_decls * 2 + 1)); - OUT_BATCH((buffer_mask << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) | - (0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) | - (0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) | - (0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT)); + OUT_BATCH((buffer_mask[0] << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) | + (buffer_mask[1] << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) | + (buffer_mask[2] << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) | + (buffer_mask[3] << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT)); - OUT_BATCH((decls << SO_NUM_ENTRIES_0_SHIFT) | - (0 << SO_NUM_ENTRIES_1_SHIFT) | - (0 << SO_NUM_ENTRIES_2_SHIFT) | - (0 << SO_NUM_ENTRIES_3_SHIFT)); + OUT_BATCH((decls[0] << SO_NUM_ENTRIES_0_SHIFT) | + (decls[1] << SO_NUM_ENTRIES_1_SHIFT) | + (decls[2] << SO_NUM_ENTRIES_2_SHIFT) | + (decls[3] << SO_NUM_ENTRIES_3_SHIFT)); - for (int i = 0; i < decls; i++) { - OUT_BATCH(so_decl[i]); - OUT_BATCH(0); + for (int i = 0; i < max_decls; i++) { + /* Stream 1 | Stream 0 */ + OUT_BATCH(((uint32_t) so_decl[1][i]) << 16 | so_decl[0][i]); + /* Stream 3 | Stream 2 */ + OUT_BATCH(((uint32_t) so_decl[3][i]) << 16 | so_decl[2][i]); } ADVANCE_BATCH(); @@ -223,9 +245,17 @@ upload_3dstate_streamout(struct brw_context *brw, bool active, * point by reading less and offsetting the register index in the * SO_DECLs. */ - dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT; - dw2 |= (urb_entry_read_length - 1) << - SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT; + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_0_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_0_VERTEX_READ_LENGTH); + + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_1_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_1_VERTEX_READ_LENGTH); + + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_2_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_2_VERTEX_READ_LENGTH); + + dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_3_VERTEX_READ_OFFSET); + dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_3_VERTEX_READ_LENGTH); } BEGIN_BATCH(3); @@ -258,11 +288,10 @@ upload_sol_state(struct brw_context *brw) const struct brw_tracked_state gen7_sol_state = { .dirty = { - .mesa = (_NEW_LIGHT), - .brw = (BRW_NEW_BATCH | - BRW_NEW_VERTEX_PROGRAM | - BRW_NEW_VUE_MAP_GEOM_OUT | - BRW_NEW_TRANSFORM_FEEDBACK) + .mesa = _NEW_LIGHT, + .brw = BRW_NEW_BATCH | + BRW_NEW_VUE_MAP_GEOM_OUT | + BRW_NEW_TRANSFORM_FEEDBACK, }, .emit = upload_sol_state, }; @@ -336,7 +365,7 @@ gen7_save_primitives_written_counters(struct brw_context *brw, } /* Flush any drawing so that the counters have the right values. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); /* Emit MI_STORE_REGISTER_MEM commands to write the values. */ for (int i = 0; i < streams; i++) { @@ -372,7 +401,7 @@ brw_compute_xfb_vertices_written(struct brw_context *brw, vertices_per_prim = 3; break; default: - assert(!"Invalid transform feedback primitive mode."); + unreachable("Invalid transform feedback primitive mode."); } /* Get the number of primitives generated. */ @@ -414,8 +443,13 @@ gen7_begin_transform_feedback(struct gl_context *ctx, GLenum mode, struct brw_transform_feedback_object *brw_obj = (struct brw_transform_feedback_object *) obj; - intel_batchbuffer_flush(brw); - brw->batch.needs_sol_reset = true; + /* Reset the SO buffer offsets to 0. */ + if (brw->gen >= 8) { + brw_obj->zero_offsets = true; + } else { + intel_batchbuffer_flush(brw); + brw->batch.needs_sol_reset = true; + } /* We're about to lose the information needed to compute the number of * vertices written during the last Begin/EndTransformFeedback section, @@ -468,17 +502,19 @@ gen7_pause_transform_feedback(struct gl_context *ctx, (struct brw_transform_feedback_object *) obj; /* Flush any drawing so that the counters have the right values. */ - intel_batchbuffer_emit_mi_flush(brw); + brw_emit_mi_flush(brw); /* Save the SOL buffer offset register values. */ - for (int i = 0; i < 4; i++) { - BEGIN_BATCH(3); - OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); - OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); - OUT_RELOC(brw_obj->offset_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - i * sizeof(uint32_t)); - ADVANCE_BATCH(); + if (brw->gen < 8) { + for (int i = 0; i < 4; i++) { + BEGIN_BATCH(3); + OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2)); + OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); + OUT_RELOC(brw_obj->offset_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + i * sizeof(uint32_t)); + ADVANCE_BATCH(); + } } /* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters. @@ -498,14 +534,16 @@ gen7_resume_transform_feedback(struct gl_context *ctx, (struct brw_transform_feedback_object *) obj; /* Reload the SOL buffer offset registers. */ - for (int i = 0; i < 4; i++) { - BEGIN_BATCH(3); - OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); - OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); - OUT_RELOC(brw_obj->offset_bo, - I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, - i * sizeof(uint32_t)); - ADVANCE_BATCH(); + if (brw->gen < 8) { + for (int i = 0; i < 4; i++) { + BEGIN_BATCH(3); + OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2)); + OUT_BATCH(GEN7_SO_WRITE_OFFSET(i)); + OUT_RELOC(brw_obj->offset_bo, + I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, + i * sizeof(uint32_t)); + ADVANCE_BATCH(); + } } /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */