upload_3dstate_so_buffers(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_VERTEX_PROGRAM */
- const struct gl_shader_program *vs_prog =
- ctx->Shader.CurrentVertexProgram;
- const struct gl_transform_feedback_info *linked_xfb_info =
- &vs_prog->LinkedTransformFeedback;
/* BRW_NEW_TRANSFORM_FEEDBACK */
struct gl_transform_feedback_object *xfb_obj =
ctx->TransformFeedback.CurrentObject;
+ const struct gl_transform_feedback_info *linked_xfb_info =
+ &xfb_obj->shader_program->LinkedTransformFeedback;
int i;
/* Set up the up to 4 output buffers. These are the ranges defined in the
const struct brw_vue_map *vue_map)
{
struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_VERTEX_PROGRAM */
- const struct gl_shader_program *vs_prog =
- ctx->Shader.CurrentVertexProgram;
/* BRW_NEW_TRANSFORM_FEEDBACK */
+ struct gl_transform_feedback_object *xfb_obj =
+ ctx->TransformFeedback.CurrentObject;
const struct gl_transform_feedback_info *linked_xfb_info =
- &vs_prog->LinkedTransformFeedback;
- int i;
+ &xfb_obj->shader_program->LinkedTransformFeedback;
uint16_t so_decl[128];
int buffer_mask = 0;
int next_offset[4] = {0, 0, 0, 0};
+ int decls = 0;
STATIC_ASSERT(ARRAY_SIZE(so_decl) >= MAX_PROGRAM_OUTPUTS);
/* Construct the list of SO_DECLs to be emitted. The formatting of the
* command is feels strange -- each dword pair contains a SO_DECL per stream.
*/
- for (i = 0; i < linked_xfb_info->NumOutputs; i++) {
+ for (int i = 0; i < linked_xfb_info->NumOutputs; i++) {
int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
uint16_t decl = 0;
int varying = linked_xfb_info->Outputs[i].OutputRegister;
- unsigned component_mask =
- (1 << linked_xfb_info->Outputs[i].NumComponents) - 1;
+ const unsigned components = linked_xfb_info->Outputs[i].NumComponents;
+ unsigned component_mask = (1 << components) - 1;
/* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
if (varying == VARYING_SLOT_PSIZ) {
- assert(linked_xfb_info->Outputs[i].NumComponents == 1);
+ assert(components == 1);
component_mask <<= 3;
} else {
component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
SO_DECL_REGISTER_INDEX_SHIFT;
decl |= component_mask << SO_DECL_COMPONENT_MASK_SHIFT;
- /* This assert should be true until GL_ARB_transform_feedback_instanced
- * is added and we start using the hole flag.
+ /* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
+ * array. Instead, it simply increments DstOffset for the following
+ * input by the number of components that should be skipped.
+ *
+ * Our hardware is unusual in that it requires us to program SO_DECLs
+ * for fake "hole" components, rather than simply taking the offset
+ * for each real varying. Each hole can have size 1, 2, 3, or 4; we
+ * program as many size = 4 holes as we can, then a final hole to
+ * accomodate the final 1, 2, or 3 remaining.
*/
+ int skip_components =
+ linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer];
+
+ next_offset[buffer] += skip_components;
+
+ while (skip_components >= 4) {
+ so_decl[decls++] = SO_DECL_HOLE_FLAG | 0xf;
+ skip_components -= 4;
+ }
+ if (skip_components > 0)
+ so_decl[decls++] = SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1);
+
assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
- next_offset[buffer] += linked_xfb_info->Outputs[i].NumComponents;
+ next_offset[buffer] += components;
- so_decl[i] = decl;
+ so_decl[decls++] = decl;
}
- BEGIN_BATCH(linked_xfb_info->NumOutputs * 2 + 3);
- OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 |
- (linked_xfb_info->NumOutputs * 2 + 1));
+ BEGIN_BATCH(decls * 2 + 3);
+ OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 | (decls * 2 + 1));
OUT_BATCH((buffer_mask << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) |
(0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) |
(0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) |
(0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT));
- OUT_BATCH((linked_xfb_info->NumOutputs << SO_NUM_ENTRIES_0_SHIFT) |
+ OUT_BATCH((decls << SO_NUM_ENTRIES_0_SHIFT) |
(0 << SO_NUM_ENTRIES_1_SHIFT) |
(0 << SO_NUM_ENTRIES_2_SHIFT) |
(0 << SO_NUM_ENTRIES_3_SHIFT));
- for (i = 0; i < linked_xfb_info->NumOutputs; i++) {
+ for (int i = 0; i < decls; i++) {
OUT_BATCH(so_decl[i]);
OUT_BATCH(0);
}
.dirty = {
.mesa = (_NEW_LIGHT),
.brw = (BRW_NEW_BATCH |
- BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_VUE_MAP_GEOM_OUT |
BRW_NEW_TRANSFORM_FEEDBACK)
},
struct brw_transform_feedback_object *brw_obj =
(struct brw_transform_feedback_object *) obj;
- intel_batchbuffer_flush(brw);
- brw->batch.needs_sol_reset = true;
+ /* Reset the SO buffer offsets to 0. */
+ if (brw->gen >= 8) {
+ brw_obj->zero_offsets = true;
+ } else {
+ intel_batchbuffer_flush(brw);
+ brw->batch.needs_sol_reset = true;
+ }
/* We're about to lose the information needed to compute the number of
* vertices written during the last Begin/EndTransformFeedback section,
intel_batchbuffer_emit_mi_flush(brw);
/* Save the SOL buffer offset register values. */
- for (int i = 0; i < 4; i++) {
- BEGIN_BATCH(3);
- OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
- OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
- OUT_RELOC(brw_obj->offset_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- i * sizeof(uint32_t));
- ADVANCE_BATCH();
+ if (brw->gen < 8) {
+ for (int i = 0; i < 4; i++) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_RELOC(brw_obj->offset_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ i * sizeof(uint32_t));
+ ADVANCE_BATCH();
+ }
}
/* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
(struct brw_transform_feedback_object *) obj;
/* Reload the SOL buffer offset registers. */
- for (int i = 0; i < 4; i++) {
- BEGIN_BATCH(3);
- OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
- OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
- OUT_RELOC(brw_obj->offset_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- i * sizeof(uint32_t));
- ADVANCE_BATCH();
+ if (brw->gen < 8) {
+ for (int i = 0; i < 4; i++) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_RELOC(brw_obj->offset_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ i * sizeof(uint32_t));
+ ADVANCE_BATCH();
+ }
}
/* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */