upload_3dstate_so_buffers(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_VERTEX_PROGRAM */
- const struct gl_shader_program *vs_prog =
- ctx->Shader.CurrentProgram[MESA_SHADER_VERTEX];
- const struct gl_transform_feedback_info *linked_xfb_info =
- &vs_prog->LinkedTransformFeedback;
/* BRW_NEW_TRANSFORM_FEEDBACK */
struct gl_transform_feedback_object *xfb_obj =
ctx->TransformFeedback.CurrentObject;
+ const struct gl_transform_feedback_info *linked_xfb_info =
+ &xfb_obj->shader_program->LinkedTransformFeedback;
int i;
/* Set up the up to 4 output buffers. These are the ranges defined in the
const struct brw_vue_map *vue_map)
{
struct gl_context *ctx = &brw->ctx;
- /* BRW_NEW_VERTEX_PROGRAM */
- const struct gl_shader_program *vs_prog =
- ctx->Shader.CurrentProgram[MESA_SHADER_VERTEX];
/* BRW_NEW_TRANSFORM_FEEDBACK */
+ struct gl_transform_feedback_object *xfb_obj =
+ ctx->TransformFeedback.CurrentObject;
const struct gl_transform_feedback_info *linked_xfb_info =
- &vs_prog->LinkedTransformFeedback;
- uint16_t so_decl[128];
- int buffer_mask = 0;
- int next_offset[4] = {0, 0, 0, 0};
- int decls = 0;
+ &xfb_obj->shader_program->LinkedTransformFeedback;
+ uint16_t so_decl[MAX_VERTEX_STREAMS][128];
+ int buffer_mask[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
+ int next_offset[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
+ int decls[MAX_VERTEX_STREAMS] = {0, 0, 0, 0};
+ int max_decls = 0;
+ STATIC_ASSERT(ARRAY_SIZE(so_decl[0]) >= MAX_PROGRAM_OUTPUTS);
- STATIC_ASSERT(ARRAY_SIZE(so_decl) >= MAX_PROGRAM_OUTPUTS);
+ memset(so_decl, 0, sizeof(so_decl));
/* Construct the list of SO_DECLs to be emitted. The formatting of the
* command is feels strange -- each dword pair contains a SO_DECL per stream.
*/
- for (int i = 0; i < linked_xfb_info->NumOutputs; i++) {
+ for (unsigned i = 0; i < linked_xfb_info->NumOutputs; i++) {
int buffer = linked_xfb_info->Outputs[i].OutputBuffer;
uint16_t decl = 0;
int varying = linked_xfb_info->Outputs[i].OutputRegister;
const unsigned components = linked_xfb_info->Outputs[i].NumComponents;
unsigned component_mask = (1 << components) - 1;
+ unsigned stream_id = linked_xfb_info->Outputs[i].StreamId;
+
+ assert(stream_id < MAX_VERTEX_STREAMS);
- /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */
+ /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w
+ * gl_Layer is stored in VARYING_SLOT_PSIZ.y
+ * gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
+ */
if (varying == VARYING_SLOT_PSIZ) {
assert(components == 1);
component_mask <<= 3;
+ } else if (varying == VARYING_SLOT_LAYER) {
+ assert(components == 1);
+ component_mask <<= 1;
+ } else if (varying == VARYING_SLOT_VIEWPORT) {
+ assert(components == 1);
+ component_mask <<= 2;
} else {
component_mask <<= linked_xfb_info->Outputs[i].ComponentOffset;
}
- buffer_mask |= 1 << buffer;
+ buffer_mask[stream_id] |= 1 << buffer;
decl |= buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
- decl |= vue_map->varying_to_slot[varying] <<
- SO_DECL_REGISTER_INDEX_SHIFT;
+ if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT) {
+ decl |= vue_map->varying_to_slot[VARYING_SLOT_PSIZ] <<
+ SO_DECL_REGISTER_INDEX_SHIFT;
+ } else {
+ assert(vue_map->varying_to_slot[varying] >= 0);
+ decl |= vue_map->varying_to_slot[varying] <<
+ SO_DECL_REGISTER_INDEX_SHIFT;
+ }
decl |= component_mask << SO_DECL_COMPONENT_MASK_SHIFT;
/* Mesa doesn't store entries for gl_SkipComponents in the Outputs[]
* for fake "hole" components, rather than simply taking the offset
* for each real varying. Each hole can have size 1, 2, 3, or 4; we
* program as many size = 4 holes as we can, then a final hole to
- * accomodate the final 1, 2, or 3 remaining.
+ * accommodate the final 1, 2, or 3 remaining.
*/
int skip_components =
linked_xfb_info->Outputs[i].DstOffset - next_offset[buffer];
next_offset[buffer] += skip_components;
while (skip_components >= 4) {
- so_decl[decls++] = SO_DECL_HOLE_FLAG | 0xf;
+ so_decl[stream_id][decls[stream_id]++] = SO_DECL_HOLE_FLAG | 0xf;
skip_components -= 4;
}
if (skip_components > 0)
- so_decl[decls++] = SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1);
+ so_decl[stream_id][decls[stream_id]++] =
+ SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1);
assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
next_offset[buffer] += components;
- so_decl[decls++] = decl;
+ so_decl[stream_id][decls[stream_id]++] = decl;
+
+ if (decls[stream_id] > max_decls)
+ max_decls = decls[stream_id];
}
- BEGIN_BATCH(decls * 2 + 3);
- OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 | (decls * 2 + 1));
+ BEGIN_BATCH(max_decls * 2 + 3);
+ OUT_BATCH(_3DSTATE_SO_DECL_LIST << 16 | (max_decls * 2 + 1));
- OUT_BATCH((buffer_mask << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) |
- (0 << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) |
- (0 << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) |
- (0 << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT));
+ OUT_BATCH((buffer_mask[0] << SO_STREAM_TO_BUFFER_SELECTS_0_SHIFT) |
+ (buffer_mask[1] << SO_STREAM_TO_BUFFER_SELECTS_1_SHIFT) |
+ (buffer_mask[2] << SO_STREAM_TO_BUFFER_SELECTS_2_SHIFT) |
+ (buffer_mask[3] << SO_STREAM_TO_BUFFER_SELECTS_3_SHIFT));
- OUT_BATCH((decls << SO_NUM_ENTRIES_0_SHIFT) |
- (0 << SO_NUM_ENTRIES_1_SHIFT) |
- (0 << SO_NUM_ENTRIES_2_SHIFT) |
- (0 << SO_NUM_ENTRIES_3_SHIFT));
+ OUT_BATCH((decls[0] << SO_NUM_ENTRIES_0_SHIFT) |
+ (decls[1] << SO_NUM_ENTRIES_1_SHIFT) |
+ (decls[2] << SO_NUM_ENTRIES_2_SHIFT) |
+ (decls[3] << SO_NUM_ENTRIES_3_SHIFT));
- for (int i = 0; i < decls; i++) {
- OUT_BATCH(so_decl[i]);
- OUT_BATCH(0);
+ for (int i = 0; i < max_decls; i++) {
+ /* Stream 1 | Stream 0 */
+ OUT_BATCH(((uint32_t) so_decl[1][i]) << 16 | so_decl[0][i]);
+ /* Stream 3 | Stream 2 */
+ OUT_BATCH(((uint32_t) so_decl[3][i]) << 16 | so_decl[2][i]);
}
ADVANCE_BATCH();
* point by reading less and offsetting the register index in the
* SO_DECLs.
*/
- dw2 |= urb_entry_read_offset << SO_STREAM_0_VERTEX_READ_OFFSET_SHIFT;
- dw2 |= (urb_entry_read_length - 1) <<
- SO_STREAM_0_VERTEX_READ_LENGTH_SHIFT;
+ dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_0_VERTEX_READ_OFFSET);
+ dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_0_VERTEX_READ_LENGTH);
+
+ dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_1_VERTEX_READ_OFFSET);
+ dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_1_VERTEX_READ_LENGTH);
+
+ dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_2_VERTEX_READ_OFFSET);
+ dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_2_VERTEX_READ_LENGTH);
+
+ dw2 |= SET_FIELD(urb_entry_read_offset, SO_STREAM_3_VERTEX_READ_OFFSET);
+ dw2 |= SET_FIELD(urb_entry_read_length - 1, SO_STREAM_3_VERTEX_READ_LENGTH);
}
BEGIN_BATCH(3);
const struct brw_tracked_state gen7_sol_state = {
.dirty = {
- .mesa = (_NEW_LIGHT),
- .brw = (BRW_NEW_BATCH |
- BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_VUE_MAP_GEOM_OUT |
- BRW_NEW_TRANSFORM_FEEDBACK)
+ .mesa = _NEW_LIGHT,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_VUE_MAP_GEOM_OUT |
+ BRW_NEW_TRANSFORM_FEEDBACK,
},
.emit = upload_sol_state,
};
}
/* Flush any drawing so that the counters have the right values. */
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Emit MI_STORE_REGISTER_MEM commands to write the values. */
for (int i = 0; i < streams; i++) {
vertices_per_prim = 3;
break;
default:
- assert(!"Invalid transform feedback primitive mode.");
+ unreachable("Invalid transform feedback primitive mode.");
}
/* Get the number of primitives generated. */
struct brw_transform_feedback_object *brw_obj =
(struct brw_transform_feedback_object *) obj;
- intel_batchbuffer_flush(brw);
- brw->batch.needs_sol_reset = true;
+ /* Reset the SO buffer offsets to 0. */
+ if (brw->gen >= 8) {
+ brw_obj->zero_offsets = true;
+ } else {
+ intel_batchbuffer_flush(brw);
+ brw->batch.needs_sol_reset = true;
+ }
/* We're about to lose the information needed to compute the number of
* vertices written during the last Begin/EndTransformFeedback section,
(struct brw_transform_feedback_object *) obj;
/* Flush any drawing so that the counters have the right values. */
- intel_batchbuffer_emit_mi_flush(brw);
+ brw_emit_mi_flush(brw);
/* Save the SOL buffer offset register values. */
- for (int i = 0; i < 4; i++) {
- BEGIN_BATCH(3);
- OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
- OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
- OUT_RELOC(brw_obj->offset_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- i * sizeof(uint32_t));
- ADVANCE_BATCH();
+ if (brw->gen < 8) {
+ for (int i = 0; i < 4; i++) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_RELOC(brw_obj->offset_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ i * sizeof(uint32_t));
+ ADVANCE_BATCH();
+ }
}
/* Store the temporary ending value of the SO_NUM_PRIMS_WRITTEN counters.
(struct brw_transform_feedback_object *) obj;
/* Reload the SOL buffer offset registers. */
- for (int i = 0; i < 4; i++) {
- BEGIN_BATCH(3);
- OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
- OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
- OUT_RELOC(brw_obj->offset_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- i * sizeof(uint32_t));
- ADVANCE_BATCH();
+ if (brw->gen < 8) {
+ for (int i = 0; i < 4; i++) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_RELOC(brw_obj->offset_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ i * sizeof(uint32_t));
+ ADVANCE_BATCH();
+ }
}
/* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */