unsigned output_vertex_size_hwords;
unsigned output_topology;
+
+ /**
+ * Size of the control data (cut bits or StreamID bits), in hwords (32
+ * bytes). 0 if there is no control data.
+ */
+ unsigned control_data_header_size_hwords;
+
+ /**
+ * Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID
+ * if the control data is StreamID bits, or
+ * GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits).
+ * Ignored if control_data_header_size is 0.
+ */
+ unsigned control_data_format;
};
/** Number of texture sampler units */
/* DW5 */
# define GEN6_GS_MAX_THREADS_SHIFT 25
# define HSW_GS_MAX_THREADS_SHIFT 24
+# define IVB_GS_CONTROL_DATA_FORMAT_SHIFT 24
+# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT 0
+# define GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID 1
+# define GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT 20
# define GEN7_GS_DISPATCH_MODE_SINGLE (0 << 11)
# define GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE (1 << 11)
# define GEN7_GS_DISPATCH_MODE_DUAL_OBJECT (2 << 11)
# define GEN7_GS_INCLUDE_PRIMITIVE_ID (1 << 4)
# define GEN7_GS_ENABLE (1 << 0)
/* DW6 */
+# define HSW_GS_CONTROL_DATA_FORMAT_SHIFT 31
# define GEN6_GS_REORDER (1 << 30)
# define GEN6_GS_DISCARD_ADJACENCY (1 << 29)
# define GEN6_GS_SVBI_PAYLOAD_ENABLE (1 << 28)
c.prog_data.base.param = rzalloc_array(NULL, const float *, param_count);
c.prog_data.base.pull_param = rzalloc_array(NULL, const float *, param_count);
+ if (gp->program.OutputType == GL_POINTS) {
+ /* When the output type is points, the geometry shader may output data
+ * to multiple streams, and EndPrimitive() has no effect. So we
+ * configure the hardware to interpret the control data as stream ID.
+ */
+ c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID;
+
+ /* However, StreamID is not yet supported, so we output zero bits of
+ * control data per vertex.
+ */
+ c.control_data_bits_per_vertex = 0;
+ } else {
+ /* When the output type is triangle_strip or line_strip, EndPrimitive()
+ * may be used to terminate the current strip and start a new one
+ * (similar to primitive restart), and outputting data to multiple
+ * streams is not supported. So we configure the hardware to interpret
+ * the control data as EndPrimitive information (a.k.a. "cut bits").
+ */
+ c.prog_data.control_data_format = GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT;
+
+ /* We only need to output control data if the shader actually calls
+ * EndPrimitive().
+ */
+ c.control_data_bits_per_vertex = gp->program.UsesEndPrimitive ? 1 : 0;
+ }
+ c.control_data_header_size_bits =
+ gp->program.VerticesOut * c.control_data_bits_per_vertex;
+
+ /* 1 HWORD = 32 bytes = 256 bits */
+ c.prog_data.control_data_header_size_hwords =
+ ALIGN(c.control_data_header_size_bits, 256) / 256;
+
brw_compute_vue_map(brw, &c.prog_data.base.vue_map,
gp->program.Base.OutputsWritten,
c.key.base.userclip_active);
*/
unsigned output_size_bytes =
c.prog_data.output_vertex_size_hwords * 32 * gp->program.VerticesOut;
+ output_size_bytes += 32 * c.prog_data.control_data_header_size_hwords;
assert(output_size_bytes >= 1);
if (output_size_bytes > GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES)
(void) complete;
vec4_instruction *inst = emit(GS_OPCODE_URB_WRITE);
+ inst->offset = c->prog_data.control_data_header_size_hwords;
inst->urb_write_flags = BRW_URB_WRITE_PER_SLOT_OFFSET;
return inst;
}
struct brw_gs_prog_data prog_data;
struct brw_geometry_program *gp;
+
+ unsigned control_data_bits_per_vertex;
+ unsigned control_data_header_size_bits;
};
#ifdef __cplusplus
* URB row increments, and each of our MRFs is half of one of
* those, since we're doing interleaved writes.
*/
- inst->offset = (max_usable_mrf - base_mrf) / 2;
+ inst->offset += (max_usable_mrf - base_mrf) / 2;
}
}
OUT_BATCH(0);
}
- OUT_BATCH(((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) <<
- GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
- (brw->gs.prog_data->output_topology <<
- GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
- (prog_data->urb_read_length <<
- GEN6_GS_URB_READ_LENGTH_SHIFT) |
- (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
- (prog_data->dispatch_grf_start_reg <<
- GEN6_GS_DISPATCH_START_GRF_SHIFT));
-
- OUT_BATCH(((brw->max_gs_threads - 1) << max_threads_shift) |
- GEN7_GS_DISPATCH_MODE_DUAL_OBJECT |
- GEN6_GS_STATISTICS_ENABLE |
- GEN7_GS_ENABLE);
+ uint32_t dw5 =
+ ((brw->gs.prog_data->output_vertex_size_hwords * 2 - 1) <<
+ GEN7_GS_OUTPUT_VERTEX_SIZE_SHIFT) |
+ (brw->gs.prog_data->output_topology <<
+ GEN7_GS_OUTPUT_TOPOLOGY_SHIFT) |
+ (prog_data->urb_read_length <<
+ GEN6_GS_URB_READ_LENGTH_SHIFT) |
+ (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT) |
+ (prog_data->dispatch_grf_start_reg <<
+ GEN6_GS_DISPATCH_START_GRF_SHIFT);
+ uint32_t dw6 =
+ ((brw->max_gs_threads - 1) << max_threads_shift) |
+ (brw->gs.prog_data->control_data_header_size_hwords <<
+ GEN7_GS_CONTROL_DATA_HEADER_SIZE_SHIFT) |
+ GEN7_GS_DISPATCH_MODE_DUAL_OBJECT |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN7_GS_ENABLE;
+
+ if (brw->is_haswell) {
+ dw6 |= brw->gs.prog_data->control_data_format <<
+ HSW_GS_CONTROL_DATA_FORMAT_SHIFT;
+ } else {
+ dw5 |= brw->gs.prog_data->control_data_format <<
+ IVB_GS_CONTROL_DATA_FORMAT_SHIFT;
+ }
+ OUT_BATCH(dw5);
+ OUT_BATCH(dw6);
OUT_BATCH(0);
ADVANCE_BATCH();
} else {