# define GEN6_GS_SVBI_POSTINCREMENT_VALUE_MASK INTEL_MASK(25, 16)
# define GEN6_GS_ENABLE (1 << 15)
+# define BRW_GS_EDGE_INDICATOR_0 (1 << 8)
+# define BRW_GS_EDGE_INDICATOR_1 (1 << 9)
+
#define _3DSTATE_HS 0x781B /* GEN7+ */
#define _3DSTATE_TE 0x781C /* GEN7+ */
#define _3DSTATE_DS 0x781D /* GEN7+ */
return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_UD), elt));
}
+static INLINE struct brw_reg get_element_d( struct brw_reg reg, GLuint elt )
+{
+ return vec1(suboffset(retype(reg, BRW_REGISTER_TYPE_D), elt));
+}
+
static INLINE struct brw_reg brw_swizzle( struct brw_reg reg,
GLuint x,
void *mem_ctx;
GLuint program_size;
- /* Gen6: VF has already converted into polygon, and LINELOOP is
- * converted to LINESTRIP at the beginning of the 3D pipeline.
- */
- if (intel->gen >= 6)
- return;
-
memset(&c, 0, sizeof(c));
c.key = *key;
*/
brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
-
- /* Note that primitives which don't require a GS program have
- * already been weeded out by this stage:
- */
-
- switch (key->primitive) {
- case _3DPRIM_QUADLIST:
- brw_gs_quads( &c, key );
- break;
- case _3DPRIM_QUADSTRIP:
- brw_gs_quad_strip( &c, key );
- break;
- case _3DPRIM_LINELOOP:
- brw_gs_lines( &c );
- break;
- default:
- ralloc_free(mem_ctx);
- return;
+ if (intel->gen >= 6) {
+ unsigned num_verts;
+ bool check_edge_flag;
+ /* On Sandybridge, we use the GS for implementing transform feedback
+ * (called "Stream Out" in the PRM).
+ */
+ switch (key->primitive) {
+ case _3DPRIM_POINTLIST:
+ num_verts = 1;
+ check_edge_flag = false;
+ break;
+ case _3DPRIM_LINELIST:
+ case _3DPRIM_LINESTRIP:
+ case _3DPRIM_LINELOOP:
+ num_verts = 2;
+ check_edge_flag = false;
+ break;
+ case _3DPRIM_TRILIST:
+ case _3DPRIM_TRIFAN:
+ case _3DPRIM_TRISTRIP:
+ case _3DPRIM_RECTLIST:
+ num_verts = 3;
+ check_edge_flag = false;
+ break;
+ case _3DPRIM_QUADLIST:
+ case _3DPRIM_QUADSTRIP:
+ case _3DPRIM_POLYGON:
+ num_verts = 3;
+ check_edge_flag = true;
+ break;
+ default:
+ assert(!"Unexpected primitive type in Gen6 SOL program.");
+ return;
+ }
+ gen6_sol_program(&c, key, num_verts, check_edge_flag);
+ } else {
+ /* On Gen4-5, we use the GS to decompose certain types of primitives.
+ * Note that primitives which don't require a GS program have already
+ * been weeded out by now.
+ */
+ switch (key->primitive) {
+ case _3DPRIM_QUADLIST:
+ brw_gs_quads( &c, key );
+ break;
+ case _3DPRIM_QUADSTRIP:
+ brw_gs_quad_strip( &c, key );
+ break;
+ case _3DPRIM_LINELOOP:
+ brw_gs_lines( &c );
+ break;
+ default:
+ ralloc_free(mem_ctx);
+ return;
+ }
}
/* get the program
/* _NEW_TRANSFORM */
key->userclip_active = (ctx->Transform.ClipPlanesEnabled != 0);
- key->need_gs_prog = (intel->gen >= 6)
- ? 0
- : (brw->primitive == _3DPRIM_QUADLIST ||
- brw->primitive == _3DPRIM_QUADSTRIP ||
- brw->primitive == _3DPRIM_LINELOOP);
+ if (intel->gen >= 7) {
+ /* On Gen7 and later, we don't use GS (yet). */
+ key->need_gs_prog = false;
+ } else if (intel->gen == 6) {
+ /* On Gen6, GS is used for transform feedback. */
+ /* _NEW_TRANSFORM_FEEDBACK */
+ key->need_gs_prog = ctx->TransformFeedback.CurrentObject->Active;
+ } else {
+ /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
+ * into simpler primitives.
+ */
+ key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
+ brw->primitive == _3DPRIM_QUADSTRIP ||
+ brw->primitive == _3DPRIM_LINELOOP);
+ }
+ /* For testing, the environment variable INTEL_FORCE_GS can be used to
+ * force a GS program to be used, even if it's not necessary.
+ */
+ if (getenv("INTEL_FORCE_GS"))
+ key->need_gs_prog = true;
}
/* Calculate interpolants for triangle and line rasterization.
const struct brw_tracked_state brw_gs_prog = {
.dirty = {
.mesa = (_NEW_LIGHT |
- _NEW_TRANSFORM),
+ _NEW_TRANSFORM |
+ _NEW_TRANSFORM_FEEDBACK),
.brw = BRW_NEW_PRIMITIVE,
.cache = CACHE_NEW_VS_PROG
},
void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key );
void brw_gs_lines( struct brw_gs_compile *c );
+void gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
+ unsigned num_verts, bool check_edge_flag);
#endif
brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2));
}
+/**
+ * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0.
+ *
+ * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0
+ * of DWORD 2. URB_WRITE messages need the primitive type in bits 6:2 of
+ * DWORD 2. So this function extracts the primitive type field, bitshifts it
+ * appropriately, and stores it in c->reg.header.
+ */
+static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(0x1f));
+ brw_SHL(p, get_element_ud(c->reg.header, 2),
+ get_element_ud(c->reg.header, 2), brw_imm_ud(2));
+}
+
+/**
+ * Apply an additive offset to DWORD 2 of c->reg.header.
+ *
+ * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately
+ * for each vertex.
+ */
+static void brw_gs_offset_header_dw2(struct brw_gs_compile *c, int offset)
+{
+ struct brw_compile *p = &c->func;
+ brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2),
+ brw_imm_d(offset));
+}
+
+
/**
* Emit a vertex using the URB_WRITE message. Use the contents of
* c->reg.header for the message header, and the registers starting at \c vert
| URB_WRITE_PRIM_END));
brw_gs_emit_vue(c, c->reg.vertex[1], 1);
}
+
+/**
+ * Generate the geometry shader program used on Gen6 to perform stream output
+ * (transform feedback).
+ */
+void
+gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key,
+ unsigned num_verts, bool check_edge_flags)
+{
+ struct brw_compile *p = &c->func;
+
+ brw_gs_alloc_regs(c, num_verts);
+ brw_gs_initialize_header(c);
+
+ brw_gs_ff_sync(c, 1);
+
+ brw_gs_overwrite_header_dw2_from_r0(c);
+ switch (num_verts) {
+ case 1:
+ brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END);
+ brw_gs_emit_vue(c, c->reg.vertex[0], true);
+ break;
+ case 2:
+ brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
+ brw_gs_emit_vue(c, c->reg.vertex[0], false);
+ brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START);
+ brw_gs_emit_vue(c, c->reg.vertex[1], true);
+ break;
+ case 3:
+ if (check_edge_flags) {
+ /* Only emit vertices 0 and 1 if this is the first triangle of the
+ * polygon. Otherwise they are redundant.
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(BRW_GS_EDGE_INDICATOR_0));
+ brw_IF(p, BRW_EXECUTE_1);
+ }
+ brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START);
+ brw_gs_emit_vue(c, c->reg.vertex[0], false);
+ brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START);
+ brw_gs_emit_vue(c, c->reg.vertex[1], false);
+ if (check_edge_flags) {
+ brw_ENDIF(p);
+ /* Only emit vertex 2 in PRIM_END mode if this is the last triangle
+ * of the polygon. Otherwise leave the primitive incomplete because
+ * there are more polygon vertices coming.
+ */
+ brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ);
+ brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD),
+ get_element_ud(c->reg.R0, 2),
+ brw_imm_ud(BRW_GS_EDGE_INDICATOR_1));
+ brw_set_predicate_control(p, BRW_PREDICATE_NORMAL);
+ }
+ brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_gs_emit_vue(c, c->reg.vertex[2], true);
+ break;
+ }
+}
OUT_BATCH(0);
ADVANCE_BATCH();
- // GS should never be used on Gen6. Disable it.
- assert(!brw->gs.prog_active);
- BEGIN_BATCH(7);
- OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
- OUT_BATCH(0); /* prog_bo */
- OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
- (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
- OUT_BATCH(0); /* scratch space base offset */
- OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
- (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
- (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
- OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
- GEN6_GS_STATISTICS_ENABLE |
- GEN6_GS_RENDERING_ENABLE);
- OUT_BATCH(0);
- ADVANCE_BATCH();
+ if (brw->gs.prog_active) {
+ BEGIN_BATCH(7);
+ OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+ OUT_BATCH(brw->gs.prog_offset);
+ OUT_BATCH(GEN6_GS_SPF_MODE | GEN6_GS_VECTOR_MASK_ENABLE);
+ OUT_BATCH(0); /* no scratch space */
+ OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+ (brw->gs.prog_data->urb_read_length << GEN6_GS_URB_READ_LENGTH_SHIFT));
+ OUT_BATCH(((brw->max_gs_threads - 1) << GEN6_GS_MAX_THREADS_SHIFT) |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN6_GS_SO_STATISTICS_ENABLE |
+ GEN6_GS_RENDERING_ENABLE);
+ OUT_BATCH(GEN6_GS_ENABLE);
+ ADVANCE_BATCH();
+ } else {
+ BEGIN_BATCH(7);
+ OUT_BATCH(_3DSTATE_GS << 16 | (7 - 2));
+ OUT_BATCH(0); /* prog_bo */
+ OUT_BATCH((0 << GEN6_GS_SAMPLER_COUNT_SHIFT) |
+ (0 << GEN6_GS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+ OUT_BATCH(0); /* scratch space base offset */
+ OUT_BATCH((1 << GEN6_GS_DISPATCH_START_GRF_SHIFT) |
+ (0 << GEN6_GS_URB_READ_LENGTH_SHIFT) |
+ (0 << GEN6_GS_URB_ENTRY_READ_OFFSET_SHIFT));
+ OUT_BATCH((0 << GEN6_GS_MAX_THREADS_SHIFT) |
+ GEN6_GS_STATISTICS_ENABLE |
+ GEN6_GS_RENDERING_ENABLE);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ }
}
const struct brw_tracked_state gen6_gs_state = {