#include "main/glheader.h"
#include "main/macros.h"
#include "main/enums.h"
+#include "main/transformfeedback.h"
#include "intel_batchbuffer.h"
#include "brw_state.h"
#include "brw_gs.h"
-
+#include "glsl/ralloc.h"
static void compile_gs_prog( struct brw_context *brw,
struct brw_gs_prog_key *key )
{
- struct intel_context *intel = &brw->intel;
struct brw_gs_compile c;
const GLuint *program;
+ void *mem_ctx;
GLuint program_size;
memset(&c, 0, sizeof(c));
c.key = *key;
- /* Need to locate the two positions present in vertex + header.
- * These are currently hardcoded:
- */
- c.nr_attrs = brw_count_bits(c.key.attrs);
-
- if (intel->gen >= 5)
- c.nr_regs = (c.nr_attrs + 1) / 2 + 3; /* are vertices packed, or reg-aligned? */
- else
- c.nr_regs = (c.nr_attrs + 1) / 2 + 1; /* are vertices packed, or reg-aligned? */
-
- c.nr_bytes = c.nr_regs * REG_SIZE;
+ c.vue_map = brw->vs.prog_data->base.vue_map;
+ c.nr_regs = (c.vue_map.num_slots + 1)/2;
+ mem_ctx = ralloc_context(NULL);
/* Begin the compilation:
*/
- brw_init_compile(brw, &c.func);
+ brw_init_compile(brw, &c.func, mem_ctx);
c.func.single_program_flow = 1;
*/
brw_set_mask_control(&c.func, BRW_MASK_DISABLE);
-
- /* Note that primitives which don't require a GS program have
- * already been weeded out by this stage:
- */
- switch (key->primitive) {
- case GL_QUADS:
- /* Gen6: VF has already converted into polygon. */
- if (intel->gen == 6)
- return;
- brw_gs_quads( &c, key );
- break;
- case GL_QUAD_STRIP:
- if (intel->gen == 6)
- return;
- brw_gs_quad_strip( &c, key );
- break;
- case GL_LINE_LOOP:
- brw_gs_lines( &c );
- break;
- case GL_LINES:
- if (key->hint_gs_always)
- brw_gs_lines( &c );
- else {
- return;
- }
- break;
- case GL_TRIANGLES:
- if (key->hint_gs_always)
- brw_gs_tris( &c );
- else {
+ if (brw->gen >= 6) {
+ unsigned num_verts;
+ bool check_edge_flag;
+ /* On Sandybridge, we use the GS for implementing transform feedback
+ * (called "Stream Out" in the PRM).
+ */
+ switch (key->primitive) {
+ case _3DPRIM_POINTLIST:
+ num_verts = 1;
+ check_edge_flag = false;
+ break;
+ case _3DPRIM_LINELIST:
+ case _3DPRIM_LINESTRIP:
+ case _3DPRIM_LINELOOP:
+ num_verts = 2;
+ check_edge_flag = false;
+ break;
+ case _3DPRIM_TRILIST:
+ case _3DPRIM_TRIFAN:
+ case _3DPRIM_TRISTRIP:
+ case _3DPRIM_RECTLIST:
+ num_verts = 3;
+ check_edge_flag = false;
+ break;
+ case _3DPRIM_QUADLIST:
+ case _3DPRIM_QUADSTRIP:
+ case _3DPRIM_POLYGON:
+ num_verts = 3;
+ check_edge_flag = true;
+ break;
+ default:
+ assert(!"Unexpected primitive type in Gen6 SOL program.");
return;
}
- break;
- case GL_POINTS:
- if (key->hint_gs_always)
- brw_gs_points( &c );
- else {
+ gen6_sol_program(&c, key, num_verts, check_edge_flag);
+ } else {
+ /* On Gen4-5, we use the GS to decompose certain types of primitives.
+ * Note that primitives which don't require a GS program have already
+ * been weeded out by now.
+ */
+ switch (key->primitive) {
+ case _3DPRIM_QUADLIST:
+ brw_gs_quads( &c, key );
+ break;
+ case _3DPRIM_QUADSTRIP:
+ brw_gs_quad_strip( &c, key );
+ break;
+ case _3DPRIM_LINELOOP:
+ brw_gs_lines( &c );
+ break;
+ default:
+ ralloc_free(mem_ctx);
return;
}
- break;
- default:
- return;
}
/* get the program
printf("gs:\n");
for (i = 0; i < program_size / sizeof(struct brw_instruction); i++)
brw_disasm(stdout, &((struct brw_instruction *)program)[i],
- intel->gen);
+ brw->gen);
printf("\n");
}
- /* Upload
- */
- drm_intel_bo_unreference(brw->gs.prog_bo);
- brw->gs.prog_bo = brw_upload_cache_with_auxdata(&brw->cache, BRW_GS_PROG,
- &c.key, sizeof(c.key),
- NULL, 0,
- program, program_size,
- &c.prog_data,
- sizeof(c.prog_data),
- &brw->gs.prog_data);
+ brw_upload_cache(&brw->cache, BRW_GS_PROG,
+ &c.key, sizeof(c.key),
+ program, program_size,
+ &c.prog_data, sizeof(c.prog_data),
+ &brw->gs.prog_offset, &brw->gs.prog_data);
+ ralloc_free(mem_ctx);
}
-static const GLenum gs_prim[GL_POLYGON+1] = {
- GL_POINTS,
- GL_LINES,
- GL_LINE_LOOP,
- GL_LINES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_TRIANGLES,
- GL_QUADS,
- GL_QUAD_STRIP,
- GL_TRIANGLES
-};
-
static void populate_key( struct brw_context *brw,
struct brw_gs_prog_key *key )
{
- struct gl_context *ctx = &brw->intel.ctx;
- struct intel_context *intel = &brw->intel;
- int prim_gs_always;
+ static const unsigned swizzle_for_offset[4] = {
+ BRW_SWIZZLE4(0, 1, 2, 3),
+ BRW_SWIZZLE4(1, 2, 3, 3),
+ BRW_SWIZZLE4(2, 3, 3, 3),
+ BRW_SWIZZLE4(3, 3, 3, 3)
+ };
+
+ struct gl_context *ctx = &brw->ctx;
memset(key, 0, sizeof(*key));
- /* CACHE_NEW_VS_PROG */
- key->attrs = brw->vs.prog_data->outputs_written;
+ /* CACHE_NEW_VS_PROG (part of VUE map) */
+ key->attrs = brw->vs.prog_data->base.vue_map.slots_valid;
/* BRW_NEW_PRIMITIVE */
- key->primitive = gs_prim[brw->primitive];
+ key->primitive = brw->primitive;
- key->hint_gs_always = 0; /* debug code? */
-
/* _NEW_LIGHT */
key->pv_first = (ctx->Light.ProvokingVertex == GL_FIRST_VERTEX_CONVENTION);
- if (key->primitive == GL_QUADS && ctx->Light.ShadeModel != GL_FLAT) {
+ if (key->primitive == _3DPRIM_QUADLIST && ctx->Light.ShadeModel != GL_FLAT) {
/* Provide consistent primitive order with brw_set_prim's
* optimization of single quads to trifans.
*/
- key->pv_first = GL_TRUE;
+ key->pv_first = true;
}
- if (intel->gen == 6)
- prim_gs_always = brw->primitive == GL_LINE_LOOP;
- else
- prim_gs_always = brw->primitive == GL_QUADS ||
- brw->primitive == GL_QUAD_STRIP ||
- brw->primitive == GL_LINE_LOOP;
+ if (brw->gen >= 7) {
+ /* On Gen7 and later, we don't use GS (yet). */
+ key->need_gs_prog = false;
+ } else if (brw->gen == 6) {
+ /* On Gen6, GS is used for transform feedback. */
+ /* BRW_NEW_TRANSFORM_FEEDBACK */
+ if (_mesa_is_xfb_active_and_unpaused(ctx)) {
+ const struct gl_shader_program *shaderprog =
+ ctx->Shader.CurrentVertexProgram;
+ const struct gl_transform_feedback_info *linked_xfb_info =
+ &shaderprog->LinkedTransformFeedback;
+ int i;
+
+ /* Make sure that the VUE slots won't overflow the unsigned chars in
+ * key->transform_feedback_bindings[].
+ */
+ STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
- key->need_gs_prog = (key->hint_gs_always || prim_gs_always);
+ /* Make sure that we don't need more binding table entries than we've
+ * set aside for use in transform feedback. (We shouldn't, since we
+ * set aside enough binding table entries to have one per component).
+ */
+ assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
+
+ key->need_gs_prog = true;
+ key->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
+ for (i = 0; i < key->num_transform_feedback_bindings; ++i) {
+ key->transform_feedback_bindings[i] =
+ linked_xfb_info->Outputs[i].OutputRegister;
+ key->transform_feedback_swizzles[i] =
+ swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
+ }
+ }
+ } else {
+ /* Pre-gen6, GS is used to transform QUADLIST, QUADSTRIP, and LINELOOP
+ * into simpler primitives.
+ */
+ key->need_gs_prog = (brw->primitive == _3DPRIM_QUADLIST ||
+ brw->primitive == _3DPRIM_QUADSTRIP ||
+ brw->primitive == _3DPRIM_LINELOOP);
+ }
}
/* Calculate interpolants for triangle and line rasterization.
*/
-static void prepare_gs_prog(struct brw_context *brw)
+static void
+brw_upload_gs_prog(struct brw_context *brw)
{
struct brw_gs_prog_key key;
/* Populate the key:
brw->gs.prog_active = key.need_gs_prog;
}
- drm_intel_bo_unreference(brw->gs.prog_bo);
- brw->gs.prog_bo = NULL;
-
if (brw->gs.prog_active) {
- brw->gs.prog_bo = brw_search_cache(&brw->cache, BRW_GS_PROG,
- &key, sizeof(key),
- NULL, 0,
- &brw->gs.prog_data);
- if (brw->gs.prog_bo == NULL)
+ if (!brw_search_cache(&brw->cache, BRW_GS_PROG,
+ &key, sizeof(key),
+ &brw->gs.prog_offset, &brw->gs.prog_data)) {
compile_gs_prog( brw, &key );
+ }
}
}
const struct brw_tracked_state brw_gs_prog = {
.dirty = {
- .mesa = _NEW_LIGHT,
- .brw = BRW_NEW_PRIMITIVE,
+ .mesa = (_NEW_LIGHT),
+ .brw = (BRW_NEW_PRIMITIVE |
+ BRW_NEW_TRANSFORM_FEEDBACK),
.cache = CACHE_NEW_VS_PROG
},
- .prepare = prepare_gs_prog
+ .emit = brw_upload_gs_prog
};