#include "gen6_gs_visitor.h"
+const unsigned MAX_GS_INPUT_VERTICES = 6;
+
namespace brw {
+void
+gen6_gs_visitor::assign_binding_table_offsets()
+{
+ /* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
+ * feedback surfaces.
+ */
+ assign_common_binding_table_offsets(BRW_MAX_SOL_BINDINGS);
+}
+
void
gen6_gs_visitor::emit_prolog()
{
*/
this->prim_count = src_reg(this, glsl_type::uint_type);
emit(MOV(dst_reg(this->prim_count), 0u));
+
+ if (c->prog_data.gen6_xfb_enabled) {
+ /* Create a virtual register to hold destination indices in SOL */
+ this->destination_indices = src_reg(this, glsl_type::uvec4_type);
+ /* Create a virtual register to hold number of written primitives */
+ this->sol_prim_written = src_reg(this, glsl_type::uint_type);
+ /* Create a virtual register to hold Streamed Vertex Buffer Indices */
+ this->svbi = src_reg(this, glsl_type::uvec4_type);
+ /* Create a virtual register to hold max values of SVBI */
+ this->max_svbi = src_reg(this, glsl_type::uvec4_type);
+ emit(MOV(dst_reg(this->max_svbi),
+ src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD))));
+
+ xfb_setup();
+ }
+
+ /* PrimitveID is delivered in r0.1 of the thread payload. If the program
+ * needs it we have to move it to a separate register where we can map
+ * the atttribute.
+ *
+ * Notice that we cannot use a virtual register for this, because we need to
+ * map all input attributes to hardware registers in setup_payload(),
+ * which happens before virtual registers are mapped to hardware registers.
+ * We could work around that issue if we were able to compute the first
+ * non-payload register here and move the PrimitiveID information to that
+ * register, but we can't because at this point we don't know the final
+ * number uniforms that will be included in the payload.
+ *
+ * So, what we do is to place PrimitiveID information in r1, which is always
+ * delivered as part of the payload, but its only populated with data
+ * relevant for transform feedback when we set GEN6_GS_SVBI_PAYLOAD_ENABLE
+ * in the 3DSTATE_GS state packet. That information can be obtained by other
+ * means though, so we can safely use r1 for this purpose.
+ */
+ if (c->prog_data.include_primitive_id) {
+ this->primitive_id =
+ src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
+ emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
+ }
}
void
{
/* Buffer all output slots for this vertex in vertex_output */
for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
- /* We will handle PSIZ for each vertex at thread end time since it
- * is not computed by the GS algorithm and requires specific handling.
- */
int varying = prog_data->vue_map.slot_to_varying[slot];
if (varying != VARYING_SLOT_PSIZ) {
dst_reg dst(this->vertex_output);
dst.reladdr = ralloc(mem_ctx, src_reg);
memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
emit_urb_slot(dst, varying);
+ } else {
+ /* The PSIZ slot can pack multiple varyings in different channels
+ * and emit_urb_slot() will produce a MOV instruction for each of
+ * them. Since we are writing to an array, that will translate to
+ * possibly multiple MOV instructions with an array destination and
+ * each will generate a scratch write with the same offset into
+ * scratch space (thus, each one overwriting the previous). This is
+ * not what we want. What we will do instead is emit PSIZ to a
+ * a regular temporary register, then move that resgister into the
+ * array. This way we only have one instruction with an array
+ * destination and we only produce a single scratch write.
+ */
+ dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
+ emit_urb_slot(tmp, varying);
+ dst_reg dst(this->vertex_output);
+ dst.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+ vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
+ inst->force_writemask_all = true;
}
+
emit(ADD(dst_reg(this->vertex_output_offset),
this->vertex_output_offset, 1u));
}
/* Otherwise we know that the last vertex we have processed was the last
* vertex in the primitive and we need to set its PrimEnd flag, so do this
- * unless we haven't emitted that vertex at all.
+ * unless we haven't emitted that vertex at all (vertex_count != 0).
*
* Notice that we have already incremented vertex_count when we processed
* the last emit_vertex, so we need to take that into account in the
unsigned num_output_vertices = c->gp->program.VerticesOut;
emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
BRW_CONDITIONAL_L));
+ vec4_instruction *inst = emit(CMP(dst_null_d(),
+ this->vertex_count, 0u,
+ BRW_CONDITIONAL_NEQ));
+ inst->predicate = BRW_PREDICATE_NORMAL;
emit(IF(BRW_PREDICATE_NORMAL));
{
/* vertex_output_offset is already pointing at the first entry of the
* vertex.
*/
src_reg offset(this, glsl_type::uint_type);
- emit(ADD(dst_reg(offset), this->vertex_output_offset, brw_imm_d(-1)));
+ emit(ADD(dst_reg(offset), this->vertex_output_offset, src_reg(-1)));
src_reg dst(this->vertex_output);
dst.reladdr = ralloc(mem_ctx, src_reg);
}
void
-gen6_gs_visitor::emit_urb_write_opcode(bool complete, src_reg vertex,
- int base_mrf, int mlen, int urb_offset)
+gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
+ int last_mrf, int urb_offset)
{
vec4_instruction *inst = NULL;
- /* If the vertex is not complete we don't have to do anything special */
if (!complete) {
+ /* If the vertex is not complete we don't have to do anything special */
inst = emit(GS_OPCODE_URB_WRITE);
inst->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
- inst->base_mrf = base_mrf;
- inst->mlen = mlen;
- inst->offset = urb_offset;
- return;
- }
-
- /* Otherwise, if this is not the last vertex we are going to write,
- * we have to request a new VUE handle for the next vertex.
- *
- * Notice that the vertex parameter has been pre-incremented in
- * emit_thread_end() to make this comparison easier.
- */
- emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_L));
- emit(IF(BRW_PREDICATE_NORMAL));
- {
+ } else {
+ /* Otherwise we always request to allocate a new VUE handle. If this is
+ * the last write before the EOT message and the new handle never gets
+ * used it will be dereferenced when we send the EOT message. This is
+ * necessary to avoid different setups for the EOT message (one for the
+ * case when there is no output and another for the case when there is)
+ * which would require to end the program with an IF/ELSE/ENDIF block,
+ * something we do not want.
+ */
inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE);
inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
- inst->base_mrf = base_mrf;
- inst->mlen = mlen;
- inst->offset = urb_offset;
inst->dst = dst_reg(MRF, base_mrf);
inst->src[0] = this->temp;
}
- emit(BRW_OPCODE_ELSE);
- {
- inst = emit(GS_OPCODE_URB_WRITE);
- inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
- inst->base_mrf = base_mrf;
- inst->mlen = mlen;
- inst->offset = urb_offset;
- }
- emit(BRW_OPCODE_ENDIF);
+
+ inst->base_mrf = base_mrf;
+ /* URB data written (does not include the message header reg) must
+ * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
+ * section 5.4.3.2.2: URB_INTERLEAVED.
+ */
+ int mlen = last_mrf - base_mrf;
+ if ((mlen % 2) != 1)
+ mlen++;
+ inst->mlen = mlen;
+ inst->offset = urb_offset;
}
void
gen6_gs_visitor::emit_thread_end()
{
+ /* Make sure the current primitive is ended: we know it is not ended when
+ * first_vertex is not zero. This is only relevant for outputs other than
+ * points because in the point case we set PrimEnd on all vertices.
+ */
+ if (c->gp->program.OutputType != GL_POINTS) {
+ emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
+ emit(IF(BRW_PREDICATE_NORMAL));
+ {
+ visit((ir_end_primitive *) NULL);
+ }
+ emit(BRW_OPCODE_ENDIF);
+ }
+
/* Here we have to:
* 1) Emit an FF_SYNC messsage to obtain an initial VUE handle.
* 2) Loop over all buffered vertex data and write it to corresponding
int max_usable_mrf = 13;
/* Issue the FF_SYNC message and obtain the initial VUE handle. */
- this->current_annotation = "gen6 thread end: ff_sync";
- vec4_instruction *inst =
- emit(GS_OPCODE_FF_SYNC, dst_reg(this->temp), this->prim_count);
- inst->base_mrf = base_mrf;
-
- /* Loop over all buffered vertices and emit URB write messages */
- this->current_annotation = "gen6 thread end: urb writes init";
- src_reg vertex(this, glsl_type::uint_type);
- emit(MOV(dst_reg(vertex), 0u));
- emit(MOV(dst_reg(this->vertex_output_offset), 0u));
-
- this->current_annotation = "gen6 thread end: urb writes";
- emit(BRW_OPCODE_DO);
+ emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G));
+ emit(IF(BRW_PREDICATE_NORMAL));
{
- emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE));
- inst = emit(BRW_OPCODE_BREAK);
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- /* First we prepare the message header */
- emit_urb_write_header(base_mrf);
+ this->current_annotation = "gen6 thread end: ff_sync";
+
+ vec4_instruction *inst;
+ if (c->prog_data.gen6_xfb_enabled) {
+ src_reg sol_temp(this, glsl_type::uvec4_type);
+ emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
+ dst_reg(this->svbi),
+ this->vertex_count,
+ this->prim_count,
+ sol_temp);
+ inst = emit(GS_OPCODE_FF_SYNC,
+ dst_reg(this->temp), this->prim_count, this->svbi);
+ } else {
+ inst = emit(GS_OPCODE_FF_SYNC,
+ dst_reg(this->temp), this->prim_count, src_reg(0u));
+ }
+ inst->base_mrf = base_mrf;
- /* Then add vertex data to the message in interleaved fashion */
- int slot = 0;
- bool complete = false;
- do {
- int mrf = base_mrf + 1;
+ /* Loop over all buffered vertices and emit URB write messages */
+ this->current_annotation = "gen6 thread end: urb writes init";
+ src_reg vertex(this, glsl_type::uint_type);
+ emit(MOV(dst_reg(vertex), 0u));
+ emit(MOV(dst_reg(this->vertex_output_offset), 0u));
+
+ this->current_annotation = "gen6 thread end: urb writes";
+ emit(BRW_OPCODE_DO);
+ {
+ emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE));
+ inst = emit(BRW_OPCODE_BREAK);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ /* First we prepare the message header */
+ emit_urb_write_header(base_mrf);
+
+ /* Then add vertex data to the message in interleaved fashion */
+ int slot = 0;
+ bool complete = false;
+ do {
+ int mrf = base_mrf + 1;
+
+ /* URB offset is in URB row increments, and each of our MRFs is half
+ * of one of those, since we're doing interleaved writes.
+ */
+ int urb_offset = slot / 2;
- /* URB offset is in URB row increments, and each of our MRFs is half
- * of one of those, since we're doing interleaved writes.
- */
- int urb_offset = slot / 2;
+ for (; slot < prog_data->vue_map.num_slots; ++slot) {
+ int varying = prog_data->vue_map.slot_to_varying[slot];
+ current_annotation = output_reg_annotation[varying];
- for (; slot < prog_data->vue_map.num_slots; ++slot) {
- int varying = prog_data->vue_map.slot_to_varying[slot];
- current_annotation = output_reg_annotation[varying];
+ /* Compute offset of this slot for the current vertex
+ * in vertex_output
+ */
+ src_reg data(this->vertex_output);
+ data.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(data.reladdr, &this->vertex_output_offset,
+ sizeof(src_reg));
- /* Compute offset of this slot for the current vertex
- * in vertex_output
- */
- src_reg data(this->vertex_output);
- data.reladdr = ralloc(mem_ctx, src_reg);
- memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
-
- if (varying == VARYING_SLOT_PSIZ) {
- /* We did not buffer PSIZ, emit it directly here */
- emit_urb_slot(dst_reg(MRF, mrf), varying);
- } else {
/* Copy this slot to the appropriate message register */
dst_reg reg = dst_reg(MRF, mrf);
reg.type = output_reg[varying].type;
data.type = reg.type;
vec4_instruction *inst = emit(MOV(reg, data));
inst->force_writemask_all = true;
- }
- mrf++;
- emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
-
- /* If this was max_usable_mrf, we can't fit anything more into this
- * URB WRITE.
- */
- if (mrf > max_usable_mrf) {
- slot++;
- break;
+ mrf++;
+ emit(ADD(dst_reg(this->vertex_output_offset),
+ this->vertex_output_offset, 1u));
+
+ /* If this was max_usable_mrf, we can't fit anything more into
+ * this URB WRITE.
+ */
+ if (mrf > max_usable_mrf) {
+ slot++;
+ break;
+ }
}
- }
- complete = slot >= prog_data->vue_map.num_slots;
+ complete = slot >= prog_data->vue_map.num_slots;
+ emit_urb_write_opcode(complete, base_mrf, mrf, urb_offset);
+ } while (!complete);
- /* When we emit the URB_WRITE below we need to do different things
- * depending on whether this is the last vertex we are going to
- * write. That means that we will need to check if
- * vertex >= vertex_count - 1. However, by increasing vertex early
- * we transform that comparison into vertex >= vertex_count, which
- * is more convenient.
+ /* Skip over the flags data item so that vertex_output_offset points
+ * to the first data item of the next vertex, so that we can start
+ * writing the next vertex.
*/
- if (complete)
- emit(ADD(dst_reg(vertex), vertex, 1u));
+ emit(ADD(dst_reg(this->vertex_output_offset),
+ this->vertex_output_offset, 1u));
- /* URB data written (does not include the message header reg) must
- * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
- * section 5.4.3.2.2: URB_INTERLEAVED.
- */
- int mlen = mrf - base_mrf;
- if ((mlen % 2) != 1)
- mlen++;
- emit_urb_write_opcode(complete, vertex, base_mrf, mlen, urb_offset);
- } while (!complete);
-
- /* Skip over the flags data item so that vertex_output_offset points to
- * the first data item of the next vertex, so that we can start writing
- * the next vertex.
- */
- emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
+ emit(ADD(dst_reg(vertex), vertex, 1u));
+ }
+ emit(BRW_OPCODE_WHILE);
+
+ if (c->prog_data.gen6_xfb_enabled)
+ xfb_write();
}
- emit(BRW_OPCODE_WHILE);
+ emit(BRW_OPCODE_ENDIF);
/* Finally, emit EOT message.
*
- * In gen6 it looks like we have to set the complete flag too, otherwise
- * the GPU hangs.
+ * In gen6 we need to end the thread differently depending on whether we have
+ * emitted at least one vertex or not. In case we did, the EOT message must
+ * always include the COMPLETE flag or else the GPU hangs. If we have not
+ * produced any output we can't use the COMPLETE flag.
+ *
+ * However, this would lead us to end the program with an ENDIF opcode,
+ * which we want to avoid, so what we do is that we always request a new
+ * VUE handle every time we do a URB WRITE, even for the last vertex we emit.
+ * With this we make sure that whether we have emitted at least one vertex
+ * or none at all, we have to finish the thread without writing to the URB,
+ * which works for both cases by setting the COMPLETE and UNUSED flags in
+ * the EOT message.
*/
this->current_annotation = "gen6 thread end: EOT";
- inst = emit(GS_OPCODE_THREAD_END);
- inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
+
+ if (c->prog_data.gen6_xfb_enabled) {
+ /* When emitting EOT, set SONumPrimsWritten Increment Value. */
+ src_reg data(this, glsl_type::uint_type);
+ emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu)));
+ emit(SHL(dst_reg(data), data, src_reg(16u)));
+ emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
+ }
+
+ vec4_instruction *inst = emit(GS_OPCODE_THREAD_END);
+ inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED;
inst->base_mrf = base_mrf;
inst->mlen = 1;
}
+void
+gen6_gs_visitor::setup_payload()
+{
+ int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES];
+
+ /* Attributes are going to be interleaved, so one register contains two
+ * attribute slots.
+ */
+ int attributes_per_reg = 2;
+
+ /* If a geometry shader tries to read from an input that wasn't written by
+ * the vertex shader, that produces undefined results, but it shouldn't
+ * crash anything. So initialize attribute_map to zeros--that ensures that
+ * these undefined results are read from r0.
+ */
+ memset(attribute_map, 0, sizeof(attribute_map));
+
+ int reg = 0;
+
+ /* The payload always contains important data in r0. */
+ reg++;
+
+ /* r1 is always part of the payload and it holds information relevant
+ * for transform feedback when we set the GEN6_GS_SVBI_PAYLOAD_ENABLE bit in
+ * the 3DSTATE_GS packet. We will overwrite it with the PrimitiveID
+ * information (and move the original value to a virtual register if
+ * necessary).
+ */
+ if (c->prog_data.include_primitive_id)
+ attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg;
+ reg++;
+
+ reg = setup_uniforms(reg);
+
+ reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg);
+
+ lower_attributes_to_hw_regs(attribute_map, true);
+
+ this->first_non_payload_grf = reg;
+}
+
+void
+gen6_gs_visitor::xfb_setup()
+{
+ static const unsigned swizzle_for_offset[4] = {
+ BRW_SWIZZLE4(0, 1, 2, 3),
+ BRW_SWIZZLE4(1, 2, 3, 3),
+ BRW_SWIZZLE4(2, 3, 3, 3),
+ BRW_SWIZZLE4(3, 3, 3, 3)
+ };
+
+ struct brw_gs_prog_data *prog_data =
+ (struct brw_gs_prog_data *) &c->prog_data;
+
+ const struct gl_transform_feedback_info *linked_xfb_info =
+ &this->shader_prog->LinkedTransformFeedback;
+ int i;
+
+ /* Make sure that the VUE slots won't overflow the unsigned chars in
+ * prog_data->transform_feedback_bindings[].
+ */
+ STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
+
+ /* Make sure that we don't need more binding table entries than we've
+ * set aside for use in transform feedback. (We shouldn't, since we
+ * set aside enough binding table entries to have one per component).
+ */
+ assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
+
+ prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
+ for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) {
+ prog_data->transform_feedback_bindings[i] =
+ linked_xfb_info->Outputs[i].OutputRegister;
+ prog_data->transform_feedback_swizzles[i] =
+ swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
+ }
+}
+
+void
+gen6_gs_visitor::xfb_write()
+{
+ unsigned num_verts;
+ struct brw_gs_prog_data *prog_data =
+ (struct brw_gs_prog_data *) &c->prog_data;
+
+ if (!prog_data->num_transform_feedback_bindings)
+ return;
+
+ switch (c->prog_data.output_topology) {
+ case _3DPRIM_POINTLIST:
+ num_verts = 1;
+ break;
+ case _3DPRIM_LINELIST:
+ case _3DPRIM_LINESTRIP:
+ case _3DPRIM_LINELOOP:
+ num_verts = 2;
+ break;
+ case _3DPRIM_TRILIST:
+ case _3DPRIM_TRIFAN:
+ case _3DPRIM_TRISTRIP:
+ case _3DPRIM_RECTLIST:
+ num_verts = 3;
+ break;
+ case _3DPRIM_QUADLIST:
+ case _3DPRIM_QUADSTRIP:
+ case _3DPRIM_POLYGON:
+ num_verts = 3;
+ break;
+ default:
+ unreachable("Unexpected primitive type in Gen6 SOL program.");
+ }
+
+ this->current_annotation = "gen6 thread end: svb writes init";
+
+ emit(MOV(dst_reg(this->vertex_output_offset), 0u));
+ emit(MOV(dst_reg(this->sol_prim_written), 0u));
+
+ /* Check that at least one primitive can be written
+ *
+ * Note: since we use the binding table to keep track of buffer offsets
+ * and stride, the GS doesn't need to keep track of a separate pointer
+ * into each buffer; it uses a single pointer which increments by 1 for
+ * each vertex. So we use SVBI0 for this pointer, regardless of whether
+ * transform feedback is in interleaved or separate attribs mode.
+ */
+ src_reg sol_temp(this, glsl_type::uvec4_type);
+ emit(ADD(dst_reg(sol_temp), this->svbi, src_reg(num_verts)));
+
+ /* Compare SVBI calculated number with the maximum value, which is
+ * in R1.4 (previously saved in this->max_svbi) for gen6.
+ */
+ emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
+ emit(IF(BRW_PREDICATE_NORMAL));
+ {
+ src_reg destination_indices_uw =
+ retype(destination_indices, BRW_REGISTER_TYPE_UW);
+
+ vec4_instruction *inst = emit(MOV(dst_reg(destination_indices_uw),
+ brw_imm_v(0x00020100))); /* (0, 1, 2) */
+ inst->force_writemask_all = true;
+
+ emit(ADD(dst_reg(this->destination_indices),
+ this->destination_indices,
+ this->svbi));
+ }
+ emit(BRW_OPCODE_ENDIF);
+
+ /* Write transform feedback data for all processed vertices. */
+ for (int i = 0; i < c->gp->program.VerticesOut; i++) {
+ emit(MOV(dst_reg(sol_temp), i));
+ emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
+ BRW_CONDITIONAL_L));
+ emit(IF(BRW_PREDICATE_NORMAL));
+ {
+ xfb_program(i, num_verts);
+ }
+ emit(BRW_OPCODE_ENDIF);
+ }
+}
+
+void
+gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
+{
+ struct brw_gs_prog_data *prog_data =
+ (struct brw_gs_prog_data *) &c->prog_data;
+ unsigned binding;
+ unsigned num_bindings = prog_data->num_transform_feedback_bindings;
+ src_reg sol_temp(this, glsl_type::uvec4_type);
+
+ /* Check for buffer overflow: we need room to write the complete primitive
+ * (all vertices). Otherwise, avoid writing any vertices for it
+ */
+ emit(ADD(dst_reg(sol_temp), this->sol_prim_written, 1u));
+ emit(MUL(dst_reg(sol_temp), sol_temp, src_reg(num_verts)));
+ emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi));
+ emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
+ emit(IF(BRW_PREDICATE_NORMAL));
+ {
+ /* Avoid overwriting MRF 1 as it is used as URB write message header */
+ dst_reg mrf_reg(MRF, 2);
+
+ this->current_annotation = "gen6: emit SOL vertex data";
+ /* For each vertex, generate code to output each varying using the
+ * appropriate binding table entry.
+ */
+ for (binding = 0; binding < num_bindings; ++binding) {
+ unsigned char varying =
+ prog_data->transform_feedback_bindings[binding];
+
+ /* Set up the correct destination index for this vertex */
+ vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
+ mrf_reg,
+ this->destination_indices);
+ inst->sol_vertex = vertex % num_verts;
+
+ /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
+ *
+ * "Prior to End of Thread with a URB_WRITE, the kernel must
+ * ensure that all writes are complete by sending the final
+ * write as a committed write."
+ */
+ bool final_write = binding == (unsigned) num_bindings - 1 &&
+ inst->sol_vertex == num_verts - 1;
+
+ /* Compute offset of this varying for the current vertex
+ * in vertex_output
+ */
+ this->current_annotation = output_reg_annotation[varying];
+ src_reg data(this->vertex_output);
+ data.reladdr = ralloc(mem_ctx, src_reg);
+ int offset = get_vertex_output_offset_for_varying(vertex, varying);
+ emit(MOV(dst_reg(this->vertex_output_offset), offset));
+ memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+ data.type = output_reg[varying].type;
+
+ /* PSIZ, LAYER and VIEWPORT are packed in different channels of the
+ * same slot, so make sure we write the appropriate channel
+ */
+ if (varying == VARYING_SLOT_PSIZ)
+ data.swizzle = BRW_SWIZZLE_WWWW;
+ else if (varying == VARYING_SLOT_LAYER)
+ data.swizzle = BRW_SWIZZLE_YYYY;
+ else if (varying == VARYING_SLOT_VIEWPORT)
+ data.swizzle = BRW_SWIZZLE_ZZZZ;
+ else
+ data.swizzle = prog_data->transform_feedback_swizzles[binding];
+
+ /* Write data */
+ inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);
+ inst->sol_binding = binding;
+ inst->sol_final_write = final_write;
+
+ if (final_write) {
+ /* This is the last vertex of the primitive, then increment
+ * SO num primitive counter and destination indices.
+ */
+ emit(ADD(dst_reg(this->destination_indices),
+ this->destination_indices,
+ src_reg(num_verts)));
+ emit(ADD(dst_reg(this->sol_prim_written),
+ this->sol_prim_written, 1u));
+ }
+
+ }
+ this->current_annotation = NULL;
+ }
+ emit(BRW_OPCODE_ENDIF);
+}
+
+int
+gen6_gs_visitor::get_vertex_output_offset_for_varying(int vertex, int varying)
+{
+ /* Find the output slot assigned to this varying.
+ *
+ * VARYING_SLOT_LAYER and VARYING_SLOT_VIEWPORT are packed in the same slot
+ * as VARYING_SLOT_PSIZ.
+ */
+ if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT)
+ varying = VARYING_SLOT_PSIZ;
+ int slot = prog_data->vue_map.varying_to_slot[varying];
+
+ if (slot < 0) {
+ /* This varying does not exist in the VUE so we are not writing to it
+ * and its value is undefined. We still want to return a valid offset
+ * into vertex_output though, to prevent any out-of-bound accesses into
+ * the vertex_output array. Since the value for this varying is undefined
+ * we don't really care for the value we assign to it, so any offset
+ * within the limits of vertex_output will do.
+ */
+ slot = 0;
+ }
+
+ return vertex * (prog_data->vue_map.num_slots + 1) + slot;
+}
+
} /* namespace brw */