if (stream_id > 0 && shader_prog->TransformFeedback.NumVarying == 0)
return;
- {
- /* If we're outputting 32 control data bits or less, then we can wait
- * until the shader is over to output them all. Otherwise we need to
- * output them as we go. Now is the time to do it, since we're about to
- * output the vertex_count'th vertex, so it's guaranteed that the
- * control data bits associated with the (vertex_count - 1)th vertex are
- * correct.
+ /* If we're outputting 32 control data bits or less, then we can wait
+ * until the shader is over to output them all. Otherwise we need to
+ * output them as we go. Now is the time to do it, since we're about to
+ * output the vertex_count'th vertex, so it's guaranteed that the
+ * control data bits associated with the (vertex_count - 1)th vertex are
+ * correct.
+ */
+ if (c->control_data_header_size_bits > 32) {
+ this->current_annotation = "emit vertex: emit control data bits";
+ /* Only emit control data bits if we've finished accumulating a batch
+ * of 32 bits. This is the case when:
+ *
+ * (vertex_count * bits_per_vertex) % 32 == 0
+ *
+ * (in other words, when the last 5 bits of vertex_count *
+ * bits_per_vertex are 0). Assuming bits_per_vertex == 2^n for some
+ * integer n (which is always the case, since bits_per_vertex is
+ * always 1 or 2), this is equivalent to requiring that the last 5-n
+ * bits of vertex_count are 0:
+ *
+ * vertex_count & (2^(5-n) - 1) == 0
+ *
+ * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
+ * equivalent to:
+ *
+ * vertex_count & (32 / bits_per_vertex - 1) == 0
*/
- if (c->control_data_header_size_bits > 32) {
- this->current_annotation = "emit vertex: emit control data bits";
- /* Only emit control data bits if we've finished accumulating a batch
- * of 32 bits. This is the case when:
- *
- * (vertex_count * bits_per_vertex) % 32 == 0
- *
- * (in other words, when the last 5 bits of vertex_count *
- * bits_per_vertex are 0). Assuming bits_per_vertex == 2^n for some
- * integer n (which is always the case, since bits_per_vertex is
- * always 1 or 2), this is equivalent to requiring that the last 5-n
- * bits of vertex_count are 0:
- *
- * vertex_count & (2^(5-n) - 1) == 0
- *
- * 2^(5-n) == 2^5 / 2^n == 32 / bits_per_vertex, so this is
- * equivalent to:
- *
- * vertex_count & (32 / bits_per_vertex - 1) == 0
+ vec4_instruction *inst =
+ emit(AND(dst_null_d(), this->vertex_count,
+ (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+ emit(IF(BRW_PREDICATE_NORMAL));
+ {
+ /* If vertex_count is 0, then no control data bits have been
+ * accumulated yet, so we skip emitting them.
*/
- vec4_instruction *inst =
- emit(AND(dst_null_d(), this->vertex_count,
- (uint32_t) (32 / c->control_data_bits_per_vertex - 1)));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
-
+ emit(CMP(dst_null_d(), this->vertex_count, 0u,
+ BRW_CONDITIONAL_NEQ));
emit(IF(BRW_PREDICATE_NORMAL));
- {
- /* If vertex_count is 0, then no control data bits have been
- * accumulated yet, so we skip emitting them.
- */
- emit(CMP(dst_null_d(), this->vertex_count, 0u,
- BRW_CONDITIONAL_NEQ));
- emit(IF(BRW_PREDICATE_NORMAL));
- emit_control_data_bits();
- emit(BRW_OPCODE_ENDIF);
-
- /* Reset control_data_bits to 0 so we can start accumulating a new
- * batch.
- *
- * Note: in the case where vertex_count == 0, this neutralizes the
- * effect of any call to EndPrimitive() that the shader may have
- * made before outputting its first vertex.
- */
- inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
- inst->force_writemask_all = true;
- }
+ emit_control_data_bits();
emit(BRW_OPCODE_ENDIF);
+
+ /* Reset control_data_bits to 0 so we can start accumulating a new
+ * batch.
+ *
+ * Note: in the case where vertex_count == 0, this neutralizes the
+ * effect of any call to EndPrimitive() that the shader may have
+ * made before outputting its first vertex.
+ */
+ inst = emit(MOV(dst_reg(this->control_data_bits), 0u));
+ inst->force_writemask_all = true;
}
+ emit(BRW_OPCODE_ENDIF);
+ }
- this->current_annotation = "emit vertex: vertex data";
- emit_vertex();
+ this->current_annotation = "emit vertex: vertex data";
+ emit_vertex();
- /* In stream mode we have to set control data bits for all vertices
- * unless we have disabled control data bits completely (which we do
- * do for GL_POINTS outputs that don't use streams).
- */
- if (c->control_data_header_size_bits > 0 &&
- c->prog_data.control_data_format ==
- GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
- this->current_annotation = "emit vertex: Stream control data bits";
- set_stream_control_data_bits(stream_id);
- }
+ /* In stream mode we have to set control data bits for all vertices
+ * unless we have disabled control data bits completely (which we do
+ * do for GL_POINTS outputs that don't use streams).
+ */
+ if (c->control_data_header_size_bits > 0 &&
+ c->prog_data.control_data_format ==
+ GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID) {
+ this->current_annotation = "emit vertex: Stream control data bits";
+ set_stream_control_data_bits(stream_id);
}
this->current_annotation = NULL;
{
this->current_annotation = "gen6 emit vertex";
- {
- /* Buffer all output slots for this vertex in vertex_output */
- for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
- int varying = prog_data->vue_map.slot_to_varying[slot];
- if (varying != VARYING_SLOT_PSIZ) {
- dst_reg dst(this->vertex_output);
- dst.reladdr = ralloc(mem_ctx, src_reg);
- memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
- emit_urb_slot(dst, varying);
- } else {
- /* The PSIZ slot can pack multiple varyings in different channels
- * and emit_urb_slot() will produce a MOV instruction for each of
- * them. Since we are writing to an array, that will translate to
- * possibly multiple MOV instructions with an array destination and
- * each will generate a scratch write with the same offset into
- * scratch space (thus, each one overwriting the previous). This is
- * not what we want. What we will do instead is emit PSIZ to a
- * a regular temporary register, then move that resgister into the
- * array. This way we only have one instruction with an array
- * destination and we only produce a single scratch write.
- */
- dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
- emit_urb_slot(tmp, varying);
- dst_reg dst(this->vertex_output);
- dst.reladdr = ralloc(mem_ctx, src_reg);
- memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
- vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
- inst->force_writemask_all = true;
- }
-
- emit(ADD(dst_reg(this->vertex_output_offset),
- this->vertex_output_offset, 1u));
- }
-
- /* Now buffer flags for this vertex */
- dst_reg dst(this->vertex_output);
- dst.reladdr = ralloc(mem_ctx, src_reg);
- memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
- if (c->gp->program.OutputType == GL_POINTS) {
- /* If we are outputting points, then every vertex has PrimStart and
- * PrimEnd set.
- */
- emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
- URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
- emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+ /* Buffer all output slots for this vertex in vertex_output */
+ for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
+ int varying = prog_data->vue_map.slot_to_varying[slot];
+ if (varying != VARYING_SLOT_PSIZ) {
+ dst_reg dst(this->vertex_output);
+ dst.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+ emit_urb_slot(dst, varying);
} else {
- /* Otherwise, we can only set the PrimStart flag, which we have stored
- * in the first_vertex register. We will have to wait until we execute
- * EndPrimitive() or we end the thread to set the PrimEnd flag on a
- * vertex.
+ /* The PSIZ slot can pack multiple varyings in different channels
+ * and emit_urb_slot() will produce a MOV instruction for each of
+ * them. Since we are writing to an array, that will translate to
+ * possibly multiple MOV instructions with an array destination and
+ * each will generate a scratch write with the same offset into
+ * scratch space (thus, each one overwriting the previous). This is
+ * not what we want. What we will do instead is emit PSIZ to a
+ * a regular temporary register, then move that resgister into the
+ * array. This way we only have one instruction with an array
+ * destination and we only produce a single scratch write.
*/
- emit(OR(dst, this->first_vertex,
- (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
- emit(MOV(dst_reg(this->first_vertex), 0u));
+ dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
+ emit_urb_slot(tmp, varying);
+ dst_reg dst(this->vertex_output);
+ dst.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+ vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
+ inst->force_writemask_all = true;
}
+
emit(ADD(dst_reg(this->vertex_output_offset),
this->vertex_output_offset, 1u));
}
+
+ /* Now buffer flags for this vertex */
+ dst_reg dst(this->vertex_output);
+ dst.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
+ if (c->gp->program.OutputType == GL_POINTS) {
+ /* If we are outputting points, then every vertex has PrimStart and
+ * PrimEnd set.
+ */
+ emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
+ URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
+ emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
+ } else {
+ /* Otherwise, we can only set the PrimStart flag, which we have stored
+ * in the first_vertex register. We will have to wait until we execute
+ * EndPrimitive() or we end the thread to set the PrimEnd flag on a
+ * vertex.
+ */
+ emit(OR(dst, this->first_vertex,
+ (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
+ emit(MOV(dst_reg(this->first_vertex), 0u));
+ }
+ emit(ADD(dst_reg(this->vertex_output_offset),
+ this->vertex_output_offset, 1u));
}
void