static void
brw_emit_prim(struct brw_context *brw,
const struct _mesa_prim *prim,
- uint32_t hw_prim)
+ uint32_t hw_prim,
+ struct brw_transform_feedback_object *xfb_obj,
+ unsigned stream)
{
int verts_per_instance;
int vertex_access_type;
verts_per_instance = prim->count;
/* If nothing to emit, just return. */
- if (verts_per_instance == 0 && !prim->is_indirect)
+ if (verts_per_instance == 0 && !prim->is_indirect && !xfb_obj)
return;
/* If we're set to always flush, do it before and after the primitive emit.
brw_emit_mi_flush(brw);
/* If indirect, emit a bunch of loads from the indirect BO. */
- if (prim->is_indirect) {
+ if (xfb_obj) {
+ indirect_flag = GEN7_3DPRIM_INDIRECT_PARAMETER_ENABLE;
+
+ brw_load_register_mem(brw, GEN7_3DPRIM_VERTEX_COUNT,
+ xfb_obj->prim_count_bo,
+ I915_GEM_DOMAIN_VERTEX, 0,
+ stream * sizeof(uint32_t));
+ BEGIN_BATCH(9);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (9 - 2));
+ OUT_BATCH(GEN7_3DPRIM_INSTANCE_COUNT);
+ OUT_BATCH(prim->num_instances);
+ OUT_BATCH(GEN7_3DPRIM_START_VERTEX);
+ OUT_BATCH(0);
+ OUT_BATCH(GEN7_3DPRIM_BASE_VERTEX);
+ OUT_BATCH(0);
+ OUT_BATCH(GEN7_3DPRIM_START_INSTANCE);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+ } else if (prim->is_indirect) {
struct gl_buffer_object *indirect_buffer = brw->ctx.DrawIndirectBuffer;
drm_intel_bo *bo = intel_bufferobj_buffer(brw,
intel_buffer_object(indirect_buffer),
const struct _mesa_index_buffer *ib,
GLuint min_index,
GLuint max_index,
+ struct brw_transform_feedback_object *xfb_obj,
+ unsigned stream,
struct gl_buffer_object *indirect)
{
struct brw_context *brw = brw_context(ctx);
brw_upload_render_state(brw);
}
- brw_emit_prim(brw, &prims[i], brw->primitive);
+ brw_emit_prim(brw, &prims[i], brw->primitive, xfb_obj, stream);
brw->no_batch_wrap = false;
GLboolean index_bounds_valid,
GLuint min_index,
GLuint max_index,
- struct gl_transform_feedback_object *unused_tfb_object,
+ struct gl_transform_feedback_object *gl_xfb_obj,
unsigned stream,
struct gl_buffer_object *indirect)
{
struct brw_context *brw = brw_context(ctx);
const struct gl_client_array **arrays = ctx->Array._DrawArrays;
-
- assert(unused_tfb_object == NULL);
+ struct brw_transform_feedback_object *xfb_obj =
+ (struct brw_transform_feedback_object *) gl_xfb_obj;
if (!brw_check_conditional_render(brw))
return;
* to it.
*/
brw_try_draw_prims(ctx, arrays, prims, nr_prims, ib, min_index, max_index,
- indirect);
+ xfb_obj, stream, indirect);
}
void
--- /dev/null
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * An implementation of the transform feedback driver hooks for Haswell
+ * and later hardware. This uses MI_MATH to compute the number of vertices
+ * written (for use by DrawTransformFeedback()) without any CPU<->GPU
+ * synchronization which could stall.
+ */
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "intel_batchbuffer.h"
+#include "intel_buffer_objects.h"
+#include "main/transformfeedback.h"
+
+/**
+ * We store several values in obj->prim_count_bo:
+ *
+ * [4x 32-bit values]: Final Number of Vertices Written
+ * [4x 32-bit values]: Tally of Primitives Written So Far
+ * [4x 64-bit values]: Starting SO_NUM_PRIMS_WRITTEN Counter Snapshots
+ *
+ * The first set of values is used by DrawTransformFeedback(), which
+ * copies one of them into the 3DPRIM_VERTEX_COUNT register and performs
+ * an indirect draw. The other values are just temporary storage.
+ */
+
+#define TALLY_OFFSET (BRW_MAX_XFB_STREAMS * sizeof(uint32_t))
+#define START_OFFSET (TALLY_OFFSET * 2)
+
+/**
+ * Store the SO_NUM_PRIMS_WRITTEN counters for each stream (4 uint64_t values)
+ * to prim_count_bo.
+ */
+static void
+save_prim_start_values(struct brw_context *brw,
+ struct brw_transform_feedback_object *obj)
+{
+ /* Flush any drawing so that the counters have the right values. */
+ brw_emit_mi_flush(brw);
+
+ /* Emit MI_STORE_REGISTER_MEM commands to write the values. */
+ for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
+ brw_store_register_mem64(brw, obj->prim_count_bo,
+ GEN7_SO_NUM_PRIMS_WRITTEN(i),
+ START_OFFSET + i * sizeof(uint64_t));
+ }
+}
+
+/**
+ * Compute the number of primitives written during our most recent
+ * transform feedback activity (the current SO_NUM_PRIMS_WRITTEN value
+ * minus the stashed "start" value), and add it to our running tally.
+ *
+ * If \p finalize is true, also compute the number of vertices written
+ * (by multiplying by the number of vertices per primitive), and store
+ * that to the "final" location.
+ *
+ * Otherwise, just overwrite the old tally with the new one.
+ */
+static void
+tally_prims_written(struct brw_context *brw,
+ struct brw_transform_feedback_object *obj,
+ bool finalize)
+{
+ /* Flush any drawing so that the counters have the right values. */
+ brw_emit_mi_flush(brw);
+
+ for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
+ /* GPR0 = Tally */
+ brw_load_register_imm32(brw, HSW_CS_GPR(0) + 4, 0);
+ brw_load_register_mem(brw, HSW_CS_GPR(0), obj->prim_count_bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ TALLY_OFFSET + i * sizeof(uint32_t));
+ /* GPR1 = Start Snapshot */
+ brw_load_register_mem64(brw, HSW_CS_GPR(1), obj->prim_count_bo,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION,
+ START_OFFSET + i * sizeof(uint64_t));
+ /* GPR2 = Ending Snapshot */
+ brw_load_register_reg64(brw, GEN7_SO_NUM_PRIMS_WRITTEN(i), HSW_CS_GPR(2));
+
+ BEGIN_BATCH(9);
+ OUT_BATCH(HSW_MI_MATH | (9 - 2));
+ /* GPR1 = GPR2 (End) - GPR1 (Start) */
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R2));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
+ OUT_BATCH(MI_MATH_ALU0(SUB));
+ OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
+ /* GPR0 = GPR0 (Tally) + GPR1 (Diff) */
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
+ OUT_BATCH(MI_MATH_ALU0(ADD));
+ OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
+ ADVANCE_BATCH();
+
+ if (!finalize) {
+ /* Write back the new tally */
+ brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
+ TALLY_OFFSET + i * sizeof(uint32_t));
+ } else {
+ /* Convert the number of primitives to the number of vertices. */
+ if (obj->primitive_mode == GL_LINES) {
+ /* Double R0 (R0 = R0 + R0) */
+ BEGIN_BATCH(5);
+ OUT_BATCH(HSW_MI_MATH | (5 - 2));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
+ OUT_BATCH(MI_MATH_ALU0(ADD));
+ OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
+ ADVANCE_BATCH();
+ } else if (obj->primitive_mode == GL_TRIANGLES) {
+ /* Triple R0 (R1 = R0 + R0, R0 = R0 + R1) */
+ BEGIN_BATCH(9);
+ OUT_BATCH(HSW_MI_MATH | (9 - 2));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R0));
+ OUT_BATCH(MI_MATH_ALU0(ADD));
+ OUT_BATCH(MI_MATH_ALU2(STORE, R1, ACCU));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCA, R0));
+ OUT_BATCH(MI_MATH_ALU2(LOAD, SRCB, R1));
+ OUT_BATCH(MI_MATH_ALU0(ADD));
+ OUT_BATCH(MI_MATH_ALU2(STORE, R0, ACCU));
+ ADVANCE_BATCH();
+ }
+ /* Store it to the final result */
+ brw_store_register_mem32(brw, obj->prim_count_bo, HSW_CS_GPR(0),
+ i * sizeof(uint32_t));
+ }
+ }
+}
+
+/**
+ * BeginTransformFeedback() driver hook.
+ */
+void
+hsw_begin_transform_feedback(struct gl_context *ctx, GLenum mode,
+ struct gl_transform_feedback_object *obj)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_transform_feedback_object *brw_obj =
+ (struct brw_transform_feedback_object *) obj;
+
+ brw_obj->primitive_mode = mode;
+
+ /* Reset the SO buffer offsets to 0. */
+ if (brw->gen >= 8) {
+ brw_obj->zero_offsets = true;
+ } else {
+ BEGIN_BATCH(1 + 2 * BRW_MAX_XFB_STREAMS);
+ OUT_BATCH(MI_LOAD_REGISTER_IMM | (1 + 2 * BRW_MAX_XFB_STREAMS - 2));
+ for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_BATCH(0);
+ }
+ ADVANCE_BATCH();
+ }
+
+ /* Zero out the initial tallies */
+ brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET, 0ull);
+ brw_store_data_imm64(brw, brw_obj->prim_count_bo, TALLY_OFFSET + 8, 0ull);
+
+ /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
+ save_prim_start_values(brw, brw_obj);
+}
+
+/**
+ * PauseTransformFeedback() driver hook.
+ */
+void
+hsw_pause_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_transform_feedback_object *brw_obj =
+ (struct brw_transform_feedback_object *) obj;
+
+ if (brw->is_haswell) {
+ /* Save the SOL buffer offset register values. */
+ for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(MI_STORE_REGISTER_MEM | (3 - 2));
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_RELOC(brw_obj->offset_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ i * sizeof(uint32_t));
+ ADVANCE_BATCH();
+ }
+ }
+
+ /* Add any primitives written to our tally */
+ tally_prims_written(brw, brw_obj, false);
+}
+
+/**
+ * ResumeTransformFeedback() driver hook.
+ */
+void
+hsw_resume_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_transform_feedback_object *brw_obj =
+ (struct brw_transform_feedback_object *) obj;
+
+ if (brw->is_haswell) {
+ /* Reload the SOL buffer offset registers. */
+ for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
+ BEGIN_BATCH(3);
+ OUT_BATCH(GEN7_MI_LOAD_REGISTER_MEM | (3 - 2));
+ OUT_BATCH(GEN7_SO_WRITE_OFFSET(i));
+ OUT_RELOC(brw_obj->offset_bo,
+ I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
+ i * sizeof(uint32_t));
+ ADVANCE_BATCH();
+ }
+ }
+
+ /* Store the new starting value of the SO_NUM_PRIMS_WRITTEN counters. */
+ save_prim_start_values(brw, brw_obj);
+}
+
+/**
+ * EndTransformFeedback() driver hook.
+ */
+void
+hsw_end_transform_feedback(struct gl_context *ctx,
+ struct gl_transform_feedback_object *obj)
+{
+ struct brw_context *brw = brw_context(ctx);
+ struct brw_transform_feedback_object *brw_obj =
+ (struct brw_transform_feedback_object *) obj;
+
+ /* Add any primitives written to our tally, convert it from the number
+ * of primitives written to the number of vertices written, and store
+ * it in the "final" location in the buffer which DrawTransformFeedback()
+ * will use as the vertex count.
+ */
+ tally_prims_written(brw, brw_obj, true);
+}