i965: Add support for xfb overflow on query buffer objects.
authorRafael Antognolli <rafael.antognolli@intel.com>
Fri, 20 Jan 2017 17:53:25 +0000 (09:53 -0800)
committerKenneth Graunke <kenneth@whitecape.org>
Wed, 22 Feb 2017 00:28:32 +0000 (16:28 -0800)
Enable getting the results of a transform feedback overflow query with a
buffer object.

v4:
    - hsw_overflow_result_to_gpr0 a public function, so it can be used
      by conditional render. (Kenneth)
    - fix typo grp0/gpr0 (Kenneth)
    - rename load_gen_written_data_to_regs to
      load_overflow_data_to_cs_gprs (Kenneth)

Signed-off-by: Rafael Antognolli <rafael.antognolli@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
src/mesa/drivers/dri/i965/brw_context.h
src/mesa/drivers/dri/i965/hsw_queryobj.c

index 4cb96ce6cfff9cfa5b01e194627ce542de89e956..83c91db5b2e4a13fb10eeabff6aaf8f88061431c 100644 (file)
@@ -1264,6 +1264,9 @@ void brw_write_timestamp(struct brw_context *brw, drm_intel_bo *bo, int idx);
 void brw_write_depth_count(struct brw_context *brw, drm_intel_bo *bo, int idx);
 
 /** hsw_queryobj.c */
+void hsw_overflow_result_to_gpr0(struct brw_context *brw,
+                                 struct brw_query_object *query,
+                                 int count);
 void hsw_init_queryobj_functions(struct dd_function_table *functions);
 
 /** brw_conditional_render.c */
index 0da2c3d3888fdc4f18ccffc26bf9225b9950650d..938fe06c8743f6eb735a714ec624b5df7b6046a2 100644 (file)
@@ -186,6 +186,107 @@ gpr0_to_bool(struct brw_context *brw)
    ADVANCE_BATCH();
 }
 
+static void
+load_overflow_data_to_cs_gprs(struct brw_context *brw,
+                              struct brw_query_object *query,
+                              int idx)
+{
+   int offset = idx * sizeof(uint64_t) * 4;
+
+   brw_load_register_mem64(brw,
+                           HSW_CS_GPR(1),
+                           query->bo,
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           offset);
+
+   offset += sizeof(uint64_t);
+   brw_load_register_mem64(brw,
+                           HSW_CS_GPR(2),
+                           query->bo,
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           offset);
+
+   offset += sizeof(uint64_t);
+   brw_load_register_mem64(brw,
+                           HSW_CS_GPR(3),
+                           query->bo,
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           offset);
+
+   offset += sizeof(uint64_t);
+   brw_load_register_mem64(brw,
+                           HSW_CS_GPR(4),
+                           query->bo,
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           I915_GEM_DOMAIN_INSTRUCTION,
+                           offset);
+}
+
+/*
+ * R3 = R4 - R3;
+ * R1 = R2 - R1;
+ * R1 = R3 - R1;
+ * R0 = R0 | R1;
+ */
+static void
+calc_overflow_for_stream(struct brw_context *brw)
+{
+   static const uint32_t maths[] = {
+      MI_MATH_ALU2(LOAD, SRCA, R4),
+      MI_MATH_ALU2(LOAD, SRCB, R3),
+      MI_MATH_ALU0(SUB),
+      MI_MATH_ALU2(STORE, R3, ACCU),
+      MI_MATH_ALU2(LOAD, SRCA, R2),
+      MI_MATH_ALU2(LOAD, SRCB, R1),
+      MI_MATH_ALU0(SUB),
+      MI_MATH_ALU2(STORE, R1, ACCU),
+      MI_MATH_ALU2(LOAD, SRCA, R3),
+      MI_MATH_ALU2(LOAD, SRCB, R1),
+      MI_MATH_ALU0(SUB),
+      MI_MATH_ALU2(STORE, R1, ACCU),
+      MI_MATH_ALU2(LOAD, SRCA, R1),
+      MI_MATH_ALU2(LOAD, SRCB, R0),
+      MI_MATH_ALU0(OR),
+      MI_MATH_ALU2(STORE, R0, ACCU),
+   };
+
+   BEGIN_BATCH(1 + ARRAY_SIZE(maths));
+   OUT_BATCH(HSW_MI_MATH | (1 + ARRAY_SIZE(maths) - 2));
+
+   for (int m = 0; m < ARRAY_SIZE(maths); m++)
+      OUT_BATCH(maths[m]);
+
+   ADVANCE_BATCH();
+}
+
+static void
+calc_overflow_to_gpr0(struct brw_context *brw, struct brw_query_object *query,
+                       int count)
+{
+   brw_load_register_imm64(brw, HSW_CS_GPR(0), 0ull);
+
+   for (int i = 0; i < count; i++) {
+      load_overflow_data_to_cs_gprs(brw, query, i);
+      calc_overflow_for_stream(brw);
+   }
+}
+
+/*
+ * Take a query and calculate whether there was overflow during transform
+ * feedback. Store the result in the gpr0 register.
+ */
+void
+hsw_overflow_result_to_gpr0(struct brw_context *brw,
+                            struct brw_query_object *query,
+                            int count)
+{
+   calc_overflow_to_gpr0(brw, query, count);
+   gpr0_to_bool(brw);
+}
+
 static void
 hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query,
                    struct gl_buffer_object *buf, intptr_t offset,
@@ -223,6 +324,11 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query,
                               I915_GEM_DOMAIN_INSTRUCTION,
                               I915_GEM_DOMAIN_INSTRUCTION,
                               0 * sizeof(uint64_t));
+   } else if (query->Base.Target == GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB
+              || query->Base.Target == GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB) {
+      /* Don't do anything in advance here, since the math for this is a little
+       * more complex.
+       */
    } else {
       brw_load_register_mem64(brw,
                               HSW_CS_GPR(1),
@@ -274,6 +380,12 @@ hsw_result_to_gpr0(struct gl_context *ctx, struct brw_query_object *query,
    case GL_ANY_SAMPLES_PASSED_CONSERVATIVE:
       gpr0_to_bool(brw);
       break;
+   case GL_TRANSFORM_FEEDBACK_STREAM_OVERFLOW_ARB:
+      hsw_overflow_result_to_gpr0(brw, query, 1);
+      break;
+   case GL_TRANSFORM_FEEDBACK_OVERFLOW_ARB:
+      hsw_overflow_result_to_gpr0(brw, query, MAX_VERTEX_STREAMS);
+      break;
    }
 }