replace malloc macros in imports.h with u_memory.h versions
[mesa.git] / src / mesa / main / compute.c
index 5c84516688490fa416790e4bf49f9c3430e3ce75..8e446afa9ee2ebc4b8bc8dac05e3dd88ed2e8b39 100644 (file)
@@ -54,13 +54,10 @@ check_valid_to_compute(struct gl_context *ctx, const char *function)
 static bool
 validate_DispatchCompute(struct gl_context *ctx, const GLuint *num_groups)
 {
-   int i;
-   FLUSH_CURRENT(ctx, 0);
-
    if (!check_valid_to_compute(ctx, "glDispatchCompute"))
       return GL_FALSE;
 
-   for (i = 0; i < 3; i++) {
+   for (int i = 0; i < 3; i++) {
       /* From the OpenGL 4.3 Core Specification, Chapter 19, Compute Shaders:
        *
        * "An INVALID_VALUE error is generated if any of num_groups_x,
@@ -106,11 +103,6 @@ validate_DispatchComputeGroupSizeARB(struct gl_context *ctx,
                                      const GLuint *num_groups,
                                      const GLuint *group_size)
 {
-   GLuint total_invocations = 1;
-   int i;
-
-   FLUSH_CURRENT(ctx, 0);
-
    if (!check_valid_to_compute(ctx, "glDispatchComputeGroupSizeARB"))
       return GL_FALSE;
 
@@ -128,7 +120,7 @@ validate_DispatchComputeGroupSizeARB(struct gl_context *ctx,
       return GL_FALSE;
    }
 
-   for (i = 0; i < 3; i++) {
+   for (int i = 0; i < 3; i++) {
       /* The ARB_compute_variable_group_size spec says:
        *
        * "An INVALID_VALUE error is generated if any of num_groups_x,
@@ -159,8 +151,6 @@ validate_DispatchComputeGroupSizeARB(struct gl_context *ctx,
                      "glDispatchComputeGroupSizeARB(group_size_%c)", 'x' + i);
          return GL_FALSE;
       }
-
-      total_invocations *= group_size[i];
    }
 
    /* The ARB_compute_variable_group_size spec says:
@@ -171,23 +161,60 @@ validate_DispatchComputeGroupSizeARB(struct gl_context *ctx,
     *  for compute shaders with variable group size
     *  (MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB)."
     */
+   uint64_t total_invocations = group_size[0] * group_size[1];
+   if (total_invocations <= UINT32_MAX) {
+      /* Only bother multiplying the third value if total still fits in
+       * 32-bit, since MaxComputeVariableGroupInvocations is also 32-bit.
+       */
+      total_invocations *= group_size[2];
+   }
    if (total_invocations > ctx->Const.MaxComputeVariableGroupInvocations) {
       _mesa_error(ctx, GL_INVALID_VALUE,
                   "glDispatchComputeGroupSizeARB(product of local_sizes "
                   "exceeds MAX_COMPUTE_VARIABLE_GROUP_INVOCATIONS_ARB "
-                  "(%d > %d))", total_invocations,
+                  "(%u * %u * %u > %u))",
+                  group_size[0], group_size[1], group_size[2],
                   ctx->Const.MaxComputeVariableGroupInvocations);
       return GL_FALSE;
    }
 
+   /* The NV_compute_shader_derivatives spec says:
+    *
+    * "An INVALID_VALUE error is generated by DispatchComputeGroupSizeARB if
+    *  the active program for the compute shader stage has a compute shader
+    *  using the "derivative_group_quadsNV" layout qualifier and
+    *  <group_size_x> or <group_size_y> is not a multiple of two.
+    *
+    *  An INVALID_VALUE error is generated by DispatchComputeGroupSizeARB if
+    *  the active program for the compute shader stage has a compute shader
+    *  using the "derivative_group_linearNV" layout qualifier and the product
+    *  of <group_size_x>, <group_size_y>, and <group_size_z> is not a multiple
+    *  of four."
+    */
+   if (prog->info.cs.derivative_group == DERIVATIVE_GROUP_QUADS &&
+       ((group_size[0] & 1) || (group_size[1] & 1))) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glDispatchComputeGroupSizeARB(derivative_group_quadsNV "
+                  "requires group_size_x (%d) and group_size_y (%d) to be "
+                  "divisble by 2)", group_size[0], group_size[1]);
+      return GL_FALSE;
+   }
+
+   if (prog->info.cs.derivative_group == DERIVATIVE_GROUP_LINEAR &&
+       total_invocations & 3) {
+      _mesa_error(ctx, GL_INVALID_VALUE,
+                  "glDispatchComputeGroupSizeARB(derivative_group_linearNV "
+                  "requires product of group sizes (%"PRIu64") to be divisible "
+                  "by 4)", total_invocations);
+      return GL_FALSE;
+   }
+
    return GL_TRUE;
 }
 
 static bool
 valid_dispatch_indirect(struct gl_context *ctx,  GLintptr indirect)
 {
-   FLUSH_CURRENT(ctx, 0);
-
    GLsizei size = 3 * sizeof(GLuint);
    const uint64_t end = (uint64_t) indirect + size;
    const char *name = "glDispatchComputeIndirect";
@@ -218,7 +245,7 @@ valid_dispatch_indirect(struct gl_context *ctx,  GLintptr indirect)
     *  DRAW_INDIRECT_BUFFER binding, or if the command would source data
     *  beyond the end of the buffer object."
     */
-   if (!_mesa_is_bufferobj(ctx->DispatchIndirectBuffer)) {
+   if (!ctx->DispatchIndirectBuffer) {
       _mesa_error(ctx, GL_INVALID_OPERATION,
                   "%s: no buffer bound to DISPATCH_INDIRECT_BUFFER", name);
       return GL_FALSE;
@@ -251,19 +278,20 @@ valid_dispatch_indirect(struct gl_context *ctx,  GLintptr indirect)
    return GL_TRUE;
 }
 
-void GLAPIENTRY
-_mesa_DispatchCompute(GLuint num_groups_x,
-                      GLuint num_groups_y,
-                      GLuint num_groups_z)
+static ALWAYS_INLINE void
+dispatch_compute(GLuint num_groups_x, GLuint num_groups_y,
+                 GLuint num_groups_z, bool no_error)
 {
    GET_CURRENT_CONTEXT(ctx);
    const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
 
+   FLUSH_VERTICES(ctx, 0);
+
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glDispatchCompute(%d, %d, %d)\n",
                   num_groups_x, num_groups_y, num_groups_z);
 
-   if (!validate_DispatchCompute(ctx, num_groups))
+   if (!no_error && !validate_DispatchCompute(ctx, num_groups))
       return;
 
    if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u)
@@ -272,36 +300,69 @@ _mesa_DispatchCompute(GLuint num_groups_x,
    ctx->Driver.DispatchCompute(ctx, num_groups);
 }
 
-extern void GLAPIENTRY
-_mesa_DispatchComputeIndirect(GLintptr indirect)
+void GLAPIENTRY
+_mesa_DispatchCompute_no_error(GLuint num_groups_x, GLuint num_groups_y,
+                               GLuint num_groups_z)
+{
+   dispatch_compute(num_groups_x, num_groups_y, num_groups_z, true);
+}
+
+void GLAPIENTRY
+_mesa_DispatchCompute(GLuint num_groups_x,
+                      GLuint num_groups_y,
+                      GLuint num_groups_z)
+{
+   dispatch_compute(num_groups_x, num_groups_y, num_groups_z, false);
+}
+
+static ALWAYS_INLINE void
+dispatch_compute_indirect(GLintptr indirect, bool no_error)
 {
    GET_CURRENT_CONTEXT(ctx);
 
+   FLUSH_VERTICES(ctx, 0);
+
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx, "glDispatchComputeIndirect(%ld)\n", (long) indirect);
 
-   if (!valid_dispatch_indirect(ctx, indirect))
+   if (!no_error && !valid_dispatch_indirect(ctx, indirect))
       return;
 
    ctx->Driver.DispatchComputeIndirect(ctx, indirect);
 }
 
-void GLAPIENTRY
-_mesa_DispatchComputeGroupSizeARB(GLuint num_groups_x, GLuint num_groups_y,
-                                  GLuint num_groups_z, GLuint group_size_x,
-                                  GLuint group_size_y, GLuint group_size_z)
+extern void GLAPIENTRY
+_mesa_DispatchComputeIndirect_no_error(GLintptr indirect)
+{
+   dispatch_compute_indirect(indirect, true);
+}
+
+extern void GLAPIENTRY
+_mesa_DispatchComputeIndirect(GLintptr indirect)
+{
+   dispatch_compute_indirect(indirect, false);
+}
+
+static ALWAYS_INLINE void
+dispatch_compute_group_size(GLuint num_groups_x, GLuint num_groups_y,
+                            GLuint num_groups_z, GLuint group_size_x,
+                            GLuint group_size_y, GLuint group_size_z,
+                            bool no_error)
 {
    GET_CURRENT_CONTEXT(ctx);
    const GLuint num_groups[3] = { num_groups_x, num_groups_y, num_groups_z };
    const GLuint group_size[3] = { group_size_x, group_size_y, group_size_z };
 
+   FLUSH_VERTICES(ctx, 0);
+
    if (MESA_VERBOSE & VERBOSE_API)
       _mesa_debug(ctx,
                   "glDispatchComputeGroupSizeARB(%d, %d, %d, %d, %d, %d)\n",
                   num_groups_x, num_groups_y, num_groups_z,
                   group_size_x, group_size_y, group_size_z);
 
-   if (!validate_DispatchComputeGroupSizeARB(ctx, num_groups, group_size))
+   if (!no_error &&
+       !validate_DispatchComputeGroupSizeARB(ctx, num_groups, group_size))
       return;
 
    if (num_groups_x == 0u || num_groups_y == 0u || num_groups_z == 0u)
@@ -309,3 +370,26 @@ _mesa_DispatchComputeGroupSizeARB(GLuint num_groups_x, GLuint num_groups_y,
 
    ctx->Driver.DispatchComputeGroupSize(ctx, num_groups, group_size);
 }
+
+void GLAPIENTRY
+_mesa_DispatchComputeGroupSizeARB_no_error(GLuint num_groups_x,
+                                           GLuint num_groups_y,
+                                           GLuint num_groups_z,
+                                           GLuint group_size_x,
+                                           GLuint group_size_y,
+                                           GLuint group_size_z)
+{
+   dispatch_compute_group_size(num_groups_x, num_groups_y, num_groups_z,
+                               group_size_x, group_size_y, group_size_z,
+                               true);
+}
+
+void GLAPIENTRY
+_mesa_DispatchComputeGroupSizeARB(GLuint num_groups_x, GLuint num_groups_y,
+                                  GLuint num_groups_z, GLuint group_size_x,
+                                  GLuint group_size_y, GLuint group_size_z)
+{
+   dispatch_compute_group_size(num_groups_x, num_groups_y, num_groups_z,
+                               group_size_x, group_size_y, group_size_z,
+                               false);
+}