st/mesa: add GL_ARB_shader_atomic_counter_ops support
authorIlia Mirkin <imirkin@alum.mit.edu>
Sat, 20 Feb 2016 04:18:33 +0000 (23:18 -0500)
committerIlia Mirkin <imirkin@alum.mit.edu>
Fri, 11 Mar 2016 03:36:17 +0000 (22:36 -0500)
Signed-off-by: Ilia Mirkin <imirkin@alum.mit.edu>
Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com>
docs/relnotes/11.3.0.html
src/mesa/state_tracker/st_extensions.c
src/mesa/state_tracker/st_glsl_to_tgsi.cpp

index 99445c2132b337e0816c14099c9c79ace8981669..c31296ef9b179246110481a9761c601338306ba1 100644 (file)
@@ -45,6 +45,7 @@ Note: some of the new features are only available with certain drivers.
 
 <ul>
 <li>GL_ARB_internalformat_query2 on i965</li>
+<li>GL_ARB_shader_atomic_counter_ops on nvc0</li>
 <li>GL_OES_texture_border_clamp and GL_EXT_texture_border_clamp on all drivers that support GL_ARB_texture_border_clamp</li>
 <li>GL_OES_shader_image_atomic on all drivers that support GL_ARB_shader_image_load_store</li>
 </ul>
index 42d347c66009460fdb7b6628e294f2d2f0397b8e..3666ece8ee7fe4f98b4b5e7f493f98d061e01270 100644 (file)
@@ -407,8 +407,10 @@ void st_init_limits(struct pipe_screen *screen,
          c->Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers;
    assert(c->MaxCombinedAtomicBuffers <= MAX_COMBINED_ATOMIC_BUFFERS);
 
-   if (c->MaxCombinedAtomicBuffers > 0)
+   if (c->MaxCombinedAtomicBuffers > 0) {
       extensions->ARB_shader_atomic_counters = GL_TRUE;
+      extensions->ARB_shader_atomic_counter_ops = GL_TRUE;
+   }
 
    c->MaxCombinedShaderOutputResources = c->MaxDrawBuffers;
    c->ShaderStorageBufferOffsetAlignment =
index 26e463e04373fe1f01780645d8b230f7b0f811d3..92cd775feff720b4c9b02b9d8d42e5bfda417fe8 100644 (file)
@@ -3158,8 +3158,8 @@ void
 glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
 {
    const char *callee = ir->callee->function_name();
-   ir_dereference *deref = static_cast<ir_dereference *>(
-      ir->actual_parameters.get_head());
+   exec_node *param = ir->actual_parameters.get_head();
+   ir_dereference *deref = static_cast<ir_dereference *>(param);
    ir_variable *location = deref->variable_referenced();
 
    st_src_reg buffer(
@@ -3188,17 +3188,56 @@ glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
 
    if (!strcmp("__intrinsic_atomic_read", callee)) {
       inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset);
-      inst->buffer = buffer;
    } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
       inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
                       st_src_reg_for_int(1));
-      inst->buffer = buffer;
    } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
       inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset,
                       st_src_reg_for_int(-1));
-      inst->buffer = buffer;
       emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1));
+   } else {
+      param = param->get_next();
+      ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();
+      val->accept(this);
+
+      st_src_reg data = this->result, data2 = undef_src;
+      unsigned opcode;
+      if (!strcmp("__intrinsic_atomic_add", callee))
+         opcode = TGSI_OPCODE_ATOMUADD;
+      else if (!strcmp("__intrinsic_atomic_min", callee))
+         opcode = TGSI_OPCODE_ATOMIMIN;
+      else if (!strcmp("__intrinsic_atomic_max", callee))
+         opcode = TGSI_OPCODE_ATOMIMAX;
+      else if (!strcmp("__intrinsic_atomic_and", callee))
+         opcode = TGSI_OPCODE_ATOMAND;
+      else if (!strcmp("__intrinsic_atomic_or", callee))
+         opcode = TGSI_OPCODE_ATOMOR;
+      else if (!strcmp("__intrinsic_atomic_xor", callee))
+         opcode = TGSI_OPCODE_ATOMXOR;
+      else if (!strcmp("__intrinsic_atomic_exchange", callee))
+         opcode = TGSI_OPCODE_ATOMXCHG;
+      else if (!strcmp("__intrinsic_atomic_comp_swap", callee)) {
+         opcode = TGSI_OPCODE_ATOMCAS;
+         param = param->get_next();
+         val = ((ir_instruction *)param)->as_rvalue();
+         val->accept(this);
+         data2 = this->result;
+      } else if (!strcmp("__intrinsic_atomic_sub", callee)) {
+         opcode = TGSI_OPCODE_ATOMUADD;
+         st_src_reg res = get_temp(glsl_type::uvec4_type);
+         st_dst_reg dstres = st_dst_reg(res);
+         dstres.writemask = dst.writemask;
+         emit_asm(ir, TGSI_OPCODE_INEG, dstres, data);
+         data = res;
+      } else {
+         assert(!"Unexpected intrinsic");
+         return;
+      }
+
+      inst = emit_asm(ir, opcode, dst, offset, data, data2);
    }
+
+   inst->buffer = buffer;
 }
 
 void
@@ -3591,7 +3630,16 @@ glsl_to_tgsi_visitor::visit(ir_call *ir)
    /* Filter out intrinsics */
    if (!strcmp("__intrinsic_atomic_read", callee) ||
        !strcmp("__intrinsic_atomic_increment", callee) ||
-       !strcmp("__intrinsic_atomic_predecrement", callee)) {
+       !strcmp("__intrinsic_atomic_predecrement", callee) ||
+       !strcmp("__intrinsic_atomic_add", callee) ||
+       !strcmp("__intrinsic_atomic_sub", callee) ||
+       !strcmp("__intrinsic_atomic_min", callee) ||
+       !strcmp("__intrinsic_atomic_max", callee) ||
+       !strcmp("__intrinsic_atomic_and", callee) ||
+       !strcmp("__intrinsic_atomic_or", callee) ||
+       !strcmp("__intrinsic_atomic_xor", callee) ||
+       !strcmp("__intrinsic_atomic_exchange", callee) ||
+       !strcmp("__intrinsic_atomic_comp_swap", callee)) {
       visit_atomic_counter_intrinsic(ir);
       return;
    }