radeonsi: ARB_gpu_shader_fp64 + ARB_vertex_attrib_64bit support.

author Dave Airlie <airlied@redhat.com>

Wed, 1 Jul 2015 03:58:24 +0000 (04:58 +0100)

committer Dave Airlie <airlied@redhat.com>

Sun, 12 Jul 2015 21:40:51 +0000 (22:40 +0100)
author Dave Airlie <airlied@redhat.com>
Wed, 1 Jul 2015 03:58:24 +0000 (04:58 +0100)
committer Dave Airlie <airlied@redhat.com>
Sun, 12 Jul 2015 21:40:51 +0000 (22:40 +0100)
diff --git a/docs/GL3.txt b/docs/GL3.txt

index 94bbcd12dfc691d531b59fd2928131536d1353db..33a282edd5b4c1bd7ff0320472abd003427aa804 100644 (file)
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -109,7 +109,7 @@ GL 4.0, GLSL 4.00:
    - Enhanced per-sample shading                        DONE (r600, radeonsi)
    - Interpolation functions                            DONE (r600)
    - New overload resolution rules                      DONE
-  GL_ARB_gpu_shader_fp64                               DONE (nvc0, llvmpipe, softpipe)
+  GL_ARB_gpu_shader_fp64                               DONE (nvc0, radeonsi, llvmpipe, softpipe)
    GL_ARB_sample_shading                                DONE (i965, nv50, nvc0, r600, radeonsi)
    GL_ARB_shader_subroutine                             started (Dave)
    GL_ARB_tessellation_shader                           started (Chris, Ilia)
@@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10:
    GL_ARB_get_program_binary                            DONE (0 binary formats)
    GL_ARB_separate_shader_objects                       DONE (all drivers)
    GL_ARB_shader_precision                              started (Micah)
-  GL_ARB_vertex_attrib_64bit                           DONE (nvc0, llvmpipe, softpipe)
+  GL_ARB_vertex_attrib_64bit                           DONE (nvc0, radeonsi, llvmpipe, softpipe)
    GL_ARB_viewport_array                                DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
  
  
diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html

index 7f55b067a4a2babc9d48f49723c620052c8feb04..42ea807e1ff8e2ac1a9c383983a4f0232607c33c 100644 (file)
--- a/docs/relnotes/10.7.0.html
+++ b/docs/relnotes/10.7.0.html
@@ -47,9 +47,9 @@ Note: some of the new features are only available with certain drivers.
  <li>GL_AMD_vertex_shader_viewport_index on radeonsi</li>
  <li>GL_ARB_fragment_layer_viewport on radeonsi</li>
  <li>GL_ARB_framebuffer_no_attachments on i965</li>
-<li>GL_ARB_gpu_shader_fp64 on llvmpipe</li>
+<li>GL_ARB_gpu_shader_fp64 on llvmpipe, radeonsi</li>
  <li>GL_ARB_shader_stencil_export on llvmpipe</li>
-<li>GL_ARB_vertex_attrib_64bit on llvmpipe</li>
+<li>GL_ARB_vertex_attrib_64bit on llvmpipe, radeonsi</li>
  <li>GL_ARB_viewport_array on radeonsi</li>
  <li>GLX_ARB_create_context_robustness on r600, radeonsi</li>
  <li>EGL_EXT_create_context_robustness on r600, radeonsi</li>
diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h

index 6a9557b0b734e4a95b7730cc0bd16d4acfda465f..591e698d4826646138995698e6216a05c329e804 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_llvm.h
+++ b/src/gallium/drivers/radeon/radeon_llvm.h
@@ -146,6 +146,8 @@ static inline LLVMTypeRef tgsi2llvmtype(
         case TGSI_TYPE_UNSIGNED:
         case TGSI_TYPE_SIGNED:
                 return LLVMInt32TypeInContext(ctx);
+       case TGSI_TYPE_DOUBLE:
+               return LLVMDoubleTypeInContext(ctx);
         case TGSI_TYPE_UNTYPED:
         case TGSI_TYPE_FLOAT:
                 return LLVMFloatTypeInContext(ctx);
@@ -205,6 +207,9 @@ build_tgsi_intrinsic_nomem(
                 struct lp_build_tgsi_context * bld_base,
                 struct lp_build_emit_data * emit_data);
  
-
+LLVMValueRef
+radeon_llvm_emit_fetch_double(struct lp_build_tgsi_context *bld_base,
+                             LLVMValueRef ptr,
+                             LLVMValueRef ptr2);
  
  #endif /* RADEON_LLVM_H */
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

index c8c980d9d329053d7e8ae8cb7b3ccf91d2357a1e..444a41c01da0fd69537204804dbf087d1ba973d8 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -116,6 +116,28 @@ emit_fetch(
         enum tgsi_opcode_type type,
         unsigned swizzle);
  
+LLVMValueRef
+radeon_llvm_emit_fetch_double(
+       struct lp_build_tgsi_context *bld_base,
+       LLVMValueRef ptr,
+       LLVMValueRef ptr2)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       LLVMValueRef result;
+
+       result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
+
+       result = LLVMBuildInsertElement(builder,
+                                       result,
+                                       bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
+                                       bld_base->int_bld.zero, "");
+       result = LLVMBuildInsertElement(builder,
+                                       result,
+                                       bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
+                                       bld_base->int_bld.one, "");
+       return bitcast(bld_base, TGSI_TYPE_DOUBLE, result);
+}
+
  static LLVMValueRef
  emit_array_fetch(
         struct lp_build_tgsi_context *bld_base,
@@ -160,7 +182,7 @@ emit_fetch(
         struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base);
         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
-       LLVMValueRef result = NULL, ptr;
+       LLVMValueRef result = NULL, ptr, ptr2;
  
         if (swizzle == ~0) {
                 LLVMValueRef values[TGSI_NUM_CHANNELS];
@@ -184,11 +206,27 @@ emit_fetch(
         switch(reg->Register.File) {
         case TGSI_FILE_IMMEDIATE: {
                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
-               return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+               if (type == TGSI_TYPE_DOUBLE) {
+                       result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
+                       result = LLVMConstInsertElement(result,
+                                                       bld->immediates[reg->Register.Index][swizzle],
+                                                       bld_base->int_bld.zero);
+                       result = LLVMConstInsertElement(result,
+                                                       bld->immediates[reg->Register.Index][swizzle + 1],
+                                                       bld_base->int_bld.one);
+                       return LLVMConstBitCast(result, ctype);
+               } else {
+                       return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
+               }
         }
  
         case TGSI_FILE_INPUT:
                 result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+               if (type == TGSI_TYPE_DOUBLE) {
+                       ptr = result;
+                       ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
+                       return radeon_llvm_emit_fetch_double(bld_base, ptr, ptr2);
+               }
                 break;
  
         case TGSI_FILE_TEMPORARY:
@@ -199,11 +237,23 @@ emit_fetch(
                         break;
                 }
                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
+               if (type == TGSI_TYPE_DOUBLE) {
+                       ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
+                       return radeon_llvm_emit_fetch_double(bld_base,
+                                                LLVMBuildLoad(builder, ptr, ""),
+                                                LLVMBuildLoad(builder, ptr2, ""));
+               }
                 result = LLVMBuildLoad(builder, ptr, "");
                 break;
  
         case TGSI_FILE_OUTPUT:
                 ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
+               if (type == TGSI_TYPE_DOUBLE) {
+                       ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
+                       return radeon_llvm_emit_fetch_double(bld_base,
+                                                LLVMBuildLoad(builder, ptr, ""),
+                                                LLVMBuildLoad(builder, ptr2, ""));
+               }
                 result = LLVMBuildLoad(builder, ptr, "");
                 break;
  
@@ -348,9 +398,10 @@ emit_store(
         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
         LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
-       LLVMValueRef temp_ptr;
+       LLVMValueRef temp_ptr, temp_ptr2 = NULL;
         unsigned chan, chan_index;
         boolean is_vec_store = FALSE;
+       enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
  
         if (dst[0]) {
                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
@@ -371,6 +422,8 @@ emit_store(
         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
                 LLVMValueRef value = dst[chan_index];
  
+               if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3))
+                       continue;
                 if (inst->Instruction.Saturate)
                         value = radeon_llvm_saturate(bld_base, value);
  
@@ -379,8 +432,9 @@ emit_store(
                         LLVMBuildStore(builder, value, temp_ptr);
                         continue;
                 }
-       
-               value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
+
+               if (dtype != TGSI_TYPE_DOUBLE)
+                       value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
  
                 if (reg->Register.Indirect) {
                         struct tgsi_declaration_range range = get_array_range(bld_base,
@@ -418,6 +472,8 @@ emit_store(
                         switch(reg->Register.File) {
                         case TGSI_FILE_OUTPUT:
                                 temp_ptr = bld->outputs[reg->Register.Index][chan_index];
+                               if (dtype == TGSI_TYPE_DOUBLE)
+                                       temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
                                 break;
  
                         case TGSI_FILE_TEMPORARY:
@@ -428,12 +484,28 @@ emit_store(
                                         break;
                                 }
                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
+                               if (dtype == TGSI_TYPE_DOUBLE)
+                                       temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
+
                                 break;
  
                         default:
                                 return;
                         }
-                       LLVMBuildStore(builder, value, temp_ptr);
+                       if (dtype != TGSI_TYPE_DOUBLE)
+                               LLVMBuildStore(builder, value, temp_ptr);
+                       else {
+                               LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
+                                                                   LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
+                               LLVMValueRef val2;
+                               value = LLVMBuildExtractElement(builder, ptr,
+                                                               bld_base->uint_bld.zero, "");
+                               val2 = LLVMBuildExtractElement(builder, ptr,
+                                                               bld_base->uint_bld.one, "");
+
+                               LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
+                               LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
+                       }
                 }
         }
  }
@@ -996,6 +1068,35 @@ static void emit_fcmp(
         emit_data->output[emit_data->chan] = v;
  }
  
+static void emit_dcmp(
+               const struct lp_build_tgsi_action *action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       LLVMContextRef context = bld_base->base.gallivm->context;
+       LLVMRealPredicate pred;
+
+       /* Use ordered for everything but NE (which is usual for
+        * float comparisons)
+        */
+       switch (emit_data->inst->Instruction.Opcode) {
+       case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
+       case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
+       case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
+       case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
+       default: assert(!"unknown instruction"); pred = 0; break;
+       }
+
+       LLVMValueRef v = LLVMBuildFCmp(builder, pred,
+                       emit_data->args[0], emit_data->args[1],"");
+
+       v = LLVMBuildSExtOrBitCast(builder, v,
+                       LLVMInt32TypeInContext(context), "");
+
+       emit_data->output[emit_data->chan] = v;
+}
+
  static void emit_not(
                 const struct lp_build_tgsi_action * action,
                 struct lp_build_tgsi_context * bld_base,
@@ -1161,6 +1262,16 @@ static void emit_ineg(
                         emit_data->args[0], "");
  }
  
+static void emit_dneg(
+               const struct lp_build_tgsi_action * action,
+               struct lp_build_tgsi_context * bld_base,
+               struct lp_build_emit_data * emit_data)
+{
+       LLVMBuilderRef builder = bld_base->base.gallivm->builder;
+       emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
+                       emit_data->args[0], "");
+}
+
  static void emit_f2i(
                 const struct lp_build_tgsi_action * action,
                 struct lp_build_tgsi_context * bld_base,
@@ -1423,6 +1534,12 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
         lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
         lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
+       {
+               struct lp_type dbl_type;
+               dbl_type = type;
+               dbl_type.width *= 2;
+               lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
+       }
  
         bld_base->soa = 1;
         bld_base->emit_store = emit_store;
@@ -1461,10 +1578,24 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
         bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
         bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
+       bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
+       bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "fabs";
+       bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
+       bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
+       bld_base->op_actions[TGSI_OPCODE_DFRAC].intr_name = "llvm.AMDIL.fraction.";
+       bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
+       bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
+       bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
+       bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
+       bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
         bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx";
         bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args;
         bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy";
         bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args;
+       bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
+       bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.AMDGPU.rsq.f64";
+       bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
+       bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c

index 13b67d210fde3b56c0c95994ac37e4ce0269f996..a9dce2cdd32573545ec6b7ac4e2af8556f34f3be 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -451,6 +451,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
         case PIPE_SHADER_CAP_PREFERRED_IR:
                 return PIPE_SHADER_IR_TGSI;
         case PIPE_SHADER_CAP_DOUBLES:
+               return HAVE_LLVM >= 0x0307;
         case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
         case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
                 return 0;
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c

index 753b238e2c0f38098c02ec5b7a79717b4fc058a1..75a29aeebc9d488cd0b0f18a0e36ae581dd47289 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -703,8 +703,15 @@ static LLVMValueRef fetch_constant(
         buf = reg->Register.Dimension ? reg->Dimension.Index : 0;
         idx = reg->Register.Index * 4 + swizzle;
  
-       if (!reg->Register.Indirect)
-               return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
+       if (!reg->Register.Indirect) {
+               if (type != TGSI_TYPE_DOUBLE)
+                       return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]);
+               else {
+                       return radeon_llvm_emit_fetch_double(bld_base,
+                                                            si_shader_ctx->constants[buf][idx],
+                                                            si_shader_ctx->constants[buf][idx + 1]);
+               }
+       }
  
         addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle];
         addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg");
@@ -713,9 +720,25 @@ static LLVMValueRef fetch_constant(
                             lp_build_const_int32(base->gallivm, idx * 4));
  
         result = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
-                           addr, base->elem_type);
+                                  addr, bld_base->base.elem_type);
+
+       if (type != TGSI_TYPE_DOUBLE)
+               result = bitcast(bld_base, type, result);
+       else {
+               LLVMValueRef addr2, result2;
+               addr2 = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1];
+               addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2");
+               addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16);
+               addr2 = lp_build_add(&bld_base->uint_bld, addr2,
+                                    lp_build_const_int32(base->gallivm, idx * 4));
  
-       return bitcast(bld_base, type, result);
+               result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf],
+                                  addr2, bld_base->base.elem_type);
+
+               result = radeon_llvm_emit_fetch_double(bld_base,
+                                                      result, result2);
+       }
+       return result;
  }
  
  /* Initialize arguments for the shader export intrinsic */
author	Dave Airlie <airlied@redhat.com>
	Wed, 1 Jul 2015 03:58:24 +0000 (04:58 +0100)
committer	Dave Airlie <airlied@redhat.com>
	Sun, 12 Jul 2015 21:40:51 +0000 (22:40 +0100)
docs/GL3.txt		patch \| blob \| history
docs/relnotes/10.7.0.html		patch \| blob \| history
src/gallium/drivers/radeon/radeon_llvm.h		patch \| blob \| history
src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader.c		patch \| blob \| history