From: Dave Airlie Date: Wed, 1 Jul 2015 03:58:24 +0000 (+0100) Subject: radeonsi: ARB_gpu_shader_fp64 + ARB_vertex_attrib_64bit support. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4cbf0a0ccf2fb4545b206066b756fd9a07acab92;p=mesa.git radeonsi: ARB_gpu_shader_fp64 + ARB_vertex_attrib_64bit support. This adds the translation from TGSI to AMDGPU llvm backend, for the 64-bit opcodes. The backend pretty much handles everything for us fine. There is one patch required for SI DFRAC support, that I know off. [airlied: fixed missing comma, updated relnotes] Reviewed-by: Marek Olšák Signed-off-by: Dave Airlie --- diff --git a/docs/GL3.txt b/docs/GL3.txt index 94bbcd12dfc..33a282edd5b 100644 --- a/docs/GL3.txt +++ b/docs/GL3.txt @@ -109,7 +109,7 @@ GL 4.0, GLSL 4.00: - Enhanced per-sample shading DONE (r600, radeonsi) - Interpolation functions DONE (r600) - New overload resolution rules DONE - GL_ARB_gpu_shader_fp64 DONE (nvc0, llvmpipe, softpipe) + GL_ARB_gpu_shader_fp64 DONE (nvc0, radeonsi, llvmpipe, softpipe) GL_ARB_sample_shading DONE (i965, nv50, nvc0, r600, radeonsi) GL_ARB_shader_subroutine started (Dave) GL_ARB_tessellation_shader started (Chris, Ilia) @@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10: GL_ARB_get_program_binary DONE (0 binary formats) GL_ARB_separate_shader_objects DONE (all drivers) GL_ARB_shader_precision started (Micah) - GL_ARB_vertex_attrib_64bit DONE (nvc0, llvmpipe, softpipe) + GL_ARB_vertex_attrib_64bit DONE (nvc0, radeonsi, llvmpipe, softpipe) GL_ARB_viewport_array DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe) diff --git a/docs/relnotes/10.7.0.html b/docs/relnotes/10.7.0.html index 7f55b067a4a..42ea807e1ff 100644 --- a/docs/relnotes/10.7.0.html +++ b/docs/relnotes/10.7.0.html @@ -47,9 +47,9 @@ Note: some of the new features are only available with certain drivers.
  • GL_AMD_vertex_shader_viewport_index on radeonsi
  • GL_ARB_fragment_layer_viewport on radeonsi
  • GL_ARB_framebuffer_no_attachments on i965
  • -
  • GL_ARB_gpu_shader_fp64 on llvmpipe
  • +
  • GL_ARB_gpu_shader_fp64 on llvmpipe, radeonsi
  • GL_ARB_shader_stencil_export on llvmpipe
  • -
  • GL_ARB_vertex_attrib_64bit on llvmpipe
  • +
  • GL_ARB_vertex_attrib_64bit on llvmpipe, radeonsi
  • GL_ARB_viewport_array on radeonsi
  • GLX_ARB_create_context_robustness on r600, radeonsi
  • EGL_EXT_create_context_robustness on r600, radeonsi
  • diff --git a/src/gallium/drivers/radeon/radeon_llvm.h b/src/gallium/drivers/radeon/radeon_llvm.h index 6a9557b0b73..591e698d482 100644 --- a/src/gallium/drivers/radeon/radeon_llvm.h +++ b/src/gallium/drivers/radeon/radeon_llvm.h @@ -146,6 +146,8 @@ static inline LLVMTypeRef tgsi2llvmtype( case TGSI_TYPE_UNSIGNED: case TGSI_TYPE_SIGNED: return LLVMInt32TypeInContext(ctx); + case TGSI_TYPE_DOUBLE: + return LLVMDoubleTypeInContext(ctx); case TGSI_TYPE_UNTYPED: case TGSI_TYPE_FLOAT: return LLVMFloatTypeInContext(ctx); @@ -205,6 +207,9 @@ build_tgsi_intrinsic_nomem( struct lp_build_tgsi_context * bld_base, struct lp_build_emit_data * emit_data); - +LLVMValueRef +radeon_llvm_emit_fetch_double(struct lp_build_tgsi_context *bld_base, + LLVMValueRef ptr, + LLVMValueRef ptr2); #endif /* RADEON_LLVM_H */ diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c index c8c980d9d32..444a41c01da 100644 --- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c +++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c @@ -116,6 +116,28 @@ emit_fetch( enum tgsi_opcode_type type, unsigned swizzle); +LLVMValueRef +radeon_llvm_emit_fetch_double( + struct lp_build_tgsi_context *bld_base, + LLVMValueRef ptr, + LLVMValueRef ptr2) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMValueRef result; + + result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2)); + + result = LLVMBuildInsertElement(builder, + result, + bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr), + bld_base->int_bld.zero, ""); + result = LLVMBuildInsertElement(builder, + result, + bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2), + bld_base->int_bld.one, ""); + return bitcast(bld_base, TGSI_TYPE_DOUBLE, result); +} + static LLVMValueRef emit_array_fetch( struct lp_build_tgsi_context *bld_base, @@ -160,7 +182,7 @@ emit_fetch( struct radeon_llvm_context * ctx = radeon_llvm_context(bld_base); struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); LLVMBuilderRef builder = bld_base->base.gallivm->builder; - LLVMValueRef result = NULL, ptr; + LLVMValueRef result = NULL, ptr, ptr2; if (swizzle == ~0) { LLVMValueRef values[TGSI_NUM_CHANNELS]; @@ -184,11 +206,27 @@ emit_fetch( switch(reg->Register.File) { case TGSI_FILE_IMMEDIATE: { LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type); - return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); + if (type == TGSI_TYPE_DOUBLE) { + result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2)); + result = LLVMConstInsertElement(result, + bld->immediates[reg->Register.Index][swizzle], + bld_base->int_bld.zero); + result = LLVMConstInsertElement(result, + bld->immediates[reg->Register.Index][swizzle + 1], + bld_base->int_bld.one); + return LLVMConstBitCast(result, ctype); + } else { + return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype); + } } case TGSI_FILE_INPUT: result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)]; + if (type == TGSI_TYPE_DOUBLE) { + ptr = result; + ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)]; + return radeon_llvm_emit_fetch_double(bld_base, ptr, ptr2); + } break; case TGSI_FILE_TEMPORARY: @@ -199,11 +237,23 @@ emit_fetch( break; } ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle]; + if (type == TGSI_TYPE_DOUBLE) { + ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1]; + return radeon_llvm_emit_fetch_double(bld_base, + LLVMBuildLoad(builder, ptr, ""), + LLVMBuildLoad(builder, ptr2, "")); + } result = LLVMBuildLoad(builder, ptr, ""); break; case TGSI_FILE_OUTPUT: ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle); + if (type == TGSI_TYPE_DOUBLE) { + ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1); + return radeon_llvm_emit_fetch_double(bld_base, + LLVMBuildLoad(builder, ptr, ""), + LLVMBuildLoad(builder, ptr2, "")); + } result = LLVMBuildLoad(builder, ptr, ""); break; @@ -348,9 +398,10 @@ emit_store( struct gallivm_state *gallivm = bld->bld_base.base.gallivm; const struct tgsi_full_dst_register *reg = &inst->Dst[0]; LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; - LLVMValueRef temp_ptr; + LLVMValueRef temp_ptr, temp_ptr2 = NULL; unsigned chan, chan_index; boolean is_vec_store = FALSE; + enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); if (dst[0]) { LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0])); @@ -371,6 +422,8 @@ emit_store( TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { LLVMValueRef value = dst[chan_index]; + if (dtype == TGSI_TYPE_DOUBLE && (chan_index == 1 || chan_index == 3)) + continue; if (inst->Instruction.Saturate) value = radeon_llvm_saturate(bld_base, value); @@ -379,8 +432,9 @@ emit_store( LLVMBuildStore(builder, value, temp_ptr); continue; } - - value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); + + if (dtype != TGSI_TYPE_DOUBLE) + value = bitcast(bld_base, TGSI_TYPE_FLOAT, value); if (reg->Register.Indirect) { struct tgsi_declaration_range range = get_array_range(bld_base, @@ -418,6 +472,8 @@ emit_store( switch(reg->Register.File) { case TGSI_FILE_OUTPUT: temp_ptr = bld->outputs[reg->Register.Index][chan_index]; + if (dtype == TGSI_TYPE_DOUBLE) + temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1]; break; case TGSI_FILE_TEMPORARY: @@ -428,12 +484,28 @@ emit_store( break; } temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index]; + if (dtype == TGSI_TYPE_DOUBLE) + temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1]; + break; default: return; } - LLVMBuildStore(builder, value, temp_ptr); + if (dtype != TGSI_TYPE_DOUBLE) + LLVMBuildStore(builder, value, temp_ptr); + else { + LLVMValueRef ptr = LLVMBuildBitCast(builder, value, + LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), ""); + LLVMValueRef val2; + value = LLVMBuildExtractElement(builder, ptr, + bld_base->uint_bld.zero, ""); + val2 = LLVMBuildExtractElement(builder, ptr, + bld_base->uint_bld.one, ""); + + LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr); + LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2); + } } } } @@ -996,6 +1068,35 @@ static void emit_fcmp( emit_data->output[emit_data->chan] = v; } +static void emit_dcmp( + const struct lp_build_tgsi_action *action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + LLVMContextRef context = bld_base->base.gallivm->context; + LLVMRealPredicate pred; + + /* Use ordered for everything but NE (which is usual for + * float comparisons) + */ + switch (emit_data->inst->Instruction.Opcode) { + case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break; + case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break; + case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break; + case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break; + default: assert(!"unknown instruction"); pred = 0; break; + } + + LLVMValueRef v = LLVMBuildFCmp(builder, pred, + emit_data->args[0], emit_data->args[1],""); + + v = LLVMBuildSExtOrBitCast(builder, v, + LLVMInt32TypeInContext(context), ""); + + emit_data->output[emit_data->chan] = v; +} + static void emit_not( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, @@ -1161,6 +1262,16 @@ static void emit_ineg( emit_data->args[0], ""); } +static void emit_dneg( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + LLVMBuilderRef builder = bld_base->base.gallivm->builder; + emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder, + emit_data->args[0], ""); +} + static void emit_f2i( const struct lp_build_tgsi_action * action, struct lp_build_tgsi_context * bld_base, @@ -1423,6 +1534,12 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) lp_build_context_init(&bld_base->base, &ctx->gallivm, type); lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type)); lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type)); + { + struct lp_type dbl_type; + dbl_type = type; + dbl_type.width *= 2; + lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type); + } bld_base->soa = 1; bld_base->emit_store = emit_store; @@ -1461,10 +1578,24 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx) bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit; bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem; bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32"; + bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "fabs"; + bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64"; + bld_base->op_actions[TGSI_OPCODE_DFRAC].intr_name = "llvm.AMDIL.fraction."; + bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg; + bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp; + bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp; + bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp; + bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp; bld_base->op_actions[TGSI_OPCODE_DDX].intr_name = "llvm.AMDGPU.ddx"; bld_base->op_actions[TGSI_OPCODE_DDX].fetch_args = tex_fetch_args; bld_base->op_actions[TGSI_OPCODE_DDY].intr_name = "llvm.AMDGPU.ddy"; bld_base->op_actions[TGSI_OPCODE_DDY].fetch_args = tex_fetch_args; + bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.AMDGPU.rsq.f64"; + bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem; + bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64"; bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit; bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c index 13b67d210fd..a9dce2cdd32 100644 --- a/src/gallium/drivers/radeonsi/si_pipe.c +++ b/src/gallium/drivers/radeonsi/si_pipe.c @@ -451,6 +451,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu case PIPE_SHADER_CAP_PREFERRED_IR: return PIPE_SHADER_IR_TGSI; case PIPE_SHADER_CAP_DOUBLES: + return HAVE_LLVM >= 0x0307; case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED: case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED: return 0; diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c index 753b238e2c0..75a29aeebc9 100644 --- a/src/gallium/drivers/radeonsi/si_shader.c +++ b/src/gallium/drivers/radeonsi/si_shader.c @@ -703,8 +703,15 @@ static LLVMValueRef fetch_constant( buf = reg->Register.Dimension ? reg->Dimension.Index : 0; idx = reg->Register.Index * 4 + swizzle; - if (!reg->Register.Indirect) - return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]); + if (!reg->Register.Indirect) { + if (type != TGSI_TYPE_DOUBLE) + return bitcast(bld_base, type, si_shader_ctx->constants[buf][idx]); + else { + return radeon_llvm_emit_fetch_double(bld_base, + si_shader_ctx->constants[buf][idx], + si_shader_ctx->constants[buf][idx + 1]); + } + } addr = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle]; addr = LLVMBuildLoad(base->gallivm->builder, addr, "load addr reg"); @@ -713,9 +720,25 @@ static LLVMValueRef fetch_constant( lp_build_const_int32(base->gallivm, idx * 4)); result = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf], - addr, base->elem_type); + addr, bld_base->base.elem_type); + + if (type != TGSI_TYPE_DOUBLE) + result = bitcast(bld_base, type, result); + else { + LLVMValueRef addr2, result2; + addr2 = si_shader_ctx->radeon_bld.soa.addr[ireg->Index][ireg->Swizzle + 1]; + addr2 = LLVMBuildLoad(base->gallivm->builder, addr2, "load addr reg2"); + addr2 = lp_build_mul_imm(&bld_base->uint_bld, addr2, 16); + addr2 = lp_build_add(&bld_base->uint_bld, addr2, + lp_build_const_int32(base->gallivm, idx * 4)); - return bitcast(bld_base, type, result); + result2 = buffer_load_const(base->gallivm->builder, si_shader_ctx->const_resource[buf], + addr2, bld_base->base.elem_type); + + result = radeon_llvm_emit_fetch_double(bld_base, + result, result2); + } + return result; } /* Initialize arguments for the shader export intrinsic */