From f402acdbe244e5de9b2b616e0a908f5d1416ce89 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Michel=20D=C3=A4nzer?= Date: Wed, 22 Aug 2012 18:15:36 +0200 Subject: [PATCH] radeonsi: Use FP16 shader export format when necessary / possible. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Fixes piglit fbo-blending-formats. Signed-off-by: Michel Dänzer Reviewed-by: Tom Stellard Reviewed-by: Christian König Reviewed-by: Alex Deucher --- src/gallium/drivers/radeon/SIInstructions.td | 4 +- src/gallium/drivers/radeon/SIIntrinsics.td | 1 + src/gallium/drivers/radeonsi/radeonsi_pipe.h | 3 +- .../drivers/radeonsi/radeonsi_shader.c | 51 +++++++++++--- src/gallium/drivers/radeonsi/si_state.c | 69 ++++++++++++++++++- src/gallium/drivers/radeonsi/si_state_draw.c | 4 -- 6 files changed, 114 insertions(+), 18 deletions(-) diff --git a/src/gallium/drivers/radeon/SIInstructions.td b/src/gallium/drivers/radeon/SIInstructions.td index f09d6042457..304732178c6 100644 --- a/src/gallium/drivers/radeon/SIInstructions.td +++ b/src/gallium/drivers/radeon/SIInstructions.td @@ -726,7 +726,9 @@ defm V_LDEXP_F32 : VOP2_32 <0x0000002b, "V_LDEXP_F32", []>; ////def V_CVT_PKACCUM_U8_F32 : VOP2_U8 <0x0000002c, "V_CVT_PKACCUM_U8_F32", []>; ////def V_CVT_PKNORM_I16_F32 : VOP2_I16 <0x0000002d, "V_CVT_PKNORM_I16_F32", []>; ////def V_CVT_PKNORM_U16_F32 : VOP2_U16 <0x0000002e, "V_CVT_PKNORM_U16_F32", []>; -////def V_CVT_PKRTZ_F16_F32 : VOP2_F16 <0x0000002f, "V_CVT_PKRTZ_F16_F32", []>; +defm V_CVT_PKRTZ_F16_F32 : VOP2_32 <0x0000002f, "V_CVT_PKRTZ_F16_F32", + [(set VReg_32:$dst, (int_SI_packf16 AllReg_32:$src0, VReg_32:$src1))] +>; ////def V_CVT_PK_U16_U32 : VOP2_U16 <0x00000030, "V_CVT_PK_U16_U32", []>; ////def V_CVT_PK_I16_I32 : VOP2_I16 <0x00000031, "V_CVT_PK_I16_I32", []>; def S_CMP_EQ_I32 : SOPC_32 <0x00000000, "S_CMP_EQ_I32", []>; diff --git a/src/gallium/drivers/radeon/SIIntrinsics.td b/src/gallium/drivers/radeon/SIIntrinsics.td index 6eadc94458f..b9544f10687 100644 --- a/src/gallium/drivers/radeon/SIIntrinsics.td +++ b/src/gallium/drivers/radeon/SIIntrinsics.td @@ -14,6 +14,7 @@ let TargetPrefix = "SI", isTarget = 1 in { + def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>; /* XXX: We may need a seperate intrinsic here for loading integer values */ def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>; diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.h b/src/gallium/drivers/radeonsi/radeonsi_pipe.h index 989bb49cbee..099b50916f6 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_pipe.h +++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.h @@ -134,7 +134,8 @@ struct r600_context { unsigned saved_render_cond_mode; /* shader information */ unsigned sprite_coord_enable; - boolean export_16bpc; + unsigned export_16bpc; + unsigned spi_shader_col_format; unsigned alpha_ref; boolean alpha_ref_dirty; struct r600_textures_info vs_samplers; diff --git a/src/gallium/drivers/radeonsi/radeonsi_shader.c b/src/gallium/drivers/radeonsi/radeonsi_shader.c index fd614dde388..98866c4ee1f 100644 --- a/src/gallium/drivers/radeonsi/radeonsi_shader.c +++ b/src/gallium/drivers/radeonsi/radeonsi_shader.c @@ -390,13 +390,47 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, unsigned compressed = 0; unsigned chan; - for (chan = 0; chan < 4; chan++ ) { - LLVMValueRef out_ptr = - si_shader_ctx->radeon_bld.soa.outputs[index][chan]; - /* +5 because the first output value will be - * the 6th argument to the intrinsic. */ - args[chan + 5] = LLVMBuildLoad(base->gallivm->builder, - out_ptr, ""); + if (si_shader_ctx->type == TGSI_PROCESSOR_FRAGMENT) { + int cbuf = target - V_008DFC_SQ_EXP_MRT; + + if (cbuf >= 0 && cbuf < 8) { + struct r600_context *rctx = si_shader_ctx->rctx; + compressed = (rctx->export_16bpc >> cbuf) & 0x1; + } + } + + if (compressed) { + /* Pixel shader needs to pack output values before export */ + for (chan = 0; chan < 2; chan++ ) { + LLVMValueRef *out_ptr = + si_shader_ctx->radeon_bld.soa.outputs[index]; + args[0] = LLVMBuildLoad(base->gallivm->builder, + out_ptr[2 * chan], ""); + args[1] = LLVMBuildLoad(base->gallivm->builder, + out_ptr[2 * chan + 1], ""); + args[chan + 5] = + build_intrinsic(base->gallivm->builder, + "llvm.SI.packf16", + LLVMInt32TypeInContext(base->gallivm->context), + args, 2, + LLVMReadNoneAttribute); + args[chan + 7] = args[chan + 5]; + } + + /* Set COMPR flag */ + args[4] = uint->one; + } else { + for (chan = 0; chan < 4; chan++ ) { + LLVMValueRef out_ptr = + si_shader_ctx->radeon_bld.soa.outputs[index][chan]; + /* +5 because the first output value will be + * the 6th argument to the intrinsic. */ + args[chan + 5] = LLVMBuildLoad(base->gallivm->builder, + out_ptr, ""); + } + + /* Clear COMPR flag */ + args[4] = uint->zero; } /* XXX: This controls which components of the output @@ -415,9 +449,6 @@ static void si_llvm_init_export_args(struct lp_build_tgsi_context *bld_base, /* Specify the target we are exporting */ args[3] = lp_build_const_int32(base->gallivm, target); - /* Set COMPR flag */ - args[4] = uint->zero; - /* XXX: We probably need to keep track of the output * values, so we know what we are passing to the next * stage. */ diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c index 5c2e7434ba3..fced24cc4ec 100644 --- a/src/gallium/drivers/radeonsi/si_state.c +++ b/src/gallium/drivers/radeonsi/si_state.c @@ -996,6 +996,53 @@ static uint32_t si_colorformat_endian_swap(uint32_t colorformat) } } +/* Returns the size in bits of the widest component of a CB format */ +static unsigned si_colorformat_max_comp_size(uint32_t colorformat) +{ + switch(colorformat) { + case V_028C70_COLOR_4_4_4_4: + return 4; + + case V_028C70_COLOR_1_5_5_5: + case V_028C70_COLOR_5_5_5_1: + return 5; + + case V_028C70_COLOR_5_6_5: + return 6; + + case V_028C70_COLOR_8: + case V_028C70_COLOR_8_8: + case V_028C70_COLOR_8_8_8_8: + return 8; + + case V_028C70_COLOR_10_10_10_2: + case V_028C70_COLOR_2_10_10_10: + return 10; + + case V_028C70_COLOR_10_11_11: + case V_028C70_COLOR_11_11_10: + return 11; + + case V_028C70_COLOR_16: + case V_028C70_COLOR_16_16: + case V_028C70_COLOR_16_16_16_16: + return 16; + + case V_028C70_COLOR_8_24: + case V_028C70_COLOR_24_8: + return 24; + + case V_028C70_COLOR_32: + case V_028C70_COLOR_32_32: + case V_028C70_COLOR_32_32_32_32: + case V_028C70_COLOR_X24_8_32_FLOAT: + return 32; + } + + assert(!"Unknown maximum component size"); + return 0; +} + static uint32_t si_translate_dbformat(enum pipe_format format) { switch (format) { @@ -1409,6 +1456,7 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4, const struct util_format_description *desc; int i; unsigned blend_clamp = 0, blend_bypass = 0; + unsigned max_comp_size; surf = (struct r600_surface *)state->cbufs[cb]; rtex = (struct r600_resource_texture*)state->cbufs[cb]->texture; @@ -1549,6 +1597,17 @@ static void si_cb(struct r600_context *rctx, struct si_pm4_state *pm4, } si_pm4_set_reg(pm4, R_028C70_CB_COLOR0_INFO + cb * 0x3C, color_info); si_pm4_set_reg(pm4, R_028C74_CB_COLOR0_ATTRIB + cb * 0x3C, color_attrib); + + /* Determine pixel shader export format */ + max_comp_size = si_colorformat_max_comp_size(format); + if (ntype == V_028C70_NUMBER_SRGB || + ((ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM) && + max_comp_size <= 10) || + (ntype == V_028C70_NUMBER_FLOAT && max_comp_size <= 16)) { + rctx->export_16bpc |= 1 << cb; + rctx->spi_shader_col_format |= V_028714_SPI_SHADER_FP16_ABGR << (4 * cb); + } else + rctx->spi_shader_col_format |= V_028714_SPI_SHADER_32_ABGR << (4 * cb); } static void si_db(struct r600_context *rctx, struct si_pm4_state *pm4, @@ -1667,9 +1726,12 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, /* build states */ rctx->have_depth_fb = 0; + rctx->export_16bpc = 0; + rctx->spi_shader_col_format = 0; for (int i = 0; i < state->nr_cbufs; i++) { si_cb(rctx, pm4, state, i); } + assert(!(rctx->export_16bpc & ~0xff)); si_db(rctx, pm4, state); shader_mask = 0; @@ -1706,6 +1768,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx, si_pm4_set_reg(pm4, R_028200_PA_SC_WINDOW_OFFSET, 0x00000000); si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA); si_pm4_set_reg(pm4, R_02823C_CB_SHADER_MASK, shader_mask); + si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, + rctx->spi_shader_col_format); si_pm4_set_reg(pm4, R_028BE0_PA_SC_AA_CONFIG, 0x00000000); si_pm4_set_state(rctx, framebuffer, pm4); @@ -1727,9 +1791,10 @@ static INLINE unsigned si_shader_selector_key(struct pipe_context *ctx, if (sel->type == PIPE_SHADER_FRAGMENT) { if (sel->fs_write_all) key |= rctx->framebuffer.nr_cbufs; + key |= rctx->export_16bpc << 4; /*if (rctx->queued.named.rasterizer) - key |= rctx->queued.named.rasterizer->flatshade << 4;*/ - /*key |== rctx->two_side << 5;*/ + key |= rctx->queued.named.rasterizer->flatshade << 12;*/ + /*key |== rctx->two_side << 13;*/ } return key; diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c index 95821dc5f5c..5f8e2118a15 100644 --- a/src/gallium/drivers/radeonsi/si_state_draw.c +++ b/src/gallium/drivers/radeonsi/si_state_draw.c @@ -186,10 +186,6 @@ static void si_pipe_shader_ps(struct pipe_context *ctx, struct si_pipe_shader *s /* XXX: Depends on Z buffer format? */ si_pm4_set_reg(pm4, R_028710_SPI_SHADER_Z_FORMAT, 0); - /* XXX: Depends on color buffer format? */ - si_pm4_set_reg(pm4, R_028714_SPI_SHADER_COL_FORMAT, - S_028714_COL0_EXPORT_FORMAT(V_028714_SPI_SHADER_32_ABGR)); - va = r600_resource_va(ctx->screen, (void *)shader->bo); si_pm4_add_bo(pm4, shader->bo, RADEON_USAGE_READ); si_pm4_set_reg(pm4, R_00B020_SPI_SHADER_PGM_LO_PS, va >> 8); -- 2.30.2