From 76837e29e3cb325eecd88bb0b8f9df9d70caf7de Mon Sep 17 00:00:00 2001 From: Gert Wollny Date: Wed, 15 Nov 2017 10:29:12 +0100 Subject: [PATCH] r600_shader: only load from LDS what is really used Use the destination write mask to determine which values are really to be read from LDS and load only these. Reviewed-by: Dave Airlie Signed-off-by: Gert Wollny --- src/gallium/drivers/r600/r600_shader.c | 33 ++++++++++++++++++++------ 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c index 9345291cef8..2f2e002115a 100644 --- a/src/gallium/drivers/r600/r600_shader.c +++ b/src/gallium/drivers/r600/r600_shader.c @@ -379,7 +379,7 @@ static void r600_bytecode_src(struct r600_bytecode_alu_src *bc_src, const struct r600_shader_src *shader_src, unsigned chan); static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, - unsigned dst_reg); + unsigned dst_reg, unsigned mask); static int tgsi_last_instruction(unsigned writemask) { @@ -1026,7 +1026,7 @@ static int tgsi_declaration(struct r600_shader_ctx *ctx) if (r) return r; - do_lds_fetch_values(ctx, temp_reg, dreg); + do_lds_fetch_values(ctx, temp_reg, dreg, 0xf); } else if (d->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) { /* MOV r1.x, r0.x; @@ -1744,14 +1744,18 @@ static int r600_get_byte_address(struct r600_shader_ctx *ctx, int temp_reg, } static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, - unsigned dst_reg) + unsigned dst_reg, unsigned mask) { struct r600_bytecode_alu alu; int r, i; if ((ctx->bc->cf_last->ndw>>1) >= 0x60) ctx->bc->force_add_cf = 1; + for (i = 1; i < 4; i++) { + if (!(mask & (1 << i))) + continue; + r = single_alu_op2(ctx, ALU_OP2_ADD_INT, temp_reg, i, temp_reg, 0, @@ -1760,6 +1764,9 @@ static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, return r; } for (i = 0; i < 4; i++) { + if (! (mask & (1 << i))) + continue; + /* emit an LDS_READ_RET */ memset(&alu, 0, sizeof(alu)); alu.op = LDS_OP1_LDS_READ_RET; @@ -1775,6 +1782,8 @@ static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, return r; } for (i = 0; i < 4; i++) { + if (! (mask & (1 << i))) + continue; /* then read from LDS_OQ_A_POP */ memset(&alu, 0, sizeof(alu)); @@ -1792,6 +1801,16 @@ static int do_lds_fetch_values(struct r600_shader_ctx *ctx, unsigned temp_reg, return 0; } +static int fetch_mask(struct tgsi_src_register *reg) +{ + int mask = 0; + mask |= 1 << reg->SwizzleX; + mask |= 1 << reg->SwizzleY; + mask |= 1 << reg->SwizzleZ; + mask |= 1 << reg->SwizzleW; + return mask; +} + static int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_register *src, unsigned int dst_reg) { int r; @@ -1808,7 +1827,7 @@ static int fetch_tes_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_reg if (r) return r; - r = do_lds_fetch_values(ctx, temp_reg, dst_reg); + r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register)); if (r) return r; return 0; @@ -1834,7 +1853,7 @@ static int fetch_tcs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_reg if (r) return r; - r = do_lds_fetch_values(ctx, temp_reg, dst_reg); + r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register)); if (r) return r; return 0; @@ -1856,7 +1875,7 @@ static int fetch_tcs_output(struct r600_shader_ctx *ctx, struct tgsi_full_src_re if (r) return r; - r = do_lds_fetch_values(ctx, temp_reg, dst_reg); + r = do_lds_fetch_values(ctx, temp_reg, dst_reg, fetch_mask(&src->Register)); if (r) return r; return 0; @@ -2795,7 +2814,7 @@ static int r600_tess_factor_read(struct r600_shader_ctx *ctx, if (r) return r; - do_lds_fetch_values(ctx, temp_reg, dreg); + do_lds_fetch_values(ctx, temp_reg, dreg, 0xf); return 0; } -- 2.30.2