From 382e3553af4257c4dff517bbe183a2c69a1bc8d7 Mon Sep 17 00:00:00 2001 From: Hyunjun Ko Date: Sat, 4 May 2019 13:23:03 +0000 Subject: [PATCH] freedreno/ir3: fix counting and printing for half registers. v2: defining 0x100 and use this for setting the FS_OUTPUT_REG.HALF_PRECISION Signed-off-by: Rob Clark --- src/freedreno/ir3/ir3_shader.c | 14 +++++++++----- src/freedreno/ir3/ir3_shader.h | 11 +++++++++-- src/gallium/drivers/freedreno/a5xx/fd5_program.c | 2 +- src/gallium/drivers/freedreno/a6xx/fd6_program.c | 2 +- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/freedreno/ir3/ir3_shader.c b/src/freedreno/ir3/ir3_shader.c index dacccc1329e..34af4ff689e 100644 --- a/src/freedreno/ir3/ir3_shader.c +++ b/src/freedreno/ir3/ir3_shader.c @@ -156,7 +156,7 @@ assemble_variant(struct ir3_shader_variant *v) if (ir3_shader_debug & IR3_DBG_DISASM) { struct ir3_shader_key key = v->key; - printf("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}", v->type, + printf("disassemble: type=%d, k={bp=%u,cts=%u,hp=%u}\n", v->type, v->binning_pass, key.color_two_side, key.half_precision); ir3_shader_disasm(v, bin, stdout); } @@ -301,8 +301,11 @@ ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir) static void dump_reg(FILE *out, const char *name, uint32_t r) { - if (r != regid(63,0)) - fprintf(out, "; %s: r%d.%c\n", name, r >> 2, "xyzw"[r & 0x3]); + if (r != regid(63,0)) { + const char *reg_type = (r & HALF_REG_ID) ? "hr" : "r"; + fprintf(out, "; %s: %s%d.%c\n", name, reg_type, + (r & ~HALF_REG_ID) >> 2, "xyzw"[r & 0x3]); + } } static void dump_output(FILE *out, struct ir3_shader_variant *so, @@ -386,8 +389,9 @@ ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out) fprintf(out, "; %s: outputs:", type); for (i = 0; i < so->outputs_count; i++) { uint8_t regid = so->outputs[i].regid; - fprintf(out, " r%d.%c (%s)", - (regid >> 2), "xyzw"[regid & 0x3], + const char *reg_type = so->outputs[i].half ? "hr" : "r"; + fprintf(out, " %s%d.%c (%s)", + reg_type, (regid >> 2), "xyzw"[regid & 0x3], gl_frag_result_name(so->outputs[i].slot)); } fprintf(out, "\n"); diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h index 06336eda0da..01e079140f1 100644 --- a/src/freedreno/ir3/ir3_shader.h +++ b/src/freedreno/ir3/ir3_shader.h @@ -379,6 +379,9 @@ struct ir3_ibo_mapping { uint8_t tex_base; /* the number of real textures, ie. image/ssbo start here */ }; +/* Represents half register in regid */ +#define HALF_REG_ID 0x100 + struct ir3_shader_variant { struct fd_bo *bo; @@ -673,8 +676,12 @@ ir3_find_output_regid(const struct ir3_shader_variant *so, unsigned slot) { int j; for (j = 0; j < so->outputs_count; j++) - if (so->outputs[j].slot == slot) - return so->outputs[j].regid; + if (so->outputs[j].slot == slot) { + uint32_t regid = so->outputs[j].regid; + if (so->outputs[j].half) + regid |= HALF_REG_ID; + return regid; + } return regid(63, 0); } diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c index e20bb2f6fc9..e52ba900e05 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_program.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -606,7 +606,7 @@ fd5_program_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_REG(0), 8); for (i = 0; i < 8; i++) { OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) | - COND(s[FS].v->outputs[i].half, A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION)); + COND(color_regid[i] & HALF_REG_ID, A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION)); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index b73d2031ab3..e56425e6d2c 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -627,7 +627,7 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd6_program_state *state, OUT_PKT4(ring, REG_A6XX_SP_FS_OUTPUT_REG(0), 8); for (i = 0; i < 8; i++) { OUT_RING(ring, A6XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) | - COND(s[FS].v->outputs[i].half, A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION)); + COND(color_regid[i] & HALF_REG_ID, A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION)); } OUT_PKT4(ring, REG_A6XX_VPC_PACK, 1); -- 2.30.2