From: Rob Clark Date: Wed, 15 Oct 2014 17:08:00 +0000 (-0400) Subject: freedreno/ir3: large const support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=652b8fbbbb0132c634c90e4d1fdbca9497b7cd94;p=mesa.git freedreno/ir3: large const support Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c index 8de0008d343..d674e0c0a81 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c @@ -186,6 +186,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) enum a3xx_instrbuffermode fpbuffer, vpbuffer; uint32_t fpbuffersz, vpbuffersz, fsoff; uint32_t pos_regid, posz_regid, psize_regid, color_regid; + int constmode; int i, j, k; vp = fd3_emit_get_vp(emit); @@ -241,6 +242,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) fsoff = 256 - fpbuffersz; } + /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */ + constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0; + pos_regid = find_output_regid(vp, ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); posz_regid = find_output_regid(fp, @@ -256,6 +260,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6); OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | + A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe * flush some caches? I think we only need to set those * bits if we have updated const or shader.. @@ -275,7 +280,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz)); OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); - OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) | + OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) | COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) | A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | A3XX_SP_SP_CTRL_REG_L0MODE(0)); @@ -381,11 +386,9 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit) A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) | A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63)); - /* NOTE: I believe VS.CONSTLEN should be <= FS.CONSTOBJOFFSET*/ - debug_assert(vp->constlen <= 128); - OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); - OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | + OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET( + MAX2(128, vp->constlen)) | A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff)); OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ } diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index eb2d954a97f..24f360b447c 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -354,7 +354,11 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader, case PIPE_SHADER_CAP_MAX_TEMPS: return 64; /* Max native temporaries. */ case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE: - return ((screen->gpu_id >= 300) ? 1024 : 64) * sizeof(float[4]); + /* NOTE: seems to be limit for a3xx is actually 512 but + * split between VS and FS. Use lower limit of 256 to + * avoid getting into impossible situations: + */ + return ((screen->gpu_id >= 300) ? 256 : 64) * sizeof(float[4]); case PIPE_SHADER_CAP_MAX_CONST_BUFFERS: return 1; case PIPE_SHADER_CAP_MAX_PREDS: diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 3da10fb81e3..70d37ffdeb5 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -104,7 +104,7 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info, val.iim_val = reg->iim_val; } else { int8_t components = util_last_bit(reg->wrmask); - int8_t max = (reg->num + repeat + components - 1) >> 2; + int16_t max = (reg->num + repeat + components - 1) >> 2; val.comp = reg->num & 0x3; val.num = reg->num >> 2; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index b92a57a43f8..d2d3dcaadb9 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -47,7 +47,7 @@ struct ir3_info { */ int8_t max_reg; /* highest GPR # used by shader */ int8_t max_half_reg; - int8_t max_const; + int16_t max_const; }; struct ir3_register { diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c index 1a5119c074f..8c4ec88ccc0 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c @@ -600,11 +600,6 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx, struct ir3_register *reg; struct ir3_instruction *orig = NULL; - /* TODO we need to use a mov to temp for const >= 64.. or maybe - * we could use relative addressing.. - */ - compile_assert(ctx, src->Index < 64); - switch (src->File) { case TGSI_FILE_IMMEDIATE: /* TODO if possible, use actual immediate instead of const.. but @@ -632,6 +627,24 @@ add_src_reg_wrmask(struct ir3_compile_context *ctx, break; } + /* We seem to have 8 bits (6.2) for dst register always, so I think + * it is safe to assume GPR cannot be >=64 + * + * cat3 instructions only have 8 bits for src2, but cannot take a + * const for src2 + * + * cat5 and cat6 in some cases only has 8 bits, but cannot take a + * const for any src. + * + * Other than that we seem to have 12 bits to encode const src, + * except for cat1 which may only have 11 bits (but that seems like + * a bug) + */ + if (flags & IR3_REG_CONST) + compile_assert(ctx, src->Index < (1 << 9)); + else + compile_assert(ctx, src->Index < (1 << 6)); + if (src->Absolute) flags |= IR3_REG_ABS; if (src->Negate)