From: Eric Anholt Date: Fri, 31 Jul 2020 16:54:09 +0000 (-0700) Subject: freedreno: Add more asserts for DST_OFF/NUM_UNIT in indirect const uploads. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=56ab105182e5a3129ee646cf6382a5699bf14bce;p=mesa.git freedreno: Add more asserts for DST_OFF/NUM_UNIT in indirect const uploads. These are just empirical alignment numbers from looking at dEQP traces of the blob driver (a330, a418, a540, a618, a630), with one exception noted in the comments. Part-of: --- diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c index 5d5be9d39e8..a689a794bdc 100644 --- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c +++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c @@ -79,13 +79,22 @@ fd3_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v uint32_t regid, uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) { + uint32_t dst_off = regid / 2; + /* The blob driver aligns all const uploads dst_off to 64. We've been + * successfully aligning to 8 vec4s as const_upload_unit so far with no + * ill effects. + */ + assert(dst_off % 16 == 0); + uint32_t num_unit = sizedwords / 2; + assert(num_unit % 2 == 0); + emit_const_asserts(ring, v, regid, sizedwords); OUT_PKT3(ring, CP_LOAD_STATE, 2); - OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) | + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(dst_off) | CP_LOAD_STATE_0_STATE_SRC(SS_INDIRECT) | CP_LOAD_STATE_0_STATE_BLOCK(sb[v->type]) | - CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2)); + CP_LOAD_STATE_0_NUM_UNIT(num_unit)); OUT_RELOC(ring, bo, offset, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); } diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c index 603a81f7536..9c9e2a1aba2 100644 --- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -74,13 +74,18 @@ fd4_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v uint32_t regid, uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) { + uint32_t dst_off = regid / 4; + assert(dst_off % 4 == 0); + uint32_t num_unit = sizedwords / 4; + assert(num_unit % 4 == 0); + emit_const_asserts(ring, v, regid, sizedwords); OUT_PKT3(ring, CP_LOAD_STATE4, 2); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) | + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) | CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) | CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) | - CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4)); + CP_LOAD_STATE4_0_NUM_UNIT(num_unit)); OUT_RELOC(ring, bo, offset, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0); } diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c index 1a1037c3a0e..d1e86a50df2 100644 --- a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -77,13 +77,18 @@ static void fd5_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, uint32_t regid, uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) { + uint32_t dst_off = regid / 4; + assert(dst_off % 4 == 0); + uint32_t num_unit = sizedwords / 4; + assert(num_unit % 4 == 0); + emit_const_asserts(ring, v, regid, sizedwords); OUT_PKT7(ring, CP_LOAD_STATE4, 3); - OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(regid/4) | + OUT_RING(ring, CP_LOAD_STATE4_0_DST_OFF(dst_off) | CP_LOAD_STATE4_0_STATE_SRC(SS4_INDIRECT) | CP_LOAD_STATE4_0_STATE_BLOCK(fd4_stage2shadersb(v->type)) | - CP_LOAD_STATE4_0_NUM_UNIT(sizedwords/4)); + CP_LOAD_STATE4_0_NUM_UNIT(num_unit)); OUT_RELOC(ring, bo, offset, CP_LOAD_STATE4_1_STATE_TYPE(ST4_CONSTANTS), 0); } diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_const.c b/src/gallium/drivers/freedreno/a6xx/fd6_const.c index 419a22d0708..68a24872639 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_const.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_const.c @@ -77,16 +77,21 @@ fd6_emit_const_bo(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, uint32_t regid, uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) { + uint32_t dst_off = regid / 4; + assert(dst_off % 4 == 0); + uint32_t num_unit = DIV_ROUND_UP(sizedwords, 4); + assert(num_unit % 4 == 0); + emit_const_asserts(ring, v, regid, sizedwords); if (fd6_geom_stage(v->type)) { OUT_PKT(ring, CP_LOAD_STATE6_GEOM, CP_LOAD_STATE6_0( - .dst_off = regid/4, + .dst_off = dst_off, .state_type = ST6_CONSTANTS, .state_src = SS6_INDIRECT, .state_block = fd6_stage2shadersb(v->type), - .num_unit = DIV_ROUND_UP(sizedwords, 4) + .num_unit = num_unit, ), CP_LOAD_STATE6_EXT_SRC_ADDR( .bo = bo, @@ -96,11 +101,11 @@ fd6_emit_const_bo(struct fd_ringbuffer *ring, } else { OUT_PKT(ring, CP_LOAD_STATE6_FRAG, CP_LOAD_STATE6_0( - .dst_off = regid/4, + .dst_off = dst_off, .state_type = ST6_CONSTANTS, .state_src = SS6_INDIRECT, .state_block = fd6_stage2shadersb(v->type), - .num_unit = DIV_ROUND_UP(sizedwords, 4) + .num_unit = num_unit, ), CP_LOAD_STATE6_EXT_SRC_ADDR( .bo = bo,