Prevents problems when calculating whether we overflow the shared limit.
Note that on a6xx, the macros handle the assert for us.
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5607>
#include "util/u_atomic.h"
#include "util/u_string.h"
+#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/format/u_format.h"
*/
v->constlen = MAX2(v->constlen, v->info.max_const + 1);
+ /* On a4xx and newer, constlen must be a multiple of 16 dwords even though
+ * uploads are in units of 4 dwords. Round it up here to make calculations
+ * regarding the shared constlen simpler.
+ */
+ if (gpu_id >= 400)
+ v->constlen = align(v->constlen, 4);
+
fixup_regfootprint(v);
return bin;
struct ir3_shader_variant vs = {
.type = MESA_SHADER_VERTEX,
.instrlen = 1,
- .constlen = 2,
+ .constlen = 4,
.info.max_reg = 1,
.inputs_count = 1,
.inputs[0] = {
struct ir3_shader_variant fs = {
.type = MESA_SHADER_FRAGMENT,
.instrlen = 1, /* max of 9 instructions with num_rts = 8 */
- .constlen = num_rts,
+ .constlen = align(num_rts, 4),
.info.max_reg = MAX2(num_rts, 1) - 1,
.total_in = blit ? 2 : 0,
.num_samp = blit ? 1 : 0,
struct ir3_shader_variant gs_shader = {
.type = MESA_SHADER_GEOMETRY,
.instrlen = 1,
- .constlen = 2,
+ .constlen = 4,
.info.max_reg = 1,
.inputs_count = 1,
.inputs[0] = {
tu_cs_emit(cs, xs->instrlen);
tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1);
- tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(align(xs->constlen, 4)) |
+ tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) |
A6XX_HLSQ_VS_CNTL_ENABLED);
/* emit program binary
if (s[i].v) {
s[i].i = &s[i].v->info;
/* constlen is in units of 4 * vec4: */
- s[i].constlen = align(s[i].v->constlen, 4) / 4;
+ assert(s[i].v->constlen % 4 == 0);
+ s[i].constlen = s[i].v->constlen / 4;
/* instrlen is already in units of 16 instr.. although
* probably we should ditch that and not make the compiler
* care about instruction group size of a3xx vs a4xx
A5XX_SP_CS_CONFIG_SHADEROBJOFFSET(0) |
A5XX_SP_CS_CONFIG_ENABLED);
- unsigned constlen = align(v->constlen, 4) / 4;
+ assert(v->constlen % 4 == 0);
+ unsigned constlen = v->constlen / 4;
OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONSTLEN, 2);
OUT_RING(ring, constlen); /* HLSQ_CS_CONSTLEN */
OUT_RING(ring, instrlen); /* HLSQ_CS_INSTRLEN */
if (s[i].v) {
s[i].i = &s[i].v->info;
/* constlen is in units of 4 * vec4: */
- s[i].constlen = align(s[i].v->constlen, 4) / 4;
+ assert(s[i].v->constlen % 4 == 0);
+ s[i].constlen = s[i].v->constlen / 4;
/* instrlen is already in units of 16 instr.. although
* probably we should ditch that and not make the compiler
* care about instruction group size of a3xx vs a5xx
OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
OUT_RING(ring, 0xff);
- unsigned constlen = align(v->constlen, 4);
OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
- OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(constlen) |
+ OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(v->constlen) |
A6XX_HLSQ_CS_CNTL_ENABLED);
OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2);
debug_assert(state->vs->constlen >= state->bs->constlen);
OUT_PKT4(ring, REG_A6XX_HLSQ_VS_CNTL, 4);
- OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(align(state->vs->constlen, 4)) |
+ OUT_RING(ring, A6XX_HLSQ_VS_CNTL_CONSTLEN(state->vs->constlen) |
A6XX_HLSQ_VS_CNTL_ENABLED);
OUT_RING(ring, COND(state->hs,
A6XX_HLSQ_HS_CNTL_ENABLED |
- A6XX_HLSQ_HS_CNTL_CONSTLEN(align(state->hs->constlen, 4))));
+ A6XX_HLSQ_HS_CNTL_CONSTLEN(state->hs->constlen)));
OUT_RING(ring, COND(state->ds,
A6XX_HLSQ_DS_CNTL_ENABLED |
- A6XX_HLSQ_DS_CNTL_CONSTLEN(align(state->ds->constlen, 4))));
+ A6XX_HLSQ_DS_CNTL_CONSTLEN(state->ds->constlen)));
OUT_RING(ring, COND(state->gs,
A6XX_HLSQ_GS_CNTL_ENABLED |
- A6XX_HLSQ_GS_CNTL_CONSTLEN(align(state->gs->constlen, 4))));
+ A6XX_HLSQ_GS_CNTL_CONSTLEN(state->gs->constlen)));
OUT_PKT4(ring, REG_A6XX_HLSQ_FS_CNTL, 1);
- OUT_RING(ring, A6XX_HLSQ_FS_CNTL_CONSTLEN(align(state->fs->constlen, 4)) |
+ OUT_RING(ring, A6XX_HLSQ_FS_CNTL_CONSTLEN(state->fs->constlen) |
A6XX_HLSQ_FS_CNTL_ENABLED);
OUT_PKT4(ring, REG_A6XX_SP_VS_CONFIG, 1);