From 47e2c195115854452a5560c59f82bebfc989694a Mon Sep 17 00:00:00 2001 From: "Kristian H. Kristensen" Date: Tue, 22 Oct 2019 20:03:07 -0700 Subject: [PATCH] freedreno/a6xx: Program state for tessellation stages Signed-off-by: Kristian H. Kristensen Acked-by: Eric Anholt Reviewed-by: Rob Clark --- src/freedreno/registers/a6xx.xml | 5 + src/gallium/drivers/freedreno/a6xx/fd6_emit.c | 1 - src/gallium/drivers/freedreno/a6xx/fd6_emit.h | 16 ++ .../drivers/freedreno/a6xx/fd6_program.c | 174 ++++++++++++++---- 4 files changed, 162 insertions(+), 34 deletions(-) diff --git a/src/freedreno/registers/a6xx.xml b/src/freedreno/registers/a6xx.xml index 7f464f1a480..747f071d27f 100644 --- a/src/freedreno/registers/a6xx.xml +++ b/src/freedreno/registers/a6xx.xml @@ -1856,6 +1856,7 @@ to upconvert to 32b float internally? + @@ -1936,6 +1937,8 @@ to upconvert to 32b float internally? + + @@ -2421,6 +2424,7 @@ to upconvert to 32b float internally? + @@ -2428,6 +2432,7 @@ to upconvert to 32b float internally? + diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c index 149b27bafc6..8a548953e9b 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.c @@ -1338,7 +1338,6 @@ fd6_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) WRITE(REG_A6XX_VPC_SO_OVERRIDE, A6XX_VPC_SO_OVERRIDE_SO_DISABLE); - WRITE(REG_A6XX_PC_UNKNOWN_9801, 0); WRITE(REG_A6XX_PC_UNKNOWN_9806, 0); WRITE(REG_A6XX_PC_UNKNOWN_9980, 0); diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h index f2f1a7f8085..81cd4d97f7f 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_emit.h +++ b/src/gallium/drivers/freedreno/a6xx/fd6_emit.h @@ -245,6 +245,22 @@ fd6_stage2shadersb(gl_shader_stage type) } } +static inline enum a6xx_tess_spacing +fd6_gl2spacing(enum gl_tess_spacing spacing) +{ + switch (spacing) { + case TESS_SPACING_EQUAL: + return TESS_EQUAL; + case TESS_SPACING_FRACTIONAL_ODD: + return TESS_FRACTIONAL_ODD; + case TESS_SPACING_FRACTIONAL_EVEN: + return TESS_FRACTIONAL_EVEN; + case TESS_SPACING_UNSPECIFIED: + default: + unreachable("spacing must be specified"); + } +} + bool fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring, enum pipe_shader_type type, struct fd_texture_stateobj *tex, unsigned bcolor_offset, diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_program.c b/src/gallium/drivers/freedreno/a6xx/fd6_program.c index 087059b3e37..5e54c7ea6d5 100644 --- a/src/gallium/drivers/freedreno/a6xx/fd6_program.c +++ b/src/gallium/drivers/freedreno/a6xx/fd6_program.c @@ -277,6 +277,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid; uint32_t smask_in_regid, smask_regid; uint32_t vertex_regid, instance_regid, layer_regid, primitive_regid; + uint32_t hs_invocation_regid; + uint32_t tess_coord_x_regid, tess_coord_y_regid, hs_patch_regid, ds_patch_regid; uint32_t ij_pix_regid, ij_samp_regid, ij_cent_regid, ij_size_regid; uint32_t gs_header_regid; enum a3xx_threadsize fssz; @@ -304,8 +306,25 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, vertex_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID); instance_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID); + if (hs) { + tess_coord_x_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD); + tess_coord_y_regid = next_regid(tess_coord_x_regid, 1); + hs_patch_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID); + ds_patch_regid = ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID); + hs_invocation_regid = ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3); + + pos_regid = ir3_find_output_regid(ds, VARYING_SLOT_POS); + psize_regid = ir3_find_output_regid(ds, VARYING_SLOT_PSIZ); + } else { + tess_coord_x_regid = regid(63, 0); + tess_coord_y_regid = regid(63, 0); + hs_patch_regid = regid(63, 0); + ds_patch_regid = regid(63, 0); + hs_invocation_regid = regid(63, 0); + } + if (gs) { - gs_header_regid = ir3_find_sysval_regid(vs, SYSTEM_VALUE_GS_HEADER_IR3); + gs_header_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3); primitive_regid = ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID); pos_regid = ir3_find_output_regid(gs, VARYING_SLOT_POS); psize_regid = ir3_find_output_regid(gs, VARYING_SLOT_PSIZ); @@ -399,14 +418,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, COND(vs->need_pixlod, A6XX_SP_VS_CTRL_REG0_PIXLODENABLE)); struct ir3_shader_linkage l = {0}; - if (gs) - ir3_link_shaders(&l, gs, fs); - else - ir3_link_shaders(&l, vs, fs); - - const struct ir3_shader_variant *so_shader = fd6_last_shader(state); - if (so_shader->shader->stream_output.num_outputs > 0) - link_stream_out(&l, so_shader); + const struct ir3_shader_variant *last_shader = fd6_last_shader(state); + ir3_link_shaders(&l, last_shader, fs); BITSET_DECLARE(varbs, 128) = {0}; uint32_t *varmask = (uint32_t *)varbs; @@ -421,6 +434,10 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */ OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */ + /* Add stream out outputs after computing the VPC_VAR_DISABLE bitmask. */ + if (last_shader->shader->stream_output.num_outputs > 0) + link_stream_out(&l, last_shader); + if (VALIDREG(layer_regid)) { layer_loc = l.max_loc; ir3_link_add(&l, layer_regid, 0x1, l.max_loc); @@ -436,13 +453,15 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, ir3_link_add(&l, psize_regid, 0x1, l.max_loc); } - if (so_shader->shader->stream_output.num_outputs > 0) { - setup_stream_out(state, so_shader, &l); + if (last_shader->shader->stream_output.num_outputs > 0) { + setup_stream_out(state, last_shader, &l); } debug_assert(l.cnt < 32); if (gs) OUT_PKT4(ring, REG_A6XX_SP_GS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2)); + else if (ds) + OUT_PKT4(ring, REG_A6XX_SP_DS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2)); else OUT_PKT4(ring, REG_A6XX_SP_VS_OUT_REG(0), DIV_ROUND_UP(l.cnt, 2)); @@ -462,6 +481,8 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, if (gs) OUT_PKT4(ring, REG_A6XX_SP_GS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4)); + else if (ds) + OUT_PKT4(ring, REG_A6XX_SP_DS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4)); else OUT_PKT4(ring, REG_A6XX_SP_VS_VPC_DST_REG(0), DIV_ROUND_UP(l.cnt, 4)); @@ -477,9 +498,89 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, } fd6_emit_shader(ring, vs); + ir3_emit_immediates(screen, vs, ring); - OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1); - OUT_RING(ring, 0); + if (hs) { + OUT_PKT4(ring, REG_A6XX_SP_HS_CTRL_REG0, 1); + OUT_RING(ring, A6XX_SP_HS_CTRL_REG0_THREADSIZE(TWO_QUADS) | + A6XX_SP_HS_CTRL_REG0_FULLREGFOOTPRINT(hs->info.max_reg + 1) | + A6XX_SP_HS_CTRL_REG0_BRANCHSTACK(hs->branchstack) | + COND(hs->need_pixlod, A6XX_SP_HS_CTRL_REG0_PIXLODENABLE)); + + fd6_emit_shader(ring, hs); + ir3_emit_immediates(screen, hs, ring); + ir3_emit_link_map(screen, vs, hs, ring); + + OUT_PKT4(ring, REG_A6XX_SP_DS_CTRL_REG0, 1); + OUT_RING(ring, A6XX_SP_DS_CTRL_REG0_THREADSIZE(TWO_QUADS) | + A6XX_SP_DS_CTRL_REG0_FULLREGFOOTPRINT(ds->info.max_reg + 1) | + A6XX_SP_DS_CTRL_REG0_BRANCHSTACK(ds->branchstack) | + COND(ds->need_pixlod, A6XX_SP_DS_CTRL_REG0_PIXLODENABLE)); + + fd6_emit_shader(ring, ds); + ir3_emit_immediates(screen, ds, ring); + ir3_emit_link_map(screen, hs, ds, ring); + + shader_info *hs_info = &hs->shader->nir->info; + OUT_PKT4(ring, REG_A6XX_PC_TESS_NUM_VERTEX, 1); + OUT_RING(ring, hs_info->tess.tcs_vertices_out); + + /* Total attribute slots in HS incoming patch. */ + OUT_PKT4(ring, REG_A6XX_PC_UNKNOWN_9801, 1); + OUT_RING(ring, hs_info->tess.tcs_vertices_out * vs->shader->output_size / 4); + + OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1); + OUT_RING(ring, vs->shader->output_size); + + shader_info *ds_info = &ds->shader->nir->info; + OUT_PKT4(ring, REG_A6XX_PC_TESS_CNTL, 1); + uint32_t output; + if (ds_info->tess.point_mode) + output = TESS_POINTS; + else if (ds_info->tess.primitive_mode == GL_ISOLINES) + output = TESS_LINES; + else if (ds_info->tess.ccw) + output = TESS_CCW_TRIS; + else + output = TESS_CW_TRIS; + + OUT_RING(ring, A6XX_PC_TESS_CNTL_SPACING(fd6_gl2spacing(ds_info->tess.spacing)) | + A6XX_PC_TESS_CNTL_OUTPUT(output)); + + /* xxx: Misc tess unknowns: */ + OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9103, 1); + OUT_RING(ring, 0x00ffff00); + + OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9106, 1); + OUT_RING(ring, 0x0000ffff); + + OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_809D, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_GRAS_UNKNOWN_8002, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A6XX_VPC_PACK, 1); + OUT_RING(ring, A6XX_VPC_PACK_POSITIONLOC(pos_loc) | + A6XX_VPC_PACK_PSIZELOC(255) | + A6XX_VPC_PACK_STRIDE_IN_VPC(l.max_loc)); + + OUT_PKT4(ring, REG_A6XX_VPC_PACK_3, 1); + OUT_RING(ring, A6XX_VPC_PACK_3_POSITIONLOC(pos_loc) | + A6XX_VPC_PACK_3_PSIZELOC(psize_loc) | + A6XX_VPC_PACK_3_STRIDE_IN_VPC(l.max_loc)); + + OUT_PKT4(ring, REG_A6XX_SP_DS_PRIMITIVE_CNTL, 1); + OUT_RING(ring, A6XX_SP_DS_PRIMITIVE_CNTL_DSOUT(l.cnt)); + + OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_4, 1); + OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_4_STRIDE_IN_VPC(l.max_loc) | + CONDREG(psize_regid, 0x100)); + + } else { + OUT_PKT4(ring, REG_A6XX_SP_HS_UNKNOWN_A831, 1); + OUT_RING(ring, 0); + } OUT_PKT4(ring, REG_A6XX_SP_PRIMITIVE_CNTL, 1); OUT_RING(ring, A6XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt)); @@ -495,6 +596,9 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_1_STRIDE_IN_VPC(l.max_loc) | CONDREG(psize_regid, A6XX_PC_PRIMITIVE_CNTL_1_PSIZE)); + OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_3, 1); + OUT_RING(ring, 0); + OUT_PKT4(ring, REG_A6XX_HLSQ_CONTROL_1_REG, 5); OUT_RING(ring, 0x7); /* XXX */ OUT_RING(ring, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | @@ -595,7 +699,10 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, fd6_emit_shader(ring, gs); ir3_emit_immediates(screen, gs, ring); - ir3_emit_link_map(screen, vs, gs, ring); + if (ds) + ir3_emit_link_map(screen, ds, gs, ring); + else + ir3_emit_link_map(screen, vs, gs, ring); OUT_PKT4(ring, REG_A6XX_VPC_PACK_GS, 1); OUT_RING(ring, A6XX_VPC_PACK_GS_POSITIONLOC(pos_loc) | @@ -646,19 +753,15 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9100, 1); OUT_RING(ring, 0xff); - OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9101, 1); - OUT_RING(ring, 0xffff00); - OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9102, 1); OUT_RING(ring, 0xffff00); - OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9107, 1); - OUT_RING(ring, 0); + const struct ir3_shader_variant *prev = state->ds ? state->ds : state->vs; /* Size of per-primitive alloction in ldlw memory in vec4s. */ uint32_t vec4_size = gs->shader->nir->info.gs.vertices_in * - DIV_ROUND_UP(vs->shader->output_size, 4); + DIV_ROUND_UP(prev->shader->output_size, 4); OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); OUT_RING(ring, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size)); @@ -666,9 +769,21 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_RING(ring, 0); OUT_PKT4(ring, REG_A6XX_SP_GS_UNKNOWN_A871, 1); - OUT_RING(ring, 3); + OUT_RING(ring, prev->shader->output_size); + } else { + OUT_PKT4(ring, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); + OUT_RING(ring, 0); + OUT_PKT4(ring, REG_A6XX_SP_GS_UNKNOWN_A871, 1); + OUT_RING(ring, 0); } + OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9101, 1); + OUT_RING(ring, 0xffff00); + + OUT_PKT4(ring, REG_A6XX_VPC_UNKNOWN_9107, 1); + OUT_RING(ring, 0); + + if (!binning_pass) { /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ for (j = -1; (j = ir3_next_varying(fs, j)) < (int)fs->inputs_count; ) { @@ -701,11 +816,11 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, A6XX_VFD_CONTROL_1_REGID4INST(instance_regid) | A6XX_VFD_CONTROL_1_REGID4PRIMID(primitive_regid) | 0xfc000000); - OUT_RING(ring, A6XX_VFD_CONTROL_2_REGID_HSPATCHID(regid(63,0)) | - A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(regid(63,0))); - OUT_RING(ring, A6XX_VFD_CONTROL_3_REGID_DSPATCHID(regid(63,0)) | - A6XX_VFD_CONTROL_3_REGID_TESSX(regid(63,0)) | - A6XX_VFD_CONTROL_3_REGID_TESSY(regid(63,0)) | + OUT_RING(ring, A6XX_VFD_CONTROL_2_REGID_HSPATCHID(hs_patch_regid) | + A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid)); + OUT_RING(ring, A6XX_VFD_CONTROL_3_REGID_DSPATCHID(ds_patch_regid) | + A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) | + A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) | 0xfc); OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */ OUT_RING(ring, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gs_header_regid) | @@ -720,13 +835,6 @@ setup_stateobj(struct fd_ringbuffer *ring, struct fd_screen *screen, OUT_PKT4(ring, REG_A6XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); OUT_RING(ring, COND(fragz, A6XX_GRAS_SU_DEPTH_PLANE_CNTL_FRAG_WRITES_Z)); - ir3_emit_immediates(screen, vs, ring); - - if (hs) { - ir3_emit_immediates(screen, hs, ring); - ir3_emit_immediates(screen, ds, ring); - } - if (!binning_pass) ir3_emit_immediates(screen, fs, ring); } -- 2.30.2