From 093c4137224a59536af9e8062f5c98c2dc136ba1 Mon Sep 17 00:00:00 2001 From: Jonathan Marek Date: Thu, 11 Jun 2020 08:38:18 -0400 Subject: [PATCH] turnip: share code between 3D blit/clear path and tu_pipeline Instead of filling out registers manually, fill out ir3 structs and re-use code from tu_pipeline. Signed-off-by: Jonathan Marek Part-of: --- src/freedreno/vulkan/tu_clear_blit.c | 214 +++++++++++---------------- src/freedreno/vulkan/tu_pipeline.c | 10 +- src/freedreno/vulkan/tu_private.h | 16 ++ src/freedreno/vulkan/tu_shader.c | 3 + 4 files changed, 108 insertions(+), 135 deletions(-) diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c index 373dbe13ef1..0a6fc2a933c 100644 --- a/src/freedreno/vulkan/tu_clear_blit.c +++ b/src/freedreno/vulkan/tu_clear_blit.c @@ -485,6 +485,59 @@ r2d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs) static void r3d_pipeline(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_rts) { + struct ir3_shader dummy_shader = {}; + + struct ir3_shader_variant vs = { + .type = MESA_SHADER_VERTEX, + .instrlen = 1, + .constlen = 2, + .info.max_reg = 1, + .inputs_count = 1, + .inputs[0] = { + .slot = SYSTEM_VALUE_VERTEX_ID, + .regid = regid(0, 3), + .sysval = true, + }, + .outputs_count = blit ? 2 : 1, + .outputs[0] = { + .slot = VARYING_SLOT_POS, + .regid = regid(0, 0), + }, + .outputs[1] = { + .slot = VARYING_SLOT_VAR0, + .regid = regid(1, 0), + }, + .shader = &dummy_shader, + }; + + struct ir3_shader_variant fs = { + .type = MESA_SHADER_FRAGMENT, + .instrlen = 1, /* max of 9 instructions with num_rts = 8 */ + .constlen = num_rts, + .info.max_reg = MAX2(num_rts, 1) - 1, + .total_in = blit ? 2 : 0, + .num_samp = blit ? 1 : 0, + .inputs_count = blit ? 2 : 0, + .inputs[0] = { + .slot = VARYING_SLOT_VAR0, + .inloc = 0, + .compmask = 3, + .bary = true, + }, + .inputs[1] = { + .slot = SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL, + .regid = regid(0, 0), + .sysval = 1, + }, + .num_sampler_prefetch = blit ? 1 : 0, + .sampler_prefetch[0] = { + .src = 0, + .wrmask = 0xf, + .cmd = 4, + }, + .shader = &dummy_shader, + }; + static const instr_t vs_code[] = { /* r0.xyz = r0.w ? c1.xyz : c0.xyz * r1.xy = r0.w ? c1.zw : c0.zw @@ -509,85 +562,6 @@ r3d_pipeline(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t nu #define FS_OFFSET (16 * sizeof(instr_t)) STATIC_ASSERT(sizeof(vs_code) <= FS_OFFSET); - /* vs inputs: only vtx id in r0.w */ - tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_0, 7); - tu_cs_emit(cs, 0x00000000); - tu_cs_emit(cs, 0xfcfcfc00 | A6XX_VFD_CONTROL_1_REGID4VTX(3)); - tu_cs_emit(cs, 0x0000fcfc); - tu_cs_emit(cs, 0xfcfcfcfc); - tu_cs_emit(cs, 0x000000fc); - tu_cs_emit(cs, 0x0000fcfc); - tu_cs_emit(cs, 0x00000000); - - /* vs outputs: position in r0.xyzw, blit coords in r1.xy */ - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4); - tu_cs_emit(cs, blit ? 0xffffffcf : 0xffffffff); - tu_cs_emit(cs, 0xffffffff); - tu_cs_emit(cs, 0xffffffff); - tu_cs_emit(cs, 0xffffffff); - - tu_cs_emit_regs(cs, A6XX_SP_VS_OUT_REG(0, - .a_regid = 0, .a_compmask = 0xf, - .b_regid = 4, .b_compmask = 0x3)); - tu_cs_emit_regs(cs, A6XX_SP_VS_VPC_DST_REG(0, .outloc0 = 0, .outloc1 = 4)); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1); - tu_cs_emit(cs, 0xff00ff00 | - COND(blit, A6XX_VPC_CNTL_0_VARYING) | - A6XX_VPC_CNTL_0_NUMNONPOSVAR(blit ? 8 : 0)); - - tu_cs_emit_regs(cs, A6XX_VPC_PACK( - .positionloc = 0, - .psizeloc = 0xff, - .stride_in_vpc = blit ? 6 : 4)); - tu_cs_emit_regs(cs, A6XX_SP_PRIMITIVE_CNTL(.vsout = blit ? 2 : 1)); - tu_cs_emit_regs(cs, - A6XX_PC_PRIMITIVE_CNTL_0(), - A6XX_PC_PRIMITIVE_CNTL_1(.stride_in_vpc = blit ? 6 : 4)); - - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8); - tu_cs_emit(cs, blit ? 0xe000 : 0); // I think this can just be 0 - for (uint32_t i = 1; i < 8; i++) - tu_cs_emit(cs, 0); - - tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8); - for (uint32_t i = 0; i < 8; i++) - tu_cs_emit(cs, 0x99999999); - - /* fs inputs: none, prefetch in blit case */ - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + blit); - tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CNTL_COUNT(blit) | - A6XX_SP_FS_PREFETCH_CNTL_UNK4(0xfc) | - 0x7000); - if (blit) { - tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CMD_SRC(4) | - A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(0) | - A6XX_SP_FS_PREFETCH_CMD_TEX_ID(0) | - A6XX_SP_FS_PREFETCH_CMD_DST(0) | - A6XX_SP_FS_PREFETCH_CMD_WRMASK(0xf) | - A6XX_SP_FS_PREFETCH_CMD_CMD(0x4)); - } - - tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CONTROL_1_REG, 5); - tu_cs_emit(cs, 0x3); // XXX blob uses 3 in blit path - tu_cs_emit(cs, 0xfcfcfcfc); - tu_cs_emit(cs, A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_PIXEL(blit ? 0 : 0xfc) | - A6XX_HLSQ_CONTROL_3_REG_BARY_IJ_CENTROID(0xfc) | - 0xfc00fc00); - tu_cs_emit(cs, 0xfcfcfcfc); - tu_cs_emit(cs, 0xfcfc); - - tu_cs_emit_regs(cs, A6XX_HLSQ_UNKNOWN_B980(blit ? 3 : 1)); - tu_cs_emit_regs(cs, A6XX_GRAS_CNTL(.varying = blit)); - tu_cs_emit_regs(cs, - A6XX_RB_RENDER_CONTROL0(.varying = blit, .unk10 = blit), - A6XX_RB_RENDER_CONTROL1()); - - tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_CNTL()); - tu_cs_emit_regs(cs, A6XX_GRAS_UNKNOWN_8101()); - tu_cs_emit_regs(cs, A6XX_GRAS_SAMPLE_CNTL()); - /* shaders */ struct ts_cs_memory shaders = { }; VkResult result = tu_cs_alloc(&cmd->sub_cs, 2, 16 * sizeof(instr_t), &shaders); @@ -595,67 +569,45 @@ r3d_pipeline(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t nu memcpy(shaders.map, vs_code, sizeof(vs_code)); - instr_t *fs = (instr_t*) ((uint8_t*) shaders.map + FS_OFFSET); + instr_t *fs_code = (instr_t*) ((uint8_t*) shaders.map + FS_OFFSET); for (uint32_t i = 0; i < num_rts; i++) { /* (rpt3)mov.s32s32 r0.x, (r)c[i].x */ - fs[i] = (instr_t) { .cat1 = { .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32, - .repeat = 3, .dst = i * 4, .src_c = 1, .src_r = 1, .src = i * 4 } }; + *fs_code++ = (instr_t) { .cat1 = { + .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32, + .repeat = 3, .dst = i * 4, .src_c = 1, .src_r = 1, .src = i * 4 + } }; + } + + /* " bary.f (ei)r63.x, 0, r0.x" note the blob doesn't have this in its + * blit path (its not clear what allows it to not have it) + */ + if (blit) { + *fs_code++ = (instr_t) { .cat2 = { + .opc_cat = 2, .opc = OPC_BARY_F & 63, .ei = 1, .full = 1, + .dst = regid(63, 0), .src1_im = 1 + } }; } - fs[num_rts] = (instr_t) { .cat0 = { .opc = OPC_END } }; + *fs_code++ = (instr_t) { .cat0 = { .opc = OPC_END } }; /* note: assumed <= 16 instructions (MAX_RTS is 8) */ tu_cs_emit_regs(cs, A6XX_HLSQ_UPDATE_CNTL(0x7ffff)); - tu_cs_emit_regs(cs, - A6XX_HLSQ_VS_CNTL(.constlen = 8, .enabled = true), - A6XX_HLSQ_HS_CNTL(), - A6XX_HLSQ_DS_CNTL(), - A6XX_HLSQ_GS_CNTL()); - tu_cs_emit_regs(cs, A6XX_HLSQ_FS_CNTL(.constlen = 4 * num_rts, .enabled = true)); - tu_cs_emit_regs(cs, - A6XX_SP_VS_CONFIG(.enabled = true), - A6XX_SP_VS_INSTRLEN(1)); - tu_cs_emit_regs(cs, A6XX_SP_HS_CONFIG()); - tu_cs_emit_regs(cs, A6XX_SP_DS_CONFIG()); - tu_cs_emit_regs(cs, A6XX_SP_GS_CONFIG()); - tu_cs_emit_regs(cs, - A6XX_SP_FS_CONFIG(.enabled = true, .ntex = blit, .nsamp = blit), - A6XX_SP_FS_INSTRLEN(1)); - - tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0( - .threadsize = FOUR_QUADS, - .fullregfootprint = 2, - .mergedregs = true)); - tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0( - .varying = blit, - .threadsize = FOUR_QUADS, - /* could this be 0 in !blit && !num_rts case ? */ - .fullregfootprint = MAX2(1, num_rts), - .mergedregs = true)); /* note: tu_pipeline also sets 0x1000000 bit */ - - tu_cs_emit_regs(cs, A6XX_SP_IBO_COUNT(0)); - - tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(1)); - tu_cs_emit_qw(cs, shaders.iova); + tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs, shaders.iova); + tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL, 0); + tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL, 0); + tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL, 0); + tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs, shaders.iova + FS_OFFSET); - tu_cs_emit_pkt4(cs, REG_A6XX_SP_VS_OBJ_START_LO, 2); - tu_cs_emit_qw(cs, shaders.iova); + tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0()); + tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0()); - tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3); - tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | - CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | - CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | - CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | - CP_LOAD_STATE6_0_NUM_UNIT(1)); - tu_cs_emit_qw(cs, shaders.iova + FS_OFFSET); - - tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OBJ_START_LO, 2); - tu_cs_emit_qw(cs, shaders.iova + FS_OFFSET); + tu6_emit_vpc(cs, &vs, NULL, &fs, NULL); + + /* REPL_MODE for varying with RECTLIST (2 vertices only) */ + tu_cs_emit_regs(cs, A6XX_VPC_VARYING_INTERP_MODE(0, 0)); + tu_cs_emit_regs(cs, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0)); + + tu6_emit_fs_inputs(cs, &fs); tu_cs_emit_regs(cs, A6XX_GRAS_CL_CNTL( diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index 462e2018601..88bfd9a6caa 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -628,7 +628,7 @@ tu6_blend_op(VkBlendOp op) } } -static void +void tu6_emit_xs_config(struct tu_cs *cs, gl_shader_stage stage, /* xs->type, but xs may be NULL */ const struct ir3_shader_variant *xs, @@ -731,7 +731,9 @@ tu6_emit_xs_config(struct tu_cs *cs, COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) | COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) | COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_IBO) | - COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO)); + COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) | + A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) | + A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp)); tu_cs_emit(cs, xs->instrlen); tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1); @@ -1003,7 +1005,7 @@ gl_primitive_to_tess(uint16_t primitive) { } } -static void +void tu6_emit_vpc(struct tu_cs *cs, const struct ir3_shader_variant *vs, const struct ir3_shader_variant *gs, @@ -1268,7 +1270,7 @@ tu6_emit_vpc_varying_modes(struct tu_cs *cs, tu_cs_emit_array(cs, ps_repl_modes, 8); } -static void +void tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) { uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid; diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 76803423629..1e1fdc1c61a 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -1445,6 +1445,22 @@ void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_ void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1); +void +tu6_emit_xs_config(struct tu_cs *cs, + gl_shader_stage stage, + const struct ir3_shader_variant *xs, + uint64_t binary_iova); + +void +tu6_emit_vpc(struct tu_cs *cs, + const struct ir3_shader_variant *vs, + const struct ir3_shader_variant *gs, + const struct ir3_shader_variant *fs, + struct tu_streamout_state *tf); + +void +tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs); + struct tu_image_view; void diff --git a/src/freedreno/vulkan/tu_shader.c b/src/freedreno/vulkan/tu_shader.c index d26154a3b02..3d9bab5830a 100644 --- a/src/freedreno/vulkan/tu_shader.c +++ b/src/freedreno/vulkan/tu_shader.c @@ -741,6 +741,9 @@ tu_compile_shader_variant(struct ir3_shader *shader, if (ret) return NULL; + /* num_samp should be 0 for bindless, but we use it with blit shader */ + variant->num_samp = 0; + /* when assemble fails, we rely on tu_shader_destroy to clean up the * variant */ -- 2.30.2