From 08837ea3d238b88866e7a767923e05e754db8d5a Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 16 Jan 2020 15:38:37 -0800 Subject: [PATCH] turnip: Port krh's packing macros from freedreno to tu. This introduces some minor unpacking of the temporary fd_reg_pair structs to code that previously was packing a whole register field. In the pack wrapper in tu_cs.h, I added some explanatory docs, dropped the relocs handling since we don't need it, and removed the extra regs[] in the __ONE_REG() macro (which was causing gcc's optimizer to fall on its face in my release build). Part-of: --- src/freedreno/vulkan/tu_cmd_buffer.c | 13 +++-- src/freedreno/vulkan/tu_cs.h | 78 ++++++++++++++++++++++++++++ src/freedreno/vulkan/tu_pipeline.c | 20 +++---- src/freedreno/vulkan/tu_private.h | 13 +++++ 4 files changed, 107 insertions(+), 17 deletions(-) diff --git a/src/freedreno/vulkan/tu_cmd_buffer.c b/src/freedreno/vulkan/tu_cmd_buffer.c index e8098dd72e1..0158cca0912 100644 --- a/src/freedreno/vulkan/tu_cmd_buffer.c +++ b/src/freedreno/vulkan/tu_cmd_buffer.c @@ -29,7 +29,6 @@ #include "registers/adreno_pm4.xml.h" #include "registers/adreno_common.xml.h" -#include "registers/a6xx.xml.h" #include "vk_format.h" @@ -405,8 +404,8 @@ tu6_emit_zs(struct tu_cmd_buffer *cmd, tu_cs_emit_pkt4(cs, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); - tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip))); - tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layout.layer_size)); + tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_PITCH(tu_image_stride(iview->image, iview->base_mip)).value); + tu_cs_emit(cs, A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(iview->image->layout.layer_size).value); tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); tu_cs_emit(cs, cmd->state.pass->attachments[a].gmem_offset); @@ -460,8 +459,8 @@ tu6_emit_mrt(struct tu_cmd_buffer *cmd, tu_cs_emit(cs, A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format->rb) | A6XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | A6XX_RB_MRT_BUF_INFO_COLOR_SWAP(format->swap)); - tu_cs_emit(cs, A6XX_RB_MRT_PITCH(tu_image_stride(iview->image, iview->base_mip))); - tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(iview->image->layout.layer_size)); + tu_cs_emit(cs, A6XX_RB_MRT_PITCH(i, tu_image_stride(iview->image, iview->base_mip)).value); + tu_cs_emit(cs, A6XX_RB_MRT_ARRAY_PITCH(i, iview->image->layout.layer_size).value); tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); tu_cs_emit(cs, cmd->state.pass->attachments[a].gmem_offset); @@ -612,8 +611,8 @@ tu6_emit_blit_info(struct tu_cmd_buffer *cmd, COND(iview->image->layout.ubwc_size, A6XX_RB_BLIT_DST_INFO_FLAGS)); tu_cs_emit_qw(cs, tu_image_base(iview->image, iview->base_mip, iview->base_layer)); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip))); - tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layout.layer_size)); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_PITCH(tu_image_stride(iview->image, iview->base_mip)).value); + tu_cs_emit(cs, A6XX_RB_BLIT_DST_ARRAY_PITCH(iview->image->layout.layer_size).value); if (iview->image->layout.ubwc_size) { tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST_LO, 3); diff --git a/src/freedreno/vulkan/tu_cs.h b/src/freedreno/vulkan/tu_cs.h index e5c47d005be..e61b82e3c7d 100644 --- a/src/freedreno/vulkan/tu_cs.h +++ b/src/freedreno/vulkan/tu_cs.h @@ -207,4 +207,82 @@ tu_cs_emit_call(struct tu_cs *cs, const struct tu_cs *target) tu_cs_emit_ib(cs, target->entries + i); } +#define fd_reg_pair tu_reg_value +#define __bo_type struct tu_bo * + +#include "a6xx.xml.h" +#include "a6xx-pack.xml.h" + +#define __assert_eq(a, b) \ + do { \ + if ((a) != (b)) { \ + fprintf(stderr, "assert failed: " #a " (0x%x) != " #b " (0x%x)\n", a, b); \ + assert((a) == (b)); \ + } \ + } while (0) + +#define __ONE_REG(i, regs) \ + do { \ + if (i < ARRAY_SIZE(regs) && regs[i].reg > 0) { \ + __assert_eq(regs[0].reg + i, regs[i].reg); \ + if (regs[i].bo) { \ + uint64_t v = regs[i].bo->iova + regs[i].bo_offset; \ + v >>= regs[i].bo_shift; \ + v |= regs[i].value; \ + \ + *p++ = v; \ + *p++ = v >> 32; \ + } else { \ + *p++ = regs[i].value; \ + if (regs[i].is_address) \ + *p++ = regs[i].value >> 32; \ + } \ + } \ + } while (0) + +/* Emits a sequence of register writes in order using a pkt4. This will check + * (at runtime on a !NDEBUG build) that the registers were actually set up in + * order in the code. + * + * Note that references to buffers aren't automatically added to the CS, + * unlike in freedreno. We are clever in various places to avoid duplicating + * the reference add work. + * + * Also, 64-bit address registers don't have a way (currently) to set a 64-bit + * address without having a reference to a BO, since the .dword field in the + * register's struct is only 32-bit wide. We should fix this in the pack + * codegen later. + */ +#define tu_cs_emit_regs(cs, ...) do { \ + const struct fd_reg_pair regs[] = { __VA_ARGS__ }; \ + unsigned count = ARRAY_SIZE(regs); \ + \ + STATIC_ASSERT(count > 0); \ + STATIC_ASSERT(count <= 16); \ + \ + uint32_t *p = cs->cur; \ + *p++ = CP_TYPE4_PKT | count | \ + (tu_odd_parity_bit(count) << 7) | \ + ((regs[0].reg & 0x3ffff) << 8) | \ + ((tu_odd_parity_bit(regs[0].reg) << 27)); \ + \ + __ONE_REG( 0, regs); \ + __ONE_REG( 1, regs); \ + __ONE_REG( 2, regs); \ + __ONE_REG( 3, regs); \ + __ONE_REG( 4, regs); \ + __ONE_REG( 5, regs); \ + __ONE_REG( 6, regs); \ + __ONE_REG( 7, regs); \ + __ONE_REG( 8, regs); \ + __ONE_REG( 9, regs); \ + __ONE_REG(10, regs); \ + __ONE_REG(11, regs); \ + __ONE_REG(12, regs); \ + __ONE_REG(13, regs); \ + __ONE_REG(14, regs); \ + __ONE_REG(15, regs); \ + cs->cur = p; \ + } while (0) + #endif /* TU_CS_H */ diff --git a/src/freedreno/vulkan/tu_pipeline.c b/src/freedreno/vulkan/tu_pipeline.c index c6624dd4932..8fe9b974db2 100644 --- a/src/freedreno/vulkan/tu_pipeline.c +++ b/src/freedreno/vulkan/tu_pipeline.c @@ -1185,12 +1185,12 @@ tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport) guardband_adj.height = tu6_guardband_adj(max.y - min.y); tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET_0, 6); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0])); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0])); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1])); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1])); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2])); - tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2])); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XOFFSET_0(offsets[0]).value); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_XSCALE_0(scales[0]).value); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YOFFSET_0(offsets[1]).value); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_YSCALE_0(scales[1]).value); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZOFFSET_0(offsets[2]).value); + tu_cs_emit(cs, A6XX_GRAS_CL_VPORT_ZSCALE_0(scales[2]).value); tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2); tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(min.x) | @@ -1237,7 +1237,7 @@ tu6_emit_point_size(struct tu_cs *cs) tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POINT_MINMAX, 2); tu_cs_emit(cs, A6XX_GRAS_SU_POINT_MINMAX_MIN(1.0f / 16.0f) | A6XX_GRAS_SU_POINT_MINMAX_MAX(4092.0f)); - tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f)); + tu_cs_emit(cs, A6XX_GRAS_SU_POINT_SIZE(1.0f).value); } static uint32_t @@ -1284,9 +1284,9 @@ tu6_emit_depth_bias(struct tu_cs *cs, float slope_factor) { tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3); - tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor)); - tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor)); - tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp)); + tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor).value); + tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor).value); + tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value); } static void diff --git a/src/freedreno/vulkan/tu_private.h b/src/freedreno/vulkan/tu_private.h index 79de5dc82de..43f676dc7a4 100644 --- a/src/freedreno/vulkan/tu_private.h +++ b/src/freedreno/vulkan/tu_private.h @@ -955,6 +955,19 @@ struct tu_cmd_buffer bool wait_for_idle; }; +/* Temporary struct for tracking a register state to be written, used by + * a6xx-pack.h and tu_cs_emit_regs() + */ +struct tu_reg_value { + uint32_t reg; + uint64_t value; + bool is_address; + struct tu_bo *bo; + bool bo_write; + uint32_t bo_offset; + uint32_t bo_shift; +}; + unsigned tu6_emit_event_write(struct tu_cmd_buffer *cmd, struct tu_cs *cs, -- 2.30.2