From: Eric Anholt Date: Thu, 7 May 2020 22:40:52 +0000 (-0700) Subject: freedreno: Deduplicate ringbuffer macros with computerator/fdperf X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=6c688ae81f4a6249cdccf1d218da5bebaf23e4f4;p=mesa.git freedreno: Deduplicate ringbuffer macros with computerator/fdperf They're sugar around freedreno_ringbuffer.h, so put them there and reuse them. Part-of: --- diff --git a/src/freedreno/computerator/main.h b/src/freedreno/computerator/main.h index 57b1ac07cb6..d72e143a3ae 100644 --- a/src/freedreno/computerator/main.h +++ b/src/freedreno/computerator/main.h @@ -80,102 +80,6 @@ to_ ## _to(struct _from *f) \ struct backend *a6xx_init(struct fd_device *dev, uint32_t gpu_id); -/* - * cmdstream helpers: - */ - -static inline void -BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) -{ - if (ring->cur + ndwords > ring->end) - fd_ringbuffer_grow(ring, ndwords); -} - -static inline void -OUT_RING(struct fd_ringbuffer *ring, uint32_t data) -{ - fd_ringbuffer_emit(ring, data); -} - -static inline unsigned -_odd_parity_bit(unsigned val) -{ - /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel - * note that we want odd parity so 0x6996 is inverted. - */ - val ^= val >> 16; - val ^= val >> 8; - val ^= val >> 4; - val &= 0xf; - return (~0x6996 >> val) & 1; -} - -static inline void -OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) -{ - BEGIN_RING(ring, cnt+1); - OUT_RING(ring, CP_TYPE4_PKT | cnt | - (_odd_parity_bit(cnt) << 7) | - ((regindx & 0x3ffff) << 8) | - ((_odd_parity_bit(regindx) << 27))); -} - -static inline void -OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) -{ - BEGIN_RING(ring, cnt+1); - OUT_RING(ring, CP_TYPE7_PKT | cnt | - (_odd_parity_bit(cnt) << 15) | - ((opcode & 0x7f) << 16) | - ((_odd_parity_bit(opcode) << 23))); -} - -/* - * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+ - */ - -static inline void -__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift, uint32_t flags) -{ - debug_assert(offset < fd_bo_size(bo)); - fd_ringbuffer_reloc(ring, &(struct fd_reloc){ - .bo = bo, - .flags = flags, - .offset = offset, - .or = or, - .shift = shift, - .orhi = or >> 32, - }); -} - -static inline void -OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift) -{ - __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ); -} - -static inline void -OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift) -{ - __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE); -} - -static inline void -OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift) -{ - __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP); -} - -static inline void -OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) -{ - fd_ringbuffer_emit_reloc_ring_full(ring, target, 0); -} - /* for conditionally setting boolean flag(s): */ #define COND(bool, val) ((bool) ? (val) : 0) diff --git a/src/freedreno/drm/freedreno_ringbuffer.h b/src/freedreno/drm/freedreno_ringbuffer.h index 3b7078b9747..b484817e6f7 100644 --- a/src/freedreno/drm/freedreno_ringbuffer.h +++ b/src/freedreno/drm/freedreno_ringbuffer.h @@ -27,9 +27,13 @@ #ifndef FREEDRENO_RINGBUFFER_H_ #define FREEDRENO_RINGBUFFER_H_ +#include #include "util/u_debug.h" +#include "util/u_dynarray.h" #include "freedreno_drmif.h" +#include "adreno_common.xml.h" +#include "adreno_pm4.xml.h" struct fd_submit; struct fd_ringbuffer; @@ -209,5 +213,143 @@ fd_ringbuffer_size(struct fd_ringbuffer *ring) return offset_bytes(ring->cur, ring->start); } +#define LOG_DWORDS 0 + +static inline void +OUT_RING(struct fd_ringbuffer *ring, uint32_t data) +{ + if (LOG_DWORDS) { + fprintf(stderr, "ring[%p]: OUT_RING %04x: %08x", ring, + (uint32_t)(ring->cur - ring->start), data); + } + fd_ringbuffer_emit(ring, data); +} + +/* + * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+ + */ + +static inline void +__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo, + uint32_t offset, uint64_t or, int32_t shift, uint32_t flags) +{ + if (LOG_DWORDS) { + fprintf(stderr, "ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, + (uint32_t)(ring->cur - ring->start), bo, offset, shift); + } + debug_assert(offset < fd_bo_size(bo)); + fd_ringbuffer_reloc(ring, &(struct fd_reloc){ + .bo = bo, + .flags = flags, + .offset = offset, + .or = or, + .shift = shift, + .orhi = or >> 32, + }); +} + +static inline void +OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, + uint32_t offset, uint64_t or, int32_t shift) +{ + __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ); +} + +static inline void +OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, + uint32_t offset, uint64_t or, int32_t shift) +{ + __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE); +} + +static inline void +OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo, + uint32_t offset, uint64_t or, int32_t shift) +{ + __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP); +} + +static inline void +OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) +{ + fd_ringbuffer_emit_reloc_ring_full(ring, target, 0); +} + +static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) +{ + if (unlikely(ring->cur + ndwords > ring->end)) + fd_ringbuffer_grow(ring, ndwords); +} + +static inline void +OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); +} + +static inline void +OUT_PKT2(struct fd_ringbuffer *ring) +{ + BEGIN_RING(ring, 1); + OUT_RING(ring, CP_TYPE2_PKT); +} + +static inline void +OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); +} + +/* + * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3 + */ + +static inline unsigned +_odd_parity_bit(unsigned val) +{ + /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel + * note that we want odd parity so 0x6996 is inverted. + */ + val ^= val >> 16; + val ^= val >> 8; + val ^= val >> 4; + val &= 0xf; + return (~0x6996 >> val) & 1; +} + +static inline void +OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE4_PKT | cnt | + (_odd_parity_bit(cnt) << 7) | + ((regindx & 0x3ffff) << 8) | + ((_odd_parity_bit(regindx) << 27))); +} + +static inline void +OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE7_PKT | cnt | + (_odd_parity_bit(cnt) << 15) | + ((opcode & 0x7f) << 16) | + ((_odd_parity_bit(opcode) << 23))); +} + +static inline void +OUT_WFI(struct fd_ringbuffer *ring) +{ + OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); + OUT_RING(ring, 0x00000000); +} + +static inline void +OUT_WFI5(struct fd_ringbuffer *ring) +{ + OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); +} #endif /* FREEDRENO_RINGBUFFER_H_ */ diff --git a/src/freedreno/drm/meson.build b/src/freedreno/drm/meson.build index 1ebfdfba67a..b29ea9d71f6 100644 --- a/src/freedreno/drm/meson.build +++ b/src/freedreno/drm/meson.build @@ -37,7 +37,10 @@ libfreedreno_drm_files = files( libfreedreno_drm = static_library( 'freedreno_drm', - libfreedreno_drm_files, + [ + libfreedreno_drm_files, + freedreno_xml_header_files, + ], include_directories : [ inc_freedreno, inc_include, diff --git a/src/freedreno/meson.build b/src/freedreno/meson.build index f8977ab348a..942fa3c444d 100644 --- a/src/freedreno/meson.build +++ b/src/freedreno/meson.build @@ -20,9 +20,9 @@ inc_freedreno = include_directories(['.', './registers']) +subdir('registers') subdir('drm') subdir('ir3') -subdir('registers') subdir('fdl') subdir('perfcntrs') subdir('computerator') diff --git a/src/freedreno/perfcntrs/fdperf.c b/src/freedreno/perfcntrs/fdperf.c index 3eb2f827a8d..cc5e90c39d9 100644 --- a/src/freedreno/perfcntrs/fdperf.c +++ b/src/freedreno/perfcntrs/fdperf.c @@ -155,70 +155,6 @@ delta(uint32_t a, uint32_t b) return b - a; } -/* - * TODO de-duplicate OUT_RING() and friends - */ - -#define CP_WAIT_FOR_IDLE 38 -#define CP_TYPE0_PKT 0x00000000 -#define CP_TYPE3_PKT 0xc0000000 -#define CP_TYPE4_PKT 0x40000000 -#define CP_TYPE7_PKT 0x70000000 - -static inline void -OUT_RING(struct fd_ringbuffer *ring, uint32_t data) -{ - *(ring->cur++) = data; -} - -static inline void -OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) -{ - OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); -} - -static inline void -OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) -{ - OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); -} - - -/* - * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3 - */ - -static inline unsigned -_odd_parity_bit(unsigned val) -{ - /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel - * note that we want odd parity so 0x6996 is inverted. - */ - val ^= val >> 16; - val ^= val >> 8; - val ^= val >> 4; - val &= 0xf; - return (~0x6996 >> val) & 1; -} - -static inline void -OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) -{ - OUT_RING(ring, CP_TYPE4_PKT | cnt | - (_odd_parity_bit(cnt) << 7) | - ((regindx & 0x3ffff) << 8) | - ((_odd_parity_bit(regindx) << 27))); -} - -static inline void -OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) -{ - OUT_RING(ring, CP_TYPE7_PKT | cnt | - (_odd_parity_bit(cnt) << 15) | - ((opcode & 0x7f) << 16) | - ((_odd_parity_bit(opcode) << 23))); -} - /* * code to find stuff in /proc/device-tree: * diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 6b1024b8d3a..0557f0ffca2 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -216,20 +216,8 @@ fd_calc_guardband(unsigned x) return 511 - ((l - 8) * 65); } -#define LOG_DWORDS 0 - static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx); -static inline void -OUT_RING(struct fd_ringbuffer *ring, uint32_t data) -{ - if (LOG_DWORDS) { - DBG("ring[%p]: OUT_RING %04x: %08x", ring, - (uint32_t)(ring->cur - ring->start), data); - } - fd_ringbuffer_emit(ring, data); -} - /* like OUT_RING() but appends a cmdstream patch point to 'buf' */ static inline void OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, @@ -245,133 +233,6 @@ OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, })); } -/* - * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+ - */ - -static inline void -__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift, uint32_t flags) -{ - if (LOG_DWORDS) { - DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, - (uint32_t)(ring->cur - ring->start), bo, offset, shift); - } - debug_assert(offset < fd_bo_size(bo)); - fd_ringbuffer_reloc(ring, &(struct fd_reloc){ - .bo = bo, - .flags = flags, - .offset = offset, - .or = or, - .shift = shift, - .orhi = or >> 32, - }); -} - -static inline void -OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift) -{ - __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ); -} - -static inline void -OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift) -{ - __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE); -} - -static inline void -OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint64_t or, int32_t shift) -{ - __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP); -} - -static inline void -OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) -{ - fd_ringbuffer_emit_reloc_ring_full(ring, target, 0); -} - -static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) -{ - if (unlikely(ring->cur + ndwords > ring->end)) - fd_ringbuffer_grow(ring, ndwords); -} - -static inline void -OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) -{ - BEGIN_RING(ring, cnt+1); - OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); -} - -static inline void -OUT_PKT2(struct fd_ringbuffer *ring) -{ - BEGIN_RING(ring, 1); - OUT_RING(ring, CP_TYPE2_PKT); -} - -static inline void -OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) -{ - BEGIN_RING(ring, cnt+1); - OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); -} - -/* - * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3 - */ - -static inline unsigned -_odd_parity_bit(unsigned val) -{ - /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel - * note that we want odd parity so 0x6996 is inverted. - */ - val ^= val >> 16; - val ^= val >> 8; - val ^= val >> 4; - val &= 0xf; - return (~0x6996 >> val) & 1; -} - -static inline void -OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) -{ - BEGIN_RING(ring, cnt+1); - OUT_RING(ring, CP_TYPE4_PKT | cnt | - (_odd_parity_bit(cnt) << 7) | - ((regindx & 0x3ffff) << 8) | - ((_odd_parity_bit(regindx) << 27))); -} - -static inline void -OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) -{ - BEGIN_RING(ring, cnt+1); - OUT_RING(ring, CP_TYPE7_PKT | cnt | - (_odd_parity_bit(cnt) << 15) | - ((opcode & 0x7f) << 16) | - ((_odd_parity_bit(opcode) << 23))); -} - -static inline void -OUT_WFI(struct fd_ringbuffer *ring) -{ - OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1); - OUT_RING(ring, 0x00000000); -} - -static inline void -OUT_WFI5(struct fd_ringbuffer *ring) -{ - OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); -} - static inline void __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target) {