X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Fvc4%2Fvc4_cl.h;h=39d1d347bbad22164a886de97d1fc254b44e7a6b;hb=cbfc13b27c4ab30037c42b75bf6f7db17ff3d338;hp=4a50e790942177fe35a18858ca32f315bc400fa2;hpb=5fbbec9aae8185b96aa4cf6d778901dea44fefa4;p=mesa.git diff --git a/src/gallium/drivers/vc4/vc4_cl.h b/src/gallium/drivers/vc4/vc4_cl.h index 4a50e790942..39d1d347bba 100644 --- a/src/gallium/drivers/vc4/vc4_cl.h +++ b/src/gallium/drivers/vc4/vc4_cl.h @@ -29,157 +29,262 @@ #include "util/u_math.h" #include "util/macros.h" -#include "kernel/vc4_packet.h" - struct vc4_bo; +struct vc4_job; +struct vc4_cl; + +/** + * Undefined structure, used for typechecking that you're passing the pointers + * to these functions correctly. + */ +struct vc4_cl_out; + +/** A reference to a BO used in the CL packing functions */ +struct vc4_cl_reloc { + struct vc4_bo *bo; + uint32_t offset; +}; + +static inline void cl_pack_emit_reloc(struct vc4_cl *cl, const struct vc4_cl_reloc *); + +#define __gen_user_data struct vc4_cl +#define __gen_address_type struct vc4_cl_reloc +#define __gen_address_offset(reloc) ((reloc)->offset) +#define __gen_emit_reloc cl_pack_emit_reloc + +#include "kernel/vc4_packet.h" +#include "broadcom/cle/v3d_packet_v21_pack.h" struct vc4_cl { void *base; - void *next; + struct vc4_job *job; + struct vc4_cl_out *next; + struct vc4_cl_out *reloc_next; uint32_t size; - uint32_t reloc_next; +#ifndef NDEBUG uint32_t reloc_count; +#endif }; -void vc4_init_cl(struct vc4_context *vc4, struct vc4_cl *cl); +void vc4_init_cl(struct vc4_job *job, struct vc4_cl *cl); void vc4_reset_cl(struct vc4_cl *cl); -void vc4_dump_cl(void *cl, uint32_t size, bool is_render); -uint32_t vc4_gem_hindex(struct vc4_context *vc4, struct vc4_bo *bo); +uint32_t vc4_gem_hindex(struct vc4_job *job, struct vc4_bo *bo); struct PACKED unaligned_16 { uint16_t x; }; struct PACKED unaligned_32 { uint32_t x; }; -static inline void -put_unaligned_32(void *ptr, uint32_t val) +static inline uint32_t cl_offset(struct vc4_cl *cl) { - struct unaligned_32 *p = ptr; - p->x = val; + return (char *)cl->next - (char *)cl->base; } static inline void -put_unaligned_16(void *ptr, uint16_t val) +cl_advance(struct vc4_cl_out **cl, uint32_t n) { - struct unaligned_16 *p = ptr; - p->x = val; + (*cl) = (struct vc4_cl_out *)((char *)(*cl) + n); } -static inline void -cl_u8(struct vc4_cl *cl, uint8_t n) +static inline struct vc4_cl_out * +cl_start(struct vc4_cl *cl) { - assert((cl->next - cl->base) + 1 <= cl->size); - - *(uint8_t *)cl->next = n; - cl->next++; + return cl->next; } static inline void -cl_u16(struct vc4_cl *cl, uint16_t n) +cl_end(struct vc4_cl *cl, struct vc4_cl_out *next) { - assert((cl->next - cl->base) + 2 <= cl->size); - - put_unaligned_16(cl->next, n); - cl->next += 2; + cl->next = next; + assert(cl_offset(cl) <= cl->size); } + static inline void -cl_u32(struct vc4_cl *cl, uint32_t n) +put_unaligned_32(struct vc4_cl_out *ptr, uint32_t val) { - assert((cl->next - cl->base) + 4 <= cl->size); - - put_unaligned_32(cl->next, n); - cl->next += 4; + struct unaligned_32 *p = (void *)ptr; + p->x = val; } static inline void -cl_aligned_u32(struct vc4_cl *cl, uint32_t n) +put_unaligned_16(struct vc4_cl_out *ptr, uint16_t val) { - assert((cl->next - cl->base) + 4 <= cl->size); - - *(uint32_t *)cl->next = n; - cl->next += 4; + struct unaligned_16 *p = (void *)ptr; + p->x = val; } static inline void -cl_ptr(struct vc4_cl *cl, void *ptr) +cl_u8(struct vc4_cl_out **cl, uint8_t n) { - assert((cl->next - cl->base) + sizeof(void *) <= cl->size); + *(uint8_t *)(*cl) = n; + cl_advance(cl, 1); +} - *(void **)cl->next = ptr; - cl->next += sizeof(void *); +static inline void +cl_u16(struct vc4_cl_out **cl, uint16_t n) +{ + put_unaligned_16(*cl, n); + cl_advance(cl, 2); } static inline void -cl_f(struct vc4_cl *cl, float f) +cl_u32(struct vc4_cl_out **cl, uint32_t n) { - cl_u32(cl, fui(f)); + put_unaligned_32(*cl, n); + cl_advance(cl, 4); } static inline void -cl_aligned_f(struct vc4_cl *cl, float f) +cl_aligned_u32(struct vc4_cl_out **cl, uint32_t n) { - cl_aligned_u32(cl, fui(f)); + *(uint32_t *)(*cl) = n; + cl_advance(cl, 4); } static inline void -cl_start_reloc(struct vc4_cl *cl, uint32_t n) +cl_ptr(struct vc4_cl_out **cl, void *ptr) { - assert(n == 1 || n == 2); - assert(cl->reloc_count == 0); - cl->reloc_count = n; + *(struct vc4_cl_out **)(*cl) = ptr; + cl_advance(cl, sizeof(void *)); +} - cl_u8(cl, VC4_PACKET_GEM_HANDLES); - cl->reloc_next = cl->next - cl->base; - cl_u32(cl, 0); /* Space where hindex will be written. */ - cl_u32(cl, 0); /* Space where hindex will be written. */ +static inline void +cl_f(struct vc4_cl_out **cl, float f) +{ + cl_u32(cl, fui(f)); } static inline void +cl_aligned_f(struct vc4_cl_out **cl, float f) +{ + cl_aligned_u32(cl, fui(f)); +} + +static inline struct vc4_cl_out * cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n) { assert(cl->reloc_count == 0); +#ifndef NDEBUG cl->reloc_count = n; - cl->reloc_next = cl->next - cl->base; +#endif + cl->reloc_next = cl->next; - /* Space where hindex will be written. */ - cl->next += n * 4; + /* Reserve the space where hindex will be written. */ + cl_advance(&cl->next, n * 4); + + return cl->next; } static inline void -cl_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset) +cl_reloc(struct vc4_job *job, struct vc4_cl *cl, struct vc4_cl_out **cl_out, + struct vc4_bo *bo, uint32_t offset) { - *(uint32_t *)(cl->base + cl->reloc_next) = hindex; - cl->reloc_next += 4; + *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo); + cl_advance(&cl->reloc_next, 4); +#ifndef NDEBUG cl->reloc_count--; +#endif - cl_u32(cl, offset); + cl_u32(cl_out, offset); } static inline void -cl_aligned_reloc_hindex(struct vc4_cl *cl, uint32_t hindex, uint32_t offset) +cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl, + struct vc4_cl_out **cl_out, + struct vc4_bo *bo, uint32_t offset) { - *(uint32_t *)(cl->base + cl->reloc_next) = hindex; - cl->reloc_next += 4; + *(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo); + cl_advance(&cl->reloc_next, 4); +#ifndef NDEBUG cl->reloc_count--; +#endif - cl_aligned_u32(cl, offset); + cl_aligned_u32(cl_out, offset); } -static inline void -cl_reloc(struct vc4_context *vc4, struct vc4_cl *cl, - struct vc4_bo *bo, uint32_t offset) +/** + * Reference to a BO with its associated offset, used in the pack process. + */ +static inline struct vc4_cl_reloc +cl_address(struct vc4_bo *bo, uint32_t offset) { - cl_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset); + struct vc4_cl_reloc reloc = { + .bo = bo, + .offset = offset, + }; + return reloc; } -static inline void -cl_aligned_reloc(struct vc4_context *vc4, struct vc4_cl *cl, - struct vc4_bo *bo, uint32_t offset) +void cl_ensure_space(struct vc4_cl *cl, uint32_t size); + +#define cl_packet_header(packet) V3D21_ ## packet ## _header +#define cl_packet_length(packet) V3D21_ ## packet ## _length +#define cl_packet_pack(packet) V3D21_ ## packet ## _pack +#define cl_packet_struct(packet) V3D21_ ## packet + +static inline void * +cl_get_emit_space(struct vc4_cl_out **cl, size_t size) { - cl_aligned_reloc_hindex(cl, vc4_gem_hindex(vc4, bo), offset); + void *addr = *cl; + cl_advance(cl, size); + return addr; } -void cl_ensure_space(struct vc4_cl *cl, uint32_t size); +/* Macro for setting up an emit of a CL struct. A temporary unpacked struct + * is created, which you get to set fields in of the form: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags) { + * .flags.flat_shade_flags = 1 << 2, + * } + * + * or default values only can be emitted with just: + * + * cl_emit(bcl, FLAT_SHADE_FLAGS, flags); + * + * The trick here is that we make a for loop that will execute the body + * (either the block or the ';' after the macro invocation) exactly once. + * Also, *dst is actually of the wrong type, it's the + * uint8_t[cl_packet_length()] in the CL, not a cl_packet_struct(packet). + */ +#define cl_emit(cl, packet, name) \ + for (struct cl_packet_struct(packet) name = { \ + cl_packet_header(packet) \ + }, \ + *_loop_terminate = &name; \ + __builtin_expect(_loop_terminate != NULL, 1); \ + ({ \ + struct vc4_cl_out *cl_out = cl_start(cl); \ + cl_packet_pack(packet)(cl, (uint8_t *)cl_out, &name); \ + VG(VALGRIND_CHECK_MEM_IS_DEFINED(cl_out, \ + cl_packet_length(packet))); \ + cl_advance(&cl_out, cl_packet_length(packet)); \ + cl_end(cl, cl_out); \ + _loop_terminate = NULL; \ + })) \ + +#define cl_emit_prepacked(cl, packet) do { \ + memcpy((cl)->next, packet, sizeof(*packet)); \ + cl_advance(&(cl)->next, sizeof(*packet)); \ +} while (0) + +/** + * Helper function called by the XML-generated pack functions for filling in + * an address field in shader records. + * + * Relocations for shader recs and texturing involve the packet (or uniforms + * stream) being preceded by the handles to the BOs, and the offset within the + * BO being in the stream (the output of this function). + */ +static inline void +cl_pack_emit_reloc(struct vc4_cl *cl, const struct vc4_cl_reloc *reloc) +{ + *(uint32_t *)cl->reloc_next = vc4_gem_hindex(cl->job, reloc->bo); + cl_advance(&cl->reloc_next, 4); + +#ifndef NDEBUG + cl->reloc_count--; +#endif +} #endif /* VC4_CL_H */