A standalone tool to compile and run compute shaders from ir3 assembly.
Mostly to have an easy way to experiment with instructions.
Signed-off-by: Rob Clark <robdclark@chromium.org>
Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3926>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3926>
--- /dev/null
+Overview
+========
+
+Computerator is a tool to launch compute shaders, written in assembly.
+The main purpose is to have an easy way to experiment with instructions
+without dealing with the entire compiler stack (which makes controlling
+the order of instructions, the registers chosen, etc, difficult). The
+choice of compute shaders is simply because there is far less state
+setup required.
+
+Headers
+-------
+
+The shader assembly can be prefixed with headers to control state setup:
+
+* ``@localsize X, Y, Z`` - configures local workgroup size
+* ``@buf SZ`` - configures an SSBO of the specified size (in dwords).
+ The order of the ``@buf`` headers determines the index, ie the first
+ ``@buf`` header is ``g[0]``, the second ``g[1]``, and so on
+* ``@const(cN.c)`` configures a const vec4 starting at specified
+ const register, ie ``@const(c1.x) 1.0, 2.0, 3.0, 4.0`` will populate
+ ``c1.xyzw`` with ``vec4(1.0, 2.0, 3.0, 4.0)``
+* ``@invocationid(rN.c)`` will populate a vec3 starting at the specified
+ register with the local invocation-id
+* ``@wgid(rN.c)`` will populate a vec3 starting at the specified register
+ with the workgroup-id (must be a high-reg, ie. ``r48.x`` and above)
+* ``@numwg(cN.c)`` will populate a vec3 starting at the specified const
+ register
+
+Example
+-------
+
+```
+@localsize 32, 1, 1
+@buf 32 ; g[0]
+@const(c0.x) 0.0, 0.0, 0.0, 0.0
+@const(c1.x) 1.0, 2.0, 3.0, 4.0
+@wgid(r48.x) ; r48.xyz
+@invocationid(r0.x) ; r0.xyz
+@numwg(c2.x) ; c2.xyz
+mov.u32u32 r0.y, r0.x
+(rpt5)nop
+stib.untyped.1d.u32.1 g[0] + r0.y, r0.x
+end
+nop
+```
+
+Usage
+-----
+
+```
+cat myshader.asm | ./computerator --disasm --groups=4,4,4
+```
+
--- /dev/null
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir3/ir3_compiler.h"
+
+#include "util/u_math.h"
+
+#include "registers/adreno_pm4.xml.h"
+#include "registers/adreno_common.xml.h"
+#include "registers/a6xx.xml.h"
+
+#include "main.h"
+#include "ir3_asm.h"
+
+struct a6xx_backend {
+ struct backend base;
+
+ struct ir3_compiler *compiler;
+ struct fd_device *dev;
+
+ unsigned seqno;
+ struct fd_bo *control_mem;
+};
+define_cast(backend, a6xx_backend);
+
+/* This struct defines the layout of the fd6_context::control buffer: */
+struct fd6_control {
+ uint32_t seqno; /* seqno for async CP_EVENT_WRITE, etc */
+ uint32_t _pad0;
+ volatile uint32_t vsc_overflow;
+ uint32_t _pad1;
+ /* flag set from cmdstream when VSC overflow detected: */
+ uint32_t vsc_scratch;
+ uint32_t _pad2;
+ uint32_t _pad3;
+ uint32_t _pad4;
+
+ /* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */
+ struct {
+ uint32_t offset;
+ uint32_t pad[7];
+ } flush_base[4];
+};
+
+#define control_ptr(a6xx_backend, member) \
+ (a6xx_backend)->control_mem, offsetof(struct fd6_control, member), 0, 0
+
+static struct kernel *
+a6xx_assemble(struct backend *b, FILE *in)
+{
+ struct a6xx_backend *a6xx_backend = to_a6xx_backend(b);
+ struct ir3_kernel *ir3_kernel =
+ ir3_asm_assemble(a6xx_backend->compiler, in);
+ ir3_kernel->backend = b;
+ return &ir3_kernel->base;
+}
+
+static void
+a6xx_disassemble(struct kernel *kernel, FILE *out)
+{
+ ir3_asm_disassemble(to_ir3_kernel(kernel), out);
+}
+
+static void
+cs_program_emit(struct fd_ringbuffer *ring, struct kernel *kernel)
+{
+ struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel);
+ struct ir3_shader_variant *v = ir3_kernel->v;
+ const struct ir3_info *i = &v->info;
+ enum a3xx_threadsize thrsz = FOUR_QUADS;
+
+ OUT_PKT4(ring, REG_A6XX_HLSQ_UPDATE_CNTL, 1);
+ OUT_RING(ring, 0xff);
+
+ unsigned constlen = align(v->constlen, 4);
+ OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL, 1);
+ OUT_RING(ring, A6XX_HLSQ_CS_CNTL_CONSTLEN(constlen) |
+ A6XX_HLSQ_CS_CNTL_ENABLED);
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_CONFIG, 2);
+ OUT_RING(ring, A6XX_SP_CS_CONFIG_ENABLED |
+ A6XX_SP_CS_CONFIG_NIBO(kernel->num_bufs) |
+ A6XX_SP_CS_CONFIG_NTEX(v->num_samp) |
+ A6XX_SP_CS_CONFIG_NSAMP(v->num_samp)); /* SP_VS_CONFIG */
+ OUT_RING(ring, v->instrlen); /* SP_VS_INSTRLEN */
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_CTRL_REG0, 1);
+ OUT_RING(ring, A6XX_SP_CS_CTRL_REG0_THREADSIZE(thrsz) |
+ A6XX_SP_CS_CTRL_REG0_FULLREGFOOTPRINT(i->max_reg + 1) |
+ A6XX_SP_CS_CTRL_REG0_MERGEDREGS |
+ A6XX_SP_CS_CTRL_REG0_BRANCHSTACK(v->branchstack) |
+ COND(v->need_pixlod, A6XX_SP_CS_CTRL_REG0_PIXLODENABLE));
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1);
+ OUT_RING(ring, 0x41);
+
+ uint32_t local_invocation_id, work_group_id;
+ local_invocation_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
+ work_group_id = ir3_find_sysval_regid(v, SYSTEM_VALUE_WORK_GROUP_ID);
+
+ OUT_PKT4(ring, REG_A6XX_HLSQ_CS_CNTL_0, 2);
+ OUT_RING(ring, A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) |
+ A6XX_HLSQ_CS_CNTL_0_UNK0(regid(63, 0)) |
+ A6XX_HLSQ_CS_CNTL_0_UNK1(regid(63, 0)) |
+ A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id));
+ OUT_RING(ring, 0x2fc); /* HLSQ_CS_UNKNOWN_B998 */
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START_LO, 2);
+ OUT_RELOC(ring, v->bo, 0, 0, 0); /* SP_CS_OBJ_START_LO/HI */
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_INSTRLEN, 1);
+ OUT_RING(ring, v->instrlen);
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_OBJ_START_LO, 2);
+ OUT_RELOC(ring, v->bo, 0, 0, 0);
+
+ OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3);
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(v->instrlen));
+ OUT_RELOCD(ring, v->bo, 0, 0, 0);
+}
+
+static void
+emit_const(struct fd_ringbuffer *ring, uint32_t regid,
+ uint32_t sizedwords, const uint32_t *dwords)
+{
+ uint32_t align_sz;
+
+ debug_assert((regid % 4) == 0);
+
+ align_sz = align(sizedwords, 4);
+
+ OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3 + align_sz);
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(regid/4) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(DIV_ROUND_UP(sizedwords, 4)));
+ OUT_RING(ring, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
+ OUT_RING(ring, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
+
+ for (uint32_t i = 0; i < sizedwords; i++) {
+ OUT_RING(ring, dwords[i]);
+ }
+
+ /* Zero-pad to multiple of 4 dwords */
+ for (uint32_t i = sizedwords; i < align_sz; i++) {
+ OUT_RING(ring, 0);
+ }
+}
+
+
+static void
+cs_const_emit(struct fd_ringbuffer *ring, struct kernel *kernel, uint32_t grid[3])
+{
+ struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel);
+ struct ir3_shader_variant *v = ir3_kernel->v;
+
+ const struct ir3_const_state *const_state = &v->shader->const_state;
+ uint32_t base = const_state->offsets.immediate;
+ int size = const_state->immediates_count;
+
+ if (ir3_kernel->numwg != INVALID_REG) {
+ assert((ir3_kernel->numwg & 0x3) == 0);
+ int idx = ir3_kernel->numwg >> 2;
+ const_state->immediates[idx].val[0] = grid[0];
+ const_state->immediates[idx].val[1] = grid[1];
+ const_state->immediates[idx].val[2] = grid[2];
+ }
+
+ /* truncate size to avoid writing constants that shader
+ * does not use:
+ */
+ size = MIN2(size + base, v->constlen) - base;
+
+ /* convert out of vec4: */
+ base *= 4;
+ size *= 4;
+
+ if (size > 0) {
+ emit_const(ring, base, size, const_state->immediates[0].val);
+ }
+}
+
+static void
+cs_ibo_emit(struct fd_ringbuffer *ring, struct fd_submit *submit,
+ struct kernel *kernel)
+{
+ struct fd_ringbuffer *state =
+ fd_submit_new_ringbuffer(submit,
+ kernel->num_bufs * 16 * 4,
+ FD_RINGBUFFER_STREAMING);
+
+ for (unsigned i = 0; i < kernel->num_bufs; i++) {
+ /* size is encoded with low 15b in WIDTH and high bits in HEIGHT,
+ * in units of elements:
+ */
+ unsigned sz = kernel->buf_sizes[i];
+ unsigned width = sz & MASK(15);
+ unsigned height = sz >> 15;
+
+ OUT_RING(state, A6XX_IBO_0_FMT(FMT6_32_UINT) |
+ A6XX_IBO_0_TILE_MODE(0));
+ OUT_RING(state, A6XX_IBO_1_WIDTH(width) |
+ A6XX_IBO_1_HEIGHT(height));
+ OUT_RING(state, A6XX_IBO_2_PITCH(0) |
+ A6XX_IBO_2_UNK4 | A6XX_IBO_2_UNK31 |
+ A6XX_IBO_2_TYPE(A6XX_TEX_1D));
+ OUT_RING(state, A6XX_IBO_3_ARRAY_PITCH(0));
+ OUT_RELOCW(state, kernel->bufs[i], 0, 0, 0);
+ OUT_RING(state, 0x00000000);
+ OUT_RING(state, 0x00000000);
+ OUT_RING(state, 0x00000000);
+ OUT_RING(state, 0x00000000);
+ OUT_RING(state, 0x00000000);
+ OUT_RING(state, 0x00000000);
+ OUT_RING(state, 0x00000000);
+ OUT_RING(state, 0x00000000);
+ OUT_RING(state, 0x00000000);
+ OUT_RING(state, 0x00000000);
+ }
+
+ OUT_PKT7(ring, CP_LOAD_STATE6_FRAG, 3);
+ OUT_RING(ring, CP_LOAD_STATE6_0_DST_OFF(0) |
+ CP_LOAD_STATE6_0_STATE_TYPE(ST6_IBO) |
+ CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
+ CP_LOAD_STATE6_0_STATE_BLOCK(SB6_CS_SHADER) |
+ CP_LOAD_STATE6_0_NUM_UNIT(kernel->num_bufs));
+ OUT_RB(ring, state);
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_LO, 2);
+ OUT_RB(ring, state);
+
+ OUT_PKT4(ring, REG_A6XX_SP_CS_IBO_COUNT, 1);
+ OUT_RING(ring, kernel->num_bufs);
+
+ fd_ringbuffer_del(state);
+}
+
+static inline unsigned
+event_write(struct fd_ringbuffer *ring, struct kernel *kernel,
+ enum vgt_event_type evt, bool timestamp)
+{
+ unsigned seqno = 0;
+
+ OUT_PKT7(ring, CP_EVENT_WRITE, timestamp ? 4 : 1);
+ OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(evt));
+ if (timestamp) {
+ struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel);
+ struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend);
+ seqno = ++a6xx_backend->seqno;
+ OUT_RELOCW(ring, control_ptr(a6xx_backend, seqno)); /* ADDR_LO/HI */
+ OUT_RING(ring, seqno);
+ }
+
+ return seqno;
+}
+
+static inline void
+cache_flush(struct fd_ringbuffer *ring, struct kernel *kernel)
+{
+ struct ir3_kernel *ir3_kernel = to_ir3_kernel(kernel);
+ struct a6xx_backend *a6xx_backend = to_a6xx_backend(ir3_kernel->backend);
+ unsigned seqno;
+
+ seqno = event_write(ring, kernel, CACHE_FLUSH_AND_INV_EVENT, true);
+
+ OUT_PKT7(ring, CP_WAIT_REG_MEM, 6);
+ OUT_RING(ring, CP_WAIT_REG_MEM_0_FUNCTION(WRITE_EQ) |
+ CP_WAIT_REG_MEM_0_POLL_MEMORY);
+ OUT_RELOC(ring, control_ptr(a6xx_backend, seqno));
+ OUT_RING(ring, CP_WAIT_REG_MEM_3_REF(seqno));
+ OUT_RING(ring, CP_WAIT_REG_MEM_4_MASK(~0));
+ OUT_RING(ring, CP_WAIT_REG_MEM_5_DELAY_LOOP_CYCLES(16));
+
+ seqno = event_write(ring, kernel, CACHE_FLUSH_TS, true);
+
+ OUT_PKT7(ring, CP_WAIT_MEM_GTE, 4);
+ OUT_RING(ring, CP_WAIT_MEM_GTE_0_RESERVED(0));
+ OUT_RELOC(ring, control_ptr(a6xx_backend, seqno));
+ OUT_RING(ring, CP_WAIT_MEM_GTE_3_REF(seqno));
+}
+
+static void
+a6xx_emit_grid(struct kernel *kernel, uint32_t grid[3], struct fd_submit *submit)
+{
+ struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0,
+ FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);
+
+ cs_program_emit(ring, kernel);
+ cs_const_emit(ring, kernel, grid);
+ cs_ibo_emit(ring, submit, kernel);
+
+ OUT_PKT7(ring, CP_SET_MARKER, 1);
+ OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_COMPUTE));
+
+ const unsigned *local_size = kernel->local_size;
+ const unsigned *num_groups = grid;
+
+ unsigned work_dim = 0;
+ for (int i = 0; i < 3; i++) {
+ if (!grid[i])
+ break;
+ work_dim++;
+ }
+
+ OUT_PKT4(ring, REG_A6XX_HLSQ_CS_NDRANGE_0, 7);
+ OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_0_KERNELDIM(work_dim) |
+ A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEX(local_size[0] - 1) |
+ A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEY(local_size[1] - 1) |
+ A6XX_HLSQ_CS_NDRANGE_0_LOCALSIZEZ(local_size[2] - 1));
+ OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_1_GLOBALSIZE_X(local_size[0] * num_groups[0]));
+ OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_2_GLOBALOFF_X */
+ OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_3_GLOBALSIZE_Y(local_size[1] * num_groups[1]));
+ OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_4_GLOBALOFF_Y */
+ OUT_RING(ring, A6XX_HLSQ_CS_NDRANGE_5_GLOBALSIZE_Z(local_size[2] * num_groups[2]));
+ OUT_RING(ring, 0); /* HLSQ_CS_NDRANGE_6_GLOBALOFF_Z */
+
+ OUT_PKT4(ring, REG_A6XX_HLSQ_CS_KERNEL_GROUP_X, 3);
+ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_X */
+ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Y */
+ OUT_RING(ring, 1); /* HLSQ_CS_KERNEL_GROUP_Z */
+
+ OUT_PKT7(ring, CP_EXEC_CS, 4);
+ OUT_RING(ring, 0x00000000);
+ OUT_RING(ring, CP_EXEC_CS_1_NGROUPS_X(grid[0]));
+ OUT_RING(ring, CP_EXEC_CS_2_NGROUPS_Y(grid[1]));
+ OUT_RING(ring, CP_EXEC_CS_3_NGROUPS_Z(grid[2]));
+
+ OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);
+
+ cache_flush(ring, kernel);
+}
+
+struct backend *
+a6xx_init(struct fd_device *dev, uint32_t gpu_id)
+{
+ struct a6xx_backend *a6xx_backend = calloc(1, sizeof(*a6xx_backend));
+
+ a6xx_backend->base = (struct backend) {
+ .assemble = a6xx_assemble,
+ .disassemble = a6xx_disassemble,
+ .emit_grid = a6xx_emit_grid,
+ };
+
+ a6xx_backend->compiler = ir3_compiler_create(dev, gpu_id);
+ a6xx_backend->dev = dev;
+
+ a6xx_backend->control_mem = fd_bo_new(dev, 0x1000,
+ DRM_FREEDRENO_GEM_TYPE_KMEM, "control");
+
+ return &a6xx_backend->base;
+}
--- /dev/null
+@localsize 32, 1, 1
+@buf 32 ; g[0]
+@const(c0.x) 0.0, 0.0, 0.0, 0.0
+@const(c1.x) 1.0, 2.0, 3.0, 4.0
+@wgid(r48.x) ; r48.xyz
+@invocationid(r0.x) ; r0.xyz
+@numwg(c2.x) ; c2.xyz
+mov.u32u32 r0.y, r0.x
+(rpt5)nop
+stib.untyped.1d.u32.1 g[0] + r0.y, r0.x
+end
+nop
+
--- /dev/null
+@localsize 1, 1, 1
+@buf 4 ; g[0]
+@const(c0.x) 0.0, 0.0, 0.0, 0.0
+@const(c1.x) 1.0, 2.0, 3.0, 4.0
+@wgid(r48.x) ; r48.xyz
+@invocationid(r0.x) ; r0.xyz
+@numwg(c2.x) ; c2.xyz
+mov.f32f32 r2.x, c0.y
+mov.u32u32 r0.x, 0x12345678
+mov.u32u32 r0.y, 0x12345678
+mov.u32u32 r0.z, 0x12345678
+add.u r2.x, c0.x, r2.x
+mov.u32u32 r0.w, 0x12345678
+mov.u32u32 r1.x, 0x12345678
+mov.u32u32 r1.y, 0x12345678
+cov.u32s16 hr4.x, r2.x
+mov.u32u32 r1.z, 0x12345678
+mov.u32u32 r1.w, 0x12345678
+nop
+mova a0.x, hr4.x
+(rpt5)nop
+(ul)mov.u32u32 r0.x, r<a0.x>
+mov.u32u32 r0.y, 0x00000000
+(rpt5)nop
+stib.untyped.1d.u32.1 g[0] + r0.y, r0.x
+end
+nop
+
--- /dev/null
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir3/ir3_compiler.h"
+
+#include "ir3_asm.h"
+#include "ir3_parser.h"
+
+struct ir3_kernel *
+ir3_asm_assemble(struct ir3_compiler *c, FILE *in)
+{
+ struct ir3_kernel *kernel = calloc(1, sizeof(*kernel));
+
+ struct ir3_shader *shader = calloc(1, sizeof(*shader));
+ shader->compiler = c;
+ shader->type = MESA_SHADER_COMPUTE;
+
+ struct ir3_shader_variant *v = calloc(1, sizeof(*v));
+ v->type = MESA_SHADER_COMPUTE;
+ v->shader = shader;
+
+ kernel->v = v;
+
+ kernel->numwg = INVALID_REG;
+
+ v->ir = ir3_parse(kernel, in);
+ if (!v->ir)
+ errx(-1, "parse failed");
+
+ ir3_debug_print(v->ir, "AFTER PARSING");
+
+ kernel->bin = ir3_shader_assemble(v, c->gpu_id);
+
+ unsigned sz = v->info.sizedwords * 4;
+
+ v->bo = fd_bo_new(c->dev, sz,
+ DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
+ DRM_FREEDRENO_GEM_TYPE_KMEM,
+ "%s", ir3_shader_stage(v));
+
+ memcpy(fd_bo_map(v->bo), kernel->bin, sz);
+
+ return kernel;
+}
+
+void
+ir3_asm_disassemble(struct ir3_kernel *k, FILE *out)
+{
+ ir3_shader_disasm(k->v, k->bin, out);
+}
--- /dev/null
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __IR3_ASM_H__
+#define __IR3_ASM_H__
+
+#include "main.h"
+
+#include "ir3/ir3_shader.h"
+
+struct ir3_kernel {
+ struct kernel base;
+ struct backend *backend;
+ struct ir3_shader_variant *v;
+ void *bin;
+
+ /* driver-param uniforms: */
+ unsigned numwg;
+};
+define_cast(kernel, ir3_kernel);
+
+struct ir3_kernel *ir3_asm_assemble(struct ir3_compiler *c, FILE *in);
+void ir3_asm_disassemble(struct ir3_kernel *k, FILE *out);
+
+#endif /* __IR3_ASM_H__ */
"@localsize" return TOKEN(T_A_LOCALSIZE);
"@const" return TOKEN(T_A_CONST);
"@buf" return TOKEN(T_A_BUF);
+"@invocationid" return TOKEN(T_A_INVOCATIONID);
+"@wgid" return TOKEN(T_A_WGID);
+"@numwg" return TOKEN(T_A_NUMWG);
"(sy)" return TOKEN(T_SY);
"(ss)" return TOKEN(T_SS);
"(absneg)" return TOKEN(T_ABSNEG);
* SOFTWARE.
*/
+%code requires {
+struct ir3_kernel;
+struct ir3 * ir3_parse(struct ir3_kernel *k, FILE *f);
+}
+
%{
#define YYDEBUG 0
#define IR3_REG_ABS IR3_REG_FABS
#define IR3_REG_NEGATE IR3_REG_FNEG
+static struct ir3_kernel *kernel;
static struct ir3_shader_variant *variant;
/* NOTE the assembler doesn't really use the ir3_block construction
* like the compiler does. Everything is treated as one large block.
return new_reg(0, 0);
}
-static void const_create(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsigned c3)
+static void add_const(unsigned reg, unsigned c0, unsigned c1, unsigned c2, unsigned c3)
{
struct ir3_const_state *const_state = &variant->shader->const_state;
assert((reg & 0x7) == 0);
const_state->immediate_idx++;
}
+static void add_sysval(unsigned reg, unsigned compmask, gl_system_value sysval)
+{
+ unsigned n = variant->inputs_count++;
+ variant->inputs[n].regid = reg;
+ variant->inputs[n].sysval = true;
+ variant->inputs[n].slot = sysval;
+ variant->inputs[n].compmask = compmask;
+ variant->inputs[n].interpolate = INTERP_MODE_FLAT;
+ variant->total_in++;
+}
+
#ifdef YYDEBUG
int yydebug;
#endif
%token <tok> T_A_LOCALSIZE
%token <tok> T_A_CONST
%token <tok> T_A_BUF
+%token <tok> T_A_INVOCATIONID
+%token <tok> T_A_WGID
+%token <tok> T_A_NUMWG
/* todo, re-add @sampler/@uniform/@varying if needed someday */
/* src register flags */
header: localsize_header
| const_header
| buf_header
+| invocationid_header
+| wgid_header
+| numwg_header
const_val: T_FLOAT { $$ = fui($1); }
| T_INT { $$ = $1; }
}
const_header: T_A_CONST '(' T_CONSTANT ')' const_val ',' const_val ',' const_val ',' const_val {
- const_create($3, $5, $7, $9, $11);
+ add_const($3, $5, $7, $9, $11);
}
buf_header: T_A_BUF const_val {
k->buf_sizes[idx] = $2;
}
+invocationid_header: T_A_INVOCATIONID '(' T_REGISTER ')' {
+ assert(($3 & 0x1) == 0); /* half-reg not allowed */
+ unsigned reg = $3 >> 1;
+ add_sysval(reg, 0x7, SYSTEM_VALUE_LOCAL_INVOCATION_ID);
+}
+
+wgid_header: T_A_WGID '(' T_REGISTER ')' {
+ assert(($3 & 0x1) == 0); /* half-reg not allowed */
+ unsigned reg = $3 >> 1;
+ assert(reg >= regid(48, 0)); /* must be a high reg */
+ add_sysval(reg, 0x7, SYSTEM_VALUE_WORK_GROUP_ID);
+}
+
+numwg_header: T_A_NUMWG '(' T_CONSTANT ')' {
+ assert(($3 & 0x1) == 0); /* half-reg not allowed */
+ unsigned reg = $3 >> 1;
+ kernel->numwg = reg;
+ /* reserve space in immediates for the actual value to be plugged in later: */
+ add_const($3, 0, 0, 0, 0);
+}
+
iflag: T_SY { iflags.flags |= IR3_INSTR_SY; }
| T_SS { iflags.flags |= IR3_INSTR_SS; }
| T_JP { iflags.flags |= IR3_INSTR_JP; }
--- /dev/null
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <getopt.h>
+#include <xf86drm.h>
+
+#include "util/u_math.h"
+
+#include "main.h"
+
+
+static void
+dump_float(void *buf, int sz)
+{
+ uint8_t *ptr = (uint8_t *)buf;
+ uint8_t *end = ptr + sz - 3;
+ int i = 0;
+
+ while (ptr < end) {
+ uint32_t d = 0;
+
+ printf((i % 8) ? " " : "\t");
+
+ d |= *(ptr++) << 0;
+ d |= *(ptr++) << 8;
+ d |= *(ptr++) << 16;
+ d |= *(ptr++) << 24;
+
+ printf("%8f", uif(d));
+
+ if ((i % 8) == 7) {
+ printf("\n");
+ }
+
+ i++;
+ }
+
+ if (i % 8) {
+ printf("\n");
+ }
+}
+
+static void
+dump_hex(void *buf, int sz)
+{
+ uint8_t *ptr = (uint8_t *)buf;
+ uint8_t *end = ptr + sz;
+ int i = 0;
+
+ while (ptr < end) {
+ uint32_t d = 0;
+
+ printf((i % 8) ? " " : "\t");
+
+ d |= *(ptr++) << 0;
+ d |= *(ptr++) << 8;
+ d |= *(ptr++) << 16;
+ d |= *(ptr++) << 24;
+
+ printf("%08x", d);
+
+ if ((i % 8) == 7) {
+ printf("\n");
+ }
+
+ i++;
+ }
+
+ if (i % 8) {
+ printf("\n");
+ }
+}
+
+static const char *shortopts = "df:g:h";
+
+static const struct option longopts[] = {
+ {"disasm", no_argument, 0, 'd'},
+ {"file", required_argument, 0, 'f'},
+ {"groups", required_argument, 0, 'g'},
+ {"help", no_argument, 0, 'h'},
+ {0, 0, 0, 0}
+};
+
+static void
+usage(const char *name)
+{
+ printf("Usage: %s [-dfgh]\n"
+ "\n"
+ "options:\n"
+ " -d, --disasm print disassembled shader\n"
+ " -f, --file=FILE read shader from file (instead of stdin)\n"
+ " -g, --groups=X,Y,Z use specified group size\n"
+ " -h, --help show this message\n"
+ ,
+ name);
+}
+
+int
+main(int argc, char **argv)
+{
+ FILE *in = stdin;
+ bool disasm = false;
+ uint32_t grid[3] = {0};
+ int opt, ret;
+
+ while ((opt = getopt_long_only(argc, argv, shortopts, longopts, NULL)) != -1) {
+ switch (opt) {
+ case 'd':
+ disasm = true;
+ break;
+ case 'f':
+ in = fopen(optarg, "r");
+ if (!in)
+ err(1, "could not open '%s'", optarg);
+ break;
+ case 'g':
+ ret = sscanf(optarg, "%u,%u,%u", &grid[0], &grid[1], &grid[2]);
+ if (ret != 3)
+ goto usage;
+ break;
+ case 'h':
+ goto usage;
+ default:
+ printf("unrecognized arg: %c\n", opt);
+ goto usage;
+ }
+ }
+
+ int fd = drmOpen("msm", NULL);
+ if (fd < 0)
+ err(1, "could not open drm device");
+
+ struct fd_device *dev = fd_device_new(fd);
+ struct fd_pipe *pipe = fd_pipe_new(dev, FD_PIPE_3D);
+
+ uint64_t val;
+ fd_pipe_get_param(pipe, FD_GPU_ID, &val);
+ uint32_t gpu_id = val;
+
+ printf("got gpu_id: %u\n", gpu_id);
+
+ struct backend *backend;
+ switch (gpu_id) {
+ case 600 ... 699:
+ backend = a6xx_init(dev, gpu_id);
+ break;
+ default:
+ err(1, "unsupported gpu: a%u", gpu_id);
+ }
+
+ struct kernel *kernel = backend->assemble(backend, in);
+ printf("localsize: %dx%dx%d\n", kernel->local_size[0],
+ kernel->local_size[1], kernel->local_size[2]);
+ for (int i = 0; i < kernel->num_bufs; i++) {
+ printf("buf[%d]: size=%u\n", i, kernel->buf_sizes[i]);
+ kernel->bufs[i] = fd_bo_new(dev, kernel->buf_sizes[i] * 4,
+ DRM_FREEDRENO_GEM_TYPE_KMEM, "buf[%d]", i);
+ }
+
+ if (disasm)
+ backend->disassemble(kernel, stdout);
+
+ if (grid[0] == 0)
+ return 0;
+
+ struct fd_submit *submit = fd_submit_new(pipe);
+
+ backend->emit_grid(kernel, grid, submit);
+
+ fd_submit_flush(submit, -1, NULL, NULL);
+
+ for (int i = 0; i < kernel->num_bufs; i++) {
+ fd_bo_cpu_prep(kernel->bufs[i], pipe, DRM_FREEDRENO_PREP_READ);
+ void *map = fd_bo_map(kernel->bufs[i]);
+
+ printf("buf[%d]:\n", i);
+ dump_hex(map, kernel->buf_sizes[i] * 4);
+ dump_float(map, kernel->buf_sizes[i] * 4);
+ }
+
+ return 0;
+
+usage:
+ usage(argv[0]);
+ return -1;
+}
--- /dev/null
+/*
+ * Copyright © 2020 Google, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef __MAIN_H__
+#define __MAIN_H__
+
+#include <err.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include "drm/freedreno_drmif.h"
+#include "drm/freedreno_ringbuffer.h"
+
+#include "registers/adreno_pm4.xml.h"
+#include "registers/adreno_common.xml.h"
+
+#define MAX_BUFS 4
+
+struct kernel {
+ /* filled in by backend when shader is assembled: */
+ uint32_t local_size[3];
+ uint32_t num_bufs;
+ uint32_t buf_sizes[MAX_BUFS]; /* size in dwords */
+
+ /* filled in by frontend before launching grid: */
+ struct fd_bo *bufs[MAX_BUFS];
+};
+
+/* per-generation entry-points: */
+struct backend {
+ struct kernel *(*assemble)(struct backend *b, FILE *in);
+ void (*disassemble)(struct kernel *kernel, FILE *out);
+ void (*emit_grid)(struct kernel *kernel, uint32_t grid[3],
+ struct fd_submit *submit);
+};
+
+#define define_cast(_from, _to) \
+static inline struct _to * \
+to_ ## _to(struct _from *f) \
+{ return (struct _to *)f; }
+
+struct backend *a6xx_init(struct fd_device *dev, uint32_t gpu_id);
+
+/*
+ * cmdstream helpers:
+ */
+
+static inline void
+BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
+{
+ if (ring->cur + ndwords > ring->end)
+ fd_ringbuffer_grow(ring, ndwords);
+}
+
+static inline void
+OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
+{
+ fd_ringbuffer_emit(ring, data);
+}
+
+static inline unsigned
+_odd_parity_bit(unsigned val)
+{
+ /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel
+ * note that we want odd parity so 0x6996 is inverted.
+ */
+ val ^= val >> 16;
+ val ^= val >> 8;
+ val ^= val >> 4;
+ val &= 0xf;
+ return (~0x6996 >> val) & 1;
+}
+
+static inline void
+OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
+{
+ BEGIN_RING(ring, cnt+1);
+ OUT_RING(ring, CP_TYPE4_PKT | cnt |
+ (_odd_parity_bit(cnt) << 7) |
+ ((regindx & 0x3ffff) << 8) |
+ ((_odd_parity_bit(regindx) << 27)));
+}
+
+static inline void
+OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
+{
+ BEGIN_RING(ring, cnt+1);
+ OUT_RING(ring, CP_TYPE7_PKT | cnt |
+ (_odd_parity_bit(cnt) << 15) |
+ ((opcode & 0x7f) << 16) |
+ ((_odd_parity_bit(opcode) << 23)));
+}
+
+/*
+ * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+
+ */
+
+static inline void
+__out_reloc(struct fd_ringbuffer *ring, struct fd_bo *bo,
+ uint32_t offset, uint64_t or, int32_t shift, uint32_t flags)
+{
+ debug_assert(offset < fd_bo_size(bo));
+ fd_ringbuffer_reloc(ring, &(struct fd_reloc){
+ .bo = bo,
+ .flags = flags,
+ .offset = offset,
+ .or = or,
+ .shift = shift,
+ .orhi = or >> 32,
+ });
+}
+
+static inline void
+OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
+ uint32_t offset, uint64_t or, int32_t shift)
+{
+ __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ);
+}
+
+static inline void
+OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo,
+ uint32_t offset, uint64_t or, int32_t shift)
+{
+ __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_WRITE);
+}
+
+static inline void
+OUT_RELOCD(struct fd_ringbuffer *ring, struct fd_bo *bo,
+ uint32_t offset, uint64_t or, int32_t shift)
+{
+ __out_reloc(ring, bo, offset, or, shift, FD_RELOC_READ | FD_RELOC_DUMP);
+}
+
+static inline void
+OUT_RB(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
+{
+ fd_ringbuffer_emit_reloc_ring_full(ring, target, 0);
+}
+
+/* for conditionally setting boolean flag(s): */
+#define COND(bool, val) ((bool) ? (val) : 0)
+
+#endif /* __MAIN_H__ */
--- /dev/null
+# Copyright © 2020 Google, Inc
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+ir3_parser = custom_target(
+ 'ir3_parser.[ch]',
+ input: 'ir3_parser.y',
+ output: ['ir3_parser.c', 'ir3_parser.h'],
+ command: [
+ prog_bison, '@INPUT@', '--name-prefix=ir3_yy', '--defines=@OUTPUT1@', '--output=@OUTPUT0@'
+ ]
+)
+
+ir3_lexer = custom_target(
+ 'ir3_lexer.c',
+ input: 'ir3_lexer.l',
+ output: 'ir3_lexer.c',
+ command: [
+ prog_flex, '-o', '@OUTPUT@', '@INPUT@'
+ ]
+)
+
+computerator_files = [
+ 'a6xx.c',
+ 'ir3_asm.c',
+ 'main.c',
+ ir3_parser[0],
+ ir3_parser[1],
+ ir3_lexer
+]
+
+computerator = executable(
+ 'computerator',
+ computerator_files,
+ include_directories : [
+ inc_common,
+ inc_freedreno,
+ ],
+ link_with : [
+ libfreedreno_drm,
+ libfreedreno_ir3,
+ ],
+ dependencies : [
+ dep_libdrm,
+ idep_mesautil,
+ # We don't actually use nir, but ir3 wants some nir headers:
+ idep_nir,
+ ],
+ build_by_default : with_tools.contains('freedreno'),
+ install : with_tools.contains('freedreno'),
+)
subdir('ir3')
subdir('registers')
subdir('perfcntrs')
+subdir('computerator')
if with_tools.contains('drm-shim')
subdir('drm-shim')