panfrost/midgard: Add load/store opcodes
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Tue, 14 May 2019 04:11:36 +0000 (04:11 +0000)
committerAlyssa Rosenzweig <alyssa@rosenzweig.io>
Thu, 16 May 2019 01:25:25 +0000 (01:25 +0000)
This commit adds a bunch of new load/store opcodes, largely related to
OpenCL, as well as adjusting the name of existing opcodes to be more
uniform. The immediate effect is compute shaders are substantially
easier to interpret now.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
src/gallium/drivers/panfrost/midgard/disassemble.c
src/gallium/drivers/panfrost/midgard/helpers.h
src/gallium/drivers/panfrost/midgard/midgard.h
src/gallium/drivers/panfrost/midgard/midgard_compile.c

index c893bc89a6cacfb0d278af316fcde0501e86ee4d..a9e443fa67c499425f37aa5785f253a367b2ff93 100644 (file)
@@ -855,10 +855,10 @@ static bool
 is_op_varying(unsigned op)
 {
         switch (op) {
-        case midgard_op_store_vary_16:
-        case midgard_op_store_vary_32:
-        case midgard_op_load_vary_16:
-        case midgard_op_load_vary_32:
+        case midgard_op_st_vary_16:
+        case midgard_op_st_vary_32:
+        case midgard_op_ld_vary_16:
+        case midgard_op_ld_vary_32:
                 return true;
         }
 
@@ -881,7 +881,7 @@ print_load_store_instr(uint64_t data,
 
         int address = word->address;
 
-        if (word->op == midgard_op_load_uniform_32) {
+        if (word->op == midgard_op_ld_uniform_32) {
                 /* Uniforms use their own addressing scheme */
 
                 int lo = word->varying_parameters >> 7;
index 441c72858875ddcd526057b530c58b83f3c5136c..f32a683233aa181742986f5b12ebdfd82da81c94 100644 (file)
  */
 
 #define OP_IS_STORE_VARY(op) (\
-               op == midgard_op_store_vary_16 || \
-               op == midgard_op_store_vary_32 \
+               op == midgard_op_st_vary_16 || \
+               op == midgard_op_st_vary_32 \
        )
 
 #define OP_IS_STORE(op) (\
                 OP_IS_STORE_VARY(op) || \
-                op == midgard_op_store_cubemap_coords \
+                op == midgard_op_st_cubemap_coords \
        )
 
 #define OP_IS_MOVE(op) ( \
index 91d1c075f96d17802248e755e230dd322554d74b..4a4ec0e454280fe8adc7e9de1d83842bf52493a0 100644 (file)
@@ -345,20 +345,63 @@ typedef enum {
         /* Unclear why this is on the L/S unit, but (with an address of 0,
          * appropriate swizzle, magic constant 0x24, and xy mask?) moves fp32 cube
          * map coordinates in r27 to its cube map texture coordinate
-         * destination (e.g r29). 0x4 magic for loading from fp16 instead */
-
-        midgard_op_store_cubemap_coords = 0x0E,
-
-        midgard_op_load_attr_16 = 0x95,
-        midgard_op_load_attr_32 = 0x94,
-        midgard_op_load_vary_16 = 0x99,
-        midgard_op_load_vary_32 = 0x98,
-        midgard_op_load_color_buffer_16 = 0x9D,
-        midgard_op_load_color_buffer_8 = 0xBA,
-        midgard_op_load_uniform_16 = 0xAC,
-        midgard_op_load_uniform_32 = 0xB0,
-        midgard_op_store_vary_16 = 0xD5,
-        midgard_op_store_vary_32 = 0xD4
+         * destination (e.g r29). 0x4 magic for lding from fp16 instead */
+
+        midgard_op_st_cubemap_coords = 0x0E,
+
+        /* Used in OpenCL. Probably can ld other things as well */
+        midgard_op_ld_global_id = 0x10,
+
+        /* val in r27.y, address embedded, outputs result to argument. Invert val for sub. Let val = +-1 for inc/dec. */
+        midgard_op_atomic_add = 0x40,
+        midgard_op_atomic_and = 0x44,
+        midgard_op_atomic_or = 0x48,
+        midgard_op_atomic_xor = 0x4C,
+
+        midgard_op_atomic_imin = 0x50,
+        midgard_op_atomic_umin = 0x54,
+        midgard_op_atomic_imax = 0x58,
+        midgard_op_atomic_umax = 0x5C,
+
+        midgard_op_atomic_xchg = 0x60,
+
+        /* Used for compute shader's __global arguments, __local variables (or
+         * for register spilling) */
+
+        midgard_op_ld_char = 0x81,
+        midgard_op_ld_char2 = 0x84,
+        midgard_op_ld_short = 0x85,
+        midgard_op_ld_char4 = 0x88, /* short2, int, float */
+        midgard_op_ld_short4 = 0x8C, /* int2, float2, long */
+        midgard_op_ld_int4 = 0x90, /* float4, long2 */
+
+        midgard_op_ld_attr_32 = 0x94,
+        midgard_op_ld_attr_16 = 0x95,
+        midgard_op_ld_attr_32i = 0x97,
+        midgard_op_ld_vary_32 = 0x98,
+        midgard_op_ld_vary_16 = 0x99,
+        midgard_op_ld_vary_32i = 0x9B,
+        midgard_op_ld_color_buffer_16 = 0x9D,
+
+        midgard_op_ld_uniform_16 = 0xAC,
+
+        midgard_op_ld_uniform_32 = 0xB0,
+        midgard_op_ld_color_buffer_8 = 0xBA,
+
+        midgard_op_st_char = 0xC0,
+        midgard_op_st_char2 = 0xC4, /* short */
+        midgard_op_st_char4 = 0xC8, /* short2, int, float */
+        midgard_op_st_short4 = 0xCC, /* int2, float2, long */
+        midgard_op_st_int4 = 0xD0, /* float4, long2 */
+
+        midgard_op_st_vary_32 = 0xD4,
+        midgard_op_st_vary_16 = 0xD5,
+        midgard_op_st_vary_32i = 0xD7,
+
+        /* Value to st in r27, location r26.w as short2 */
+        midgard_op_st_image_f = 0xD8,
+        midgard_op_st_image_ui = 0xDA,
+        midgard_op_st_image_i = 0xDB,
 } midgard_load_store_op;
 
 typedef enum {
@@ -494,17 +537,53 @@ __attribute__((__packed__))
 midgard_texture_word;
 
 static char *load_store_opcode_names[256] = {
-        [midgard_op_store_cubemap_coords] = "st_cubemap_coords",
-        [midgard_op_load_attr_16] = "ld_attr_16",
-        [midgard_op_load_attr_32] = "ld_attr_32",
-        [midgard_op_load_vary_16] = "ld_vary_16",
-        [midgard_op_load_vary_32] = "ld_vary_32",
-        [midgard_op_load_uniform_16] = "ld_uniform_16",
-        [midgard_op_load_uniform_32] = "ld_uniform_32",
-        [midgard_op_load_color_buffer_8] = "ld_color_buffer_8",
-        [midgard_op_load_color_buffer_16] = "ld_color_buffer_16",
-        [midgard_op_store_vary_16] = "st_vary_16",
-        [midgard_op_store_vary_32] = "st_vary_32"
+        [midgard_op_st_cubemap_coords] = "st_cubemap_coords",
+        [midgard_op_ld_global_id] = "ld_global_id",
+
+        [midgard_op_atomic_add] = "atomic_add",
+        [midgard_op_atomic_and] = "atomic_and",
+        [midgard_op_atomic_or] = "atomic_or",
+        [midgard_op_atomic_xor] = "atomic_xor",
+        [midgard_op_atomic_imin] = "atomic_imin",
+        [midgard_op_atomic_umin] = "atomic_umin",
+        [midgard_op_atomic_imax] = "atomic_imax",
+        [midgard_op_atomic_umax] = "atomic_umax",
+        [midgard_op_atomic_umax] = "atomic_xchg",
+
+        [midgard_op_ld_char] = "ld_char",
+        [midgard_op_ld_char2] = "ld_char2",
+        [midgard_op_ld_short] = "ld_short",
+        [midgard_op_ld_char4] = "ld_char4",
+        [midgard_op_ld_short4] = "ld_short4",
+        [midgard_op_ld_int4] = "ld_int4",
+
+        [midgard_op_ld_attr_32] = "ld_attr_32",
+        [midgard_op_ld_attr_16] = "ld_attr_16",
+        [midgard_op_ld_attr_32i] = "ld_attr_32i",
+
+        [midgard_op_ld_vary_32] = "ld_vary_32",
+        [midgard_op_ld_vary_16] = "ld_vary_16",
+        [midgard_op_ld_vary_32i] = "ld_vary_32i",
+
+        [midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
+
+        [midgard_op_ld_uniform_16] = "ld_uniform_16",
+        [midgard_op_ld_uniform_32] = "ld_uniform_32",
+        [midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
+
+        [midgard_op_st_char] = "st_char",
+        [midgard_op_st_char2] = "st_char2",
+        [midgard_op_st_char4] = "st_char4",
+        [midgard_op_st_short4] = "st_short4",
+        [midgard_op_st_int4] = "st_int4",
+
+        [midgard_op_st_vary_32] = "st_vary_32",
+        [midgard_op_st_vary_16] = "st_vary_16",
+        [midgard_op_st_vary_32i] = "st_vary_32i",
+
+        [midgard_op_st_image_f] = "st_image_f",
+        [midgard_op_st_image_ui] = "st_image_ui",
+        [midgard_op_st_image_i] = "st_image_i",
 };
 
 #endif
index 421a3343a71670cdce111d5edffb615b1b1e434b..61fa610e8128e7156a1bca740094e991ac9bbbb0 100644 (file)
@@ -323,16 +323,16 @@ v_fmov(unsigned src, midgard_vector_alu_src mod, unsigned dest)
  * don't support half-floats -- this requires changes in other parts of the
  * compiler -- therefore the 16-bit versions are commented out. */
 
-//M_LOAD(load_attr_16);
-M_LOAD(load_attr_32);
-//M_LOAD(load_vary_16);
-M_LOAD(load_vary_32);
-//M_LOAD(load_uniform_16);
-M_LOAD(load_uniform_32);
-M_LOAD(load_color_buffer_8);
-//M_STORE(store_vary_16);
-M_STORE(store_vary_32);
-M_STORE(store_cubemap_coords);
+//M_LOAD(ld_attr_16);
+M_LOAD(ld_attr_32);
+//M_LOAD(ld_vary_16);
+M_LOAD(ld_vary_32);
+//M_LOAD(ld_uniform_16);
+M_LOAD(ld_uniform_32);
+M_LOAD(ld_color_buffer_8);
+//M_STORE(st_vary_16);
+M_STORE(st_vary_32);
+M_STORE(st_cubemap_coords);
 
 static midgard_instruction
 v_alu_br_compact_cond(midgard_jmp_writeout_op op, unsigned tag, signed offset, unsigned cond)
@@ -1487,7 +1487,7 @@ emit_uniform_read(compiler_context *ctx, unsigned dest, unsigned offset, nir_src
                  * higher-indexed uniforms, at a performance cost. More
                  * generally, we're emitting a UBO read instruction. */
 
-                midgard_instruction ins = m_load_uniform_32(dest, offset);
+                midgard_instruction ins = m_ld_uniform_32(dest, offset);
 
                 /* TODO: Don't split */
                 ins.load_store.varying_parameters = (offset & 7) << 7;
@@ -1560,7 +1560,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                         /* XXX: Half-floats? */
                         /* TODO: swizzle, mask */
 
-                        midgard_instruction ins = m_load_vary_32(reg, offset);
+                        midgard_instruction ins = m_ld_vary_32(reg, offset);
 
                         midgard_varying_parameter p = {
                                 .is_varying = 1,
@@ -1615,7 +1615,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                         } else if (out->data.location == VARYING_SLOT_COL1) {
                                 /* Destination color must be read from framebuffer */
 
-                                midgard_instruction ins = m_load_color_buffer_8(reg, 0);
+                                midgard_instruction ins = m_ld_color_buffer_8(reg, 0);
                                 ins.load_store.swizzle = 0; /* xxxx */
 
                                 /* Read each component sequentially */
@@ -1682,7 +1682,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                                 assert(0);
                         }
                 } else if (ctx->stage == MESA_SHADER_VERTEX) {
-                        midgard_instruction ins = m_load_attr_32(reg, offset);
+                        midgard_instruction ins = m_ld_attr_32(reg, offset);
                         ins.load_store.unknown = 0x1E1E; /* XXX: What is this? */
                         ins.load_store.mask = (1 << instr->num_components) - 1;
                         emit_mir_instruction(ctx, ins);
@@ -1745,7 +1745,7 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                                 attach_constants(ctx, &ins, constant_value, reg + 1);
                                 emit_mir_instruction(ctx, ins);
 
-                                midgard_instruction st = m_store_vary_32(SSA_FIXED_REGISTER(0), offset);
+                                midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(0), offset);
                                 st.load_store.unknown = 0x1E9E; /* XXX: What is this? */
                                 emit_mir_instruction(ctx, st);
                         } else {
@@ -1842,7 +1842,7 @@ emit_tex(compiler_context *ctx, nir_tex_instr *instr)
                                 midgard_instruction move = v_fmov(index, alu_src, SSA_FIXED_REGISTER(27));
                                 emit_mir_instruction(ctx, move);
 
-                                midgard_instruction st = m_store_cubemap_coords(reg, 0);
+                                midgard_instruction st = m_st_cubemap_coords(reg, 0);
                                 st.load_store.unknown = 0x24; /* XXX: What is this? */
                                 st.load_store.mask = 0x3; /* xy? */
                                 st.load_store.swizzle = alu_src.swizzle;
@@ -2126,7 +2126,7 @@ install_registers(compiler_context *ctx, struct ra_graph *g)
 
                         case TAG_LOAD_STORE_4: {
                                 if (OP_IS_STORE_VARY(ins->load_store.op)) {
-                                        /* TODO: use ssa_args for store_vary */
+                                        /* TODO: use ssa_args for st_vary */
                                         ins->load_store.reg = 0;
                                 } else {
                                         bool has_dest = args.dest >= 0;
@@ -2239,7 +2239,7 @@ allocate_registers(compiler_context *ctx)
                 mir_foreach_instr_in_block(block, ins) {
                         if (ins->compact_branch) continue;
 
-                        /* Dest is < 0 for store_vary instructions, which break
+                        /* Dest is < 0 for st_vary instructions, which break
                          * the usual SSA conventions. Liveness analysis doesn't
                          * make sense on these instructions, so skip them to
                          * avoid memory corruption */
@@ -3432,7 +3432,7 @@ midgard_emit_store(compiler_context *ctx, midgard_block *block) {
 
                 midgard_instruction mov = v_fmov(idx, blank_alu_src, SSA_FIXED_REGISTER(REGISTER_VARYING_BASE + high_varying_register));
 
-                midgard_instruction st = m_store_vary_32(SSA_FIXED_REGISTER(high_varying_register), varying);
+                midgard_instruction st = m_st_vary_32(SSA_FIXED_REGISTER(high_varying_register), varying);
                 st.load_store.unknown = 0x1E9E; /* XXX: What is this? */
 
                 mir_insert_instruction_before(mir_next_op(ins), st);