panfrost/midgard: Split up midgard_compile.c (RA)
authorAlyssa Rosenzweig <alyssa@rosenzweig.io>
Sun, 19 May 2019 23:20:34 +0000 (23:20 +0000)
committerAlyssa Rosenzweig <alyssa@rosenzweig.io>
Sun, 19 May 2019 23:37:45 +0000 (23:37 +0000)
This commit moves the register allocator out of midgard_compile.c and
into its own midgard_ra.c file. In doing so, a number of dependencies
are identified and moved into their own files in turn. midgard_compile.c
is still fairly monolithic, but this should help.

Code churn, but no functional changes should be introduced by this
commit.

Signed-off-by: Alyssa Rosenzweig <alyssa@rosenzweig.io>
src/gallium/drivers/panfrost/meson.build
src/gallium/drivers/panfrost/midgard/compiler.h [new file with mode: 0644]
src/gallium/drivers/panfrost/midgard/disassemble.c
src/gallium/drivers/panfrost/midgard/helpers.h
src/gallium/drivers/panfrost/midgard/midgard.h
src/gallium/drivers/panfrost/midgard/midgard_compile.c
src/gallium/drivers/panfrost/midgard/midgard_liveness.c [new file with mode: 0644]
src/gallium/drivers/panfrost/midgard/midgard_ops.c [new file with mode: 0644]
src/gallium/drivers/panfrost/midgard/midgard_ops.h [new file with mode: 0644]
src/gallium/drivers/panfrost/midgard/midgard_print.c [new file with mode: 0644]
src/gallium/drivers/panfrost/midgard/midgard_ra.c [new file with mode: 0644]

index 93640a29c4cfaa4003a81b6aea2e839c2bb5d327..075afa05cd946b348fa8573065610faf2ca7fc9b 100644 (file)
@@ -27,6 +27,11 @@ files_panfrost = files(
   'pan_resource.h',
 
   'midgard/midgard_compile.c',
+  'midgard/midgard_print.c',
+  'midgard/midgard_ra.c',
+  'midgard/midgard_liveness.c',
+  'midgard/midgard_ops.c',
+
   'midgard/nir_lower_blend.c',
   'midgard/cppwrap.cpp',
   'midgard/disassemble.c',
@@ -97,6 +102,10 @@ driver_panfrost = declare_dependency(
 
 files_midgard = files(
   'midgard/midgard_compile.c',
+  'midgard/midgard_print.c',
+  'midgard/midgard_ra.c',
+  'midgard/midgard_liveness.c',
+  'midgard/midgard_ops.c',
   'midgard/cppwrap.cpp',
   'midgard/disassemble.c',
   'midgard/cmdline.c',
@@ -153,6 +162,7 @@ files_pandecode = files(
   'pan_pretty_print.c',
 
   'midgard/disassemble.c',
+  'midgard/midgard_ops.c',
   'bifrost/disassemble.c',
 )
 
diff --git a/src/gallium/drivers/panfrost/midgard/compiler.h b/src/gallium/drivers/panfrost/midgard/compiler.h
new file mode 100644 (file)
index 0000000..48c6db5
--- /dev/null
@@ -0,0 +1,359 @@
+/*
+ * Copyright (C) 2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef _MDG_COMPILER_H
+#define _MDG_COMPILER_H
+
+#include "midgard.h"
+#include "helpers.h"
+#include "midgard_compile.h"
+
+#include "util/hash_table.h"
+#include "util/u_dynarray.h"
+#include "util/set.h"
+#include "util/list.h"
+
+#include "main/mtypes.h"
+#include "compiler/nir_types.h"
+#include "compiler/nir/nir.h"
+
+/* Forward declare */
+struct midgard_block;
+
+/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to
+ * the hardware), hence why that must be zero. TARGET_DISCARD signals this
+ * instruction is actually a discard op. */
+
+#define TARGET_GOTO 0
+#define TARGET_BREAK 1
+#define TARGET_CONTINUE 2
+#define TARGET_DISCARD 3
+
+typedef struct midgard_branch {
+        /* If conditional, the condition is specified in r31.w */
+        bool conditional;
+
+        /* For conditionals, if this is true, we branch on FALSE. If false, we  branch on TRUE. */
+        bool invert_conditional;
+
+        /* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */
+        unsigned target_type;
+
+        /* The actual target */
+        union {
+                int target_block;
+                int target_break;
+                int target_continue;
+        };
+} midgard_branch;
+
+/* Instruction arguments represented as block-local SSA indices, rather than
+ * registers. Negative values mean unused. */
+
+typedef struct {
+        int src0;
+        int src1;
+        int dest;
+
+        /* src1 is -not- SSA but instead a 16-bit inline constant to be smudged
+         * in. Only valid for ALU ops. */
+        bool inline_constant;
+} ssa_args;
+
+/* Generic in-memory data type repesenting a single logical instruction, rather
+ * than a single instruction group. This is the preferred form for code gen.
+ * Multiple midgard_insturctions will later be combined during scheduling,
+ * though this is not represented in this structure.  Its format bridges
+ * the low-level binary representation with the higher level semantic meaning.
+ *
+ * Notably, it allows registers to be specified as block local SSA, for code
+ * emitted before the register allocation pass.
+ */
+
+typedef struct midgard_instruction {
+        /* Must be first for casting */
+        struct list_head link;
+
+        unsigned type; /* ALU, load/store, texture */
+
+        /* If the register allocator has not run yet... */
+        ssa_args ssa_args;
+
+        /* Special fields for an ALU instruction */
+        midgard_reg_info registers;
+
+        /* I.e. (1 << alu_bit) */
+        int unit;
+
+        /* When emitting bundle, should this instruction have a break forced
+         * before it? Used for r31 writes which are valid only within a single
+         * bundle and *need* to happen as early as possible... this is a hack,
+         * TODO remove when we have a scheduler */
+        bool precede_break;
+
+        bool has_constants;
+        float constants[4];
+        uint16_t inline_constant;
+        bool has_blend_constant;
+
+        bool compact_branch;
+        bool writeout;
+        bool prepacked_branch;
+
+        union {
+                midgard_load_store_word load_store;
+                midgard_vector_alu alu;
+                midgard_texture_word texture;
+                midgard_branch_extended branch_extended;
+                uint16_t br_compact;
+
+                /* General branch, rather than packed br_compact. Higher level
+                 * than the other components */
+                midgard_branch branch;
+        };
+} midgard_instruction;
+
+typedef struct midgard_block {
+        /* Link to next block. Must be first for mir_get_block */
+        struct list_head link;
+
+        /* List of midgard_instructions emitted for the current block */
+        struct list_head instructions;
+
+        bool is_scheduled;
+
+        /* List of midgard_bundles emitted (after the scheduler has run) */
+        struct util_dynarray bundles;
+
+        /* Number of quadwords _actually_ emitted, as determined after scheduling */
+        unsigned quadword_count;
+
+        /* Successors: always one forward (the block after us), maybe
+         * one backwards (for a backward branch). No need for a second
+         * forward, since graph traversal would get there eventually
+         * anyway */
+        struct midgard_block *successors[2];
+        unsigned nr_successors;
+
+        /* The successors pointer form a graph, and in the case of
+         * complex control flow, this graph has a cycles. To aid
+         * traversal during liveness analysis, we have a visited?
+         * boolean for passes to use as they see fit, provided they
+         * clean up later */
+        bool visited;
+} midgard_block;
+
+typedef struct midgard_bundle {
+        /* Tag for the overall bundle */
+        int tag;
+
+        /* Instructions contained by the bundle */
+        int instruction_count;
+        midgard_instruction instructions[5];
+
+        /* Bundle-wide ALU configuration */
+        int padding;
+        int control;
+        bool has_embedded_constants;
+        float constants[4];
+        bool has_blend_constant;
+
+        uint16_t register_words[8];
+        int register_words_count;
+
+        uint64_t body_words[8];
+        size_t body_size[8];
+        int body_words_count;
+} midgard_bundle;
+
+typedef struct compiler_context {
+        nir_shader *nir;
+        gl_shader_stage stage;
+
+        /* Is internally a blend shader? Depends on stage == FRAGMENT */
+        bool is_blend;
+
+        /* Tracking for blend constant patching */
+        int blend_constant_offset;
+
+        /* Current NIR function */
+        nir_function *func;
+
+        /* Unordered list of midgard_blocks */
+        int block_count;
+        struct list_head blocks;
+
+        midgard_block *initial_block;
+        midgard_block *previous_source_block;
+        midgard_block *final_block;
+
+        /* List of midgard_instructions emitted for the current block */
+        midgard_block *current_block;
+
+        /* The current "depth" of the loop, for disambiguating breaks/continues
+         * when using nested loops */
+        int current_loop_depth;
+
+        /* Constants which have been loaded, for later inlining */
+        struct hash_table_u64 *ssa_constants;
+
+        /* SSA indices to be outputted to corresponding varying offset */
+        struct hash_table_u64 *ssa_varyings;
+
+        /* SSA values / registers which have been aliased. Naively, these
+         * demand a fmov output; instead, we alias them in a later pass to
+         * avoid the wasted op.
+         *
+         * A note on encoding: to avoid dynamic memory management here, rather
+         * than ampping to a pointer, we map to the source index; the key
+         * itself is just the destination index. */
+
+        struct hash_table_u64 *ssa_to_alias;
+        struct set *leftover_ssa_to_alias;
+
+        /* Actual SSA-to-register for RA */
+        struct hash_table_u64 *ssa_to_register;
+
+        /* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */
+        struct hash_table_u64 *hash_to_temp;
+        int temp_count;
+        int max_hash;
+
+        /* Just the count of the max register used. Higher count => higher
+         * register pressure */
+        int work_registers;
+
+        /* Used for cont/last hinting. Increase when a tex op is added.
+         * Decrease when a tex op is removed. */
+        int texture_op_count;
+
+        /* Mapping of texture register -> SSA index for unaliasing */
+        int texture_index[2];
+
+        /* If any path hits a discard instruction */
+        bool can_discard;
+
+        /* The number of uniforms allowable for the fast path */
+        int uniform_cutoff;
+
+        /* Count of instructions emitted from NIR overall, across all blocks */
+        int instruction_count;
+
+        /* Alpha ref value passed in */
+        float alpha_ref;
+
+        /* The index corresponding to the fragment output */
+        unsigned fragment_output;
+
+        /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
+        unsigned sysvals[MAX_SYSVAL_COUNT];
+        unsigned sysval_count;
+        struct hash_table_u64 *sysval_to_id;
+} compiler_context;
+
+/* Helpers for manipulating the above structures (forming the driver IR) */
+
+/* Append instruction to end of current block */
+
+static inline midgard_instruction *
+mir_upload_ins(struct midgard_instruction ins)
+{
+        midgard_instruction *heap = malloc(sizeof(ins));
+        memcpy(heap, &ins, sizeof(ins));
+        return heap;
+}
+
+static inline void
+emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins)
+{
+        list_addtail(&(mir_upload_ins(ins))->link, &ctx->current_block->instructions);
+}
+
+static inline void
+mir_insert_instruction_before(struct midgard_instruction *tag, struct midgard_instruction ins)
+{
+        list_addtail(&(mir_upload_ins(ins))->link, &tag->link);
+}
+
+static inline void
+mir_remove_instruction(struct midgard_instruction *ins)
+{
+        list_del(&ins->link);
+}
+
+static inline midgard_instruction*
+mir_prev_op(struct midgard_instruction *ins)
+{
+        return list_last_entry(&(ins->link), midgard_instruction, link);
+}
+
+static inline midgard_instruction*
+mir_next_op(struct midgard_instruction *ins)
+{
+        return list_first_entry(&(ins->link), midgard_instruction, link);
+}
+
+#define mir_foreach_block(ctx, v) list_for_each_entry(struct midgard_block, v, &ctx->blocks, link) 
+#define mir_foreach_block_from(ctx, from, v) list_for_each_entry_from(struct midgard_block, v, from, &ctx->blocks, link)
+
+#define mir_foreach_instr(ctx, v) list_for_each_entry(struct midgard_instruction, v, &ctx->current_block->instructions, link) 
+#define mir_foreach_instr_safe(ctx, v) list_for_each_entry_safe(struct midgard_instruction, v, &ctx->current_block->instructions, link) 
+#define mir_foreach_instr_in_block(block, v) list_for_each_entry(struct midgard_instruction, v, &block->instructions, link) 
+#define mir_foreach_instr_in_block_safe(block, v) list_for_each_entry_safe(struct midgard_instruction, v, &block->instructions, link) 
+#define mir_foreach_instr_in_block_safe_rev(block, v) list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->instructions, link) 
+#define mir_foreach_instr_in_block_from(block, v, from) list_for_each_entry_from(struct midgard_instruction, v, from, &block->instructions, link) 
+#define mir_foreach_instr_in_block_from_rev(block, v, from) list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->instructions, link) 
+
+
+static inline midgard_instruction *
+mir_last_in_block(struct midgard_block *block)
+{
+        return list_last_entry(&block->instructions, struct midgard_instruction, link);
+}
+
+static inline midgard_block *
+mir_get_block(compiler_context *ctx, int idx)
+{
+        struct list_head *lst = &ctx->blocks;
+
+        while ((idx--) + 1)
+                lst = lst->next;
+
+        return (struct midgard_block *) lst;
+}
+
+/* MIR printing */
+
+void mir_print_instruction(midgard_instruction *ins);
+void mir_print_block(midgard_block *block);
+void mir_print_shader(compiler_context *ctx);
+
+/* Register allocation */
+
+struct ra_graph;
+
+struct ra_graph* allocate_registers(compiler_context *ctx);
+void install_registers(compiler_context *ctx, struct ra_graph *g);
+bool mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src);
+
+#endif
index a9e443fa67c499425f37aa5785f253a367b2ff93..c467e94fc29f220a4d289bc7bc005c95e25142c1 100644 (file)
@@ -31,6 +31,7 @@
 #include <string.h>
 #include "midgard.h"
 #include "midgard-parse.h"
+#include "midgard_ops.h"
 #include "disassemble.h"
 #include "helpers.h"
 #include "util/half_float.h"
index f32a683233aa181742986f5b12ebdfd82da81c94..9d287259a8a267fa20e4f8719ae033e19f31670d 100644 (file)
@@ -1,7 +1,4 @@
-/* Author(s):
- *  Alyssa Rosenzweig
- *
- * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io)
+/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io)
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -22,6 +19,9 @@
  * THE SOFTWARE.
  */
 
+#ifndef __MDG_HELPERS_H
+#define __MDG_HELPERS_H
+
 #define OP_IS_STORE_VARY(op) (\
                op == midgard_op_st_vary_16 || \
                op == midgard_op_st_vary_32 \
 #define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD)
 #define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT)
 
-/* Table of mapping opcodes to accompanying properties relevant to
- * scheduling/emission/etc */
-
-static struct {
+struct mir_op_props {
         const char *name;
         unsigned props;
-} alu_opcode_props[256] = {
-        [midgard_alu_op_fadd]           = {"fadd", UNITS_ADD | OP_COMMUTES},
-        [midgard_alu_op_fmul]           = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES},
-        [midgard_alu_op_fmin]           = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
-        [midgard_alu_op_fmax]           = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
-        [midgard_alu_op_imin]           = {"imin", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_imax]           = {"imax", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_umin]           = {"umin", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_umax]           = {"umax", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_fmov]           = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24},
-        [midgard_alu_op_fround]          = {"fround", UNITS_ADD},
-        [midgard_alu_op_froundeven]      = {"froundeven", UNITS_ADD},
-        [midgard_alu_op_ftrunc]          = {"ftrunc", UNITS_ADD},
-        [midgard_alu_op_ffloor]                 = {"ffloor", UNITS_ADD},
-        [midgard_alu_op_fceil]          = {"fceil", UNITS_ADD},
-        [midgard_alu_op_ffma]           = {"ffma", UNIT_VLUT},
-
-        /* Though they output a scalar, they need to run on a vector unit
-         * since they process vectors */
-        [midgard_alu_op_fdot3]          = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
-        [midgard_alu_op_fdot3r]                 = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
-        [midgard_alu_op_fdot4]          = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
-
-        /* Incredibly, iadd can run on vmul, etc */
-        [midgard_alu_op_iadd]           = {"iadd", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_iabs]           = {"iabs", UNITS_ADD},
-        [midgard_alu_op_isub]           = {"isub", UNITS_MOST},
-        [midgard_alu_op_imul]           = {"imul", UNITS_MUL | OP_COMMUTES},
-        [midgard_alu_op_imov]           = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24},
-
-        /* For vector comparisons, use ball etc */
-        [midgard_alu_op_feq]            = {"feq", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_fne]            = {"fne", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_fle]            = {"fle", UNITS_MOST},
-        [midgard_alu_op_flt]            = {"flt", UNITS_MOST},
-        [midgard_alu_op_ieq]            = {"ieq", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_ine]            = {"ine", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_ilt]            = {"ilt", UNITS_MOST},
-        [midgard_alu_op_ile]            = {"ile", UNITS_MOST},
-        [midgard_alu_op_ult]            = {"ult", UNITS_MOST},
-        [midgard_alu_op_ule]            = {"ule", UNITS_MOST},
-
-        [midgard_alu_op_icsel]          = {"icsel", UNITS_ADD},
-        [midgard_alu_op_icsel_v]         = {"icsel_v", UNITS_ADD},
-        [midgard_alu_op_fcsel_v]        = {"fcsel_v", UNITS_ADD},
-        [midgard_alu_op_fcsel]          = {"fcsel", UNITS_ADD | UNIT_SMUL},
-
-        [midgard_alu_op_frcp]           = {"frcp", UNIT_VLUT},
-        [midgard_alu_op_frsqrt]                 = {"frsqrt", UNIT_VLUT},
-        [midgard_alu_op_fsqrt]          = {"fsqrt", UNIT_VLUT},
-        [midgard_alu_op_fpow_pt1]       = {"fpow_pt1", UNIT_VLUT},
-        [midgard_alu_op_fexp2]          = {"fexp2", UNIT_VLUT},
-        [midgard_alu_op_flog2]          = {"flog2", UNIT_VLUT},
-
-        [midgard_alu_op_f2i]            = {"f2i", UNITS_ADD | OP_TYPE_CONVERT},
-        [midgard_alu_op_f2u]            = {"f2u", UNITS_ADD | OP_TYPE_CONVERT},
-        [midgard_alu_op_f2u8]           = {"f2u8", UNITS_ADD | OP_TYPE_CONVERT},
-        [midgard_alu_op_i2f]            = {"i2f", UNITS_ADD | OP_TYPE_CONVERT},
-        [midgard_alu_op_u2f]            = {"u2f", UNITS_ADD | OP_TYPE_CONVERT},
-
-        [midgard_alu_op_fsin]           = {"fsin", UNIT_VLUT},
-        [midgard_alu_op_fcos]           = {"fcos", UNIT_VLUT},
-
-        /* XXX: Test case where it's right on smul but not sadd */
-        [midgard_alu_op_iand]           = {"iand", UNITS_MOST | OP_COMMUTES}, 
-        [midgard_alu_op_iandnot]         = {"iandnot", UNITS_MOST},
-
-        [midgard_alu_op_ior]            = {"ior", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_iornot]                 = {"iornot", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_inor]           = {"inor", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_ixor]           = {"ixor", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_inxor]          = {"inxor", UNITS_MOST | OP_COMMUTES},
-        [midgard_alu_op_iclz]           = {"iclz", UNITS_ADD},
-        [midgard_alu_op_ibitcount8]     = {"ibitcount8", UNITS_ADD},
-        [midgard_alu_op_inand]          = {"inand", UNITS_MOST},
-        [midgard_alu_op_ishl]           = {"ishl", UNITS_ADD},
-        [midgard_alu_op_iasr]           = {"iasr", UNITS_ADD},
-        [midgard_alu_op_ilsr]           = {"ilsr", UNITS_ADD},
-
-        [midgard_alu_op_fball_eq]       = {"fball_eq", UNITS_VECTOR | OP_COMMUTES},
-        [midgard_alu_op_fbany_neq]      = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES},
-        [midgard_alu_op_iball_eq]       = {"iball_eq", UNITS_VECTOR | OP_COMMUTES},
-        [midgard_alu_op_iball_neq]      = {"iball_neq", UNITS_VECTOR | OP_COMMUTES},
-        [midgard_alu_op_ibany_eq]       = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES},
-        [midgard_alu_op_ibany_neq]      = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES},
-
-        /* These instructions are not yet emitted by the compiler, so
-         * don't speculate about units yet */ 
-        [midgard_alu_op_ishladd]        = {"ishladd", 0},
-
-        [midgard_alu_op_uball_lt]       = {"uball_lt", 0},
-        [midgard_alu_op_uball_lte]      = {"uball_lte", 0},
-        [midgard_alu_op_iball_lt]       = {"iball_lt", 0},
-        [midgard_alu_op_iball_lte]      = {"iball_lte", 0},
-        [midgard_alu_op_ubany_lt]       = {"ubany_lt", 0},
-        [midgard_alu_op_ubany_lte]      = {"ubany_lte", 0},
-        [midgard_alu_op_ibany_lt]       = {"ibany_lt", 0},
-        [midgard_alu_op_ibany_lte]      = {"ibany_lte", 0},
-
-        [midgard_alu_op_freduce]        = {"freduce", 0},
-        [midgard_alu_op_bball_eq]       = {"bball_eq", 0 | OP_COMMUTES},
-        [midgard_alu_op_bbany_neq]      = {"bball_eq", 0 | OP_COMMUTES},
-        [midgard_alu_op_fatan2_pt1]     = {"fatan2_pt1", 0},
-        [midgard_alu_op_fatan_pt2]      = {"fatan_pt2", 0},
 };
 
-/* Is this opcode that of an integer (regardless of signedness)? Instruction
- * names authoritatively determine types */
-
-static inline bool
-midgard_is_integer_op(int op)
-{
-        const char *name = alu_opcode_props[op].name;
-
-        if (!name)
-                return false;
-
-        return (name[0] == 'i') || (name[0] == 'u');
-}
-
-/* Does this opcode *write* an integer? Same as is_integer_op, unless it's a
- * conversion between int<->float in which case we do the opposite */
-
-static inline bool
-midgard_is_integer_out_op(int op)
-{
-        bool is_int = midgard_is_integer_op(op);
-        bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT;
+/* This file is common, so don't define the tables themselves. #include
+ * midgard_op.h if you need that, or edit midgard_ops.c directly */
 
-        return is_int ^ is_conversion;
-}
+#endif
index d5d6c12f78c558a39711995791dabbc7bce4151e..f5cd59cbfb1f5c2a13f1e36781ecf60aaa2f509e 100644 (file)
@@ -536,54 +536,4 @@ __attribute__((__packed__))
 }
 midgard_texture_word;
 
-static char *load_store_opcode_names[256] = {
-        [midgard_op_st_cubemap_coords] = "st_cubemap_coords",
-        [midgard_op_ld_global_id] = "ld_global_id",
-
-        [midgard_op_atomic_add] = "atomic_add",
-        [midgard_op_atomic_and] = "atomic_and",
-        [midgard_op_atomic_or] = "atomic_or",
-        [midgard_op_atomic_xor] = "atomic_xor",
-        [midgard_op_atomic_imin] = "atomic_imin",
-        [midgard_op_atomic_umin] = "atomic_umin",
-        [midgard_op_atomic_imax] = "atomic_imax",
-        [midgard_op_atomic_umax] = "atomic_umax",
-        [midgard_op_atomic_xchg] = "atomic_xchg",
-
-        [midgard_op_ld_char] = "ld_char",
-        [midgard_op_ld_char2] = "ld_char2",
-        [midgard_op_ld_short] = "ld_short",
-        [midgard_op_ld_char4] = "ld_char4",
-        [midgard_op_ld_short4] = "ld_short4",
-        [midgard_op_ld_int4] = "ld_int4",
-
-        [midgard_op_ld_attr_32] = "ld_attr_32",
-        [midgard_op_ld_attr_16] = "ld_attr_16",
-        [midgard_op_ld_attr_32i] = "ld_attr_32i",
-
-        [midgard_op_ld_vary_32] = "ld_vary_32",
-        [midgard_op_ld_vary_16] = "ld_vary_16",
-        [midgard_op_ld_vary_32i] = "ld_vary_32i",
-
-        [midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
-
-        [midgard_op_ld_uniform_16] = "ld_uniform_16",
-        [midgard_op_ld_uniform_32] = "ld_uniform_32",
-        [midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
-
-        [midgard_op_st_char] = "st_char",
-        [midgard_op_st_char2] = "st_char2",
-        [midgard_op_st_char4] = "st_char4",
-        [midgard_op_st_short4] = "st_short4",
-        [midgard_op_st_int4] = "st_int4",
-
-        [midgard_op_st_vary_32] = "st_vary_32",
-        [midgard_op_st_vary_16] = "st_vary_16",
-        [midgard_op_st_vary_32i] = "st_vary_32i",
-
-        [midgard_op_st_image_f] = "st_image_f",
-        [midgard_op_st_image_ui] = "st_image_ui",
-        [midgard_op_st_image_i] = "st_image_i",
-};
-
 #endif
index fa74b3f9046279617bb9b130268981505d1afd57..640e4a5fb86eb427ea6ccb2133e210da53719f03 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2018 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -36,7 +36,6 @@
 #include "main/imports.h"
 #include "compiler/nir/nir_builder.h"
 #include "util/half_float.h"
-#include "util/register_allocate.h"
 #include "util/u_debug.h"
 #include "util/u_dynarray.h"
 #include "util/list.h"
@@ -45,7 +44,9 @@
 #include "midgard.h"
 #include "midgard_nir.h"
 #include "midgard_compile.h"
+#include "midgard_ops.h"
 #include "helpers.h"
+#include "compiler.h"
 
 #include "disassemble.h"
 
@@ -64,138 +65,12 @@ int midgard_debug = 0;
                        fprintf(stderr, "%s:%d: "fmt, \
                                __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
 
-/* Instruction arguments represented as block-local SSA indices, rather than
- * registers. Negative values mean unused. */
-
-typedef struct {
-        int src0;
-        int src1;
-        int dest;
-
-        /* src1 is -not- SSA but instead a 16-bit inline constant to be smudged
-         * in. Only valid for ALU ops. */
-        bool inline_constant;
-} ssa_args;
-
-/* Forward declare so midgard_branch can reference */
-struct midgard_block;
-
-/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to
- * the hardware), hence why that must be zero. TARGET_DISCARD signals this
- * instruction is actually a discard op. */
-
-#define TARGET_GOTO 0
-#define TARGET_BREAK 1
-#define TARGET_CONTINUE 2
-#define TARGET_DISCARD 3
-
-typedef struct midgard_branch {
-        /* If conditional, the condition is specified in r31.w */
-        bool conditional;
-
-        /* For conditionals, if this is true, we branch on FALSE. If false, we  branch on TRUE. */
-        bool invert_conditional;
-
-        /* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */
-        unsigned target_type;
-
-        /* The actual target */
-        union {
-                int target_block;
-                int target_break;
-                int target_continue;
-        };
-} midgard_branch;
-
 static bool
 midgard_is_branch_unit(unsigned unit)
 {
         return (unit == ALU_ENAB_BRANCH) || (unit == ALU_ENAB_BR_COMPACT);
 }
 
-/* Generic in-memory data type repesenting a single logical instruction, rather
- * than a single instruction group. This is the preferred form for code gen.
- * Multiple midgard_insturctions will later be combined during scheduling,
- * though this is not represented in this structure.  Its format bridges
- * the low-level binary representation with the higher level semantic meaning.
- *
- * Notably, it allows registers to be specified as block local SSA, for code
- * emitted before the register allocation pass.
- */
-
-typedef struct midgard_instruction {
-        /* Must be first for casting */
-        struct list_head link;
-
-        unsigned type; /* ALU, load/store, texture */
-
-        /* If the register allocator has not run yet... */
-        ssa_args ssa_args;
-
-        /* Special fields for an ALU instruction */
-        midgard_reg_info registers;
-
-        /* I.e. (1 << alu_bit) */
-        int unit;
-
-        /* When emitting bundle, should this instruction have a break forced
-         * before it? Used for r31 writes which are valid only within a single
-         * bundle and *need* to happen as early as possible... this is a hack,
-         * TODO remove when we have a scheduler */
-        bool precede_break;
-
-        bool has_constants;
-        float constants[4];
-        uint16_t inline_constant;
-        bool has_blend_constant;
-
-        bool compact_branch;
-        bool writeout;
-        bool prepacked_branch;
-
-        union {
-                midgard_load_store_word load_store;
-                midgard_vector_alu alu;
-                midgard_texture_word texture;
-                midgard_branch_extended branch_extended;
-                uint16_t br_compact;
-
-                /* General branch, rather than packed br_compact. Higher level
-                 * than the other components */
-                midgard_branch branch;
-        };
-} midgard_instruction;
-
-typedef struct midgard_block {
-        /* Link to next block. Must be first for mir_get_block */
-        struct list_head link;
-
-        /* List of midgard_instructions emitted for the current block */
-        struct list_head instructions;
-
-        bool is_scheduled;
-
-        /* List of midgard_bundles emitted (after the scheduler has run) */
-        struct util_dynarray bundles;
-
-        /* Number of quadwords _actually_ emitted, as determined after scheduling */
-        unsigned quadword_count;
-
-        /* Successors: always one forward (the block after us), maybe
-         * one backwards (for a backward branch). No need for a second
-         * forward, since graph traversal would get there eventually
-         * anyway */
-        struct midgard_block *successors[2];
-        unsigned nr_successors;
-
-        /* The successors pointer form a graph, and in the case of
-         * complex control flow, this graph has a cycles. To aid
-         * traversal during liveness analysis, we have a visited?
-         * boolean for passes to use as they see fit, provided they
-         * clean up later */
-        bool visited;
-} midgard_block;
-
 static void
 midgard_block_add_successor(midgard_block *block, midgard_block *successor)
 {
@@ -404,267 +279,6 @@ midgard_create_branch_extended( midgard_condition cond,
         return branch;
 }
 
-typedef struct midgard_bundle {
-        /* Tag for the overall bundle */
-        int tag;
-
-        /* Instructions contained by the bundle */
-        int instruction_count;
-        midgard_instruction instructions[5];
-
-        /* Bundle-wide ALU configuration */
-        int padding;
-        int control;
-        bool has_embedded_constants;
-        float constants[4];
-        bool has_blend_constant;
-
-        uint16_t register_words[8];
-        int register_words_count;
-
-        uint64_t body_words[8];
-        size_t body_size[8];
-        int body_words_count;
-} midgard_bundle;
-
-typedef struct compiler_context {
-        nir_shader *nir;
-        gl_shader_stage stage;
-
-        /* Is internally a blend shader? Depends on stage == FRAGMENT */
-        bool is_blend;
-
-        /* Tracking for blend constant patching */
-        int blend_constant_offset;
-
-        /* Current NIR function */
-        nir_function *func;
-
-        /* Unordered list of midgard_blocks */
-        int block_count;
-        struct list_head blocks;
-
-        midgard_block *initial_block;
-        midgard_block *previous_source_block;
-        midgard_block *final_block;
-
-        /* List of midgard_instructions emitted for the current block */
-        midgard_block *current_block;
-
-        /* The current "depth" of the loop, for disambiguating breaks/continues
-         * when using nested loops */
-        int current_loop_depth;
-
-        /* Constants which have been loaded, for later inlining */
-        struct hash_table_u64 *ssa_constants;
-
-        /* SSA indices to be outputted to corresponding varying offset */
-        struct hash_table_u64 *ssa_varyings;
-
-        /* SSA values / registers which have been aliased. Naively, these
-         * demand a fmov output; instead, we alias them in a later pass to
-         * avoid the wasted op.
-         *
-         * A note on encoding: to avoid dynamic memory management here, rather
-         * than ampping to a pointer, we map to the source index; the key
-         * itself is just the destination index. */
-
-        struct hash_table_u64 *ssa_to_alias;
-        struct set *leftover_ssa_to_alias;
-
-        /* Actual SSA-to-register for RA */
-        struct hash_table_u64 *ssa_to_register;
-
-        /* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */
-        struct hash_table_u64 *hash_to_temp;
-        int temp_count;
-        int max_hash;
-
-        /* Just the count of the max register used. Higher count => higher
-         * register pressure */
-        int work_registers;
-
-        /* Used for cont/last hinting. Increase when a tex op is added.
-         * Decrease when a tex op is removed. */
-        int texture_op_count;
-
-        /* Mapping of texture register -> SSA index for unaliasing */
-        int texture_index[2];
-
-        /* If any path hits a discard instruction */
-        bool can_discard;
-
-        /* The number of uniforms allowable for the fast path */
-        int uniform_cutoff;
-
-        /* Count of instructions emitted from NIR overall, across all blocks */
-        int instruction_count;
-
-        /* Alpha ref value passed in */
-        float alpha_ref;
-
-        /* The index corresponding to the fragment output */
-        unsigned fragment_output;
-
-        /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */
-        unsigned sysvals[MAX_SYSVAL_COUNT];
-        unsigned sysval_count;
-        struct hash_table_u64 *sysval_to_id;
-} compiler_context;
-
-/* Append instruction to end of current block */
-
-static midgard_instruction *
-mir_upload_ins(struct midgard_instruction ins)
-{
-        midgard_instruction *heap = malloc(sizeof(ins));
-        memcpy(heap, &ins, sizeof(ins));
-        return heap;
-}
-
-static void
-emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins)
-{
-        list_addtail(&(mir_upload_ins(ins))->link, &ctx->current_block->instructions);
-}
-
-static void
-mir_insert_instruction_before(struct midgard_instruction *tag, struct midgard_instruction ins)
-{
-        list_addtail(&(mir_upload_ins(ins))->link, &tag->link);
-}
-
-static void
-mir_remove_instruction(struct midgard_instruction *ins)
-{
-        list_del(&ins->link);
-}
-
-static midgard_instruction*
-mir_prev_op(struct midgard_instruction *ins)
-{
-        return list_last_entry(&(ins->link), midgard_instruction, link);
-}
-
-static midgard_instruction*
-mir_next_op(struct midgard_instruction *ins)
-{
-        return list_first_entry(&(ins->link), midgard_instruction, link);
-}
-
-#define mir_foreach_block(ctx, v) list_for_each_entry(struct midgard_block, v, &ctx->blocks, link) 
-#define mir_foreach_block_from(ctx, from, v) list_for_each_entry_from(struct midgard_block, v, from, &ctx->blocks, link)
-
-#define mir_foreach_instr(ctx, v) list_for_each_entry(struct midgard_instruction, v, &ctx->current_block->instructions, link) 
-#define mir_foreach_instr_safe(ctx, v) list_for_each_entry_safe(struct midgard_instruction, v, &ctx->current_block->instructions, link) 
-#define mir_foreach_instr_in_block(block, v) list_for_each_entry(struct midgard_instruction, v, &block->instructions, link) 
-#define mir_foreach_instr_in_block_safe(block, v) list_for_each_entry_safe(struct midgard_instruction, v, &block->instructions, link) 
-#define mir_foreach_instr_in_block_safe_rev(block, v) list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->instructions, link) 
-#define mir_foreach_instr_in_block_from(block, v, from) list_for_each_entry_from(struct midgard_instruction, v, from, &block->instructions, link) 
-#define mir_foreach_instr_in_block_from_rev(block, v, from) list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->instructions, link) 
-
-
-static midgard_instruction *
-mir_last_in_block(struct midgard_block *block)
-{
-        return list_last_entry(&block->instructions, struct midgard_instruction, link);
-}
-
-static midgard_block *
-mir_get_block(compiler_context *ctx, int idx)
-{
-        struct list_head *lst = &ctx->blocks;
-
-        while ((idx--) + 1)
-                lst = lst->next;
-
-        return (struct midgard_block *) lst;
-}
-
-/* Pretty printer for internal Midgard IR */
-
-static void
-print_mir_source(int source)
-{
-        if (source >= SSA_FIXED_MINIMUM) {
-                /* Specific register */
-                int reg = SSA_REG_FROM_FIXED(source);
-
-                /* TODO: Moving threshold */
-                if (reg > 16 && reg < 24)
-                        printf("u%d", 23 - reg);
-                else
-                        printf("r%d", reg);
-        } else {
-                printf("%d", source);
-        }
-}
-
-static void
-print_mir_instruction(midgard_instruction *ins)
-{
-        printf("\t");
-
-        switch (ins->type) {
-        case TAG_ALU_4: {
-                midgard_alu_op op = ins->alu.op;
-                const char *name = alu_opcode_props[op].name;
-
-                if (ins->unit)
-                        printf("%d.", ins->unit);
-
-                printf("%s", name ? name : "??");
-                break;
-        }
-
-        case TAG_LOAD_STORE_4: {
-                midgard_load_store_op op = ins->load_store.op;
-                const char *name = load_store_opcode_names[op];
-
-                assert(name);
-                printf("%s", name);
-                break;
-        }
-
-        case TAG_TEXTURE_4: {
-                printf("texture");
-                break;
-        }
-
-        default:
-                assert(0);
-        }
-
-        ssa_args *args = &ins->ssa_args;
-
-        printf(" %d, ", args->dest);
-
-        print_mir_source(args->src0);
-        printf(", ");
-
-        if (args->inline_constant)
-                printf("#%d", ins->inline_constant);
-        else
-                print_mir_source(args->src1);
-
-        if (ins->has_constants)
-                printf(" <%f, %f, %f, %f>", ins->constants[0], ins->constants[1], ins->constants[2], ins->constants[3]);
-
-        printf("\n");
-}
-
-static void
-print_mir_block(midgard_block *block)
-{
-        printf("{\n");
-
-        mir_foreach_instr_in_block(block, ins) {
-                print_mir_instruction(ins);
-        }
-
-        printf("}\n");
-}
-
 static void
 attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constants, int name)
 {
@@ -975,26 +589,6 @@ effective_writemask(midgard_vector_alu *alu)
         return squeeze_writemask(alu->mask);
 }
 
-static unsigned
-find_or_allocate_temp(compiler_context *ctx, unsigned hash)
-{
-        if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
-                return hash;
-
-        unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
-
-        if (temp)
-                return temp - 1;
-
-        /* If no temp is find, allocate one */
-        temp = ctx->temp_count++;
-        ctx->max_hash = MAX2(ctx->max_hash, hash);
-
-        _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
-
-        return temp;
-}
-
 static unsigned
 nir_src_index(compiler_context *ctx, nir_src *src)
 {
@@ -1983,338 +1577,6 @@ emit_instr(compiler_context *ctx, struct nir_instr *instr)
         }
 }
 
-/* Determine the actual hardware from the index based on the RA results or special values */
-
-static int
-dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg)
-{
-        if (reg >= SSA_FIXED_MINIMUM)
-                return SSA_REG_FROM_FIXED(reg);
-
-        if (reg >= 0) {
-                assert(reg < maxreg);
-                assert(g);
-                int r = ra_get_node_reg(g, reg);
-                ctx->work_registers = MAX2(ctx->work_registers, r);
-                return r;
-        }
-
-        switch (reg) {
-        /* fmov style unused */
-        case SSA_UNUSED_0:
-                return REGISTER_UNUSED;
-
-        /* lut style unused */
-        case SSA_UNUSED_1:
-                return REGISTER_UNUSED;
-
-        default:
-                DBG("Unknown SSA register alias %d\n", reg);
-                assert(0);
-                return 31;
-        }
-}
-
-static unsigned int
-midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
-{
-        /* Choose the first available register to minimise reported register pressure */
-
-        for (int i = 0; i < 16; ++i) {
-                if (BITSET_TEST(regs, i)) {
-                        return i;
-                }
-        }
-
-        assert(0);
-        return 0;
-}
-
-static bool
-midgard_is_live_in_instr(midgard_instruction *ins, int src)
-{
-        if (ins->ssa_args.src0 == src) return true;
-        if (ins->ssa_args.src1 == src) return true;
-
-        return false;
-}
-
-/* Determine if a variable is live in the successors of a block */
-static bool
-is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src)
-{
-        for (unsigned i = 0; i < bl->nr_successors; ++i) {
-                midgard_block *succ = bl->successors[i];
-
-                /* If we already visited, the value we're seeking
-                 * isn't down this path (or we would have short
-                 * circuited */
-
-                if (succ->visited) continue;
-
-                /* Otherwise (it's visited *now*), check the block */
-
-                succ->visited = true;
-
-                mir_foreach_instr_in_block(succ, ins) {
-                        if (midgard_is_live_in_instr(ins, src))
-                                return true;
-                }
-
-                /* ...and also, check *its* successors */
-                if (is_live_after_successors(ctx, succ, src))
-                        return true;
-
-        }
-
-        /* Welp. We're really not live. */
-
-        return false;
-}
-
-static bool
-is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src)
-{
-        /* Check the rest of the block for liveness */
-
-        mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) {
-                if (midgard_is_live_in_instr(ins, src))
-                        return true;
-        }
-
-        /* Check the rest of the blocks for liveness recursively */
-
-        bool succ = is_live_after_successors(ctx, block, src);
-
-        mir_foreach_block(ctx, block) {
-                block->visited = false;
-        }
-
-        return succ;
-}
-
-/* Once registers have been decided via register allocation
- * (allocate_registers), we need to rewrite the MIR to use registers instead of
- * SSA */
-
-static void
-install_registers(compiler_context *ctx, struct ra_graph *g)
-{
-        mir_foreach_block(ctx, block) {
-                mir_foreach_instr_in_block(block, ins) {
-                        if (ins->compact_branch) continue;
-
-                        ssa_args args = ins->ssa_args;
-
-                        switch (ins->type) {
-                        case TAG_ALU_4:
-                                ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count);
-
-                                ins->registers.src2_imm = args.inline_constant;
-
-                                if (args.inline_constant) {
-                                        /* Encode inline 16-bit constant as a vector by default */
-
-                                        ins->registers.src2_reg = ins->inline_constant >> 11;
-
-                                        int lower_11 = ins->inline_constant & ((1 << 12) - 1);
-
-                                        uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
-                                        ins->alu.src2 = imm << 2;
-                                } else {
-                                        ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count);
-                                }
-
-                                ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count);
-
-                                break;
-
-                        case TAG_LOAD_STORE_4: {
-                                if (OP_IS_STORE_VARY(ins->load_store.op)) {
-                                        /* TODO: use ssa_args for st_vary */
-                                        ins->load_store.reg = 0;
-                                } else {
-                                        bool has_dest = args.dest >= 0;
-                                        int ssa_arg = has_dest ? args.dest : args.src0;
-
-                                        ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count);
-                                }
-
-                                break;
-                        }
-
-                        default:
-                                break;
-                        }
-                }
-        }
-
-}
-
-/* This routine performs the actual register allocation. It should be succeeded
- * by install_registers */
-
-static struct ra_graph *
-allocate_registers(compiler_context *ctx)
-{
-        /* First, initialize the RA */
-        struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true);
-
-        /* Create a primary (general purpose) class, as well as special purpose
-         * pipeline register classes */
-
-        int primary_class = ra_alloc_reg_class(regs);
-        int varying_class  = ra_alloc_reg_class(regs);
-
-        /* Add the full set of work registers */
-        int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
-        for (int i = 0; i < work_count; ++i)
-                ra_class_add_reg(regs, primary_class, i);
-
-        /* Add special registers */
-        ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE);
-        ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1);
-
-        /* We're done setting up */
-        ra_set_finalize(regs, NULL);
-
-        /* Transform the MIR into squeezed index form */
-        mir_foreach_block(ctx, block) {
-                mir_foreach_instr_in_block(block, ins) {
-                        if (ins->compact_branch) continue;
-
-                        ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0);
-                        ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1);
-                        ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest);
-                }
-               if (midgard_debug & MIDGARD_DBG_SHADERS)
-                       print_mir_block(block);
-        }
-
-        /* No register allocation to do with no SSA */
-
-        if (!ctx->temp_count)
-                return NULL;
-
-        /* Let's actually do register allocation */
-        int nodes = ctx->temp_count;
-        struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
-
-        /* Set everything to the work register class, unless it has somewhere
-         * special to go */
-
-        mir_foreach_block(ctx, block) {
-                mir_foreach_instr_in_block(block, ins) {
-                        if (ins->compact_branch) continue;
-
-                        if (ins->ssa_args.dest < 0) continue;
-
-                        if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
-
-                        int class = primary_class;
-
-                        ra_set_node_class(g, ins->ssa_args.dest, class);
-                }
-        }
-
-        for (int index = 0; index <= ctx->max_hash; ++index) {
-                unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_register, index + 1);
-
-                if (temp) {
-                        unsigned reg = temp - 1;
-                        int t = find_or_allocate_temp(ctx, index);
-                        ra_set_node_reg(g, t, reg);
-                }
-        }
-
-        /* Determine liveness */
-
-        int *live_start = malloc(nodes * sizeof(int));
-        int *live_end = malloc(nodes * sizeof(int));
-
-        /* Initialize as non-existent */
-
-        for (int i = 0; i < nodes; ++i) {
-                live_start[i] = live_end[i] = -1;
-        }
-
-        int d = 0;
-
-        mir_foreach_block(ctx, block) {
-                mir_foreach_instr_in_block(block, ins) {
-                        if (ins->compact_branch) continue;
-
-                        /* Dest is < 0 for st_vary instructions, which break
-                         * the usual SSA conventions. Liveness analysis doesn't
-                         * make sense on these instructions, so skip them to
-                         * avoid memory corruption */
-
-                        if (ins->ssa_args.dest < 0) continue;
-
-                        if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
-                                /* If this destination is not yet live, it is now since we just wrote it */
-
-                                int dest = ins->ssa_args.dest;
-
-                                if (live_start[dest] == -1)
-                                        live_start[dest] = d;
-                        }
-
-                        /* Since we just used a source, the source might be
-                         * dead now. Scan the rest of the block for
-                         * invocations, and if there are none, the source dies
-                         * */
-
-                        int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
-
-                        for (int src = 0; src < 2; ++src) {
-                                int s = sources[src];
-
-                                if (s < 0) continue;
-
-                                if (s >= SSA_FIXED_MINIMUM) continue;
-
-                                if (!is_live_after(ctx, block, ins, s)) {
-                                        live_end[s] = d;
-                                }
-                        }
-
-                        ++d;
-                }
-        }
-
-        /* If a node still hasn't been killed, kill it now */
-
-        for (int i = 0; i < nodes; ++i) {
-                /* live_start == -1 most likely indicates a pinned output */
-
-                if (live_end[i] == -1)
-                        live_end[i] = d;
-        }
-
-        /* Setup interference between nodes that are live at the same time */
-
-        for (int i = 0; i < nodes; ++i) {
-                for (int j = i + 1; j < nodes; ++j) {
-                        if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
-                                ra_add_node_interference(g, i, j);
-                }
-        }
-
-        ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL);
-
-        if (!ra_allocate(g)) {
-                DBG("Error allocating registers\n");
-                assert(0);
-        }
-
-        /* Cleanup */
-        free(live_start);
-        free(live_end);
-
-        return g;
-}
-
 /* Midgard IR only knows vector ALU types, but we sometimes need to actually
  * use scalar ALU instructions, for functional or performance reasons. To do
  * this, we just demote vector ALU payloads to scalar. */
@@ -3247,7 +2509,7 @@ midgard_opt_dead_code_eliminate(compiler_context *ctx, midgard_block *block)
 
                 if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
                 if (midgard_is_pinned(ctx, ins->ssa_args.dest)) continue;
-                if (is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue;
+                if (mir_is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue;
 
                 mir_remove_instruction(ins);
                 progress = true;
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_liveness.c b/src/gallium/drivers/panfrost/midgard/midgard_liveness.c
new file mode 100644 (file)
index 0000000..ab36583
--- /dev/null
@@ -0,0 +1,92 @@
+/*
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+/* mir_is_live_after performs liveness analysis on the MIR, used primarily
+ * as part of register allocation. TODO: Algorithmic improvements for
+ * compiler performance (this is the worst algorithm possible -- see
+ * backlog with Connor on IRC) */
+
+#include "compiler.h"
+
+static bool
+midgard_is_live_in_instr(midgard_instruction *ins, int src)
+{
+        if (ins->ssa_args.src0 == src) return true;
+        if (ins->ssa_args.src1 == src) return true;
+
+        return false;
+}
+
+/* Determine if a variable is live in the successors of a block */
+static bool
+is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src)
+{
+        for (unsigned i = 0; i < bl->nr_successors; ++i) {
+                midgard_block *succ = bl->successors[i];
+
+                /* If we already visited, the value we're seeking
+                 * isn't down this path (or we would have short
+                 * circuited */
+
+                if (succ->visited) continue;
+
+                /* Otherwise (it's visited *now*), check the block */
+
+                succ->visited = true;
+
+                mir_foreach_instr_in_block(succ, ins) {
+                        if (midgard_is_live_in_instr(ins, src))
+                                return true;
+                }
+
+                /* ...and also, check *its* successors */
+                if (is_live_after_successors(ctx, succ, src))
+                        return true;
+
+        }
+
+        /* Welp. We're really not live. */
+
+        return false;
+}
+
+bool
+mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src)
+{
+        /* Check the rest of the block for liveness */
+
+        mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) {
+                if (midgard_is_live_in_instr(ins, src))
+                        return true;
+        }
+
+        /* Check the rest of the blocks for liveness recursively */
+
+        bool succ = is_live_after_successors(ctx, block, src);
+
+        mir_foreach_block(ctx, block) {
+                block->visited = false;
+        }
+
+        return succ;
+}
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ops.c b/src/gallium/drivers/panfrost/midgard/midgard_ops.c
new file mode 100644 (file)
index 0000000..cffa3c2
--- /dev/null
@@ -0,0 +1,188 @@
+/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "midgard.h"
+
+/* Include the definitions of the macros and such */
+
+#define MIDGARD_OPS_TABLE
+#include "helpers.h"
+#undef MIDGARD_OPS_TABLE
+
+/* Table of mapping opcodes to accompanying properties. This is used for both
+ * the disassembler and the compiler. It is placed in a .c file like this to
+ * avoid duplications in the binary */
+
+struct mir_op_props alu_opcode_props[256] = {
+        [midgard_alu_op_fadd]           = {"fadd", UNITS_ADD | OP_COMMUTES},
+        [midgard_alu_op_fmul]           = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES},
+        [midgard_alu_op_fmin]           = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
+        [midgard_alu_op_fmax]           = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES},
+        [midgard_alu_op_imin]           = {"imin", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_imax]           = {"imax", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_umin]           = {"umin", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_umax]           = {"umax", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_fmov]           = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24},
+        [midgard_alu_op_fround]          = {"fround", UNITS_ADD},
+        [midgard_alu_op_froundeven]      = {"froundeven", UNITS_ADD},
+        [midgard_alu_op_ftrunc]          = {"ftrunc", UNITS_ADD},
+        [midgard_alu_op_ffloor]                 = {"ffloor", UNITS_ADD},
+        [midgard_alu_op_fceil]          = {"fceil", UNITS_ADD},
+        [midgard_alu_op_ffma]           = {"ffma", UNIT_VLUT},
+
+        /* Though they output a scalar, they need to run on a vector unit
+         * since they process vectors */
+        [midgard_alu_op_fdot3]          = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
+        [midgard_alu_op_fdot3r]                 = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES},
+        [midgard_alu_op_fdot4]          = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES},
+
+        /* Incredibly, iadd can run on vmul, etc */
+        [midgard_alu_op_iadd]           = {"iadd", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_iabs]           = {"iabs", UNITS_ADD},
+        [midgard_alu_op_isub]           = {"isub", UNITS_MOST},
+        [midgard_alu_op_imul]           = {"imul", UNITS_MUL | OP_COMMUTES},
+        [midgard_alu_op_imov]           = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24},
+
+        /* For vector comparisons, use ball etc */
+        [midgard_alu_op_feq]            = {"feq", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_fne]            = {"fne", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_fle]            = {"fle", UNITS_MOST},
+        [midgard_alu_op_flt]            = {"flt", UNITS_MOST},
+        [midgard_alu_op_ieq]            = {"ieq", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_ine]            = {"ine", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_ilt]            = {"ilt", UNITS_MOST},
+        [midgard_alu_op_ile]            = {"ile", UNITS_MOST},
+        [midgard_alu_op_ult]            = {"ult", UNITS_MOST},
+        [midgard_alu_op_ule]            = {"ule", UNITS_MOST},
+
+        [midgard_alu_op_icsel]          = {"icsel", UNITS_ADD},
+        [midgard_alu_op_icsel_v]         = {"icsel_v", UNITS_ADD},
+        [midgard_alu_op_fcsel_v]        = {"fcsel_v", UNITS_ADD},
+        [midgard_alu_op_fcsel]          = {"fcsel", UNITS_ADD | UNIT_SMUL},
+
+        [midgard_alu_op_frcp]           = {"frcp", UNIT_VLUT},
+        [midgard_alu_op_frsqrt]                 = {"frsqrt", UNIT_VLUT},
+        [midgard_alu_op_fsqrt]          = {"fsqrt", UNIT_VLUT},
+        [midgard_alu_op_fpow_pt1]       = {"fpow_pt1", UNIT_VLUT},
+        [midgard_alu_op_fexp2]          = {"fexp2", UNIT_VLUT},
+        [midgard_alu_op_flog2]          = {"flog2", UNIT_VLUT},
+
+        [midgard_alu_op_f2i]            = {"f2i", UNITS_ADD | OP_TYPE_CONVERT},
+        [midgard_alu_op_f2u]            = {"f2u", UNITS_ADD | OP_TYPE_CONVERT},
+        [midgard_alu_op_f2u8]           = {"f2u8", UNITS_ADD | OP_TYPE_CONVERT},
+        [midgard_alu_op_i2f]            = {"i2f", UNITS_ADD | OP_TYPE_CONVERT},
+        [midgard_alu_op_u2f]            = {"u2f", UNITS_ADD | OP_TYPE_CONVERT},
+
+        [midgard_alu_op_fsin]           = {"fsin", UNIT_VLUT},
+        [midgard_alu_op_fcos]           = {"fcos", UNIT_VLUT},
+
+        /* XXX: Test case where it's right on smul but not sadd */
+        [midgard_alu_op_iand]           = {"iand", UNITS_MOST | OP_COMMUTES}, 
+        [midgard_alu_op_iandnot]         = {"iandnot", UNITS_MOST},
+
+        [midgard_alu_op_ior]            = {"ior", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_iornot]                 = {"iornot", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_inor]           = {"inor", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_ixor]           = {"ixor", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_inxor]          = {"inxor", UNITS_MOST | OP_COMMUTES},
+        [midgard_alu_op_iclz]           = {"iclz", UNITS_ADD},
+        [midgard_alu_op_ibitcount8]     = {"ibitcount8", UNITS_ADD},
+        [midgard_alu_op_inand]          = {"inand", UNITS_MOST},
+        [midgard_alu_op_ishl]           = {"ishl", UNITS_ADD},
+        [midgard_alu_op_iasr]           = {"iasr", UNITS_ADD},
+        [midgard_alu_op_ilsr]           = {"ilsr", UNITS_ADD},
+
+        [midgard_alu_op_fball_eq]       = {"fball_eq", UNITS_VECTOR | OP_COMMUTES},
+        [midgard_alu_op_fbany_neq]      = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES},
+        [midgard_alu_op_iball_eq]       = {"iball_eq", UNITS_VECTOR | OP_COMMUTES},
+        [midgard_alu_op_iball_neq]      = {"iball_neq", UNITS_VECTOR | OP_COMMUTES},
+        [midgard_alu_op_ibany_eq]       = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES},
+        [midgard_alu_op_ibany_neq]      = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES},
+
+        /* These instructions are not yet emitted by the compiler, so
+         * don't speculate about units yet */ 
+        [midgard_alu_op_ishladd]        = {"ishladd", 0},
+
+        [midgard_alu_op_uball_lt]       = {"uball_lt", 0},
+        [midgard_alu_op_uball_lte]      = {"uball_lte", 0},
+        [midgard_alu_op_iball_lt]       = {"iball_lt", 0},
+        [midgard_alu_op_iball_lte]      = {"iball_lte", 0},
+        [midgard_alu_op_ubany_lt]       = {"ubany_lt", 0},
+        [midgard_alu_op_ubany_lte]      = {"ubany_lte", 0},
+        [midgard_alu_op_ibany_lt]       = {"ibany_lt", 0},
+        [midgard_alu_op_ibany_lte]      = {"ibany_lte", 0},
+
+        [midgard_alu_op_freduce]        = {"freduce", 0},
+        [midgard_alu_op_bball_eq]       = {"bball_eq", 0 | OP_COMMUTES},
+        [midgard_alu_op_bbany_neq]      = {"bball_eq", 0 | OP_COMMUTES},
+        [midgard_alu_op_fatan2_pt1]     = {"fatan2_pt1", 0},
+        [midgard_alu_op_fatan_pt2]      = {"fatan_pt2", 0},
+};
+
+const char *load_store_opcode_names[256] = {
+        [midgard_op_st_cubemap_coords] = "st_cubemap_coords",
+        [midgard_op_ld_global_id] = "ld_global_id",
+
+        [midgard_op_atomic_add] = "atomic_add",
+        [midgard_op_atomic_and] = "atomic_and",
+        [midgard_op_atomic_or] = "atomic_or",
+        [midgard_op_atomic_xor] = "atomic_xor",
+        [midgard_op_atomic_imin] = "atomic_imin",
+        [midgard_op_atomic_umin] = "atomic_umin",
+        [midgard_op_atomic_imax] = "atomic_imax",
+        [midgard_op_atomic_umax] = "atomic_umax",
+        [midgard_op_atomic_xchg] = "atomic_xchg",
+
+        [midgard_op_ld_char] = "ld_char",
+        [midgard_op_ld_char2] = "ld_char2",
+        [midgard_op_ld_short] = "ld_short",
+        [midgard_op_ld_char4] = "ld_char4",
+        [midgard_op_ld_short4] = "ld_short4",
+        [midgard_op_ld_int4] = "ld_int4",
+
+        [midgard_op_ld_attr_32] = "ld_attr_32",
+        [midgard_op_ld_attr_16] = "ld_attr_16",
+        [midgard_op_ld_attr_32i] = "ld_attr_32i",
+
+        [midgard_op_ld_vary_32] = "ld_vary_32",
+        [midgard_op_ld_vary_16] = "ld_vary_16",
+        [midgard_op_ld_vary_32i] = "ld_vary_32i",
+
+        [midgard_op_ld_color_buffer_16] = "ld_color_buffer_16",
+
+        [midgard_op_ld_uniform_16] = "ld_uniform_16",
+        [midgard_op_ld_uniform_32] = "ld_uniform_32",
+        [midgard_op_ld_color_buffer_8] = "ld_color_buffer_8",
+
+        [midgard_op_st_char] = "st_char",
+        [midgard_op_st_char2] = "st_char2",
+        [midgard_op_st_char4] = "st_char4",
+        [midgard_op_st_short4] = "st_short4",
+        [midgard_op_st_int4] = "st_int4",
+
+        [midgard_op_st_vary_32] = "st_vary_32",
+        [midgard_op_st_vary_16] = "st_vary_16",
+        [midgard_op_st_vary_32i] = "st_vary_32i",
+
+        [midgard_op_st_image_f] = "st_image_f",
+        [midgard_op_st_image_ui] = "st_image_ui",
+        [midgard_op_st_image_i] = "st_image_i",
+};
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ops.h b/src/gallium/drivers/panfrost/midgard/midgard_ops.h
new file mode 100644 (file)
index 0000000..8b36352
--- /dev/null
@@ -0,0 +1,53 @@
+/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io)
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "helpers.h"
+
+/* Forward declare */
+
+extern struct mir_op_props alu_opcode_props[256];
+extern const char *load_store_opcode_names[256];
+
+/* Is this opcode that of an integer (regardless of signedness)? Instruction
+ * names authoritatively determine types */
+
+static inline bool
+midgard_is_integer_op(int op)
+{
+        const char *name = alu_opcode_props[op].name;
+
+        if (!name)
+                return false;
+
+        return (name[0] == 'i') || (name[0] == 'u');
+}
+
+/* Does this opcode *write* an integer? Same as is_integer_op, unless it's a
+ * conversion between int<->float in which case we do the opposite */
+
+static inline bool
+midgard_is_integer_out_op(int op)
+{
+        bool is_int = midgard_is_integer_op(op);
+        bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT;
+
+        return is_int ^ is_conversion;
+}
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_print.c b/src/gallium/drivers/panfrost/midgard/midgard_print.c
new file mode 100644 (file)
index 0000000..348650e
--- /dev/null
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler.h"
+#include "helpers.h"
+#include "midgard_ops.h"
+
+/* Pretty printer for Midgard IR, for use debugging compiler-internal
+ * passes like register allocation. The output superficially resembles
+ * Midgard assembly, with the exception that unit information and such is
+ * (normally) omitted, and generic indices are usually used instead of
+ * registers */
+
+static void
+mir_print_source(int source)
+{
+        if (source >= SSA_FIXED_MINIMUM) {
+                /* Specific register */
+                int reg = SSA_REG_FROM_FIXED(source);
+
+                /* TODO: Moving threshold */
+                if (reg > 16 && reg < 24)
+                        printf("u%d", 23 - reg);
+                else
+                        printf("r%d", reg);
+        } else {
+                printf("%d", source);
+        }
+}
+
+void
+mir_print_instruction(midgard_instruction *ins)
+{
+        printf("\t");
+
+        switch (ins->type) {
+        case TAG_ALU_4: {
+                midgard_alu_op op = ins->alu.op;
+                const char *name = alu_opcode_props[op].name;
+
+                if (ins->unit)
+                        printf("%d.", ins->unit);
+
+                printf("%s", name ? name : "??");
+                break;
+        }
+
+        case TAG_LOAD_STORE_4: {
+                midgard_load_store_op op = ins->load_store.op;
+                const char *name = load_store_opcode_names[op];
+
+                assert(name);
+                printf("%s", name);
+                break;
+        }
+
+        case TAG_TEXTURE_4: {
+                printf("texture");
+                break;
+        }
+
+        default:
+                assert(0);
+        }
+
+        ssa_args *args = &ins->ssa_args;
+
+        printf(" %d, ", args->dest);
+
+        mir_print_source(args->src0);
+        printf(", ");
+
+        if (args->inline_constant)
+                printf("#%d", ins->inline_constant);
+        else
+                mir_print_source(args->src1);
+
+        if (ins->has_constants)
+                printf(" <%f, %f, %f, %f>", ins->constants[0], ins->constants[1], ins->constants[2], ins->constants[3]);
+
+        printf("\n");
+}
+
+/* Dumps MIR for a block or entire shader respective */
+
+void
+mir_print_block(midgard_block *block)
+{
+        printf("{\n");
+
+        mir_foreach_instr_in_block(block, ins) {
+                mir_print_instruction(ins);
+        }
+
+        printf("}\n");
+}
+
+void
+mir_print_shader(compiler_context *ctx)
+{
+        mir_foreach_block(ctx, block) {
+                mir_print_block(block);
+        }
+}
diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ra.c b/src/gallium/drivers/panfrost/midgard/midgard_ra.c
new file mode 100644 (file)
index 0000000..594cafe
--- /dev/null
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "compiler.h"
+#include "util/register_allocate.h"
+
+/* When we're 'squeezing down' the values in the IR, we maintain a hash
+ * as such */
+
+static unsigned
+find_or_allocate_temp(compiler_context *ctx, unsigned hash)
+{
+        if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM))
+                return hash;
+
+        unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1);
+
+        if (temp)
+                return temp - 1;
+
+        /* If no temp is find, allocate one */
+        temp = ctx->temp_count++;
+        ctx->max_hash = MAX2(ctx->max_hash, hash);
+
+        _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1));
+
+        return temp;
+}
+
+/* Callback for register allocation selection, trivial default for now */
+
+static unsigned int
+midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data)
+{
+        /* Choose the first available register to minimise reported register pressure */
+
+        for (int i = 0; i < 16; ++i) {
+                if (BITSET_TEST(regs, i)) {
+                        return i;
+                }
+        }
+
+        assert(0);
+        return 0;
+}
+
+/* Determine the actual hardware from the index based on the RA results or special values */
+
+static int
+dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg)
+{
+        if (reg >= SSA_FIXED_MINIMUM)
+                return SSA_REG_FROM_FIXED(reg);
+
+        if (reg >= 0) {
+                assert(reg < maxreg);
+                assert(g);
+                int r = ra_get_node_reg(g, reg);
+                ctx->work_registers = MAX2(ctx->work_registers, r);
+                return r;
+        }
+
+        switch (reg) {
+        case SSA_UNUSED_0:
+        case SSA_UNUSED_1:
+                return REGISTER_UNUSED;
+
+        default:
+                unreachable("Unknown SSA register alias");
+        }
+}
+
+/* This routine performs the actual register allocation. It should be succeeded
+ * by install_registers */
+
+struct ra_graph *
+allocate_registers(compiler_context *ctx)
+{
+        /* First, initialize the RA */
+        struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true);
+
+        /* Create a primary (general purpose) class, as well as special purpose
+         * pipeline register classes */
+
+        int primary_class = ra_alloc_reg_class(regs);
+        int varying_class  = ra_alloc_reg_class(regs);
+
+        /* Add the full set of work registers */
+        int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0);
+        for (int i = 0; i < work_count; ++i)
+                ra_class_add_reg(regs, primary_class, i);
+
+        /* Add special registers */
+        ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE);
+        ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1);
+
+        /* We're done setting up */
+        ra_set_finalize(regs, NULL);
+
+        /* Transform the MIR into squeezed index form */
+        mir_foreach_block(ctx, block) {
+                mir_foreach_instr_in_block(block, ins) {
+                        if (ins->compact_branch) continue;
+
+                        ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0);
+                        ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1);
+                        ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest);
+                }
+        }
+
+        /* No register allocation to do with no SSA */
+
+        if (!ctx->temp_count)
+                return NULL;
+
+        /* Let's actually do register allocation */
+        int nodes = ctx->temp_count;
+        struct ra_graph *g = ra_alloc_interference_graph(regs, nodes);
+
+        /* Set everything to the work register class, unless it has somewhere
+         * special to go */
+
+        mir_foreach_block(ctx, block) {
+                mir_foreach_instr_in_block(block, ins) {
+                        if (ins->compact_branch) continue;
+
+                        if (ins->ssa_args.dest < 0) continue;
+
+                        if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue;
+
+                        int class = primary_class;
+
+                        ra_set_node_class(g, ins->ssa_args.dest, class);
+                }
+        }
+
+        for (int index = 0; index <= ctx->max_hash; ++index) {
+                unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_register, index + 1);
+
+                if (temp) {
+                        unsigned reg = temp - 1;
+                        int t = find_or_allocate_temp(ctx, index);
+                        ra_set_node_reg(g, t, reg);
+                }
+        }
+
+        /* Determine liveness */
+
+        int *live_start = malloc(nodes * sizeof(int));
+        int *live_end = malloc(nodes * sizeof(int));
+
+        /* Initialize as non-existent */
+
+        for (int i = 0; i < nodes; ++i) {
+                live_start[i] = live_end[i] = -1;
+        }
+
+        int d = 0;
+
+        mir_foreach_block(ctx, block) {
+                mir_foreach_instr_in_block(block, ins) {
+                        if (ins->compact_branch) continue;
+
+                        /* Dest is < 0 for st_vary instructions, which break
+                         * the usual SSA conventions. Liveness analysis doesn't
+                         * make sense on these instructions, so skip them to
+                         * avoid memory corruption */
+
+                        if (ins->ssa_args.dest < 0) continue;
+
+                        if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) {
+                                /* If this destination is not yet live, it is now since we just wrote it */
+
+                                int dest = ins->ssa_args.dest;
+
+                                if (live_start[dest] == -1)
+                                        live_start[dest] = d;
+                        }
+
+                        /* Since we just used a source, the source might be
+                         * dead now. Scan the rest of the block for
+                         * invocations, and if there are none, the source dies
+                         * */
+
+                        int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 };
+
+                        for (int src = 0; src < 2; ++src) {
+                                int s = sources[src];
+
+                                if (s < 0) continue;
+
+                                if (s >= SSA_FIXED_MINIMUM) continue;
+
+                                if (!mir_is_live_after(ctx, block, ins, s)) {
+                                        live_end[s] = d;
+                                }
+                        }
+
+                        ++d;
+                }
+        }
+
+        /* If a node still hasn't been killed, kill it now */
+
+        for (int i = 0; i < nodes; ++i) {
+                /* live_start == -1 most likely indicates a pinned output */
+
+                if (live_end[i] == -1)
+                        live_end[i] = d;
+        }
+
+        /* Setup interference between nodes that are live at the same time */
+
+        for (int i = 0; i < nodes; ++i) {
+                for (int j = i + 1; j < nodes; ++j) {
+                        if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i]))
+                                ra_add_node_interference(g, i, j);
+                }
+        }
+
+        ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL);
+
+        if (!ra_allocate(g)) {
+                unreachable("Error allocating registers\n");
+        }
+
+        /* Cleanup */
+        free(live_start);
+        free(live_end);
+
+        return g;
+}
+
+/* Once registers have been decided via register allocation
+ * (allocate_registers), we need to rewrite the MIR to use registers instead of
+ * SSA */
+
+void
+install_registers(compiler_context *ctx, struct ra_graph *g)
+{
+        mir_foreach_block(ctx, block) {
+                mir_foreach_instr_in_block(block, ins) {
+                        if (ins->compact_branch) continue;
+
+                        ssa_args args = ins->ssa_args;
+
+                        switch (ins->type) {
+                        case TAG_ALU_4:
+                                ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count);
+
+                                ins->registers.src2_imm = args.inline_constant;
+
+                                if (args.inline_constant) {
+                                        /* Encode inline 16-bit constant as a vector by default */
+
+                                        ins->registers.src2_reg = ins->inline_constant >> 11;
+
+                                        int lower_11 = ins->inline_constant & ((1 << 12) - 1);
+
+                                        uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3);
+                                        ins->alu.src2 = imm << 2;
+                                } else {
+                                        ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count);
+                                }
+
+                                ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count);
+
+                                break;
+
+                        case TAG_LOAD_STORE_4: {
+                                if (OP_IS_STORE_VARY(ins->load_store.op)) {
+                                        /* TODO: use ssa_args for st_vary */
+                                        ins->load_store.reg = 0;
+                                } else {
+                                        bool has_dest = args.dest >= 0;
+                                        int ssa_arg = has_dest ? args.dest : args.src0;
+
+                                        ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count);
+                                }
+
+                                break;
+                        }
+
+                        default:
+                                break;
+                        }
+                }
+        }
+
+}