From 1155446c198f43fcfc7afcb01917f5b3517081c2 Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Sun, 19 May 2019 23:20:34 +0000 Subject: [PATCH] panfrost/midgard: Split up midgard_compile.c (RA) This commit moves the register allocator out of midgard_compile.c and into its own midgard_ra.c file. In doing so, a number of dependencies are identified and moved into their own files in turn. midgard_compile.c is still fairly monolithic, but this should help. Code churn, but no functional changes should be introduced by this commit. Signed-off-by: Alyssa Rosenzweig --- src/gallium/drivers/panfrost/meson.build | 10 + .../drivers/panfrost/midgard/compiler.h | 359 +++++++++ .../drivers/panfrost/midgard/disassemble.c | 1 + .../drivers/panfrost/midgard/helpers.h | 144 +--- .../drivers/panfrost/midgard/midgard.h | 50 -- .../panfrost/midgard/midgard_compile.c | 746 +----------------- .../panfrost/midgard/midgard_liveness.c | 92 +++ .../drivers/panfrost/midgard/midgard_ops.c | 188 +++++ .../drivers/panfrost/midgard/midgard_ops.h | 53 ++ .../drivers/panfrost/midgard/midgard_print.c | 124 +++ .../drivers/panfrost/midgard/midgard_ra.c | 310 ++++++++ 11 files changed, 1149 insertions(+), 928 deletions(-) create mode 100644 src/gallium/drivers/panfrost/midgard/compiler.h create mode 100644 src/gallium/drivers/panfrost/midgard/midgard_liveness.c create mode 100644 src/gallium/drivers/panfrost/midgard/midgard_ops.c create mode 100644 src/gallium/drivers/panfrost/midgard/midgard_ops.h create mode 100644 src/gallium/drivers/panfrost/midgard/midgard_print.c create mode 100644 src/gallium/drivers/panfrost/midgard/midgard_ra.c diff --git a/src/gallium/drivers/panfrost/meson.build b/src/gallium/drivers/panfrost/meson.build index 93640a29c4c..075afa05cd9 100644 --- a/src/gallium/drivers/panfrost/meson.build +++ b/src/gallium/drivers/panfrost/meson.build @@ -27,6 +27,11 @@ files_panfrost = files( 'pan_resource.h', 'midgard/midgard_compile.c', + 'midgard/midgard_print.c', + 'midgard/midgard_ra.c', + 'midgard/midgard_liveness.c', + 'midgard/midgard_ops.c', + 'midgard/nir_lower_blend.c', 'midgard/cppwrap.cpp', 'midgard/disassemble.c', @@ -97,6 +102,10 @@ driver_panfrost = declare_dependency( files_midgard = files( 'midgard/midgard_compile.c', + 'midgard/midgard_print.c', + 'midgard/midgard_ra.c', + 'midgard/midgard_liveness.c', + 'midgard/midgard_ops.c', 'midgard/cppwrap.cpp', 'midgard/disassemble.c', 'midgard/cmdline.c', @@ -153,6 +162,7 @@ files_pandecode = files( 'pan_pretty_print.c', 'midgard/disassemble.c', + 'midgard/midgard_ops.c', 'bifrost/disassemble.c', ) diff --git a/src/gallium/drivers/panfrost/midgard/compiler.h b/src/gallium/drivers/panfrost/midgard/compiler.h new file mode 100644 index 00000000000..48c6db542a5 --- /dev/null +++ b/src/gallium/drivers/panfrost/midgard/compiler.h @@ -0,0 +1,359 @@ +/* + * Copyright (C) 2019 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef _MDG_COMPILER_H +#define _MDG_COMPILER_H + +#include "midgard.h" +#include "helpers.h" +#include "midgard_compile.h" + +#include "util/hash_table.h" +#include "util/u_dynarray.h" +#include "util/set.h" +#include "util/list.h" + +#include "main/mtypes.h" +#include "compiler/nir_types.h" +#include "compiler/nir/nir.h" + +/* Forward declare */ +struct midgard_block; + +/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to + * the hardware), hence why that must be zero. TARGET_DISCARD signals this + * instruction is actually a discard op. */ + +#define TARGET_GOTO 0 +#define TARGET_BREAK 1 +#define TARGET_CONTINUE 2 +#define TARGET_DISCARD 3 + +typedef struct midgard_branch { + /* If conditional, the condition is specified in r31.w */ + bool conditional; + + /* For conditionals, if this is true, we branch on FALSE. If false, we branch on TRUE. */ + bool invert_conditional; + + /* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */ + unsigned target_type; + + /* The actual target */ + union { + int target_block; + int target_break; + int target_continue; + }; +} midgard_branch; + +/* Instruction arguments represented as block-local SSA indices, rather than + * registers. Negative values mean unused. */ + +typedef struct { + int src0; + int src1; + int dest; + + /* src1 is -not- SSA but instead a 16-bit inline constant to be smudged + * in. Only valid for ALU ops. */ + bool inline_constant; +} ssa_args; + +/* Generic in-memory data type repesenting a single logical instruction, rather + * than a single instruction group. This is the preferred form for code gen. + * Multiple midgard_insturctions will later be combined during scheduling, + * though this is not represented in this structure. Its format bridges + * the low-level binary representation with the higher level semantic meaning. + * + * Notably, it allows registers to be specified as block local SSA, for code + * emitted before the register allocation pass. + */ + +typedef struct midgard_instruction { + /* Must be first for casting */ + struct list_head link; + + unsigned type; /* ALU, load/store, texture */ + + /* If the register allocator has not run yet... */ + ssa_args ssa_args; + + /* Special fields for an ALU instruction */ + midgard_reg_info registers; + + /* I.e. (1 << alu_bit) */ + int unit; + + /* When emitting bundle, should this instruction have a break forced + * before it? Used for r31 writes which are valid only within a single + * bundle and *need* to happen as early as possible... this is a hack, + * TODO remove when we have a scheduler */ + bool precede_break; + + bool has_constants; + float constants[4]; + uint16_t inline_constant; + bool has_blend_constant; + + bool compact_branch; + bool writeout; + bool prepacked_branch; + + union { + midgard_load_store_word load_store; + midgard_vector_alu alu; + midgard_texture_word texture; + midgard_branch_extended branch_extended; + uint16_t br_compact; + + /* General branch, rather than packed br_compact. Higher level + * than the other components */ + midgard_branch branch; + }; +} midgard_instruction; + +typedef struct midgard_block { + /* Link to next block. Must be first for mir_get_block */ + struct list_head link; + + /* List of midgard_instructions emitted for the current block */ + struct list_head instructions; + + bool is_scheduled; + + /* List of midgard_bundles emitted (after the scheduler has run) */ + struct util_dynarray bundles; + + /* Number of quadwords _actually_ emitted, as determined after scheduling */ + unsigned quadword_count; + + /* Successors: always one forward (the block after us), maybe + * one backwards (for a backward branch). No need for a second + * forward, since graph traversal would get there eventually + * anyway */ + struct midgard_block *successors[2]; + unsigned nr_successors; + + /* The successors pointer form a graph, and in the case of + * complex control flow, this graph has a cycles. To aid + * traversal during liveness analysis, we have a visited? + * boolean for passes to use as they see fit, provided they + * clean up later */ + bool visited; +} midgard_block; + +typedef struct midgard_bundle { + /* Tag for the overall bundle */ + int tag; + + /* Instructions contained by the bundle */ + int instruction_count; + midgard_instruction instructions[5]; + + /* Bundle-wide ALU configuration */ + int padding; + int control; + bool has_embedded_constants; + float constants[4]; + bool has_blend_constant; + + uint16_t register_words[8]; + int register_words_count; + + uint64_t body_words[8]; + size_t body_size[8]; + int body_words_count; +} midgard_bundle; + +typedef struct compiler_context { + nir_shader *nir; + gl_shader_stage stage; + + /* Is internally a blend shader? Depends on stage == FRAGMENT */ + bool is_blend; + + /* Tracking for blend constant patching */ + int blend_constant_offset; + + /* Current NIR function */ + nir_function *func; + + /* Unordered list of midgard_blocks */ + int block_count; + struct list_head blocks; + + midgard_block *initial_block; + midgard_block *previous_source_block; + midgard_block *final_block; + + /* List of midgard_instructions emitted for the current block */ + midgard_block *current_block; + + /* The current "depth" of the loop, for disambiguating breaks/continues + * when using nested loops */ + int current_loop_depth; + + /* Constants which have been loaded, for later inlining */ + struct hash_table_u64 *ssa_constants; + + /* SSA indices to be outputted to corresponding varying offset */ + struct hash_table_u64 *ssa_varyings; + + /* SSA values / registers which have been aliased. Naively, these + * demand a fmov output; instead, we alias them in a later pass to + * avoid the wasted op. + * + * A note on encoding: to avoid dynamic memory management here, rather + * than ampping to a pointer, we map to the source index; the key + * itself is just the destination index. */ + + struct hash_table_u64 *ssa_to_alias; + struct set *leftover_ssa_to_alias; + + /* Actual SSA-to-register for RA */ + struct hash_table_u64 *ssa_to_register; + + /* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */ + struct hash_table_u64 *hash_to_temp; + int temp_count; + int max_hash; + + /* Just the count of the max register used. Higher count => higher + * register pressure */ + int work_registers; + + /* Used for cont/last hinting. Increase when a tex op is added. + * Decrease when a tex op is removed. */ + int texture_op_count; + + /* Mapping of texture register -> SSA index for unaliasing */ + int texture_index[2]; + + /* If any path hits a discard instruction */ + bool can_discard; + + /* The number of uniforms allowable for the fast path */ + int uniform_cutoff; + + /* Count of instructions emitted from NIR overall, across all blocks */ + int instruction_count; + + /* Alpha ref value passed in */ + float alpha_ref; + + /* The index corresponding to the fragment output */ + unsigned fragment_output; + + /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */ + unsigned sysvals[MAX_SYSVAL_COUNT]; + unsigned sysval_count; + struct hash_table_u64 *sysval_to_id; +} compiler_context; + +/* Helpers for manipulating the above structures (forming the driver IR) */ + +/* Append instruction to end of current block */ + +static inline midgard_instruction * +mir_upload_ins(struct midgard_instruction ins) +{ + midgard_instruction *heap = malloc(sizeof(ins)); + memcpy(heap, &ins, sizeof(ins)); + return heap; +} + +static inline void +emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins) +{ + list_addtail(&(mir_upload_ins(ins))->link, &ctx->current_block->instructions); +} + +static inline void +mir_insert_instruction_before(struct midgard_instruction *tag, struct midgard_instruction ins) +{ + list_addtail(&(mir_upload_ins(ins))->link, &tag->link); +} + +static inline void +mir_remove_instruction(struct midgard_instruction *ins) +{ + list_del(&ins->link); +} + +static inline midgard_instruction* +mir_prev_op(struct midgard_instruction *ins) +{ + return list_last_entry(&(ins->link), midgard_instruction, link); +} + +static inline midgard_instruction* +mir_next_op(struct midgard_instruction *ins) +{ + return list_first_entry(&(ins->link), midgard_instruction, link); +} + +#define mir_foreach_block(ctx, v) list_for_each_entry(struct midgard_block, v, &ctx->blocks, link) +#define mir_foreach_block_from(ctx, from, v) list_for_each_entry_from(struct midgard_block, v, from, &ctx->blocks, link) + +#define mir_foreach_instr(ctx, v) list_for_each_entry(struct midgard_instruction, v, &ctx->current_block->instructions, link) +#define mir_foreach_instr_safe(ctx, v) list_for_each_entry_safe(struct midgard_instruction, v, &ctx->current_block->instructions, link) +#define mir_foreach_instr_in_block(block, v) list_for_each_entry(struct midgard_instruction, v, &block->instructions, link) +#define mir_foreach_instr_in_block_safe(block, v) list_for_each_entry_safe(struct midgard_instruction, v, &block->instructions, link) +#define mir_foreach_instr_in_block_safe_rev(block, v) list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->instructions, link) +#define mir_foreach_instr_in_block_from(block, v, from) list_for_each_entry_from(struct midgard_instruction, v, from, &block->instructions, link) +#define mir_foreach_instr_in_block_from_rev(block, v, from) list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->instructions, link) + + +static inline midgard_instruction * +mir_last_in_block(struct midgard_block *block) +{ + return list_last_entry(&block->instructions, struct midgard_instruction, link); +} + +static inline midgard_block * +mir_get_block(compiler_context *ctx, int idx) +{ + struct list_head *lst = &ctx->blocks; + + while ((idx--) + 1) + lst = lst->next; + + return (struct midgard_block *) lst; +} + +/* MIR printing */ + +void mir_print_instruction(midgard_instruction *ins); +void mir_print_block(midgard_block *block); +void mir_print_shader(compiler_context *ctx); + +/* Register allocation */ + +struct ra_graph; + +struct ra_graph* allocate_registers(compiler_context *ctx); +void install_registers(compiler_context *ctx, struct ra_graph *g); +bool mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src); + +#endif diff --git a/src/gallium/drivers/panfrost/midgard/disassemble.c b/src/gallium/drivers/panfrost/midgard/disassemble.c index a9e443fa67c..c467e94fc29 100644 --- a/src/gallium/drivers/panfrost/midgard/disassemble.c +++ b/src/gallium/drivers/panfrost/midgard/disassemble.c @@ -31,6 +31,7 @@ #include #include "midgard.h" #include "midgard-parse.h" +#include "midgard_ops.h" #include "disassemble.h" #include "helpers.h" #include "util/half_float.h" diff --git a/src/gallium/drivers/panfrost/midgard/helpers.h b/src/gallium/drivers/panfrost/midgard/helpers.h index f32a683233a..9d287259a8a 100644 --- a/src/gallium/drivers/panfrost/midgard/helpers.h +++ b/src/gallium/drivers/panfrost/midgard/helpers.h @@ -1,7 +1,4 @@ -/* Author(s): - * Alyssa Rosenzweig - * - * Copyright (c) 2018 Alyssa Rosenzweig (alyssa@rosenzweig.io) +/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io) * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -22,6 +19,9 @@ * THE SOFTWARE. */ +#ifndef __MDG_HELPERS_H +#define __MDG_HELPERS_H + #define OP_IS_STORE_VARY(op) (\ op == midgard_op_st_vary_16 || \ op == midgard_op_st_vary_32 \ @@ -150,140 +150,12 @@ #define UNITS_VECTOR (UNIT_VMUL | UNIT_VADD) #define UNITS_ANY_VECTOR (UNITS_VECTOR | UNIT_VLUT) -/* Table of mapping opcodes to accompanying properties relevant to - * scheduling/emission/etc */ - -static struct { +struct mir_op_props { const char *name; unsigned props; -} alu_opcode_props[256] = { - [midgard_alu_op_fadd] = {"fadd", UNITS_ADD | OP_COMMUTES}, - [midgard_alu_op_fmul] = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES}, - [midgard_alu_op_fmin] = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES}, - [midgard_alu_op_fmax] = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES}, - [midgard_alu_op_imin] = {"imin", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_imax] = {"imax", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_umin] = {"umin", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_umax] = {"umax", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_fmov] = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24}, - [midgard_alu_op_fround] = {"fround", UNITS_ADD}, - [midgard_alu_op_froundeven] = {"froundeven", UNITS_ADD}, - [midgard_alu_op_ftrunc] = {"ftrunc", UNITS_ADD}, - [midgard_alu_op_ffloor] = {"ffloor", UNITS_ADD}, - [midgard_alu_op_fceil] = {"fceil", UNITS_ADD}, - [midgard_alu_op_ffma] = {"ffma", UNIT_VLUT}, - - /* Though they output a scalar, they need to run on a vector unit - * since they process vectors */ - [midgard_alu_op_fdot3] = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES}, - [midgard_alu_op_fdot3r] = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES}, - [midgard_alu_op_fdot4] = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES}, - - /* Incredibly, iadd can run on vmul, etc */ - [midgard_alu_op_iadd] = {"iadd", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_iabs] = {"iabs", UNITS_ADD}, - [midgard_alu_op_isub] = {"isub", UNITS_MOST}, - [midgard_alu_op_imul] = {"imul", UNITS_MUL | OP_COMMUTES}, - [midgard_alu_op_imov] = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24}, - - /* For vector comparisons, use ball etc */ - [midgard_alu_op_feq] = {"feq", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_fne] = {"fne", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_fle] = {"fle", UNITS_MOST}, - [midgard_alu_op_flt] = {"flt", UNITS_MOST}, - [midgard_alu_op_ieq] = {"ieq", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_ine] = {"ine", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_ilt] = {"ilt", UNITS_MOST}, - [midgard_alu_op_ile] = {"ile", UNITS_MOST}, - [midgard_alu_op_ult] = {"ult", UNITS_MOST}, - [midgard_alu_op_ule] = {"ule", UNITS_MOST}, - - [midgard_alu_op_icsel] = {"icsel", UNITS_ADD}, - [midgard_alu_op_icsel_v] = {"icsel_v", UNITS_ADD}, - [midgard_alu_op_fcsel_v] = {"fcsel_v", UNITS_ADD}, - [midgard_alu_op_fcsel] = {"fcsel", UNITS_ADD | UNIT_SMUL}, - - [midgard_alu_op_frcp] = {"frcp", UNIT_VLUT}, - [midgard_alu_op_frsqrt] = {"frsqrt", UNIT_VLUT}, - [midgard_alu_op_fsqrt] = {"fsqrt", UNIT_VLUT}, - [midgard_alu_op_fpow_pt1] = {"fpow_pt1", UNIT_VLUT}, - [midgard_alu_op_fexp2] = {"fexp2", UNIT_VLUT}, - [midgard_alu_op_flog2] = {"flog2", UNIT_VLUT}, - - [midgard_alu_op_f2i] = {"f2i", UNITS_ADD | OP_TYPE_CONVERT}, - [midgard_alu_op_f2u] = {"f2u", UNITS_ADD | OP_TYPE_CONVERT}, - [midgard_alu_op_f2u8] = {"f2u8", UNITS_ADD | OP_TYPE_CONVERT}, - [midgard_alu_op_i2f] = {"i2f", UNITS_ADD | OP_TYPE_CONVERT}, - [midgard_alu_op_u2f] = {"u2f", UNITS_ADD | OP_TYPE_CONVERT}, - - [midgard_alu_op_fsin] = {"fsin", UNIT_VLUT}, - [midgard_alu_op_fcos] = {"fcos", UNIT_VLUT}, - - /* XXX: Test case where it's right on smul but not sadd */ - [midgard_alu_op_iand] = {"iand", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_iandnot] = {"iandnot", UNITS_MOST}, - - [midgard_alu_op_ior] = {"ior", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_iornot] = {"iornot", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_inor] = {"inor", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_ixor] = {"ixor", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_inxor] = {"inxor", UNITS_MOST | OP_COMMUTES}, - [midgard_alu_op_iclz] = {"iclz", UNITS_ADD}, - [midgard_alu_op_ibitcount8] = {"ibitcount8", UNITS_ADD}, - [midgard_alu_op_inand] = {"inand", UNITS_MOST}, - [midgard_alu_op_ishl] = {"ishl", UNITS_ADD}, - [midgard_alu_op_iasr] = {"iasr", UNITS_ADD}, - [midgard_alu_op_ilsr] = {"ilsr", UNITS_ADD}, - - [midgard_alu_op_fball_eq] = {"fball_eq", UNITS_VECTOR | OP_COMMUTES}, - [midgard_alu_op_fbany_neq] = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES}, - [midgard_alu_op_iball_eq] = {"iball_eq", UNITS_VECTOR | OP_COMMUTES}, - [midgard_alu_op_iball_neq] = {"iball_neq", UNITS_VECTOR | OP_COMMUTES}, - [midgard_alu_op_ibany_eq] = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES}, - [midgard_alu_op_ibany_neq] = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES}, - - /* These instructions are not yet emitted by the compiler, so - * don't speculate about units yet */ - [midgard_alu_op_ishladd] = {"ishladd", 0}, - - [midgard_alu_op_uball_lt] = {"uball_lt", 0}, - [midgard_alu_op_uball_lte] = {"uball_lte", 0}, - [midgard_alu_op_iball_lt] = {"iball_lt", 0}, - [midgard_alu_op_iball_lte] = {"iball_lte", 0}, - [midgard_alu_op_ubany_lt] = {"ubany_lt", 0}, - [midgard_alu_op_ubany_lte] = {"ubany_lte", 0}, - [midgard_alu_op_ibany_lt] = {"ibany_lt", 0}, - [midgard_alu_op_ibany_lte] = {"ibany_lte", 0}, - - [midgard_alu_op_freduce] = {"freduce", 0}, - [midgard_alu_op_bball_eq] = {"bball_eq", 0 | OP_COMMUTES}, - [midgard_alu_op_bbany_neq] = {"bball_eq", 0 | OP_COMMUTES}, - [midgard_alu_op_fatan2_pt1] = {"fatan2_pt1", 0}, - [midgard_alu_op_fatan_pt2] = {"fatan_pt2", 0}, }; -/* Is this opcode that of an integer (regardless of signedness)? Instruction - * names authoritatively determine types */ - -static inline bool -midgard_is_integer_op(int op) -{ - const char *name = alu_opcode_props[op].name; - - if (!name) - return false; - - return (name[0] == 'i') || (name[0] == 'u'); -} - -/* Does this opcode *write* an integer? Same as is_integer_op, unless it's a - * conversion between int<->float in which case we do the opposite */ - -static inline bool -midgard_is_integer_out_op(int op) -{ - bool is_int = midgard_is_integer_op(op); - bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT; +/* This file is common, so don't define the tables themselves. #include + * midgard_op.h if you need that, or edit midgard_ops.c directly */ - return is_int ^ is_conversion; -} +#endif diff --git a/src/gallium/drivers/panfrost/midgard/midgard.h b/src/gallium/drivers/panfrost/midgard/midgard.h index d5d6c12f78c..f5cd59cbfb1 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard.h +++ b/src/gallium/drivers/panfrost/midgard/midgard.h @@ -536,54 +536,4 @@ __attribute__((__packed__)) } midgard_texture_word; -static char *load_store_opcode_names[256] = { - [midgard_op_st_cubemap_coords] = "st_cubemap_coords", - [midgard_op_ld_global_id] = "ld_global_id", - - [midgard_op_atomic_add] = "atomic_add", - [midgard_op_atomic_and] = "atomic_and", - [midgard_op_atomic_or] = "atomic_or", - [midgard_op_atomic_xor] = "atomic_xor", - [midgard_op_atomic_imin] = "atomic_imin", - [midgard_op_atomic_umin] = "atomic_umin", - [midgard_op_atomic_imax] = "atomic_imax", - [midgard_op_atomic_umax] = "atomic_umax", - [midgard_op_atomic_xchg] = "atomic_xchg", - - [midgard_op_ld_char] = "ld_char", - [midgard_op_ld_char2] = "ld_char2", - [midgard_op_ld_short] = "ld_short", - [midgard_op_ld_char4] = "ld_char4", - [midgard_op_ld_short4] = "ld_short4", - [midgard_op_ld_int4] = "ld_int4", - - [midgard_op_ld_attr_32] = "ld_attr_32", - [midgard_op_ld_attr_16] = "ld_attr_16", - [midgard_op_ld_attr_32i] = "ld_attr_32i", - - [midgard_op_ld_vary_32] = "ld_vary_32", - [midgard_op_ld_vary_16] = "ld_vary_16", - [midgard_op_ld_vary_32i] = "ld_vary_32i", - - [midgard_op_ld_color_buffer_16] = "ld_color_buffer_16", - - [midgard_op_ld_uniform_16] = "ld_uniform_16", - [midgard_op_ld_uniform_32] = "ld_uniform_32", - [midgard_op_ld_color_buffer_8] = "ld_color_buffer_8", - - [midgard_op_st_char] = "st_char", - [midgard_op_st_char2] = "st_char2", - [midgard_op_st_char4] = "st_char4", - [midgard_op_st_short4] = "st_short4", - [midgard_op_st_int4] = "st_int4", - - [midgard_op_st_vary_32] = "st_vary_32", - [midgard_op_st_vary_16] = "st_vary_16", - [midgard_op_st_vary_32i] = "st_vary_32i", - - [midgard_op_st_image_f] = "st_image_f", - [midgard_op_st_image_ui] = "st_image_ui", - [midgard_op_st_image_i] = "st_image_i", -}; - #endif diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c index fa74b3f9046..640e4a5fb86 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2018 Alyssa Rosenzweig + * Copyright (C) 2018-2019 Alyssa Rosenzweig * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -36,7 +36,6 @@ #include "main/imports.h" #include "compiler/nir/nir_builder.h" #include "util/half_float.h" -#include "util/register_allocate.h" #include "util/u_debug.h" #include "util/u_dynarray.h" #include "util/list.h" @@ -45,7 +44,9 @@ #include "midgard.h" #include "midgard_nir.h" #include "midgard_compile.h" +#include "midgard_ops.h" #include "helpers.h" +#include "compiler.h" #include "disassemble.h" @@ -64,138 +65,12 @@ int midgard_debug = 0; fprintf(stderr, "%s:%d: "fmt, \ __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) -/* Instruction arguments represented as block-local SSA indices, rather than - * registers. Negative values mean unused. */ - -typedef struct { - int src0; - int src1; - int dest; - - /* src1 is -not- SSA but instead a 16-bit inline constant to be smudged - * in. Only valid for ALU ops. */ - bool inline_constant; -} ssa_args; - -/* Forward declare so midgard_branch can reference */ -struct midgard_block; - -/* Target types. Defaults to TARGET_GOTO (the type corresponding directly to - * the hardware), hence why that must be zero. TARGET_DISCARD signals this - * instruction is actually a discard op. */ - -#define TARGET_GOTO 0 -#define TARGET_BREAK 1 -#define TARGET_CONTINUE 2 -#define TARGET_DISCARD 3 - -typedef struct midgard_branch { - /* If conditional, the condition is specified in r31.w */ - bool conditional; - - /* For conditionals, if this is true, we branch on FALSE. If false, we branch on TRUE. */ - bool invert_conditional; - - /* Branch targets: the start of a block, the start of a loop (continue), the end of a loop (break). Value is one of TARGET_ */ - unsigned target_type; - - /* The actual target */ - union { - int target_block; - int target_break; - int target_continue; - }; -} midgard_branch; - static bool midgard_is_branch_unit(unsigned unit) { return (unit == ALU_ENAB_BRANCH) || (unit == ALU_ENAB_BR_COMPACT); } -/* Generic in-memory data type repesenting a single logical instruction, rather - * than a single instruction group. This is the preferred form for code gen. - * Multiple midgard_insturctions will later be combined during scheduling, - * though this is not represented in this structure. Its format bridges - * the low-level binary representation with the higher level semantic meaning. - * - * Notably, it allows registers to be specified as block local SSA, for code - * emitted before the register allocation pass. - */ - -typedef struct midgard_instruction { - /* Must be first for casting */ - struct list_head link; - - unsigned type; /* ALU, load/store, texture */ - - /* If the register allocator has not run yet... */ - ssa_args ssa_args; - - /* Special fields for an ALU instruction */ - midgard_reg_info registers; - - /* I.e. (1 << alu_bit) */ - int unit; - - /* When emitting bundle, should this instruction have a break forced - * before it? Used for r31 writes which are valid only within a single - * bundle and *need* to happen as early as possible... this is a hack, - * TODO remove when we have a scheduler */ - bool precede_break; - - bool has_constants; - float constants[4]; - uint16_t inline_constant; - bool has_blend_constant; - - bool compact_branch; - bool writeout; - bool prepacked_branch; - - union { - midgard_load_store_word load_store; - midgard_vector_alu alu; - midgard_texture_word texture; - midgard_branch_extended branch_extended; - uint16_t br_compact; - - /* General branch, rather than packed br_compact. Higher level - * than the other components */ - midgard_branch branch; - }; -} midgard_instruction; - -typedef struct midgard_block { - /* Link to next block. Must be first for mir_get_block */ - struct list_head link; - - /* List of midgard_instructions emitted for the current block */ - struct list_head instructions; - - bool is_scheduled; - - /* List of midgard_bundles emitted (after the scheduler has run) */ - struct util_dynarray bundles; - - /* Number of quadwords _actually_ emitted, as determined after scheduling */ - unsigned quadword_count; - - /* Successors: always one forward (the block after us), maybe - * one backwards (for a backward branch). No need for a second - * forward, since graph traversal would get there eventually - * anyway */ - struct midgard_block *successors[2]; - unsigned nr_successors; - - /* The successors pointer form a graph, and in the case of - * complex control flow, this graph has a cycles. To aid - * traversal during liveness analysis, we have a visited? - * boolean for passes to use as they see fit, provided they - * clean up later */ - bool visited; -} midgard_block; - static void midgard_block_add_successor(midgard_block *block, midgard_block *successor) { @@ -404,267 +279,6 @@ midgard_create_branch_extended( midgard_condition cond, return branch; } -typedef struct midgard_bundle { - /* Tag for the overall bundle */ - int tag; - - /* Instructions contained by the bundle */ - int instruction_count; - midgard_instruction instructions[5]; - - /* Bundle-wide ALU configuration */ - int padding; - int control; - bool has_embedded_constants; - float constants[4]; - bool has_blend_constant; - - uint16_t register_words[8]; - int register_words_count; - - uint64_t body_words[8]; - size_t body_size[8]; - int body_words_count; -} midgard_bundle; - -typedef struct compiler_context { - nir_shader *nir; - gl_shader_stage stage; - - /* Is internally a blend shader? Depends on stage == FRAGMENT */ - bool is_blend; - - /* Tracking for blend constant patching */ - int blend_constant_offset; - - /* Current NIR function */ - nir_function *func; - - /* Unordered list of midgard_blocks */ - int block_count; - struct list_head blocks; - - midgard_block *initial_block; - midgard_block *previous_source_block; - midgard_block *final_block; - - /* List of midgard_instructions emitted for the current block */ - midgard_block *current_block; - - /* The current "depth" of the loop, for disambiguating breaks/continues - * when using nested loops */ - int current_loop_depth; - - /* Constants which have been loaded, for later inlining */ - struct hash_table_u64 *ssa_constants; - - /* SSA indices to be outputted to corresponding varying offset */ - struct hash_table_u64 *ssa_varyings; - - /* SSA values / registers which have been aliased. Naively, these - * demand a fmov output; instead, we alias them in a later pass to - * avoid the wasted op. - * - * A note on encoding: to avoid dynamic memory management here, rather - * than ampping to a pointer, we map to the source index; the key - * itself is just the destination index. */ - - struct hash_table_u64 *ssa_to_alias; - struct set *leftover_ssa_to_alias; - - /* Actual SSA-to-register for RA */ - struct hash_table_u64 *ssa_to_register; - - /* Mapping of hashes computed from NIR indices to the sequential temp indices ultimately used in MIR */ - struct hash_table_u64 *hash_to_temp; - int temp_count; - int max_hash; - - /* Just the count of the max register used. Higher count => higher - * register pressure */ - int work_registers; - - /* Used for cont/last hinting. Increase when a tex op is added. - * Decrease when a tex op is removed. */ - int texture_op_count; - - /* Mapping of texture register -> SSA index for unaliasing */ - int texture_index[2]; - - /* If any path hits a discard instruction */ - bool can_discard; - - /* The number of uniforms allowable for the fast path */ - int uniform_cutoff; - - /* Count of instructions emitted from NIR overall, across all blocks */ - int instruction_count; - - /* Alpha ref value passed in */ - float alpha_ref; - - /* The index corresponding to the fragment output */ - unsigned fragment_output; - - /* The mapping of sysvals to uniforms, the count, and the off-by-one inverse */ - unsigned sysvals[MAX_SYSVAL_COUNT]; - unsigned sysval_count; - struct hash_table_u64 *sysval_to_id; -} compiler_context; - -/* Append instruction to end of current block */ - -static midgard_instruction * -mir_upload_ins(struct midgard_instruction ins) -{ - midgard_instruction *heap = malloc(sizeof(ins)); - memcpy(heap, &ins, sizeof(ins)); - return heap; -} - -static void -emit_mir_instruction(struct compiler_context *ctx, struct midgard_instruction ins) -{ - list_addtail(&(mir_upload_ins(ins))->link, &ctx->current_block->instructions); -} - -static void -mir_insert_instruction_before(struct midgard_instruction *tag, struct midgard_instruction ins) -{ - list_addtail(&(mir_upload_ins(ins))->link, &tag->link); -} - -static void -mir_remove_instruction(struct midgard_instruction *ins) -{ - list_del(&ins->link); -} - -static midgard_instruction* -mir_prev_op(struct midgard_instruction *ins) -{ - return list_last_entry(&(ins->link), midgard_instruction, link); -} - -static midgard_instruction* -mir_next_op(struct midgard_instruction *ins) -{ - return list_first_entry(&(ins->link), midgard_instruction, link); -} - -#define mir_foreach_block(ctx, v) list_for_each_entry(struct midgard_block, v, &ctx->blocks, link) -#define mir_foreach_block_from(ctx, from, v) list_for_each_entry_from(struct midgard_block, v, from, &ctx->blocks, link) - -#define mir_foreach_instr(ctx, v) list_for_each_entry(struct midgard_instruction, v, &ctx->current_block->instructions, link) -#define mir_foreach_instr_safe(ctx, v) list_for_each_entry_safe(struct midgard_instruction, v, &ctx->current_block->instructions, link) -#define mir_foreach_instr_in_block(block, v) list_for_each_entry(struct midgard_instruction, v, &block->instructions, link) -#define mir_foreach_instr_in_block_safe(block, v) list_for_each_entry_safe(struct midgard_instruction, v, &block->instructions, link) -#define mir_foreach_instr_in_block_safe_rev(block, v) list_for_each_entry_safe_rev(struct midgard_instruction, v, &block->instructions, link) -#define mir_foreach_instr_in_block_from(block, v, from) list_for_each_entry_from(struct midgard_instruction, v, from, &block->instructions, link) -#define mir_foreach_instr_in_block_from_rev(block, v, from) list_for_each_entry_from_rev(struct midgard_instruction, v, from, &block->instructions, link) - - -static midgard_instruction * -mir_last_in_block(struct midgard_block *block) -{ - return list_last_entry(&block->instructions, struct midgard_instruction, link); -} - -static midgard_block * -mir_get_block(compiler_context *ctx, int idx) -{ - struct list_head *lst = &ctx->blocks; - - while ((idx--) + 1) - lst = lst->next; - - return (struct midgard_block *) lst; -} - -/* Pretty printer for internal Midgard IR */ - -static void -print_mir_source(int source) -{ - if (source >= SSA_FIXED_MINIMUM) { - /* Specific register */ - int reg = SSA_REG_FROM_FIXED(source); - - /* TODO: Moving threshold */ - if (reg > 16 && reg < 24) - printf("u%d", 23 - reg); - else - printf("r%d", reg); - } else { - printf("%d", source); - } -} - -static void -print_mir_instruction(midgard_instruction *ins) -{ - printf("\t"); - - switch (ins->type) { - case TAG_ALU_4: { - midgard_alu_op op = ins->alu.op; - const char *name = alu_opcode_props[op].name; - - if (ins->unit) - printf("%d.", ins->unit); - - printf("%s", name ? name : "??"); - break; - } - - case TAG_LOAD_STORE_4: { - midgard_load_store_op op = ins->load_store.op; - const char *name = load_store_opcode_names[op]; - - assert(name); - printf("%s", name); - break; - } - - case TAG_TEXTURE_4: { - printf("texture"); - break; - } - - default: - assert(0); - } - - ssa_args *args = &ins->ssa_args; - - printf(" %d, ", args->dest); - - print_mir_source(args->src0); - printf(", "); - - if (args->inline_constant) - printf("#%d", ins->inline_constant); - else - print_mir_source(args->src1); - - if (ins->has_constants) - printf(" <%f, %f, %f, %f>", ins->constants[0], ins->constants[1], ins->constants[2], ins->constants[3]); - - printf("\n"); -} - -static void -print_mir_block(midgard_block *block) -{ - printf("{\n"); - - mir_foreach_instr_in_block(block, ins) { - print_mir_instruction(ins); - } - - printf("}\n"); -} - static void attach_constants(compiler_context *ctx, midgard_instruction *ins, void *constants, int name) { @@ -975,26 +589,6 @@ effective_writemask(midgard_vector_alu *alu) return squeeze_writemask(alu->mask); } -static unsigned -find_or_allocate_temp(compiler_context *ctx, unsigned hash) -{ - if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM)) - return hash; - - unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1); - - if (temp) - return temp - 1; - - /* If no temp is find, allocate one */ - temp = ctx->temp_count++; - ctx->max_hash = MAX2(ctx->max_hash, hash); - - _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1)); - - return temp; -} - static unsigned nir_src_index(compiler_context *ctx, nir_src *src) { @@ -1983,338 +1577,6 @@ emit_instr(compiler_context *ctx, struct nir_instr *instr) } } -/* Determine the actual hardware from the index based on the RA results or special values */ - -static int -dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg) -{ - if (reg >= SSA_FIXED_MINIMUM) - return SSA_REG_FROM_FIXED(reg); - - if (reg >= 0) { - assert(reg < maxreg); - assert(g); - int r = ra_get_node_reg(g, reg); - ctx->work_registers = MAX2(ctx->work_registers, r); - return r; - } - - switch (reg) { - /* fmov style unused */ - case SSA_UNUSED_0: - return REGISTER_UNUSED; - - /* lut style unused */ - case SSA_UNUSED_1: - return REGISTER_UNUSED; - - default: - DBG("Unknown SSA register alias %d\n", reg); - assert(0); - return 31; - } -} - -static unsigned int -midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data) -{ - /* Choose the first available register to minimise reported register pressure */ - - for (int i = 0; i < 16; ++i) { - if (BITSET_TEST(regs, i)) { - return i; - } - } - - assert(0); - return 0; -} - -static bool -midgard_is_live_in_instr(midgard_instruction *ins, int src) -{ - if (ins->ssa_args.src0 == src) return true; - if (ins->ssa_args.src1 == src) return true; - - return false; -} - -/* Determine if a variable is live in the successors of a block */ -static bool -is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src) -{ - for (unsigned i = 0; i < bl->nr_successors; ++i) { - midgard_block *succ = bl->successors[i]; - - /* If we already visited, the value we're seeking - * isn't down this path (or we would have short - * circuited */ - - if (succ->visited) continue; - - /* Otherwise (it's visited *now*), check the block */ - - succ->visited = true; - - mir_foreach_instr_in_block(succ, ins) { - if (midgard_is_live_in_instr(ins, src)) - return true; - } - - /* ...and also, check *its* successors */ - if (is_live_after_successors(ctx, succ, src)) - return true; - - } - - /* Welp. We're really not live. */ - - return false; -} - -static bool -is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src) -{ - /* Check the rest of the block for liveness */ - - mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) { - if (midgard_is_live_in_instr(ins, src)) - return true; - } - - /* Check the rest of the blocks for liveness recursively */ - - bool succ = is_live_after_successors(ctx, block, src); - - mir_foreach_block(ctx, block) { - block->visited = false; - } - - return succ; -} - -/* Once registers have been decided via register allocation - * (allocate_registers), we need to rewrite the MIR to use registers instead of - * SSA */ - -static void -install_registers(compiler_context *ctx, struct ra_graph *g) -{ - mir_foreach_block(ctx, block) { - mir_foreach_instr_in_block(block, ins) { - if (ins->compact_branch) continue; - - ssa_args args = ins->ssa_args; - - switch (ins->type) { - case TAG_ALU_4: - ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count); - - ins->registers.src2_imm = args.inline_constant; - - if (args.inline_constant) { - /* Encode inline 16-bit constant as a vector by default */ - - ins->registers.src2_reg = ins->inline_constant >> 11; - - int lower_11 = ins->inline_constant & ((1 << 12) - 1); - - uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3); - ins->alu.src2 = imm << 2; - } else { - ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count); - } - - ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count); - - break; - - case TAG_LOAD_STORE_4: { - if (OP_IS_STORE_VARY(ins->load_store.op)) { - /* TODO: use ssa_args for st_vary */ - ins->load_store.reg = 0; - } else { - bool has_dest = args.dest >= 0; - int ssa_arg = has_dest ? args.dest : args.src0; - - ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count); - } - - break; - } - - default: - break; - } - } - } - -} - -/* This routine performs the actual register allocation. It should be succeeded - * by install_registers */ - -static struct ra_graph * -allocate_registers(compiler_context *ctx) -{ - /* First, initialize the RA */ - struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true); - - /* Create a primary (general purpose) class, as well as special purpose - * pipeline register classes */ - - int primary_class = ra_alloc_reg_class(regs); - int varying_class = ra_alloc_reg_class(regs); - - /* Add the full set of work registers */ - int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0); - for (int i = 0; i < work_count; ++i) - ra_class_add_reg(regs, primary_class, i); - - /* Add special registers */ - ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE); - ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1); - - /* We're done setting up */ - ra_set_finalize(regs, NULL); - - /* Transform the MIR into squeezed index form */ - mir_foreach_block(ctx, block) { - mir_foreach_instr_in_block(block, ins) { - if (ins->compact_branch) continue; - - ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0); - ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1); - ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest); - } - if (midgard_debug & MIDGARD_DBG_SHADERS) - print_mir_block(block); - } - - /* No register allocation to do with no SSA */ - - if (!ctx->temp_count) - return NULL; - - /* Let's actually do register allocation */ - int nodes = ctx->temp_count; - struct ra_graph *g = ra_alloc_interference_graph(regs, nodes); - - /* Set everything to the work register class, unless it has somewhere - * special to go */ - - mir_foreach_block(ctx, block) { - mir_foreach_instr_in_block(block, ins) { - if (ins->compact_branch) continue; - - if (ins->ssa_args.dest < 0) continue; - - if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue; - - int class = primary_class; - - ra_set_node_class(g, ins->ssa_args.dest, class); - } - } - - for (int index = 0; index <= ctx->max_hash; ++index) { - unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_register, index + 1); - - if (temp) { - unsigned reg = temp - 1; - int t = find_or_allocate_temp(ctx, index); - ra_set_node_reg(g, t, reg); - } - } - - /* Determine liveness */ - - int *live_start = malloc(nodes * sizeof(int)); - int *live_end = malloc(nodes * sizeof(int)); - - /* Initialize as non-existent */ - - for (int i = 0; i < nodes; ++i) { - live_start[i] = live_end[i] = -1; - } - - int d = 0; - - mir_foreach_block(ctx, block) { - mir_foreach_instr_in_block(block, ins) { - if (ins->compact_branch) continue; - - /* Dest is < 0 for st_vary instructions, which break - * the usual SSA conventions. Liveness analysis doesn't - * make sense on these instructions, so skip them to - * avoid memory corruption */ - - if (ins->ssa_args.dest < 0) continue; - - if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) { - /* If this destination is not yet live, it is now since we just wrote it */ - - int dest = ins->ssa_args.dest; - - if (live_start[dest] == -1) - live_start[dest] = d; - } - - /* Since we just used a source, the source might be - * dead now. Scan the rest of the block for - * invocations, and if there are none, the source dies - * */ - - int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 }; - - for (int src = 0; src < 2; ++src) { - int s = sources[src]; - - if (s < 0) continue; - - if (s >= SSA_FIXED_MINIMUM) continue; - - if (!is_live_after(ctx, block, ins, s)) { - live_end[s] = d; - } - } - - ++d; - } - } - - /* If a node still hasn't been killed, kill it now */ - - for (int i = 0; i < nodes; ++i) { - /* live_start == -1 most likely indicates a pinned output */ - - if (live_end[i] == -1) - live_end[i] = d; - } - - /* Setup interference between nodes that are live at the same time */ - - for (int i = 0; i < nodes; ++i) { - for (int j = i + 1; j < nodes; ++j) { - if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i])) - ra_add_node_interference(g, i, j); - } - } - - ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL); - - if (!ra_allocate(g)) { - DBG("Error allocating registers\n"); - assert(0); - } - - /* Cleanup */ - free(live_start); - free(live_end); - - return g; -} - /* Midgard IR only knows vector ALU types, but we sometimes need to actually * use scalar ALU instructions, for functional or performance reasons. To do * this, we just demote vector ALU payloads to scalar. */ @@ -3247,7 +2509,7 @@ midgard_opt_dead_code_eliminate(compiler_context *ctx, midgard_block *block) if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue; if (midgard_is_pinned(ctx, ins->ssa_args.dest)) continue; - if (is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue; + if (mir_is_live_after(ctx, block, ins, ins->ssa_args.dest)) continue; mir_remove_instruction(ins); progress = true; diff --git a/src/gallium/drivers/panfrost/midgard/midgard_liveness.c b/src/gallium/drivers/panfrost/midgard/midgard_liveness.c new file mode 100644 index 00000000000..ab36583ef39 --- /dev/null +++ b/src/gallium/drivers/panfrost/midgard/midgard_liveness.c @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2018-2019 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* mir_is_live_after performs liveness analysis on the MIR, used primarily + * as part of register allocation. TODO: Algorithmic improvements for + * compiler performance (this is the worst algorithm possible -- see + * backlog with Connor on IRC) */ + +#include "compiler.h" + +static bool +midgard_is_live_in_instr(midgard_instruction *ins, int src) +{ + if (ins->ssa_args.src0 == src) return true; + if (ins->ssa_args.src1 == src) return true; + + return false; +} + +/* Determine if a variable is live in the successors of a block */ +static bool +is_live_after_successors(compiler_context *ctx, midgard_block *bl, int src) +{ + for (unsigned i = 0; i < bl->nr_successors; ++i) { + midgard_block *succ = bl->successors[i]; + + /* If we already visited, the value we're seeking + * isn't down this path (or we would have short + * circuited */ + + if (succ->visited) continue; + + /* Otherwise (it's visited *now*), check the block */ + + succ->visited = true; + + mir_foreach_instr_in_block(succ, ins) { + if (midgard_is_live_in_instr(ins, src)) + return true; + } + + /* ...and also, check *its* successors */ + if (is_live_after_successors(ctx, succ, src)) + return true; + + } + + /* Welp. We're really not live. */ + + return false; +} + +bool +mir_is_live_after(compiler_context *ctx, midgard_block *block, midgard_instruction *start, int src) +{ + /* Check the rest of the block for liveness */ + + mir_foreach_instr_in_block_from(block, ins, mir_next_op(start)) { + if (midgard_is_live_in_instr(ins, src)) + return true; + } + + /* Check the rest of the blocks for liveness recursively */ + + bool succ = is_live_after_successors(ctx, block, src); + + mir_foreach_block(ctx, block) { + block->visited = false; + } + + return succ; +} diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ops.c b/src/gallium/drivers/panfrost/midgard/midgard_ops.c new file mode 100644 index 00000000000..cffa3c20fdf --- /dev/null +++ b/src/gallium/drivers/panfrost/midgard/midgard_ops.c @@ -0,0 +1,188 @@ +/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "midgard.h" + +/* Include the definitions of the macros and such */ + +#define MIDGARD_OPS_TABLE +#include "helpers.h" +#undef MIDGARD_OPS_TABLE + +/* Table of mapping opcodes to accompanying properties. This is used for both + * the disassembler and the compiler. It is placed in a .c file like this to + * avoid duplications in the binary */ + +struct mir_op_props alu_opcode_props[256] = { + [midgard_alu_op_fadd] = {"fadd", UNITS_ADD | OP_COMMUTES}, + [midgard_alu_op_fmul] = {"fmul", UNITS_MUL | UNIT_VLUT | OP_COMMUTES}, + [midgard_alu_op_fmin] = {"fmin", UNITS_MUL | UNITS_ADD | OP_COMMUTES}, + [midgard_alu_op_fmax] = {"fmax", UNITS_MUL | UNITS_ADD | OP_COMMUTES}, + [midgard_alu_op_imin] = {"imin", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_imax] = {"imax", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_umin] = {"umin", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_umax] = {"umax", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_fmov] = {"fmov", UNITS_ALL | QUIRK_FLIPPED_R24}, + [midgard_alu_op_fround] = {"fround", UNITS_ADD}, + [midgard_alu_op_froundeven] = {"froundeven", UNITS_ADD}, + [midgard_alu_op_ftrunc] = {"ftrunc", UNITS_ADD}, + [midgard_alu_op_ffloor] = {"ffloor", UNITS_ADD}, + [midgard_alu_op_fceil] = {"fceil", UNITS_ADD}, + [midgard_alu_op_ffma] = {"ffma", UNIT_VLUT}, + + /* Though they output a scalar, they need to run on a vector unit + * since they process vectors */ + [midgard_alu_op_fdot3] = {"fdot3", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES}, + [midgard_alu_op_fdot3r] = {"fdot3r", UNIT_VMUL | OP_CHANNEL_COUNT(3) | OP_COMMUTES}, + [midgard_alu_op_fdot4] = {"fdot4", UNIT_VMUL | OP_CHANNEL_COUNT(4) | OP_COMMUTES}, + + /* Incredibly, iadd can run on vmul, etc */ + [midgard_alu_op_iadd] = {"iadd", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_iabs] = {"iabs", UNITS_ADD}, + [midgard_alu_op_isub] = {"isub", UNITS_MOST}, + [midgard_alu_op_imul] = {"imul", UNITS_MUL | OP_COMMUTES}, + [midgard_alu_op_imov] = {"imov", UNITS_MOST | QUIRK_FLIPPED_R24}, + + /* For vector comparisons, use ball etc */ + [midgard_alu_op_feq] = {"feq", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_fne] = {"fne", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_fle] = {"fle", UNITS_MOST}, + [midgard_alu_op_flt] = {"flt", UNITS_MOST}, + [midgard_alu_op_ieq] = {"ieq", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_ine] = {"ine", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_ilt] = {"ilt", UNITS_MOST}, + [midgard_alu_op_ile] = {"ile", UNITS_MOST}, + [midgard_alu_op_ult] = {"ult", UNITS_MOST}, + [midgard_alu_op_ule] = {"ule", UNITS_MOST}, + + [midgard_alu_op_icsel] = {"icsel", UNITS_ADD}, + [midgard_alu_op_icsel_v] = {"icsel_v", UNITS_ADD}, + [midgard_alu_op_fcsel_v] = {"fcsel_v", UNITS_ADD}, + [midgard_alu_op_fcsel] = {"fcsel", UNITS_ADD | UNIT_SMUL}, + + [midgard_alu_op_frcp] = {"frcp", UNIT_VLUT}, + [midgard_alu_op_frsqrt] = {"frsqrt", UNIT_VLUT}, + [midgard_alu_op_fsqrt] = {"fsqrt", UNIT_VLUT}, + [midgard_alu_op_fpow_pt1] = {"fpow_pt1", UNIT_VLUT}, + [midgard_alu_op_fexp2] = {"fexp2", UNIT_VLUT}, + [midgard_alu_op_flog2] = {"flog2", UNIT_VLUT}, + + [midgard_alu_op_f2i] = {"f2i", UNITS_ADD | OP_TYPE_CONVERT}, + [midgard_alu_op_f2u] = {"f2u", UNITS_ADD | OP_TYPE_CONVERT}, + [midgard_alu_op_f2u8] = {"f2u8", UNITS_ADD | OP_TYPE_CONVERT}, + [midgard_alu_op_i2f] = {"i2f", UNITS_ADD | OP_TYPE_CONVERT}, + [midgard_alu_op_u2f] = {"u2f", UNITS_ADD | OP_TYPE_CONVERT}, + + [midgard_alu_op_fsin] = {"fsin", UNIT_VLUT}, + [midgard_alu_op_fcos] = {"fcos", UNIT_VLUT}, + + /* XXX: Test case where it's right on smul but not sadd */ + [midgard_alu_op_iand] = {"iand", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_iandnot] = {"iandnot", UNITS_MOST}, + + [midgard_alu_op_ior] = {"ior", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_iornot] = {"iornot", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_inor] = {"inor", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_ixor] = {"ixor", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_inxor] = {"inxor", UNITS_MOST | OP_COMMUTES}, + [midgard_alu_op_iclz] = {"iclz", UNITS_ADD}, + [midgard_alu_op_ibitcount8] = {"ibitcount8", UNITS_ADD}, + [midgard_alu_op_inand] = {"inand", UNITS_MOST}, + [midgard_alu_op_ishl] = {"ishl", UNITS_ADD}, + [midgard_alu_op_iasr] = {"iasr", UNITS_ADD}, + [midgard_alu_op_ilsr] = {"ilsr", UNITS_ADD}, + + [midgard_alu_op_fball_eq] = {"fball_eq", UNITS_VECTOR | OP_COMMUTES}, + [midgard_alu_op_fbany_neq] = {"fbany_neq", UNITS_VECTOR | OP_COMMUTES}, + [midgard_alu_op_iball_eq] = {"iball_eq", UNITS_VECTOR | OP_COMMUTES}, + [midgard_alu_op_iball_neq] = {"iball_neq", UNITS_VECTOR | OP_COMMUTES}, + [midgard_alu_op_ibany_eq] = {"ibany_eq", UNITS_VECTOR | OP_COMMUTES}, + [midgard_alu_op_ibany_neq] = {"ibany_neq", UNITS_VECTOR | OP_COMMUTES}, + + /* These instructions are not yet emitted by the compiler, so + * don't speculate about units yet */ + [midgard_alu_op_ishladd] = {"ishladd", 0}, + + [midgard_alu_op_uball_lt] = {"uball_lt", 0}, + [midgard_alu_op_uball_lte] = {"uball_lte", 0}, + [midgard_alu_op_iball_lt] = {"iball_lt", 0}, + [midgard_alu_op_iball_lte] = {"iball_lte", 0}, + [midgard_alu_op_ubany_lt] = {"ubany_lt", 0}, + [midgard_alu_op_ubany_lte] = {"ubany_lte", 0}, + [midgard_alu_op_ibany_lt] = {"ibany_lt", 0}, + [midgard_alu_op_ibany_lte] = {"ibany_lte", 0}, + + [midgard_alu_op_freduce] = {"freduce", 0}, + [midgard_alu_op_bball_eq] = {"bball_eq", 0 | OP_COMMUTES}, + [midgard_alu_op_bbany_neq] = {"bball_eq", 0 | OP_COMMUTES}, + [midgard_alu_op_fatan2_pt1] = {"fatan2_pt1", 0}, + [midgard_alu_op_fatan_pt2] = {"fatan_pt2", 0}, +}; + +const char *load_store_opcode_names[256] = { + [midgard_op_st_cubemap_coords] = "st_cubemap_coords", + [midgard_op_ld_global_id] = "ld_global_id", + + [midgard_op_atomic_add] = "atomic_add", + [midgard_op_atomic_and] = "atomic_and", + [midgard_op_atomic_or] = "atomic_or", + [midgard_op_atomic_xor] = "atomic_xor", + [midgard_op_atomic_imin] = "atomic_imin", + [midgard_op_atomic_umin] = "atomic_umin", + [midgard_op_atomic_imax] = "atomic_imax", + [midgard_op_atomic_umax] = "atomic_umax", + [midgard_op_atomic_xchg] = "atomic_xchg", + + [midgard_op_ld_char] = "ld_char", + [midgard_op_ld_char2] = "ld_char2", + [midgard_op_ld_short] = "ld_short", + [midgard_op_ld_char4] = "ld_char4", + [midgard_op_ld_short4] = "ld_short4", + [midgard_op_ld_int4] = "ld_int4", + + [midgard_op_ld_attr_32] = "ld_attr_32", + [midgard_op_ld_attr_16] = "ld_attr_16", + [midgard_op_ld_attr_32i] = "ld_attr_32i", + + [midgard_op_ld_vary_32] = "ld_vary_32", + [midgard_op_ld_vary_16] = "ld_vary_16", + [midgard_op_ld_vary_32i] = "ld_vary_32i", + + [midgard_op_ld_color_buffer_16] = "ld_color_buffer_16", + + [midgard_op_ld_uniform_16] = "ld_uniform_16", + [midgard_op_ld_uniform_32] = "ld_uniform_32", + [midgard_op_ld_color_buffer_8] = "ld_color_buffer_8", + + [midgard_op_st_char] = "st_char", + [midgard_op_st_char2] = "st_char2", + [midgard_op_st_char4] = "st_char4", + [midgard_op_st_short4] = "st_short4", + [midgard_op_st_int4] = "st_int4", + + [midgard_op_st_vary_32] = "st_vary_32", + [midgard_op_st_vary_16] = "st_vary_16", + [midgard_op_st_vary_32i] = "st_vary_32i", + + [midgard_op_st_image_f] = "st_image_f", + [midgard_op_st_image_ui] = "st_image_ui", + [midgard_op_st_image_i] = "st_image_i", +}; diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ops.h b/src/gallium/drivers/panfrost/midgard/midgard_ops.h new file mode 100644 index 00000000000..8b363529aa9 --- /dev/null +++ b/src/gallium/drivers/panfrost/midgard/midgard_ops.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2018-2019 Alyssa Rosenzweig (alyssa@rosenzweig.io) + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "helpers.h" + +/* Forward declare */ + +extern struct mir_op_props alu_opcode_props[256]; +extern const char *load_store_opcode_names[256]; + +/* Is this opcode that of an integer (regardless of signedness)? Instruction + * names authoritatively determine types */ + +static inline bool +midgard_is_integer_op(int op) +{ + const char *name = alu_opcode_props[op].name; + + if (!name) + return false; + + return (name[0] == 'i') || (name[0] == 'u'); +} + +/* Does this opcode *write* an integer? Same as is_integer_op, unless it's a + * conversion between int<->float in which case we do the opposite */ + +static inline bool +midgard_is_integer_out_op(int op) +{ + bool is_int = midgard_is_integer_op(op); + bool is_conversion = alu_opcode_props[op].props & OP_TYPE_CONVERT; + + return is_int ^ is_conversion; +} diff --git a/src/gallium/drivers/panfrost/midgard/midgard_print.c b/src/gallium/drivers/panfrost/midgard/midgard_print.c new file mode 100644 index 00000000000..348650ecf30 --- /dev/null +++ b/src/gallium/drivers/panfrost/midgard/midgard_print.c @@ -0,0 +1,124 @@ +/* + * Copyright (C) 2018-2019 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler.h" +#include "helpers.h" +#include "midgard_ops.h" + +/* Pretty printer for Midgard IR, for use debugging compiler-internal + * passes like register allocation. The output superficially resembles + * Midgard assembly, with the exception that unit information and such is + * (normally) omitted, and generic indices are usually used instead of + * registers */ + +static void +mir_print_source(int source) +{ + if (source >= SSA_FIXED_MINIMUM) { + /* Specific register */ + int reg = SSA_REG_FROM_FIXED(source); + + /* TODO: Moving threshold */ + if (reg > 16 && reg < 24) + printf("u%d", 23 - reg); + else + printf("r%d", reg); + } else { + printf("%d", source); + } +} + +void +mir_print_instruction(midgard_instruction *ins) +{ + printf("\t"); + + switch (ins->type) { + case TAG_ALU_4: { + midgard_alu_op op = ins->alu.op; + const char *name = alu_opcode_props[op].name; + + if (ins->unit) + printf("%d.", ins->unit); + + printf("%s", name ? name : "??"); + break; + } + + case TAG_LOAD_STORE_4: { + midgard_load_store_op op = ins->load_store.op; + const char *name = load_store_opcode_names[op]; + + assert(name); + printf("%s", name); + break; + } + + case TAG_TEXTURE_4: { + printf("texture"); + break; + } + + default: + assert(0); + } + + ssa_args *args = &ins->ssa_args; + + printf(" %d, ", args->dest); + + mir_print_source(args->src0); + printf(", "); + + if (args->inline_constant) + printf("#%d", ins->inline_constant); + else + mir_print_source(args->src1); + + if (ins->has_constants) + printf(" <%f, %f, %f, %f>", ins->constants[0], ins->constants[1], ins->constants[2], ins->constants[3]); + + printf("\n"); +} + +/* Dumps MIR for a block or entire shader respective */ + +void +mir_print_block(midgard_block *block) +{ + printf("{\n"); + + mir_foreach_instr_in_block(block, ins) { + mir_print_instruction(ins); + } + + printf("}\n"); +} + +void +mir_print_shader(compiler_context *ctx) +{ + mir_foreach_block(ctx, block) { + mir_print_block(block); + } +} diff --git a/src/gallium/drivers/panfrost/midgard/midgard_ra.c b/src/gallium/drivers/panfrost/midgard/midgard_ra.c new file mode 100644 index 00000000000..594cafe45ae --- /dev/null +++ b/src/gallium/drivers/panfrost/midgard/midgard_ra.c @@ -0,0 +1,310 @@ +/* + * Copyright (C) 2018-2019 Alyssa Rosenzweig + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "compiler.h" +#include "util/register_allocate.h" + +/* When we're 'squeezing down' the values in the IR, we maintain a hash + * as such */ + +static unsigned +find_or_allocate_temp(compiler_context *ctx, unsigned hash) +{ + if ((hash < 0) || (hash >= SSA_FIXED_MINIMUM)) + return hash; + + unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->hash_to_temp, hash + 1); + + if (temp) + return temp - 1; + + /* If no temp is find, allocate one */ + temp = ctx->temp_count++; + ctx->max_hash = MAX2(ctx->max_hash, hash); + + _mesa_hash_table_u64_insert(ctx->hash_to_temp, hash + 1, (void *) ((uintptr_t) temp + 1)); + + return temp; +} + +/* Callback for register allocation selection, trivial default for now */ + +static unsigned int +midgard_ra_select_callback(struct ra_graph *g, BITSET_WORD *regs, void *data) +{ + /* Choose the first available register to minimise reported register pressure */ + + for (int i = 0; i < 16; ++i) { + if (BITSET_TEST(regs, i)) { + return i; + } + } + + assert(0); + return 0; +} + +/* Determine the actual hardware from the index based on the RA results or special values */ + +static int +dealias_register(compiler_context *ctx, struct ra_graph *g, int reg, int maxreg) +{ + if (reg >= SSA_FIXED_MINIMUM) + return SSA_REG_FROM_FIXED(reg); + + if (reg >= 0) { + assert(reg < maxreg); + assert(g); + int r = ra_get_node_reg(g, reg); + ctx->work_registers = MAX2(ctx->work_registers, r); + return r; + } + + switch (reg) { + case SSA_UNUSED_0: + case SSA_UNUSED_1: + return REGISTER_UNUSED; + + default: + unreachable("Unknown SSA register alias"); + } +} + +/* This routine performs the actual register allocation. It should be succeeded + * by install_registers */ + +struct ra_graph * +allocate_registers(compiler_context *ctx) +{ + /* First, initialize the RA */ + struct ra_regs *regs = ra_alloc_reg_set(NULL, 32, true); + + /* Create a primary (general purpose) class, as well as special purpose + * pipeline register classes */ + + int primary_class = ra_alloc_reg_class(regs); + int varying_class = ra_alloc_reg_class(regs); + + /* Add the full set of work registers */ + int work_count = 16 - MAX2((ctx->uniform_cutoff - 8), 0); + for (int i = 0; i < work_count; ++i) + ra_class_add_reg(regs, primary_class, i); + + /* Add special registers */ + ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE); + ra_class_add_reg(regs, varying_class, REGISTER_VARYING_BASE + 1); + + /* We're done setting up */ + ra_set_finalize(regs, NULL); + + /* Transform the MIR into squeezed index form */ + mir_foreach_block(ctx, block) { + mir_foreach_instr_in_block(block, ins) { + if (ins->compact_branch) continue; + + ins->ssa_args.src0 = find_or_allocate_temp(ctx, ins->ssa_args.src0); + ins->ssa_args.src1 = find_or_allocate_temp(ctx, ins->ssa_args.src1); + ins->ssa_args.dest = find_or_allocate_temp(ctx, ins->ssa_args.dest); + } + } + + /* No register allocation to do with no SSA */ + + if (!ctx->temp_count) + return NULL; + + /* Let's actually do register allocation */ + int nodes = ctx->temp_count; + struct ra_graph *g = ra_alloc_interference_graph(regs, nodes); + + /* Set everything to the work register class, unless it has somewhere + * special to go */ + + mir_foreach_block(ctx, block) { + mir_foreach_instr_in_block(block, ins) { + if (ins->compact_branch) continue; + + if (ins->ssa_args.dest < 0) continue; + + if (ins->ssa_args.dest >= SSA_FIXED_MINIMUM) continue; + + int class = primary_class; + + ra_set_node_class(g, ins->ssa_args.dest, class); + } + } + + for (int index = 0; index <= ctx->max_hash; ++index) { + unsigned temp = (uintptr_t) _mesa_hash_table_u64_search(ctx->ssa_to_register, index + 1); + + if (temp) { + unsigned reg = temp - 1; + int t = find_or_allocate_temp(ctx, index); + ra_set_node_reg(g, t, reg); + } + } + + /* Determine liveness */ + + int *live_start = malloc(nodes * sizeof(int)); + int *live_end = malloc(nodes * sizeof(int)); + + /* Initialize as non-existent */ + + for (int i = 0; i < nodes; ++i) { + live_start[i] = live_end[i] = -1; + } + + int d = 0; + + mir_foreach_block(ctx, block) { + mir_foreach_instr_in_block(block, ins) { + if (ins->compact_branch) continue; + + /* Dest is < 0 for st_vary instructions, which break + * the usual SSA conventions. Liveness analysis doesn't + * make sense on these instructions, so skip them to + * avoid memory corruption */ + + if (ins->ssa_args.dest < 0) continue; + + if (ins->ssa_args.dest < SSA_FIXED_MINIMUM) { + /* If this destination is not yet live, it is now since we just wrote it */ + + int dest = ins->ssa_args.dest; + + if (live_start[dest] == -1) + live_start[dest] = d; + } + + /* Since we just used a source, the source might be + * dead now. Scan the rest of the block for + * invocations, and if there are none, the source dies + * */ + + int sources[2] = { ins->ssa_args.src0, ins->ssa_args.src1 }; + + for (int src = 0; src < 2; ++src) { + int s = sources[src]; + + if (s < 0) continue; + + if (s >= SSA_FIXED_MINIMUM) continue; + + if (!mir_is_live_after(ctx, block, ins, s)) { + live_end[s] = d; + } + } + + ++d; + } + } + + /* If a node still hasn't been killed, kill it now */ + + for (int i = 0; i < nodes; ++i) { + /* live_start == -1 most likely indicates a pinned output */ + + if (live_end[i] == -1) + live_end[i] = d; + } + + /* Setup interference between nodes that are live at the same time */ + + for (int i = 0; i < nodes; ++i) { + for (int j = i + 1; j < nodes; ++j) { + if (!(live_start[i] >= live_end[j] || live_start[j] >= live_end[i])) + ra_add_node_interference(g, i, j); + } + } + + ra_set_select_reg_callback(g, midgard_ra_select_callback, NULL); + + if (!ra_allocate(g)) { + unreachable("Error allocating registers\n"); + } + + /* Cleanup */ + free(live_start); + free(live_end); + + return g; +} + +/* Once registers have been decided via register allocation + * (allocate_registers), we need to rewrite the MIR to use registers instead of + * SSA */ + +void +install_registers(compiler_context *ctx, struct ra_graph *g) +{ + mir_foreach_block(ctx, block) { + mir_foreach_instr_in_block(block, ins) { + if (ins->compact_branch) continue; + + ssa_args args = ins->ssa_args; + + switch (ins->type) { + case TAG_ALU_4: + ins->registers.src1_reg = dealias_register(ctx, g, args.src0, ctx->temp_count); + + ins->registers.src2_imm = args.inline_constant; + + if (args.inline_constant) { + /* Encode inline 16-bit constant as a vector by default */ + + ins->registers.src2_reg = ins->inline_constant >> 11; + + int lower_11 = ins->inline_constant & ((1 << 12) - 1); + + uint16_t imm = ((lower_11 >> 8) & 0x7) | ((lower_11 & 0xFF) << 3); + ins->alu.src2 = imm << 2; + } else { + ins->registers.src2_reg = dealias_register(ctx, g, args.src1, ctx->temp_count); + } + + ins->registers.out_reg = dealias_register(ctx, g, args.dest, ctx->temp_count); + + break; + + case TAG_LOAD_STORE_4: { + if (OP_IS_STORE_VARY(ins->load_store.op)) { + /* TODO: use ssa_args for st_vary */ + ins->load_store.reg = 0; + } else { + bool has_dest = args.dest >= 0; + int ssa_arg = has_dest ? args.dest : args.src0; + + ins->load_store.reg = dealias_register(ctx, g, ssa_arg, ctx->temp_count); + } + + break; + } + + default: + break; + } + } + } + +} -- 2.30.2