X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Fpanfrost%2Fbifrost%2Fcompiler.h;h=32361cc37e1c44d16776bb90eb64332f92c1a7f9;hp=4178352f3a8372625ca3da5d8be8b3065ce4d00e;hb=a7b2317d0a06c0c4c7f0fb2d7ab0c72f68ce6383;hpb=7fe3c145d9728480106e8c5b4e97b289104e50e8 diff --git a/src/panfrost/bifrost/compiler.h b/src/panfrost/bifrost/compiler.h index 4178352f3a8..32361cc37e1 100644 --- a/src/panfrost/bifrost/compiler.h +++ b/src/panfrost/bifrost/compiler.h @@ -60,7 +60,7 @@ enum bi_class { BI_FMA, BI_FMOV, BI_FREXP, - BI_ISUB, + BI_IMATH, BI_LOAD, BI_LOAD_UNIFORM, BI_LOAD_ATTR, @@ -70,13 +70,13 @@ enum bi_class { BI_MOV, BI_REDUCE_FMA, BI_SELECT, - BI_SHIFT, BI_STORE, BI_STORE_VAR, BI_SPECIAL, /* _FAST on supported GPUs */ BI_TABLE, BI_TEX, BI_ROUND, + BI_IMUL, BI_NUM_CLASSES }; @@ -86,10 +86,8 @@ extern unsigned bi_class_props[BI_NUM_CLASSES]; /* abs/neg/outmod valid for a float op */ #define BI_MODS (1 << 0) -/* Generic enough that little class-specific information is required. In other - * words, it acts as a "normal" ALU op, even if the encoding ends up being - * irregular enough to warrant a separate class */ -#define BI_GENERIC (1 << 1) +/* Accepts a bi_cond */ +#define BI_CONDITIONAL (1 << 1) /* Accepts a bifrost_roundmode */ #define BI_ROUNDMODE (1 << 2) @@ -157,15 +155,6 @@ enum bi_cond { BI_COND_NE, }; -struct bi_branch { - /* Types are specified in src_types and must be compatible (either both - * int, or both float, 16/32, and same size or 32/16 if float. Types - * ignored if BI_COND_ALWAYS is set for an unconditional branch. */ - - enum bi_cond cond; - struct bi_block *target; -}; - /* Opcodes within a class */ enum bi_minmax_op { BI_MINMAX_MIN, @@ -178,6 +167,15 @@ enum bi_bitwise_op { BI_BITWISE_XOR }; +enum bi_imath_op { + BI_IMATH_ADD, + BI_IMATH_SUB, +}; + +enum bi_imul_op { + BI_IMUL_IMUL, +}; + enum bi_table_op { /* fp32 log2() with low precision, suitable for GL or half_log2() in * CL. In the first argument, takes x. Letting u be such that x = @@ -206,6 +204,7 @@ enum bi_special_op { * exp2() in GL. In the first argument, it takes f2i_rte(x * 2^24). In * the second, it takes x itself. */ BI_SPECIAL_EXP2_LOW, + BI_SPECIAL_IABS, }; enum bi_tex_op { @@ -214,11 +213,22 @@ enum bi_tex_op { BI_TEX_DUAL }; +struct bi_bitwise { + bool src_invert[2]; + bool rshift; /* false for lshift */ +}; + +struct bi_texture { + /* Constant indices. Indirect would need to be in src[..] like normal, + * we can reserve some sentinels there for that for future. */ + unsigned texture_index, sampler_index; +}; + typedef struct { struct list_head link; /* Must be first */ enum bi_class type; - /* Indices, see bir_ssa_index etc. Note zero is special cased + /* Indices, see pan_ssa_index etc. Note zero is special cased * to "no argument" */ unsigned dest; unsigned src[BIR_SRC_COUNT]; @@ -262,6 +272,9 @@ typedef struct { /* For VECTOR ops, how many channels are written? */ unsigned vector_channels; + /* The comparison op. BI_COND_ALWAYS may not be valid. */ + enum bi_cond cond; + /* A class-specific op from which the actual opcode can be derived * (along with the above information) */ @@ -273,6 +286,8 @@ typedef struct { enum bi_table_op table; enum bi_frexp_op frexp; enum bi_tex_op texture; + enum bi_imath_op imath; + enum bi_imul_op imul; /* For FMA/ADD, should we add a biased exponent? */ bool mscale; @@ -282,45 +297,63 @@ typedef struct { union { enum bifrost_minmax_mode minmax; struct bi_load_vary load_vary; - struct bi_branch branch; - - /* For CSEL, the comparison op. BI_COND_ALWAYS doesn't make - * sense here but you can always just use a move for that */ - enum bi_cond cond; + struct bi_block *branch_target; /* For BLEND -- the location 0-7 */ unsigned blend_location; + + struct bi_bitwise bitwise; + struct bi_texture texture; }; } bi_instruction; -/* Scheduling takes place in two steps. Step 1 groups instructions within a - * block into distinct clauses (bi_clause). Step 2 schedules instructions - * within a clause into FMA/ADD pairs (bi_bundle). - * - * A bi_bundle contains two paired instruction pointers. If a slot is unfilled, - * leave it NULL; the emitter will fill in a nop. +/* Represents the assignment of ports for a given bi_bundle */ + +typedef struct { + /* Register to assign to each port */ + unsigned port[4]; + + /* Read ports can be disabled */ + bool enabled[2]; + + /* Should we write FMA? what about ADD? If only a single port is + * enabled it is in port 2, else ADD/FMA is 2/3 respectively */ + bool write_fma, write_add; + + /* Should we read with port 3? */ + bool read_port3; + + /* Packed uniform/constant */ + uint8_t uniform_constant; + + /* Whether writes are actually for the last instruction */ + bool first_instruction; +} bi_registers; + +/* A bi_bundle contains two paired instruction pointers. If a slot is unfilled, + * leave it NULL; the emitter will fill in a nop. Instructions reference + * registers via ports which are assigned per bundle. */ typedef struct { + bi_registers regs; bi_instruction *fma; bi_instruction *add; } bi_bundle; +struct bi_block; + typedef struct { struct list_head link; + /* Link back up for branch calculations */ + struct bi_block *block; + /* A clause can have 8 instructions in bundled FMA/ADD sense, so there - * can be 8 bundles. But each bundle can have both an FMA and an ADD, - * so a clause can have up to 16 bi_instructions. Whether bundles or - * instructions are used depends on where in scheduling we are. */ + * can be 8 bundles. */ - unsigned instruction_count; unsigned bundle_count; - - union { - bi_instruction *instructions[16]; - bi_bundle bundles[8]; - }; + bi_bundle bundles[8]; /* For scoreboarding -- the clause ID (this is not globally unique!) * and its dependencies in terms of other clauses, computed during @@ -344,10 +377,22 @@ typedef struct { /* Corresponds to the usual bit but shifted by a clause */ bool data_register_write_barrier; - /* Constants read by this clause. ISA limit. */ + /* Constants read by this clause. ISA limit. Must satisfy: + * + * constant_count + bundle_count <= 13 + * + * Also implicitly constant_count <= bundle_count since a bundle only + * reads a single constant. + */ uint64_t constants[8]; unsigned constant_count; + /* Branches encode a constant offset relative to the program counter + * with some magic flags. By convention, if there is a branch, its + * constant will be last. Set this flag to indicate this is required. + */ + bool branch_constant; + /* What type of high latency instruction is here, basically */ unsigned clause_type; } bi_clause; @@ -370,7 +415,6 @@ typedef struct { /* During NIR->BIR */ nir_function_impl *impl; bi_block *current_block; - unsigned block_name_count; bi_block *after_block; bi_block *break_block; bi_block *continue_block; @@ -412,10 +456,6 @@ bi_remove_instruction(bi_instruction *ins) list_del(&ins->link); } -/* So we can distinguish between SSA/reg/sentinel quickly */ -#define BIR_NO_ARG (0) -#define BIR_IS_REG (1) - /* If high bits are set, instead of SSA/registers, we have specials indexed by * the low bits if necessary. * @@ -453,36 +493,7 @@ bi_make_temp(bi_context *ctx) static inline unsigned bi_make_temp_reg(bi_context *ctx) { - return ((ctx->impl->reg_alloc + ctx->temp_alloc++) << 1) | BIR_IS_REG; -} - -static inline unsigned -bir_ssa_index(nir_ssa_def *ssa) -{ - /* Off-by-one ensures BIR_NO_ARG is skipped */ - return ((ssa->index + 1) << 1) | 0; -} - -static inline unsigned -bir_src_index(nir_src *src) -{ - if (src->is_ssa) - return bir_ssa_index(src->ssa); - else { - assert(!src->reg.indirect); - return (src->reg.reg->index << 1) | BIR_IS_REG; - } -} - -static inline unsigned -bir_dest_index(nir_dest *dst) -{ - if (dst->is_ssa) - return bir_ssa_index(&dst->ssa); - else { - assert(!dst->reg.indirect); - return (dst->reg.reg->index << 1) | BIR_IS_REG; - } + return ((ctx->impl->reg_alloc + ctx->temp_alloc++) << 1) | PAN_IS_REG; } /* Iterators for Bifrost IR */ @@ -493,6 +504,9 @@ bir_dest_index(nir_dest *dst) #define bi_foreach_block_from(ctx, from, v) \ list_for_each_entry_from(pan_block, v, from, &ctx->blocks, link) +#define bi_foreach_block_from_rev(ctx, from, v) \ + list_for_each_entry_from_rev(pan_block, v, from, &ctx->blocks, link) + #define bi_foreach_instr_in_block(block, v) \ list_for_each_entry(bi_instruction, v, &(block)->base.instructions, link) @@ -514,6 +528,12 @@ bir_dest_index(nir_dest *dst) #define bi_foreach_clause_in_block(block, v) \ list_for_each_entry(bi_clause, v, &(block)->clauses, link) +#define bi_foreach_clause_in_block_from(block, v, from) \ + list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link) + +#define bi_foreach_clause_in_block_from_rev(block, v, from) \ + list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link) + #define bi_foreach_instr_global(ctx, v) \ bi_foreach_block(ctx, v_block) \ bi_foreach_instr_in_block((bi_block *) v_block, v) @@ -586,6 +606,12 @@ void bi_liveness_ins_update(uint16_t *live, bi_instruction *ins, unsigned max); void bi_invalidate_liveness(bi_context *ctx); bool bi_is_live_after(bi_context *ctx, bi_block *block, bi_instruction *start, int src); +/* Layout */ + +bool bi_can_insert_bundle(bi_clause *clause, bool constant); +unsigned bi_clause_quadwords(bi_clause *clause); +signed bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target); + /* Code emit */ void bi_pack(bi_context *ctx, struct util_dynarray *emission);