X-Git-Url: https://git.libre-soc.org/?p=mesa.git;a=blobdiff_plain;f=src%2Famd%2Fcompiler%2Faco_builder_h.py;h=8d541cbd72f2a15a4d2856498728d8e8189f01e0;hp=eb655471c902deb937d64adb50e662d5cc87d1b3;hb=fc9f502a5bd853128a9c2932c793180035883efc;hpb=f31c9b4edf6e8d972f26461c49c5e193bf6d9a13 diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py index eb655471c90..8d541cbd72f 100644 --- a/src/amd/compiler/aco_builder_h.py +++ b/src/amd/compiler/aco_builder_h.py @@ -69,6 +69,13 @@ dpp_row_sr(unsigned amount) return (dpp_ctrl)(((unsigned) _dpp_row_sr) | amount); } +inline dpp_ctrl +dpp_row_rr(unsigned amount) +{ + assert(amount > 0 && amount < 16); + return (dpp_ctrl)(((unsigned) _dpp_row_rr) | amount); +} + inline unsigned ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) { @@ -78,6 +85,8 @@ ds_pattern_bitmode(unsigned and_mask, unsigned or_mask, unsigned xor_mask) aco_ptr create_s_mov(Definition dst, Operand src); +extern uint8_t int8_mul_table[512]; + enum sendmsg { sendmsg_none = 0, _sendmsg_gs = 2, @@ -166,11 +175,25 @@ public: std::vector> *instructions; std::vector>::iterator it; + bool is_precise = false; + bool is_nuw = false; - Builder(Program *pgm) : program(pgm), use_iterator(false), start(false), lm(pgm->lane_mask), instructions(NULL) {} + Builder(Program *pgm) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(NULL) {} Builder(Program *pgm, Block *block) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(&block->instructions) {} Builder(Program *pgm, std::vector> *instrs) : program(pgm), use_iterator(false), start(false), lm(pgm ? pgm->lane_mask : s2), instructions(instrs) {} + Builder precise() const { + Builder res = *this; + res.is_precise = true; + return res; + }; + + Builder nuw() const { + Builder res = *this; + res.is_nuw = true; + return res; + } + void moveEnd(Block *block) { instructions = &block->instructions; } @@ -294,7 +317,8 @@ public: % for fixed in ['m0', 'vcc', 'exec', 'scc']: Operand ${fixed}(Temp tmp) { % if fixed == 'vcc' or fixed == 'exec': - assert(tmp.regClass() == lm); + //vcc_hi and exec_hi can still be used in wave32 + assert(tmp.type() == RegType::sgpr && tmp.bytes() <= 8); % endif Operand op(tmp); op.setFixed(aco::${fixed}); @@ -303,7 +327,8 @@ public: Definition ${fixed}(Definition def) { % if fixed == 'vcc' or fixed == 'exec': - assert(def.regClass() == lm); + //vcc_hi and exec_hi can still be used in wave32 + assert(def.regClass().type() == RegType::sgpr && def.bytes() <= 8); % endif def.setFixed(aco::${fixed}); return def; @@ -311,7 +336,8 @@ public: Definition hint_${fixed}(Definition def) { % if fixed == 'vcc' or fixed == 'exec': - assert(def.regClass() == lm); + //vcc_hi and exec_hi can still be used in wave32 + assert(def.regClass().type() == RegType::sgpr && def.bytes() <= 8); % endif def.setHint(aco::${fixed}); return def; @@ -381,6 +407,36 @@ public: return vop1(aco_opcode::v_mov_b32, dst, op); } else if (op.bytes() > 2) { return pseudo(aco_opcode::p_create_vector, dst, op); + } else if (op.bytes() == 1 && op.isConstant()) { + uint8_t val = op.constantValue(); + Operand op32((uint32_t)val | (val & 0x80u ? 0xffffff00u : 0u)); + aco_ptr sdwa; + if (op32.isLiteral()) { + sdwa.reset(create_instruction(aco_opcode::v_mul_u32_u24, asSDWA(Format::VOP2), 2, 1)); + uint32_t a = (uint32_t)int8_mul_table[val * 2]; + uint32_t b = (uint32_t)int8_mul_table[val * 2 + 1]; + sdwa->operands[0] = Operand(a | (a & 0x80u ? 0xffffff00u : 0x0u)); + sdwa->operands[1] = Operand(b | (b & 0x80u ? 0xffffff00u : 0x0u)); + } else { + sdwa.reset(create_instruction(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)); + sdwa->operands[0] = op32; + } + sdwa->definitions[0] = dst; + sdwa->sel[0] = sdwa_udword; + sdwa->sel[1] = sdwa_udword; + sdwa->dst_sel = sdwa_ubyte; + sdwa->dst_preserve = true; + return insert(std::move(sdwa)); + } else if (op.bytes() == 2 && op.isConstant() && !op.isLiteral()) { + aco_ptr sdwa{create_instruction(aco_opcode::v_add_f16, asSDWA(Format::VOP2), 2, 1)}; + sdwa->operands[0] = op; + sdwa->operands[1] = Operand(0u); + sdwa->definitions[0] = dst; + sdwa->sel[0] = sdwa_uword; + sdwa->sel[1] = sdwa_udword; + sdwa->dst_sel = dst.bytes() == 1 ? sdwa_ubyte : sdwa_uword; + sdwa->dst_preserve = true; + return insert(std::move(sdwa)); } else if (dst.regClass().is_subdword()) { if (program->chip_class >= GFX8) { aco_ptr sdwa{create_instruction(aco_opcode::v_mov_b32, asSDWA(Format::VOP1), 1, 1)}; @@ -506,8 +562,9 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod ("vopc_e64", [Format.VOPC, Format.VOP3A], 'VOP3A_instruction', itertools.product([1, 2], [2])), ("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]), ("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)])] +formats = [(f if len(f) == 5 else f + ('',)) for f in formats] %>\\ -% for name, formats, struct, shapes in formats: +% for name, formats, struct, shapes, extra_field_setup in formats: % for num_definitions, num_operands in shapes: <% args = ['aco_opcode opcode'] @@ -524,6 +581,8 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod ${struct} *instr = create_instruction<${struct}>(opcode, (Format)(${'|'.join('(int)Format::%s' % f.name for f in formats)}), ${num_operands}, ${num_definitions}); % for i in range(num_definitions): instr->definitions[${i}] = def${i}; + instr->definitions[${i}].setPrecise(is_precise); + instr->definitions[${i}].setNUW(is_nuw); % endfor % for i in range(num_operands): instr->operands[${i}] = op${i}.op; @@ -534,6 +593,7 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod % endfor ${f.get_builder_initialization(num_operands)} % endfor + ${extra_field_setup} return insert(instr); }