From 7f1cf046cd1fb8a3af0e24b622179e4adb398764 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Mon, 29 Oct 2018 15:06:14 -0500 Subject: [PATCH] intel/fs: Add a generic SEND opcode Reviewed-by: Iago Toral Quiroga --- src/intel/compiler/brw_eu_defines.h | 7 ++++ src/intel/compiler/brw_fs.cpp | 13 +++++++ src/intel/compiler/brw_fs.h | 6 ++++ src/intel/compiler/brw_fs_cse.cpp | 5 +++ src/intel/compiler/brw_fs_generator.cpp | 35 ++++++++++++++++++- src/intel/compiler/brw_fs_reg_allocate.cpp | 6 ++-- .../compiler/brw_schedule_instructions.cpp | 7 ++++ src/intel/compiler/brw_shader.cpp | 9 +++++ src/intel/compiler/brw_shader.h | 6 ++++ 9 files changed, 91 insertions(+), 3 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 4640c98140f..e52f1b505e9 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -315,6 +315,13 @@ enum opcode { SHADER_OPCODE_SIN, SHADER_OPCODE_COS, + /** + * A generic "send" opcode. The first two sources are the message + * descriptor and extended message descriptor respectively. The third + * and optional fourth sources are the message payload + */ + SHADER_OPCODE_SEND, + /** * Texture sampling opcodes. * diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index b3e3980f0d0..b4e07be14bd 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -216,6 +216,7 @@ bool fs_inst::is_send_from_grf() const { switch (opcode) { + case SHADER_OPCODE_SEND: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case SHADER_OPCODE_SHADER_TIME_ADD: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: @@ -848,6 +849,14 @@ unsigned fs_inst::size_read(int arg) const { switch (opcode) { + case SHADER_OPCODE_SEND: + if (arg == 2) { + return mlen * REG_SIZE; + } else if (arg == 3) { + return ex_mlen * REG_SIZE; + } + break; + case FS_OPCODE_FB_WRITE: case FS_OPCODE_REP_FB_WRITE: if (arg == 0) { @@ -6025,6 +6034,10 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "(mlen: %d) ", inst->mlen); } + if (inst->ex_mlen) { + fprintf(file, "(ex_mlen: %d) ", inst->ex_mlen); + } + if (inst->eot) { fprintf(file, "(EOT) "); } diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 68287bcdcea..093a5751e2d 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -406,6 +406,12 @@ private: struct brw_reg payload, struct brw_reg implied_header, GLuint nr); + void generate_send(fs_inst *inst, + struct brw_reg dst, + struct brw_reg desc, + struct brw_reg ex_desc, + struct brw_reg payload, + struct brw_reg payload2); void generate_fb_write(fs_inst *inst, struct brw_reg payload); void generate_fb_read(fs_inst *inst, struct brw_reg dst, struct brw_reg payload); diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index 6859733d58c..19089246c4f 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -184,8 +184,13 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) a->dst.type == b->dst.type && a->offset == b->offset && a->mlen == b->mlen && + a->ex_mlen == b->ex_mlen && + a->sfid == b->sfid && + a->desc == b->desc && a->size_written == b->size_written && a->base_mrf == b->base_mrf && + a->check_tdr == b->check_tdr && + a->send_has_side_effects == b->send_has_side_effects && a->eot == b->eot && a->header_size == b->header_size && a->shadow_compare == b->shadow_compare && diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 5fc6cf5f8cc..17578fe5ff6 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -250,6 +250,33 @@ fs_generator::patch_discard_jumps_to_fb_writes() return true; } +void +fs_generator::generate_send(fs_inst *inst, + struct brw_reg dst, + struct brw_reg desc, + struct brw_reg ex_desc, + struct brw_reg payload, + struct brw_reg payload2) +{ + /* SENDS not yet supported */ + assert(ex_desc.file == BRW_IMMEDIATE_VALUE && ex_desc.d == 0); + assert(payload2.file == BRW_ARCHITECTURE_REGISTER_FILE && + payload2.nr == BRW_ARF_NULL); + + const bool dst_is_null = dst.file == BRW_ARCHITECTURE_REGISTER_FILE && + dst.nr == BRW_ARF_NULL; + const unsigned rlen = dst_is_null ? 0 : inst->size_written / REG_SIZE; + + uint32_t desc_imm = inst->desc | + brw_message_desc(devinfo, inst->mlen, rlen, inst->header_size); + + brw_send_indirect_message(p, inst->sfid, dst, payload, desc, desc_imm); + + brw_inst_set_eot(p->devinfo, brw_last_inst, inst->eot); + if (inst->check_tdr) + brw_inst_set_opcode(p->devinfo, brw_last_inst, BRW_OPCODE_SENDC); +} + void fs_generator::fire_fb_write(fs_inst *inst, struct brw_reg payload, @@ -1807,7 +1834,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg); foreach_block_and_inst (block, fs_inst, inst, cfg) { - struct brw_reg src[3], dst; + struct brw_reg src[4], dst; unsigned int last_insn_offset = p->next_insn_offset; bool multiple_instructions_emitted = false; @@ -2130,6 +2157,12 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) src[0].subnr = 4 * type_sz(src[0].type); brw_MOV(p, dst, stride(src[0], 8, 4, 1)); break; + + case SHADER_OPCODE_SEND: + generate_send(inst, dst, src[0], src[1], src[2], + inst->ex_mlen > 0 ? src[3] : brw_null_reg()); + break; + case SHADER_OPCODE_GET_BUFFER_SIZE: generate_get_buffer_size(inst, dst, src[0], src[1]); break; diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index 678afe6bab4..5db5242452e 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -617,7 +617,9 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all) * highest register that works. */ if (inst->eot) { - int size = alloc.sizes[inst->src[0].nr]; + const int vgrf = inst->opcode == SHADER_OPCODE_SEND ? + inst->src[2].nr : inst->src[0].nr; + int size = alloc.sizes[vgrf]; int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1; /* If something happened to spill, we want to push the EOT send @@ -626,7 +628,7 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all) */ reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf; - ra_set_node_reg(g, inst->src[0].nr, reg); + ra_set_node_reg(g, vgrf, reg); break; } } diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 9b279df5cf1..bc4c2dc5cdb 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -414,6 +414,13 @@ schedule_node::set_latency_gen7(bool is_haswell) latency = is_haswell ? 300 : 600; break; + case SHADER_OPCODE_SEND: + switch (inst->sfid) { + default: + unreachable("Unknown SFID"); + } + break; + default: /* 2 cycles: * mul(8) g4<1>F g2<0,1,0>F 0.5F { align1 WE_normal 1Q }; diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 97966c951a1..f1037fcda22 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -206,6 +206,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case SHADER_OPCODE_COS: return "cos"; + case SHADER_OPCODE_SEND: + return "send"; + case SHADER_OPCODE_TEX: return "tex"; case SHADER_OPCODE_TEX_LOGICAL: @@ -997,6 +1000,9 @@ bool backend_instruction::has_side_effects() const { switch (opcode) { + case SHADER_OPCODE_SEND: + return send_has_side_effects; + case SHADER_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: @@ -1033,6 +1039,9 @@ bool backend_instruction::is_volatile() const { switch (opcode) { + case SHADER_OPCODE_SEND: + return send_is_volatile; + case SHADER_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: case SHADER_OPCODE_TYPED_SURFACE_READ: diff --git a/src/intel/compiler/brw_shader.h b/src/intel/compiler/brw_shader.h index e1d598b8781..45ff19832b7 100644 --- a/src/intel/compiler/brw_shader.h +++ b/src/intel/compiler/brw_shader.h @@ -156,8 +156,11 @@ struct backend_instruction { uint32_t offset; /**< spill/unspill offset or texture offset bitfield */ uint8_t mlen; /**< SEND message length */ + uint8_t ex_mlen; /**< SENDS extended message length */ int8_t base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */ uint8_t target; /**< MRT target. */ + uint8_t sfid; /**< SFID for SEND instructions */ + uint32_t desc; /**< SEND[S] message descriptor immediate */ unsigned size_written; /**< Data written to the destination register in bytes. */ enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */ @@ -170,6 +173,9 @@ struct backend_instruction { bool no_dd_check:1; bool saturate:1; bool shadow_compare:1; + bool check_tdr:1; /**< Only valid for SEND; turns it into a SENDC */ + bool send_has_side_effects:1; /**< Only valid for SHADER_OPCODE_SEND */ + bool send_is_volatile:1; /**< Only valid for SHADER_OPCODE_SEND */ bool eot:1; /* Chooses which flag subregister (f0.0 to f1.1) is used for conditional -- 2.30.2