From b284d222dbbe7a106dd1e52af7a826dc9855fc3c Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 1 Nov 2018 16:04:01 -0500 Subject: [PATCH] intel/fs: Use SHADER_OPCODE_SEND for varying UBO pulls on gen7+ Reviewed-by: Iago Toral Quiroga --- src/intel/compiler/brw_eu_defines.h | 1 - src/intel/compiler/brw_fs.cpp | 31 ++++++-- src/intel/compiler/brw_fs.h | 4 - src/intel/compiler/brw_fs_cse.cpp | 1 - src/intel/compiler/brw_fs_generator.cpp | 73 ------------------- .../compiler/brw_schedule_instructions.cpp | 1 - src/intel/compiler/brw_shader.cpp | 2 - 7 files changed, 25 insertions(+), 88 deletions(-) diff --git a/src/intel/compiler/brw_eu_defines.h b/src/intel/compiler/brw_eu_defines.h index 4445f388d38..d3dfd6dc7e8 100644 --- a/src/intel/compiler/brw_eu_defines.h +++ b/src/intel/compiler/brw_eu_defines.h @@ -526,7 +526,6 @@ enum opcode { FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4, - FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7, FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL, FS_OPCODE_DISCARD_JUMP, FS_OPCODE_SET_SAMPLE_ID, diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index d9c339b1f08..9ecabd58763 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -217,7 +217,6 @@ fs_inst::is_send_from_grf() const { switch (opcode) { case SHADER_OPCODE_SEND: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case SHADER_OPCODE_SHADER_TIME_ADD: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: @@ -5196,16 +5195,37 @@ lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst) const gen_device_info *devinfo = bld.shader->devinfo; if (devinfo->gen >= 7) { + fs_reg index = inst->src[0]; /* We are switching the instruction from an ALU-like instruction to a * send-from-grf instruction. Since sends can't handle strides or * source modifiers, we have to make a copy of the offset source. */ - fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD); - bld.MOV(tmp, inst->src[1]); - inst->src[1] = tmp; + fs_reg offset = bld.vgrf(BRW_REGISTER_TYPE_UD); + bld.MOV(offset, inst->src[1]); - inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7; + const unsigned simd_mode = + inst->exec_size <= 8 ? BRW_SAMPLER_SIMD_MODE_SIMD8 : + BRW_SAMPLER_SIMD_MODE_SIMD16; + + inst->opcode = SHADER_OPCODE_SEND; inst->mlen = inst->exec_size / 8; + inst->resize_sources(3); + + inst->sfid = BRW_SFID_SAMPLER; + inst->desc = brw_sampler_desc(devinfo, 0, 0, + GEN5_SAMPLER_MESSAGE_SAMPLE_LD, + simd_mode, 0); + if (index.file == IMM) { + inst->desc |= index.ud & 0xff; + inst->src[0] = brw_imm_ud(0); + } else { + const fs_builder ubld = bld.exec_all().group(1, 0); + fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD); + ubld.AND(tmp, index, brw_imm_ud(0xff)); + inst->src[0] = component(tmp, 0); + } + inst->src[1] = brw_imm_ud(0); /* ex_desc */ + inst->src[2] = offset; /* payload */ } else { const fs_reg payload(MRF, FIRST_PULL_LOAD_MRF(devinfo->gen), BRW_REGISTER_TYPE_UD); @@ -5727,7 +5747,6 @@ get_lowered_simd_width(const struct gen_device_info *devinfo, case FS_OPCODE_DDX_FINE: case FS_OPCODE_DDY_COARSE: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_PACK_HALF_2x16_SPLIT: case FS_OPCODE_INTERPOLATE_AT_SAMPLE: case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 4e913eb8d80..6467b4c6d95 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -444,10 +444,6 @@ private: void generate_varying_pull_constant_load_gen4(fs_inst *inst, struct brw_reg dst, struct brw_reg index); - void generate_varying_pull_constant_load_gen7(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset); void generate_mov_dispatch_to_flags(fs_inst *inst); void generate_pixel_interpolator_query(fs_inst *inst, diff --git a/src/intel/compiler/brw_fs_cse.cpp b/src/intel/compiler/brw_fs_cse.cpp index 19089246c4f..9b897804032 100644 --- a/src/intel/compiler/brw_fs_cse.cpp +++ b/src/intel/compiler/brw_fs_cse.cpp @@ -74,7 +74,6 @@ is_expression(const fs_visitor *v, const fs_inst *const inst) case FS_OPCODE_FB_READ_LOGICAL: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_LINTERP: case SHADER_OPCODE_FIND_LIVE_CHANNEL: case SHADER_OPCODE_BROADCAST: diff --git a/src/intel/compiler/brw_fs_generator.cpp b/src/intel/compiler/brw_fs_generator.cpp index 35762b43615..544d19826f4 100644 --- a/src/intel/compiler/brw_fs_generator.cpp +++ b/src/intel/compiler/brw_fs_generator.cpp @@ -1514,75 +1514,6 @@ fs_generator::generate_varying_pull_constant_load_gen4(fs_inst *inst, msg_type, simd_mode, return_format)); } -void -fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, - struct brw_reg dst, - struct brw_reg index, - struct brw_reg offset) -{ - assert(devinfo->gen >= 7); - /* Varying-offset pull constant loads are treated as a normal expression on - * gen7, so the fact that it's a send message is hidden at the IR level. - */ - assert(inst->header_size == 0); - assert(inst->mlen); - assert(index.type == BRW_REGISTER_TYPE_UD); - - uint32_t simd_mode, rlen; - if (inst->exec_size == 16) { - rlen = 8; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; - } else { - assert(inst->exec_size == 8); - rlen = 4; - simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; - } - - if (index.file == BRW_IMMEDIATE_VALUE) { - - uint32_t surf_index = index.ud; - - brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND); - brw_inst_set_sfid(devinfo, send, BRW_SFID_SAMPLER); - brw_set_dest(p, send, retype(dst, BRW_REGISTER_TYPE_UW)); - brw_set_src0(p, send, offset); - brw_set_desc(p, send, - brw_message_desc(devinfo, inst->mlen, rlen, false) | - brw_sampler_desc(devinfo, surf_index, - 0, /* LD message ignores sampler unit */ - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - simd_mode, 0)); - - } else { - - struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD)); - - brw_push_insn_state(p); - brw_set_default_mask_control(p, BRW_MASK_DISABLE); - - /* a0.0 = surf_index & 0xff */ - brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND); - brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1); - brw_set_dest(p, insn_and, addr); - brw_set_src0(p, insn_and, vec1(retype(index, BRW_REGISTER_TYPE_UD))); - brw_set_src1(p, insn_and, brw_imm_ud(0x0ff)); - - brw_pop_insn_state(p); - - /* dst = send(offset, a0.0 | ) */ - brw_send_indirect_message( - p, BRW_SFID_SAMPLER, retype(dst, BRW_REGISTER_TYPE_UW), - offset, addr, - brw_message_desc(devinfo, inst->mlen, rlen, false) | - brw_sampler_desc(devinfo, - 0 /* surface */, - 0 /* sampler */, - GEN5_SAMPLER_MESSAGE_SAMPLE_LD, - simd_mode, - 0)); - } -} - void fs_generator::generate_pixel_interpolator_query(fs_inst *inst, struct brw_reg dst, @@ -2133,10 +2064,6 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) generate_varying_pull_constant_load_gen4(inst, dst, src[0]); break; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]); - break; - case FS_OPCODE_REP_FB_WRITE: case FS_OPCODE_FB_WRITE: generate_fb_write(inst, src[0]); diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 46d3111045d..be57802b227 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -323,7 +323,6 @@ schedule_node::set_latency_gen7(bool is_haswell) break; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: case VS_OPCODE_PULL_CONSTANT_LOAD: diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 5b1d50052ff..b4c74871a48 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -407,8 +407,6 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "uniform_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: return "varying_pull_const_gen4"; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - return "varying_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: return "varying_pull_const_logical"; -- 2.30.2