From c20dc9b8363b5b497d4f6af5144590e8c2d940c0 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Fri, 27 Dec 2019 16:38:26 -0800 Subject: [PATCH] intel/fs: Make implied_mrf_writes() an fs_inst method. This will be convenient in a later commit enabling SIMD32 fragment shaders, and happens to fix the calculation for MATH instructions which is currently inaccurate for SIMD-lowered instructions on Gen4-5 platforms (all of them on Gen4 in SIMD16 mode), since it was based on the shader's dispatch width rather than on the actual execution size of the instruction. This causes some shader-db noise on Gen4 due to the more compact register allocation interacting with the SEND dependency workarounds, but otherwise no major changes. Reviewed-by: Kenneth Graunke --- src/intel/compiler/brw_fs.cpp | 22 +++++++++---------- src/intel/compiler/brw_fs.h | 2 -- src/intel/compiler/brw_fs_bank_conflicts.cpp | 2 +- src/intel/compiler/brw_fs_reg_allocate.cpp | 2 +- src/intel/compiler/brw_fs_scoreboard.cpp | 2 +- src/intel/compiler/brw_ir_fs.h | 1 + .../compiler/brw_schedule_instructions.cpp | 4 ++-- 7 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index f78c953f0f3..eaa57e4fe0a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -1164,16 +1164,16 @@ fs_inst::flags_written() const * Note that this is not the 0 or 1 implied writes in an actual gen * instruction -- the FS opcodes often generate MOVs in addition. */ -int -fs_visitor::implied_mrf_writes(const fs_inst *inst) const +unsigned +fs_inst::implied_mrf_writes() const { - if (inst->mlen == 0) + if (mlen == 0) return 0; - if (inst->base_mrf == -1) + if (base_mrf == -1) return 0; - switch (inst->opcode) { + switch (opcode) { case SHADER_OPCODE_RCP: case SHADER_OPCODE_RSQ: case SHADER_OPCODE_SQRT: @@ -1181,11 +1181,11 @@ fs_visitor::implied_mrf_writes(const fs_inst *inst) const case SHADER_OPCODE_LOG2: case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: - return 1 * dispatch_width / 8; + return 1 * exec_size / 8; case SHADER_OPCODE_POW: case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: - return 2 * dispatch_width / 8; + return 2 * exec_size / 8; case SHADER_OPCODE_TEX: case FS_OPCODE_TXB: case SHADER_OPCODE_TXD: @@ -1201,14 +1201,14 @@ fs_visitor::implied_mrf_writes(const fs_inst *inst) const return 1; case FS_OPCODE_FB_WRITE: case FS_OPCODE_REP_FB_WRITE: - return inst->src[0].file == BAD_FILE ? 0 : 2; + return src[0].file == BAD_FILE ? 0 : 2; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: case SHADER_OPCODE_GEN4_SCRATCH_READ: return 1; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: - return inst->mlen; + return mlen; case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - return inst->mlen; + return mlen; default: unreachable("not reached"); } @@ -3494,7 +3494,7 @@ fs_visitor::remove_duplicate_mrf_writes() /* Found a SEND instruction, which will include two or fewer * implied MRF writes. We could do better here. */ - for (int i = 0; i < implied_mrf_writes(inst); i++) { + for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { last_mrf_move[inst->base_mrf + i] = NULL; } } diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h index 5236fff4b6e..a86db3ca075 100644 --- a/src/intel/compiler/brw_fs.h +++ b/src/intel/compiler/brw_fs.h @@ -302,8 +302,6 @@ public: fs_reg interp_reg(int location, int channel); - int implied_mrf_writes(const fs_inst *inst) const; - virtual void dump_instructions(); virtual void dump_instructions(const char *name); void dump_instruction(backend_instruction *inst); diff --git a/src/intel/compiler/brw_fs_bank_conflicts.cpp b/src/intel/compiler/brw_fs_bank_conflicts.cpp index 938ebcceac4..e908d0e38fc 100644 --- a/src/intel/compiler/brw_fs_bank_conflicts.cpp +++ b/src/intel/compiler/brw_fs_bank_conflicts.cpp @@ -573,7 +573,7 @@ namespace { if (v->devinfo->gen >= 7) { assert(inst->dst.file != MRF); - for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { const unsigned reg = GEN7_MRF_HACK_START + inst->base_mrf + i; constrained[p.atom_of_reg(reg)] = true; } diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp index bf3f86b0d56..966080f6b6c 100644 --- a/src/intel/compiler/brw_fs_reg_allocate.cpp +++ b/src/intel/compiler/brw_fs_reg_allocate.cpp @@ -494,7 +494,7 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used) } if (inst->mlen > 0) { - for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { mrf_used[inst->base_mrf + i] = true; } } diff --git a/src/intel/compiler/brw_fs_scoreboard.cpp b/src/intel/compiler/brw_fs_scoreboard.cpp index 67e07e7f26b..35586f2a107 100644 --- a/src/intel/compiler/brw_fs_scoreboard.cpp +++ b/src/intel/compiler/brw_fs_scoreboard.cpp @@ -896,7 +896,7 @@ namespace { } if (is_send(inst) && inst->base_mrf != -1) { - for (int j = 0; j < shader->implied_mrf_writes(inst); j++) + for (unsigned j = 0; j < inst->implied_mrf_writes(); j++) add_dependency(ids, deps[ip], dependency_for_write(inst, sb.get(brw_uvec_mrf(8, inst->base_mrf + j, 0)))); } diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h index 099fb570d70..24e523c5270 100644 --- a/src/intel/compiler/brw_ir_fs.h +++ b/src/intel/compiler/brw_ir_fs.h @@ -357,6 +357,7 @@ public: bool can_do_cmod(); bool can_change_types() const; bool has_source_and_destination_hazard() const; + unsigned implied_mrf_writes() const; /** * Return whether \p arg is a control source of a virtual instruction which diff --git a/src/intel/compiler/brw_schedule_instructions.cpp b/src/intel/compiler/brw_schedule_instructions.cpp index 575693fc961..9c78944cf68 100644 --- a/src/intel/compiler/brw_schedule_instructions.cpp +++ b/src/intel/compiler/brw_schedule_instructions.cpp @@ -1190,7 +1190,7 @@ fs_instruction_scheduler::calculate_deps() } if (inst->mlen > 0 && inst->base_mrf != -1) { - for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { add_dep(last_mrf_write[inst->base_mrf + i], n); last_mrf_write[inst->base_mrf + i] = n; } @@ -1313,7 +1313,7 @@ fs_instruction_scheduler::calculate_deps() } if (inst->mlen > 0 && inst->base_mrf != -1) { - for (int i = 0; i < v->implied_mrf_writes(inst); i++) { + for (unsigned i = 0; i < inst->implied_mrf_writes(); i++) { last_mrf_write[inst->base_mrf + i] = n; } } -- 2.30.2