From: Jason Ekstrand Date: Thu, 14 Aug 2014 20:56:24 +0000 (-0700) Subject: i965/fs: Add an exec_size field to fs_inst X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=071ac3a467479ce1ada1b86e2f65d4cc7d07753e;p=mesa.git i965/fs: Add an exec_size field to fs_inst This will, eventually, allow us to manage execution sizes of instructions in a much more natural way from the fs_visitor level. i965/fs: Explicitly set instruction execute size a couple of places i965/blorp: Explicitly set instruction execute sizes Since blorp is all 16-wide and nothing isn't, in general, very careful about register width, we'll just set it all explicitly. Signed-off-by: Jason Ekstrand Reviewed-by: Matt Turner --- diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 82ece732d18..3afe0e71519 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@ -96,7 +96,7 @@ brw_blorp_eu_emitter::emit_render_target_write(const struct brw_reg &src0, unsigned msg_length, bool use_header) { - fs_inst *inst = new (mem_ctx) fs_inst(FS_OPCODE_BLORP_FB_WRITE); + fs_inst *inst = new (mem_ctx) fs_inst(FS_OPCODE_BLORP_FB_WRITE, 16); inst->src[0] = src0; inst->base_mrf = msg_reg_nr; diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h index 0459a7e1bd5..cd50da4883b 100644 --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.h @@ -65,7 +65,7 @@ protected: { emit_cmp(op, x, y); - fs_inst *mv = new (mem_ctx) fs_inst(BRW_OPCODE_MOV, dst, src); + fs_inst *mv = new (mem_ctx) fs_inst(BRW_OPCODE_MOV, 16, dst, src); mv->predicate = BRW_PREDICATE_NORMAL; insts.push_tail(mv); } @@ -82,17 +82,17 @@ protected: const struct brw_reg &src3) { insts.push_tail( - new (mem_ctx) fs_inst(BRW_OPCODE_LRP, dst, src1, src2, src3)); + new (mem_ctx) fs_inst(BRW_OPCODE_LRP, 16, dst, src1, src2, src3)); } inline void emit_mov(const struct brw_reg& dst, const struct brw_reg& src) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_MOV, dst, src)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_MOV, 16, dst, src)); } inline void emit_mov_8(const struct brw_reg& dst, const struct brw_reg& src) { - fs_inst *mv = new (mem_ctx) fs_inst(BRW_OPCODE_MOV, dst, src); + fs_inst *mv = new (mem_ctx) fs_inst(BRW_OPCODE_MOV, 8, dst, src); mv->force_uncompressed = true; insts.push_tail(mv); } @@ -101,21 +101,21 @@ protected: const struct brw_reg& src1, const struct brw_reg& src2) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_AND, dst, src1, src2)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_AND, 16, dst, src1, src2)); } inline void emit_add(const struct brw_reg& dst, const struct brw_reg& src1, const struct brw_reg& src2) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ADD, dst, src1, src2)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ADD, 16, dst, src1, src2)); } inline void emit_add_8(const struct brw_reg& dst, const struct brw_reg& src1, const struct brw_reg& src2) { - fs_inst *add = new (mem_ctx) fs_inst(BRW_OPCODE_ADD, dst, src1, src2); + fs_inst *add = new (mem_ctx) fs_inst(BRW_OPCODE_ADD, 8, dst, src1, src2); add->force_uncompressed = true; insts.push_tail(add); } @@ -124,40 +124,40 @@ protected: const struct brw_reg& src1, const struct brw_reg& src2) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_MUL, dst, src1, src2)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_MUL, 16, dst, src1, src2)); } inline void emit_shr(const struct brw_reg& dst, const struct brw_reg& src1, const struct brw_reg& src2) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_SHR, dst, src1, src2)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_SHR, 16, dst, src1, src2)); } inline void emit_shl(const struct brw_reg& dst, const struct brw_reg& src1, const struct brw_reg& src2) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_SHL, dst, src1, src2)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_SHL, 16, dst, src1, src2)); } inline void emit_or(const struct brw_reg& dst, const struct brw_reg& src1, const struct brw_reg& src2) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_OR, dst, src1, src2)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_OR, 16, dst, src1, src2)); } inline void emit_frc(const struct brw_reg& dst, const struct brw_reg& src) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_FRC, dst, src)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_FRC, 16, dst, src)); } inline void emit_rndd(const struct brw_reg& dst, const struct brw_reg& src) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_RNDD, dst, src)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_RNDD, 16, dst, src)); } inline void emit_cmp_if(enum brw_conditional_mod op, @@ -165,17 +165,17 @@ protected: const struct brw_reg &y) { emit_cmp(op, x, y); - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_IF)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_IF, 16)); } inline void emit_else(void) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ELSE)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ELSE, 16)); } inline void emit_endif(void) { - insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ENDIF)); + insts.push_tail(new (mem_ctx) fs_inst(BRW_OPCODE_ENDIF, 16)); } private: diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 9f30b379e79..ad61b33b754 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -53,7 +53,8 @@ extern "C" { #include "glsl/glsl_types.h" void -fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources) +fs_inst::init(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, + fs_reg *src, int sources) { memset(this, 0, sizeof(*this)); @@ -61,6 +62,33 @@ fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources) this->dst = dst; this->src = src; this->sources = sources; + this->exec_size = exec_size; + + assert(dst.file != IMM && dst.file != UNIFORM); + + /* If exec_size == 0, try to guess it from the registers. Since all + * manner of things may use hardware registers, we first try to guess + * based on GRF registers. If this fails, we will go ahead and take the + * width from the destination register. + */ + if (this->exec_size == 0) { + if (dst.file == GRF) { + this->exec_size = dst.width; + } else { + for (int i = 0; i < sources; ++i) { + if (src[i].file != GRF) + continue; + + if (this->exec_size <= 1) + this->exec_size = src[i].width; + assert(src[i].width == 1 || src[i].width == this->exec_size); + } + } + + if (this->exec_size == 0 && dst.file != BAD_FILE) + this->exec_size = dst.width; + } + assert(this->exec_size != 0); this->conditional_mod = BRW_CONDITIONAL_NONE; @@ -84,17 +112,46 @@ fs_inst::init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources) this->writes_accumulator = false; } +fs_inst::fs_inst() +{ + fs_reg *src = ralloc_array(this, fs_reg, 3); + init(BRW_OPCODE_NOP, 8, dst, src, 0); +} + +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size) +{ + fs_reg *src = ralloc_array(this, fs_reg, 3); + init(opcode, exec_size, reg_undef, src, 0); +} + fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst) { fs_reg *src = ralloc_array(this, fs_reg, 3); - init(opcode, dst, src, 0); + init(opcode, 0, dst, src, 0); +} + +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, + const fs_reg &src0) +{ + fs_reg *src = ralloc_array(this, fs_reg, 3); + src[0] = src0; + init(opcode, exec_size, dst, src, 1); } fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0) { fs_reg *src = ralloc_array(this, fs_reg, 3); src[0] = src0; - init(opcode, dst, src, 1); + init(opcode, 0, dst, src, 1); +} + +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, + const fs_reg &src0, const fs_reg &src1) +{ + fs_reg *src = ralloc_array(this, fs_reg, 3); + src[0] = src0; + src[1] = src1; + init(opcode, exec_size, dst, src, 2); } fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, @@ -103,7 +160,17 @@ fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, fs_reg *src = ralloc_array(this, fs_reg, 3); src[0] = src0; src[1] = src1; - init(opcode, dst, src, 2); + init(opcode, 0, dst, src, 2); +} + +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, + const fs_reg &src0, const fs_reg &src1, const fs_reg &src2) +{ + fs_reg *src = ralloc_array(this, fs_reg, 3); + src[0] = src0; + src[1] = src1; + src[2] = src2; + init(opcode, exec_size, dst, src, 3); } fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, @@ -113,12 +180,18 @@ fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, src[0] = src0; src[1] = src1; src[2] = src2; - init(opcode, dst, src, 3); + init(opcode, 0, dst, src, 3); } fs_inst::fs_inst(enum opcode opcode, const fs_reg &dst, fs_reg src[], int sources) { - init(opcode, dst, src, sources); + init(opcode, 0, dst, src, sources); +} + +fs_inst::fs_inst(enum opcode opcode, uint8_t exec_width, const fs_reg &dst, + fs_reg src[], int sources) +{ + init(opcode, exec_width, dst, src, sources); } fs_inst::fs_inst(const fs_inst &that) @@ -206,7 +279,7 @@ ALU2(MAC) fs_inst * fs_visitor::IF(enum brw_predicate predicate) { - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF); + fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width); inst->predicate = predicate; return inst; } @@ -217,7 +290,7 @@ fs_visitor::IF(const fs_reg &src0, const fs_reg &src1, enum brw_conditional_mod condition) { assert(brw->gen == 6); - fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, + fs_inst *inst = new(mem_ctx) fs_inst(BRW_OPCODE_IF, dispatch_width, reg_null_d, src0, src1); inst->conditional_mod = condition; return inst; @@ -358,6 +431,7 @@ fs_visitor::DEP_RESOLVE_MOV(int grf) /* The caller always wants uncompressed to emit the minimal extra * dependencies, and to avoid having to deal with aligning its regs to 2. */ + inst->exec_size = 8; inst->force_uncompressed = true; return inst; @@ -380,6 +454,7 @@ fs_inst::equals(fs_inst *inst) const eot == inst->eot && header_present == inst->header_present && shadow_compare == inst->shadow_compare && + exec_size == inst->exec_size && offset == inst->offset); } @@ -605,6 +680,7 @@ fs_visitor::get_timestamp() */ mov->force_writemask_all = true; mov->force_uncompressed = true; + mov->exec_size = 8; /* The caller wants the low 32 bits of the timestamp. Since it's running * at the GPU clock rate of ~1.2ghz, it will roll over every ~3 seconds, @@ -760,7 +836,7 @@ fs_visitor::no16(const char *format, ...) fs_inst * fs_visitor::emit(enum opcode opcode) { - return emit(new(mem_ctx) fs_inst(opcode)); + return emit(new(mem_ctx) fs_inst(opcode, dispatch_width)); } fs_inst * @@ -2129,7 +2205,7 @@ fs_visitor::demote_pull_constants() } else { fs_reg offset = fs_reg((unsigned)(pull_index * 4) & ~15); fs_inst *pull = - new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8, dst, surf_index, offset); inst->insert_before(block, pull); inst->src[i].set_smear(pull_index & 3); @@ -2840,7 +2916,7 @@ fs_visitor::lower_uniform_pull_constant_loads() * by live variable analysis, or register allocation will explode. */ fs_inst *setup = new(mem_ctx) fs_inst(FS_OPCODE_SET_SIMD4X2_OFFSET, - payload, const_offset_reg); + 8, payload, const_offset_reg); setup->force_writemask_all = true; setup->ir = inst->ir; diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 05fb71d724c..0e398026977 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -193,18 +193,29 @@ public: class fs_inst : public backend_instruction { fs_inst &operator=(const fs_inst &); + void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst, + fs_reg *src, int sources); + public: DECLARE_RALLOC_CXX_OPERATORS(fs_inst) - void init(enum opcode opcode, const fs_reg &dst, fs_reg *src, int sources); - - fs_inst(enum opcode opcode = BRW_OPCODE_NOP, const fs_reg &dst = reg_undef); + fs_inst(); + fs_inst(enum opcode opcode, uint8_t exec_size); + fs_inst(enum opcode opcode, const fs_reg &dst); + fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, + const fs_reg &src0); fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0); + fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, + const fs_reg &src0, const fs_reg &src1); fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, const fs_reg &src1); + fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, + const fs_reg &src0, const fs_reg &src1, const fs_reg &src2); fs_inst(enum opcode opcode, const fs_reg &dst, const fs_reg &src0, const fs_reg &src1, const fs_reg &src2); fs_inst(enum opcode opcode, const fs_reg &dst, fs_reg src[], int sources); + fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst, + fs_reg src[], int sources); fs_inst(const fs_inst &that); void resize_sources(uint8_t num_sources); @@ -224,6 +235,13 @@ public: uint8_t sources; /**< Number of fs_reg sources. */ + /** + * Execution size of the instruction. This is used by the generator to + * generate the correct binary for the given fs_inst. Current valid + * values are 1, 8, 16. + */ + uint8_t exec_size; + /* Chooses which flag subregister (f0.0 or f0.1) is used for conditional * mod and predication. */ diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index da6f00145be..640987ed333 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -944,7 +944,7 @@ fs_visitor::visit(ir_expression *ir) packed_consts.type = result.type; fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15); - emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, + emit(new(mem_ctx) fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, 8, packed_consts, surf_index, const_offset_reg)); for (int i = 0; i < ir->type->vector_elements; i++) {