From 830f67046ace3c0b95a7f093fe373eeb417a1aad Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Thu, 18 Jun 2015 12:44:35 -0700 Subject: [PATCH] i965/fs: Remove the width field from fs_reg As of now, the width field is no longer used for anything. The width field "seemed like a good idea at the time" but is actually entirely redundant with the instruction's execution size. Initially, it gave us the ability to easily set the instructions execution size based entirely on register widths. With the builder, we can easiliy set the sizes explicitly and the width field doesn't have as much purpose. At this point, it's just redundant information that can get out of sync so it really needs to go. Reviewed-by: Topi Pohjolainen Acked-by: Francisco Jerez --- src/mesa/drivers/dri/i965/brw_fs.cpp | 62 +++---------------- src/mesa/drivers/dri/i965/brw_fs_builder.h | 19 ++---- .../dri/i965/brw_fs_copy_propagation.cpp | 4 -- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 6 +- .../drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 +- .../dri/i965/brw_fs_register_coalesce.cpp | 1 - src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 26 ++++---- src/mesa/drivers/dri/i965/brw_ir_fs.h | 15 +---- 8 files changed, 30 insertions(+), 107 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index be772ae547b..dd955197bca 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -203,10 +203,8 @@ fs_visitor::VARYING_PULL_CONSTANT_LOAD(const fs_builder &bld, else op = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD; - assert(dst.width % 8 == 0); int regs_written = 4 * (bld.dispatch_width() / 8) * scale; - fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), - dst.type, bld.dispatch_width()); + fs_reg vec4_result = fs_reg(GRF, alloc.allocate(regs_written), dst.type); fs_inst *inst = bld.emit(op, vec4_result, surf_index, vec4_offset); inst->regs_written = regs_written; @@ -310,7 +308,6 @@ fs_inst::is_copy_payload(const brw::simple_allocator &grf_alloc) const for (int i = 0; i < this->sources; i++) { reg.type = this->src[i].type; - reg.width = this->src[i].width; if (!this->src[i].equals(reg)) return false; @@ -366,7 +363,6 @@ fs_reg::fs_reg(float f) this->file = IMM; this->type = BRW_REGISTER_TYPE_F; this->fixed_hw_reg.dw1.f = f; - this->width = 1; } /** Immediate value constructor. */ @@ -376,7 +372,6 @@ fs_reg::fs_reg(int32_t i) this->file = IMM; this->type = BRW_REGISTER_TYPE_D; this->fixed_hw_reg.dw1.d = i; - this->width = 1; } /** Immediate value constructor. */ @@ -386,7 +381,6 @@ fs_reg::fs_reg(uint32_t u) this->file = IMM; this->type = BRW_REGISTER_TYPE_UD; this->fixed_hw_reg.dw1.ud = u; - this->width = 1; } /** Vector float immediate value constructor. */ @@ -417,7 +411,6 @@ fs_reg::fs_reg(struct brw_reg fixed_hw_reg) this->file = HW_REG; this->fixed_hw_reg = fixed_hw_reg; this->type = fixed_hw_reg.type; - this->width = 1 << fixed_hw_reg.width; } bool @@ -432,7 +425,6 @@ fs_reg::equals(const fs_reg &r) const abs == r.abs && !reladdr && !r.reladdr && memcmp(&fixed_hw_reg, &r.fixed_hw_reg, sizeof(fixed_hw_reg)) == 0 && - width == r.width && stride == r.stride); } @@ -504,7 +496,7 @@ fs_visitor::get_timestamp(const fs_builder &bld) 0), BRW_REGISTER_TYPE_UD)); - fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 4); + fs_reg dst = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); /* We want to read the 3 fields we care about even if it's not enabled in * the dispatch. @@ -554,7 +546,7 @@ fs_visitor::emit_shader_time_end() fs_reg start = shader_start_time; start.negate = true; - fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD, 1); + fs_reg diff = fs_reg(GRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); diff.set_smear(0); const fs_builder cbld = ibld.group(1, 0); @@ -809,7 +801,7 @@ fs_visitor::vgrf(const glsl_type *const type) { int reg_width = dispatch_width / 8; return fs_reg(GRF, alloc.allocate(type_size(type) * reg_width), - brw_type_for_base_type(type), dispatch_width); + brw_type_for_base_type(type)); } /** Fixed HW reg constructor. */ @@ -819,14 +811,6 @@ fs_reg::fs_reg(enum register_file file, int reg) this->file = file; this->reg = reg; this->type = BRW_REGISTER_TYPE_F; - - switch (file) { - case UNIFORM: - this->width = 1; - break; - default: - this->width = 8; - } } /** Fixed HW reg constructor. */ @@ -836,25 +820,6 @@ fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type) this->file = file; this->reg = reg; this->type = type; - - switch (file) { - case UNIFORM: - this->width = 1; - break; - default: - this->width = 8; - } -} - -/** Fixed HW reg constructor. */ -fs_reg::fs_reg(enum register_file file, int reg, enum brw_reg_type type, - uint8_t width) -{ - init(); - this->file = file; - this->reg = reg; - this->type = type; - this->width = width; } /* For SIMD16, we need to follow from the uniform setup of SIMD8 dispatch. @@ -1863,7 +1828,6 @@ fs_visitor::demote_pull_constants() inst->src[i].file = GRF; inst->src[i].reg = dst.reg; inst->src[i].reg_offset = 0; - inst->src[i].width = dispatch_width; } } invalidate_live_intervals(); @@ -2951,20 +2915,17 @@ fs_visitor::lower_load_payload() if (dst.file == MRF) dst.reg = dst.reg & ~BRW_MRF_COMPR4; - dst.width = 8; const fs_builder hbld = bld.group(8, 0).exec_all().at(block, inst); for (uint8_t i = 0; i < inst->header_size; i++) { if (inst->src[i].file != BAD_FILE) { fs_reg mov_dst = retype(dst, BRW_REGISTER_TYPE_UD); fs_reg mov_src = retype(inst->src[i], BRW_REGISTER_TYPE_UD); - mov_src.width = 8; hbld.MOV(mov_dst, mov_src); } dst = offset(dst, hbld, 1); } - dst.width = inst->exec_size; const fs_builder ibld = bld.group(inst->exec_size, inst->force_sechalf) .exec_all(inst->force_writemask_all) .at(block, inst); @@ -2998,7 +2959,6 @@ fs_visitor::lower_load_payload() } else { /* Platform doesn't have COMPR4. We have to fake it */ fs_reg mov_dst = retype(dst, inst->src[i].type); - mov_dst.width = 8; ibld.half(0).MOV(mov_dst, half(inst->src[i], 0)); mov_dst.reg += 4; ibld.half(1).MOV(mov_dst, half(inst->src[i], 1)); @@ -3070,7 +3030,7 @@ fs_visitor::lower_integer_multiplication() inst->src[1].fixed_hw_reg.dw1.ud < (1 << 16)) { if (devinfo->gen < 7) { fs_reg imm(GRF, alloc.allocate(dispatch_width / 8), - inst->dst.type, dispatch_width); + inst->dst.type); ibld.MOV(imm, inst->src[1]); ibld.MUL(inst->dst, imm, inst->src[0]); } else { @@ -3124,11 +3084,11 @@ fs_visitor::lower_integer_multiplication() if (inst->conditional_mod && inst->dst.is_null()) { inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8), - inst->dst.type, dispatch_width); + inst->dst.type); } fs_reg low = inst->dst; fs_reg high(GRF, alloc.allocate(dispatch_width / 8), - inst->dst.type, dispatch_width); + inst->dst.type); if (devinfo->gen >= 7) { fs_reg src1_0_w = inst->src[1]; @@ -3282,9 +3242,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) switch (inst->dst.file) { case GRF: fprintf(file, "vgrf%d", inst->dst.reg); - if (inst->dst.width != dispatch_width) - fprintf(file, "@%d", inst->dst.width); - if (alloc.sizes[inst->dst.reg] != inst->dst.width / 8 || + if (alloc.sizes[inst->dst.reg] != inst->regs_written || inst->dst.subreg_offset) fprintf(file, "+%d.%d", inst->dst.reg_offset, inst->dst.subreg_offset); @@ -3342,9 +3300,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) switch (inst->src[i].file) { case GRF: fprintf(file, "vgrf%d", inst->src[i].reg); - if (inst->src[i].width != dispatch_width) - fprintf(file, "@%d", inst->src[i].width); - if (alloc.sizes[inst->src[i].reg] != inst->src[i].width / 8 || + if (alloc.sizes[inst->src[i].reg] != (unsigned)inst->regs_read(i) || inst->src[i].subreg_offset) fprintf(file, "+%d.%d", inst->src[i].reg_offset, inst->src[i].subreg_offset); diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index 8af16a0fd73..2c36e071f54 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -163,7 +163,7 @@ namespace brw { return dst_reg(GRF, shader->alloc.allocate( DIV_ROUND_UP(n * type_sz(type) * dispatch_width(), REG_SIZE)), - type, dispatch_width()); + type); } /** @@ -516,21 +516,12 @@ namespace brw { LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src, unsigned sources, unsigned header_size) const { - assert(dst.width % 8 == 0); instruction *inst = emit(instruction(SHADER_OPCODE_LOAD_PAYLOAD, dispatch_width(), dst, src, sources)); inst->header_size = header_size; - - for (unsigned i = 0; i < header_size; i++) - assert(src[i].file != GRF || - src[i].width * type_sz(src[i].type) == 32); - inst->regs_written = header_size; - - for (unsigned i = header_size; i < sources; ++i) - assert(src[i].file != GRF || - src[i].width == dst.width); - inst->regs_written += (sources - header_size) * (dispatch_width() / 8); + inst->regs_written = header_size + + (sources - header_size) * (dispatch_width() / 8); return inst; } @@ -628,8 +619,8 @@ namespace brw { inst->resize_sources(1); inst->src[0] = src0; - at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type, - dispatch_width()), src1); + at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type), + src1); } } diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index c92aae4b1d6..54e9114fe6b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -388,17 +388,14 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) switch (entry->src.file) { case UNIFORM: - assert(entry->src.width == 1); case BAD_FILE: case HW_REG: - inst->src[arg].width = entry->src.width; inst->src[arg].reg_offset = entry->src.reg_offset; inst->src[arg].subreg_offset = entry->src.subreg_offset; break; case ATTR: case GRF: { - assert(entry->src.width % inst->src[arg].width == 0); /* In this case, we'll just leave the width alone. The source * register could have different widths depending on how it is * being used. For instance, if only half of the register was @@ -715,7 +712,6 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, acp_entry *entry = ralloc(copy_prop_ctx, acp_entry); entry->dst = inst->dst; entry->dst.reg_offset = offset; - entry->dst.width = effective_width; entry->src = inst->src[i]; entry->regs_written = regs_written; entry->opcode = inst->opcode; diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 29b46b96b8a..291feb3ec0c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -200,7 +200,6 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) payload = ralloc_array(bld.shader->mem_ctx, fs_reg, sources); for (int i = 0; i < header_size; i++) { payload[i] = src; - payload[i].width = 8; src.reg_offset++; } for (int i = header_size; i < sources; i++) { @@ -260,11 +259,9 @@ fs_visitor::opt_cse_local(bblock_t *block) bool no_existing_temp = entry->tmp.file == BAD_FILE; if (no_existing_temp && !entry->generator->dst.is_null()) { int written = entry->generator->regs_written; - assert((written * 8) % entry->generator->dst.width == 0); entry->tmp = fs_reg(GRF, alloc.allocate(written), - entry->generator->dst.type, - entry->generator->dst.width); + entry->generator->dst.type); create_copy_instr(bld.at(block, entry->generator->next), entry->generator, entry->tmp, false); @@ -275,7 +272,6 @@ fs_visitor::opt_cse_local(bblock_t *block) /* dest <- temp */ if (!inst->dst.is_null()) { assert(inst->regs_written == entry->generator->regs_written); - assert(inst->dst.width == entry->generator->dst.width); assert(inst->dst.type == entry->tmp.type); create_copy_instr(bld.at(block, inst), inst, entry->tmp, negate); diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index 364fc4a5ad2..8e5621d3cb5 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -706,10 +706,8 @@ fs_visitor::emit_unspill(bblock_t *block, fs_inst *inst, fs_reg dst, uint32_t spill_offset, int count) { int reg_size = 1; - if (dispatch_width == 16 && count % 2 == 0) { + if (dispatch_width == 16 && count % 2 == 0) reg_size = 2; - dst.width = 16; - } const fs_builder ibld = bld.annotate(inst->annotation, inst->ir) .group(reg_size * 8, 0) diff --git a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp index 149c0f0e217..a253c81c845 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_register_coalesce.cpp @@ -167,7 +167,6 @@ fs_visitor::register_coalesce() src_size = alloc.sizes[inst->src[0].reg]; assert(src_size <= MAX_VGRF_SIZE); - assert(inst->src[0].width % 8 == 0); channels_remaining = src_size; memset(mov, 0, sizeof(mov)); diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index d5ff1be1414..79ebb2d3019 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -246,7 +246,7 @@ fs_visitor::emit_texture_gen4_simd16(ir_texture_opcode op, fs_reg dst, fs_reg shadow_c, fs_reg lod, uint32_t sampler) { - fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width); + fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F); bool has_lod = op == ir_txl || op == ir_txb || op == ir_txf || op == ir_txs; if (has_lod && shadow_c.file != BAD_FILE) @@ -323,7 +323,7 @@ fs_visitor::emit_texture_gen5(ir_texture_opcode op, fs_reg dst, int reg_width = dispatch_width / 8; unsigned header_size = 0; - fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F, dispatch_width); + fs_reg message(MRF, 2, BRW_REGISTER_TYPE_F); fs_reg msg_coords = message; if (has_offset) { @@ -646,7 +646,7 @@ fs_visitor::emit_texture_gen7(ir_texture_opcode op, fs_reg dst, mlen = length * reg_width; fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), - BRW_REGISTER_TYPE_F, dispatch_width); + BRW_REGISTER_TYPE_F); bld.LOAD_PAYLOAD(src_payload, sources, length, header_size); /* Generate the SEND */ @@ -800,7 +800,7 @@ fs_visitor::emit_mcs_fetch(fs_reg coordinate, int components, fs_reg sampler) { int reg_width = dispatch_width / 8; fs_reg payload = fs_reg(GRF, alloc.allocate(components * reg_width), - BRW_REGISTER_TYPE_F, dispatch_width); + BRW_REGISTER_TYPE_F); fs_reg dest = vgrf(glsl_type::uvec4_type); fs_reg *sources = ralloc_array(mem_ctx, fs_reg, components); @@ -1170,7 +1170,7 @@ fs_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, int mlen = 1 + (length - 1) * reg_width; fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), - BRW_REGISTER_TYPE_UD, dispatch_width); + BRW_REGISTER_TYPE_UD); bld.LOAD_PAYLOAD(src_payload, sources, length, 1); /* Emit the instruction. */ @@ -1218,7 +1218,7 @@ fs_visitor::emit_untyped_surface_read(unsigned surf_index, fs_reg dst, int mlen = 1 + reg_width; fs_reg src_payload = fs_reg(GRF, alloc.allocate(mlen), - BRW_REGISTER_TYPE_UD, dispatch_width); + BRW_REGISTER_TYPE_UD); fs_inst *inst = bld.LOAD_PAYLOAD(src_payload, sources, 2, 1); /* Emit the instruction. */ @@ -1236,8 +1236,8 @@ fs_visitor::emit_dummy_fs() /* Everyone's favorite color. */ const float color[4] = { 1.0, 0.0, 1.0, 0.0 }; for (int i = 0; i < 4; i++) { - bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F, - dispatch_width), fs_reg(color[i])); + bld.MOV(fs_reg(MRF, 2 + i * reg_width, BRW_REGISTER_TYPE_F), + fs_reg(color[i])); } fs_inst *write; @@ -1357,7 +1357,7 @@ fs_visitor::emit_interpolation_setup_gen6() * compute our pixel centers. */ fs_reg int_pixel_xy(GRF, alloc.allocate(dispatch_width / 8), - BRW_REGISTER_TYPE_UW, dispatch_width * 2); + BRW_REGISTER_TYPE_UW); fs_inst *add = new (mem_ctx) fs_inst(BRW_OPCODE_ADD, dispatch_width * 2, int_pixel_xy, @@ -1544,7 +1544,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, * it's unsinged single words, one vgrf is always 16-wide. */ sources[length] = fs_reg(GRF, alloc.allocate(1), - BRW_REGISTER_TYPE_UW, 16); + BRW_REGISTER_TYPE_UW); bld.exec_all().annotate("FB write oMask") .emit(FS_OPCODE_SET_OMASK, sources[length], this->sample_mask); length++; @@ -1613,7 +1613,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, fs_inst *write; if (devinfo->gen >= 7) { /* Send from the GRF */ - fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F, exec_size); + fs_reg payload = fs_reg(GRF, -1, BRW_REGISTER_TYPE_F); load = ubld.LOAD_PAYLOAD(payload, sources, length, payload_header_size); payload.reg = alloc.allocate(load->regs_written); load->dst = payload; @@ -1621,7 +1621,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, write->base_mrf = -1; } else { /* Send from the MRF */ - load = ubld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F, exec_size), + load = ubld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F), sources, length, payload_header_size); /* On pre-SNB, we have to interlace the color values. LOAD_PAYLOAD @@ -1928,7 +1928,7 @@ fs_visitor::emit_urb_writes() if (flush) { fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1); fs_reg payload = fs_reg(GRF, alloc.allocate(length + 1), - BRW_REGISTER_TYPE_F, dispatch_width); + BRW_REGISTER_TYPE_F); payload_sources[0] = fs_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index d6b617ab2bd..b48244ae4ca 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -44,7 +44,6 @@ public: fs_reg(struct brw_reg fixed_hw_reg); fs_reg(enum register_file file, int reg); fs_reg(enum register_file file, int reg, enum brw_reg_type type); - fs_reg(enum register_file file, int reg, enum brw_reg_type type, uint8_t width); bool equals(const fs_reg &r) const; bool is_contiguous() const; @@ -60,14 +59,6 @@ public: fs_reg *reladdr; - /** - * The register width. This indicates how many hardware values are - * represented by each virtual value. Valid values are 1, 8, or 16. - * For immediate values, this is 1. Most of the rest of the time, it - * will be equal to the dispatch width. - */ - uint8_t width; - /** Register region horizontal stride */ uint8_t stride; }; @@ -132,9 +123,7 @@ static inline fs_reg component(fs_reg reg, unsigned idx) { assert(reg.subreg_offset == 0); - assert(idx < reg.width); reg.subreg_offset = idx * type_sz(reg.type); - reg.width = 1; reg.stride = 0; return reg; } @@ -142,7 +131,7 @@ component(fs_reg reg, unsigned idx) static inline bool is_uniform(const fs_reg ®) { - return (reg.width == 1 || reg.stride == 0 || reg.is_null()) && + return (reg.stride == 0 || reg.is_null()) && (!reg.reladdr || is_uniform(*reg.reladdr)); } @@ -164,8 +153,6 @@ half(fs_reg reg, unsigned idx) case GRF: case MRF: - assert(reg.width == 16); - reg.width = 8; return horiz_offset(reg, 8 * idx); case ATTR: -- 2.30.2