From: Francisco Jerez Date: Fri, 20 May 2016 23:14:13 +0000 (-0700) Subject: i965/fs: Expose arbitrary channel execution groups to the IR. X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ece41df247af247fb573ae8ec208d50e895b7aef;p=mesa.git i965/fs: Expose arbitrary channel execution groups to the IR. This generalizes the current fs_inst::force_sechalf flag to allow specifying channel enable groups other than 0 or 8. At some point it will likely make sense to fix the vec4 generator to support arbitrary execution groups and then move the definition of fs_inst::group into backend_instruction (e.g. so we can do FP64 in the VEC4 back-end). Reviewed-by: Jason Ekstrand --- diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 5c5354e34f4..98940dae8be 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -3621,7 +3621,7 @@ fs_visitor::lower_integer_multiplication() mul->src[1].stride *= 2; } else if (devinfo->gen == 7 && !devinfo->is_haswell && - inst->force_sechalf) { + inst->group > 0) { /* Among other things the quarter control bits influence which * accumulator register is used by the hardware for instructions * that access the accumulator implicitly (e.g. MACH). A @@ -3638,7 +3638,7 @@ fs_visitor::lower_integer_multiplication() * to get the result masked correctly according to the current * channel enables. */ - mach->force_sechalf = false; + mach->group = 0; mach->force_writemask_all = true; mach->dst = ibld.vgrf(inst->dst.type); ibld.MOV(inst->dst, mach->dst); @@ -3774,8 +3774,8 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, sample_mask.stride *= 2; bld.exec_all().annotate("FB write oMask") - .MOV(half(retype(sources[length], BRW_REGISTER_TYPE_UW), - inst->force_sechalf), + .MOV(horiz_offset(retype(sources[length], BRW_REGISTER_TYPE_UW), + inst->group), sample_mask); length++; } @@ -5008,10 +5008,10 @@ fs_visitor::lower_simd_width() * execution size of the builder to the highest of both for now so * we're sure that both cases can be handled. */ + const unsigned max_width = MAX2(inst->exec_size, lower_width); const fs_builder ibld = bld.at(block, inst) .exec_all(inst->force_writemask_all) - .group(MAX2(inst->exec_size, lower_width), - inst->force_sechalf); + .group(max_width, inst->group / max_width); /* Split the copies in chunks of the execution width of either the * original or the lowered instruction, whichever is lower. @@ -5343,12 +5343,8 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) if (inst->force_writemask_all) fprintf(file, "NoMask "); - if (dispatch_width == 16 && inst->exec_size == 8) { - if (inst->force_sechalf) - fprintf(file, "2ndhalf "); - else - fprintf(file, "1sthalf "); - } + if (inst->exec_size != dispatch_width) + fprintf(file, "group%d ", inst->group); fprintf(file, "\n"); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index b50dda421ae..c1d13a2b14a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -72,7 +72,7 @@ namespace brw { fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) : shader(shader), block(block), cursor(inst), _dispatch_width(inst->exec_size), - _group(inst->force_sechalf ? 8 : 0), + _group(inst->group), force_writemask_all(inst->force_writemask_all) { annotation.str = inst->annotation; @@ -167,6 +167,15 @@ namespace brw { return _dispatch_width; } + /** + * Get the channel group in use. + */ + unsigned + group() const + { + return _group; + } + /** * Allocate a virtual register of natural vector size (one for this IR) * and SIMD width. \p n gives the amount of space to allocate in @@ -353,9 +362,8 @@ namespace brw { assert(inst->exec_size <= 32); assert(inst->exec_size == dispatch_width() || force_writemask_all); - assert(_group == 0 || _group == 8); - inst->force_sechalf = (_group == 8); + inst->group = _group; inst->force_writemask_all = force_writemask_all; inst->annotation = annotation.str; inst->ir = annotation.ir; diff --git a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp index 9c39106bbc2..159bf5dd6e0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_cse.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_cse.cpp @@ -162,7 +162,7 @@ instructions_match(fs_inst *a, fs_inst *b, bool *negate) return a->opcode == b->opcode && a->force_writemask_all == b->force_writemask_all && a->exec_size == b->exec_size && - a->force_sechalf == b->force_sechalf && + a->group == b->group && a->saturate == b->saturate && a->predicate == b->predicate && a->predicate_inverse == b->predicate_inverse && @@ -215,7 +215,7 @@ create_copy_instr(const fs_builder &bld, fs_inst *inst, fs_reg src, bool negate) copy = bld.LOAD_PAYLOAD(inst->dst, payload, sources, header_size); } else { copy = bld.MOV(inst->dst, src); - copy->force_sechalf = inst->force_sechalf; + copy->group = inst->group; copy->force_writemask_all = inst->force_writemask_all; copy->src[0].negate = negate; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 3b0717e17d4..4ca3dad866c 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -212,7 +212,7 @@ fs_generator::fire_fb_write(fs_inst *inst, if (inst->opcode == FS_OPCODE_REP_FB_WRITE) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; else if (prog_data->dual_src_blend) { - if (!inst->force_sechalf) + if (!inst->group) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23; @@ -1076,7 +1076,7 @@ fs_generator::generate_scratch_write(fs_inst *inst, struct brw_reg src) brw_set_default_compression(p, lower_size > 8); for (unsigned i = 0; i < inst->exec_size / lower_size; i++) { - brw_set_default_group(p, (inst->force_sechalf ? 8 : 0) + lower_size * i); + brw_set_default_group(p, inst->group + lower_size * i); brw_MOV(p, brw_uvec_mrf(lower_size, inst->base_mrf + 1, 0), retype(offset(src, block_size * i), BRW_REGISTER_TYPE_UD)); @@ -1620,7 +1620,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) const bool compressed = inst->dst.component_size(inst->exec_size) > REG_SIZE; brw_set_default_compression(p, compressed); - brw_set_default_group(p, inst->force_sechalf ? 8 : 0); + brw_set_default_group(p, inst->group); for (unsigned int i = 0; i < inst->sources; i++) { src[i] = brw_reg_from_fs_reg(inst, &inst->src[i], devinfo->gen, @@ -1648,6 +1648,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width) brw_set_default_exec_size(p, cvt(inst->exec_size) - 1); assert(inst->force_writemask_all || inst->exec_size >= 8); + assert(inst->force_writemask_all || inst->group % inst->exec_size == 0); assert(inst->base_mrf + inst->mlen <= BRW_MAX_MRF(devinfo->gen)); assert(inst->mlen <= BRW_MAX_MSG_LENGTH); diff --git a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp index 8613725f6b9..8cd897f72e0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp @@ -163,7 +163,7 @@ fs_visitor::opt_peephole_sel() /* Check that the MOVs are the right form. */ if (!then_mov[i]->dst.equals(else_mov[i]->dst) || then_mov[i]->exec_size != else_mov[i]->exec_size || - then_mov[i]->force_sechalf != else_mov[i]->force_sechalf || + then_mov[i]->group != else_mov[i]->group || then_mov[i]->force_writemask_all != else_mov[i]->force_writemask_all || then_mov[i]->is_partial_write() || else_mov[i]->is_partial_write() || diff --git a/src/mesa/drivers/dri/i965/brw_ir_fs.h b/src/mesa/drivers/dri/i965/brw_ir_fs.h index 9c39a05f4ce..13f4e151433 100644 --- a/src/mesa/drivers/dri/i965/brw_ir_fs.h +++ b/src/mesa/drivers/dri/i965/brw_ir_fs.h @@ -291,21 +291,19 @@ public: */ uint8_t exec_size; + /** + * Channel group from the hardware execution and predication mask that + * should be applied to the instruction. The subset of channel enable + * signals (calculated from the EU control flow and predication state) + * given by [group, group + exec_size) will be used to mask GRF writes and + * any other side effects of the instruction. + */ + uint8_t group; + bool eot:1; - bool force_sechalf:1; bool pi_noperspective:1; /**< Pixel interpolator noperspective flag */ }; -/** - * Set second-half quarter control on \p inst. - */ -static inline fs_inst * -set_sechalf(fs_inst *inst) -{ - inst->force_sechalf = true; - return inst; -} - /** * Make the execution of \p inst dependent on the evaluation of a possibly * inverted predicate.