X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=sidebyside;f=src%2Fmesa%2Fdrivers%2Fdri%2Fi965%2Fbrw_fs_builder.h;h=2087e585b06bb3d8b76f7d82fc667f96caa3dbb3;hb=595224f714d4a6734700d4d22165cb7fa3990238;hp=c823190efbdc112fc43e06066d4128d655d72b9a;hpb=89bc4c78c394e50ddb16cc089bd3ec90681342d7;p=mesa.git diff --git a/src/mesa/drivers/dri/i965/brw_fs_builder.h b/src/mesa/drivers/dri/i965/brw_fs_builder.h index c823190efbd..2087e585b06 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_builder.h +++ b/src/mesa/drivers/dri/i965/brw_fs_builder.h @@ -63,6 +63,22 @@ namespace brw { { } + /** + * Construct an fs_builder that inserts instructions into \p shader + * before instruction \p inst in basic block \p block. The default + * execution controls and debug annotation are initialized from the + * instruction passed as argument. + */ + fs_builder(backend_shader *shader, bblock_t *block, fs_inst *inst) : + shader(shader), block(block), cursor(inst), + _dispatch_width(inst->exec_size), + _group(inst->force_sechalf ? 8 : 0), + force_writemask_all(inst->force_writemask_all) + { + annotation.str = inst->annotation; + annotation.ir = inst->ir; + } + /** * Construct an fs_builder that inserts instructions before \p cursor in * basic block \p block, inheriting other code generation parameters @@ -99,8 +115,8 @@ namespace brw { fs_builder group(unsigned n, unsigned i) const { - assert(n <= dispatch_width() && - i < dispatch_width() / n); + assert(force_writemask_all || + (n <= dispatch_width() && i < dispatch_width() / n)); fs_builder bld = *this; bld._dispatch_width = n; bld._group += i * n; @@ -160,10 +176,15 @@ namespace brw { dst_reg vgrf(enum brw_reg_type type, unsigned n = 1) const { - return dst_reg(GRF, shader->alloc.allocate( - DIV_ROUND_UP(n * type_sz(type) * dispatch_width(), - REG_SIZE)), - type, dispatch_width()); + assert(dispatch_width() <= 32); + + if (n > 0) + return dst_reg(VGRF, shader->alloc.allocate( + DIV_ROUND_UP(n * type_sz(type) * dispatch_width(), + REG_SIZE)), + type); + else + return retype(null_reg_ud(), type); } /** @@ -176,6 +197,13 @@ namespace brw { BRW_REGISTER_TYPE_F)); } + dst_reg + null_reg_df() const + { + return dst_reg(retype(brw_null_vec(dispatch_width()), + BRW_REGISTER_TYPE_DF)); + } + /** * Create a null register of signed integer type. */ @@ -203,12 +231,13 @@ namespace brw { src_reg sample_mask_reg() const { - const bool uses_kill = - (shader->stage == MESA_SHADER_FRAGMENT && - ((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill); - return (shader->stage != MESA_SHADER_FRAGMENT ? src_reg(0xffff) : - uses_kill ? brw_flag_reg(0, 1) : - retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD)); + if (shader->stage != MESA_SHADER_FRAGMENT) { + return brw_imm_d(0xffff); + } else if (((brw_wm_prog_data *)shader->stage_prog_data)->uses_kill) { + return brw_flag_reg(0, 1); + } else { + return retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UD); + } } /** @@ -235,7 +264,7 @@ namespace brw { instruction * emit(enum opcode opcode, const dst_reg &dst) const { - return emit(instruction(opcode, dst.width, dst)); + return emit(instruction(opcode, dispatch_width(), dst)); } /** @@ -253,11 +282,11 @@ namespace brw { case SHADER_OPCODE_SIN: case SHADER_OPCODE_COS: return fix_math_instruction( - emit(instruction(opcode, dst.width, dst, + emit(instruction(opcode, dispatch_width(), dst, fix_math_operand(src0)))); default: - return emit(instruction(opcode, dst.width, dst, src0)); + return emit(instruction(opcode, dispatch_width(), dst, src0)); } } @@ -273,12 +302,12 @@ namespace brw { case SHADER_OPCODE_INT_QUOTIENT: case SHADER_OPCODE_INT_REMAINDER: return fix_math_instruction( - emit(instruction(opcode, dst.width, dst, + emit(instruction(opcode, dispatch_width(), dst, fix_math_operand(src0), fix_math_operand(src1)))); default: - return emit(instruction(opcode, dst.width, dst, src0, src1)); + return emit(instruction(opcode, dispatch_width(), dst, src0, src1)); } } @@ -295,22 +324,35 @@ namespace brw { case BRW_OPCODE_BFI2: case BRW_OPCODE_MAD: case BRW_OPCODE_LRP: - return emit(instruction(opcode, dst.width, dst, + return emit(instruction(opcode, dispatch_width(), dst, fix_3src_operand(src0), fix_3src_operand(src1), fix_3src_operand(src2))); default: - return emit(instruction(opcode, dst.width, dst, src0, src1, src2)); + return emit(instruction(opcode, dispatch_width(), dst, + src0, src1, src2)); } } + /** + * Create and insert an instruction with a variable number of sources + * into the program. + */ + instruction * + emit(enum opcode opcode, const dst_reg &dst, const src_reg srcs[], + unsigned n) const + { + return emit(instruction(opcode, dispatch_width(), dst, srcs, n)); + } + /** * Insert a preallocated instruction into the program. */ instruction * emit(instruction *inst) const { + assert(inst->exec_size <= 32); assert(inst->exec_size == dispatch_width() || force_writemask_all); assert(_group == 0 || _group == 8); @@ -334,32 +376,37 @@ namespace brw { * * Generally useful to get the minimum or maximum of two values. */ - void + instruction * emit_minmax(const dst_reg &dst, const src_reg &src0, const src_reg &src1, brw_conditional_mod mod) const { - if (shader->devinfo->gen >= 6) { - set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), - fix_unsigned_negate(src1))); - } else { - CMP(null_reg_d(), src0, src1, mod); - set_predicate(BRW_PREDICATE_NORMAL, - SEL(dst, src0, src1)); - } + assert(mod == BRW_CONDITIONAL_GE || mod == BRW_CONDITIONAL_L); + + return set_condmod(mod, SEL(dst, fix_unsigned_negate(src0), + fix_unsigned_negate(src1))); } /** - * Copy any live channel from \p src to the first channel of \p dst. + * Copy any live channel from \p src to the first channel of the result. */ - void - emit_uniformize(const dst_reg &dst, const src_reg &src) const + src_reg + emit_uniformize(const src_reg &src) const { + /* FIXME: We use a vector chan_index and dst to allow constant and + * copy propagration to move result all the way into the consuming + * instruction (typically a surface index or sampler index for a + * send). This uses 1 or 3 extra hw registers in 16 or 32 wide + * dispatch. Once we teach const/copy propagation about scalars we + * should go back to scalar destinations here. + */ const fs_builder ubld = exec_all(); const dst_reg chan_index = vgrf(BRW_REGISTER_TYPE_UD); + const dst_reg dst = vgrf(src.type); - ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, component(chan_index, 0)); - ubld.emit(SHADER_OPCODE_BROADCAST, component(dst, 0), - src, component(chan_index, 0)); + ubld.emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, chan_index); + ubld.emit(SHADER_OPCODE_BROADCAST, dst, src, component(chan_index, 0)); + + return src_reg(component(dst, 0)); } /** @@ -502,7 +549,7 @@ namespace brw { const dst_reg x_times_one_minus_a = vgrf(dst.type); MUL(y_times_a, y, a); - ADD(one_minus_a, negate(a), src_reg(1.0f)); + ADD(one_minus_a, negate(a), brw_imm_f(1.0f)); MUL(x_times_one_minus_a, x, src_reg(one_minus_a)); return ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)); } @@ -515,20 +562,14 @@ namespace brw { LOAD_PAYLOAD(const dst_reg &dst, const src_reg *src, unsigned sources, unsigned header_size) const { - assert(dst.width % 8 == 0); - instruction *inst = emit(instruction(SHADER_OPCODE_LOAD_PAYLOAD, - dst.width, dst, src, sources)); + instruction *inst = emit(SHADER_OPCODE_LOAD_PAYLOAD, dst, src, sources); inst->header_size = header_size; - - for (unsigned i = 0; i < header_size; i++) - assert(src[i].file != GRF || - src[i].width * type_sz(src[i].type) == 32); inst->regs_written = header_size; - - for (unsigned i = header_size; i < sources; ++i) - assert(src[i].file != GRF || - src[i].width == dst.width); - inst->regs_written += (sources - header_size) * (dst.width / 8); + for (unsigned i = header_size; i < sources; i++) { + inst->regs_written += + DIV_ROUND_UP(dispatch_width() * type_sz(src[i].type) * + dst.stride, REG_SIZE); + } return inst; } @@ -560,7 +601,7 @@ namespace brw { src_reg fix_3src_operand(const src_reg &src) const { - if (src.file == GRF || src.file == UNIFORM || src.stride > 1) { + if (src.file == VGRF || src.file == UNIFORM || src.stride > 1) { return src; } else { dst_reg expanded = vgrf(src.type); @@ -626,8 +667,8 @@ namespace brw { inst->resize_sources(1); inst->src[0] = src0; - at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type, - dispatch_width()), src1); + at(block, inst).MOV(fs_reg(MRF, inst->base_mrf + 1, src1.type), + src1); } }