X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_fs.cpp;h=538745880c6fd911d8749c295dd7cf6a6996cd6f;hb=f729ecefef1542314e1f7660e8f00e9e67e33a84;hp=8bf11f9aa938884c75856a3f9cc5caf8068e2905;hpb=23f42f8dcfe7ca275ff3cbb5586b5a07ce8df778;p=mesa.git diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index 8bf11f9aa93..538745880c6 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -227,6 +227,9 @@ fs_inst::is_send_from_grf() const case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: + case SHADER_OPCODE_INTERLOCK: + case SHADER_OPCODE_MEMORY_FENCE: + case SHADER_OPCODE_BARRIER: return true; case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: return src[1].file == VGRF; @@ -287,6 +290,44 @@ fs_inst::is_control_source(unsigned arg) const } } +bool +fs_inst::is_payload(unsigned arg) const +{ + switch (opcode) { + case FS_OPCODE_FB_WRITE: + case FS_OPCODE_FB_READ: + case SHADER_OPCODE_URB_WRITE_SIMD8: + case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED: + case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT: + case SHADER_OPCODE_URB_READ_SIMD8: + case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: + case VEC4_OPCODE_UNTYPED_ATOMIC: + case VEC4_OPCODE_UNTYPED_SURFACE_READ: + case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: + case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET: + case SHADER_OPCODE_SHADER_TIME_ADD: + case FS_OPCODE_INTERPOLATE_AT_SAMPLE: + case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET: + case SHADER_OPCODE_INTERLOCK: + case SHADER_OPCODE_MEMORY_FENCE: + case SHADER_OPCODE_BARRIER: + return arg == 0; + + case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: + return arg == 1; + + case SHADER_OPCODE_SEND: + return arg == 2 || arg == 3; + + default: + if (is_tex()) + return arg == 0; + else + return false; + } +} + /** * Returns true if this instruction's sources and destinations cannot * safely be the same register. @@ -423,6 +464,24 @@ fs_inst::can_do_source_mods(const struct gen_device_info *devinfo) const if (is_send_from_grf()) return false; + /* From GEN:BUG:1604601757: + * + * "When multiplying a DW and any lower precision integer, source modifier + * is not supported." + */ + if (devinfo->gen >= 12 && (opcode == BRW_OPCODE_MUL || + opcode == BRW_OPCODE_MAD)) { + const brw_reg_type exec_type = get_exec_type(this); + const unsigned min_type_sz = opcode == BRW_OPCODE_MAD ? + MIN2(type_sz(src[1].type), type_sz(src[2].type)) : + MIN2(type_sz(src[0].type), type_sz(src[1].type)); + + if (brw_reg_type_is_integer(exec_type) && + type_sz(exec_type) >= 4 && + type_sz(exec_type) != min_type_sz) + return false; + } + if (!backend_instruction::can_do_source_mods()) return false; @@ -1011,15 +1070,37 @@ fs_inst::size_read(int arg) const } namespace { + unsigned + predicate_width(brw_predicate predicate) + { + switch (predicate) { + case BRW_PREDICATE_NONE: return 1; + case BRW_PREDICATE_NORMAL: return 1; + case BRW_PREDICATE_ALIGN1_ANY2H: return 2; + case BRW_PREDICATE_ALIGN1_ALL2H: return 2; + case BRW_PREDICATE_ALIGN1_ANY4H: return 4; + case BRW_PREDICATE_ALIGN1_ALL4H: return 4; + case BRW_PREDICATE_ALIGN1_ANY8H: return 8; + case BRW_PREDICATE_ALIGN1_ALL8H: return 8; + case BRW_PREDICATE_ALIGN1_ANY16H: return 16; + case BRW_PREDICATE_ALIGN1_ALL16H: return 16; + case BRW_PREDICATE_ALIGN1_ANY32H: return 32; + case BRW_PREDICATE_ALIGN1_ALL32H: return 32; + default: unreachable("Unsupported predicate"); + } + } + /* Return the subset of flag registers that an instruction could * potentially read or write based on the execution controls and flag * subregister number of the instruction. */ unsigned - flag_mask(const fs_inst *inst) + flag_mask(const fs_inst *inst, unsigned width) { - const unsigned start = inst->flag_subreg * 16 + inst->group; - const unsigned end = start + inst->exec_size; + assert(util_is_power_of_two_nonzero(width)); + const unsigned start = (inst->flag_subreg * 16 + inst->group) & + ~(width - 1); + const unsigned end = start + ALIGN(inst->exec_size, width); return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1); } @@ -1051,9 +1132,9 @@ fs_inst::flags_read(const gen_device_info *devinfo) const * f0.0 and f1.0 on Gen7+, and f0.0 and f0.1 on older hardware. */ const unsigned shift = devinfo->gen >= 7 ? 4 : 2; - return flag_mask(this) << shift | flag_mask(this); + return flag_mask(this, 1) << shift | flag_mask(this, 1); } else if (predicate) { - return flag_mask(this); + return flag_mask(this, predicate_width(predicate)); } else { unsigned mask = 0; for (int i = 0; i < sources; i++) { @@ -1072,7 +1153,7 @@ fs_inst::flags_written() const opcode != BRW_OPCODE_WHILE)) || opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL || opcode == FS_OPCODE_FB_WRITE) { - return flag_mask(this); + return flag_mask(this, 1); } else { return flag_mask(dst, size_written); } @@ -1085,7 +1166,7 @@ fs_inst::flags_written() const * instruction -- the FS opcodes often generate MOVs in addition. */ int -fs_visitor::implied_mrf_writes(fs_inst *inst) const +fs_visitor::implied_mrf_writes(const fs_inst *inst) const { if (inst->mlen == 0) return 0; @@ -1192,7 +1273,7 @@ fs_visitor::emit_fragcoord_interpolation(fs_reg wpos) } else { bld.emit(FS_OPCODE_LINTERP, wpos, this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL], - interp_reg(VARYING_SLOT_POS, 2)); + component(interp_reg(VARYING_SLOT_POS, 2), 0)); } wpos = offset(wpos, bld, 1); @@ -1245,7 +1326,13 @@ fs_visitor::emit_frontfacing_interpolation() { fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::bool_type)); - if (devinfo->gen >= 6) { + if (devinfo->gen >= 12) { + fs_reg g1 = fs_reg(retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_W)); + + fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_W); + bld.ASR(tmp, g1, brw_imm_d(15)); + bld.NOT(*reg, tmp); + } else if (devinfo->gen >= 6) { /* Bit 15 of g0.0 is 0 if the polygon is front facing. We want to create * a boolean result from this (~0/true or 0/false). * @@ -1910,6 +1997,17 @@ fs_visitor::split_virtual_grfs() } foreach_block_and_inst(block, fs_inst, inst, cfg) { + /* We fix up undef instructions later */ + if (inst->opcode == SHADER_OPCODE_UNDEF) { + /* UNDEF instructions are currently only used to undef entire + * registers. We need this invariant later when we split them. + */ + assert(inst->dst.file == VGRF); + assert(inst->dst.offset == 0); + assert(inst->size_written == alloc.sizes[inst->dst.nr] * REG_SIZE); + continue; + } + if (inst->dst.file == VGRF) { int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE; for (unsigned j = 1; j < regs_written(inst); j++) @@ -1962,7 +2060,20 @@ fs_visitor::split_virtual_grfs() } assert(reg == reg_count); - foreach_block_and_inst(block, fs_inst, inst, cfg) { + foreach_block_and_inst_safe(block, fs_inst, inst, cfg) { + if (inst->opcode == SHADER_OPCODE_UNDEF) { + const fs_builder ibld(this, block, inst); + assert(inst->size_written % REG_SIZE == 0); + unsigned reg_offset = 0; + while (reg_offset < inst->size_written / REG_SIZE) { + reg = vgrf_to_reg[inst->dst.nr] + reg_offset; + ibld.UNDEF(fs_reg(VGRF, new_virtual_grf[reg], inst->dst.type)); + reg_offset += alloc.sizes[new_virtual_grf[reg]]; + } + inst->remove(block); + continue; + } + if (inst->dst.file == VGRF) { reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE; inst->dst.nr = new_virtual_grf[reg]; @@ -2402,6 +2513,8 @@ fs_visitor::get_pull_locs(const fs_reg &src, *out_surf_index = prog_data->binding_table.ubo_start + range->block; *out_pull_index = (32 * range->start + src.offset) / 4; + + prog_data->has_ubo_pull = true; return true; } @@ -2411,6 +2524,8 @@ fs_visitor::get_pull_locs(const fs_reg &src, /* A regular uniform push constant */ *out_surf_index = stage_prog_data->binding_table.pull_constants_start; *out_pull_index = pull_constant_loc[location]; + + prog_data->has_ubo_pull = true; return true; } @@ -3418,6 +3533,8 @@ fs_visitor::emit_repclear_shader() assert(mov->src[0].file == FIXED_GRF); mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0); } + + lower_scoreboard(); } /** @@ -3507,9 +3624,22 @@ bool fs_visitor::remove_extra_rounding_modes() { bool progress = false; + unsigned execution_mode = this->nir->info.float_controls_execution_mode; + + brw_rnd_mode base_mode = BRW_RND_MODE_UNSPECIFIED; + if ((FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 | + FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32 | + FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64) & + execution_mode) + base_mode = BRW_RND_MODE_RTNE; + if ((FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 | + FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32 | + FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64) & + execution_mode) + base_mode = BRW_RND_MODE_RTZ; foreach_block (block, cfg) { - brw_rnd_mode prev_mode = BRW_RND_MODE_UNSPECIFIED; + brw_rnd_mode prev_mode = base_mode; foreach_inst_in_block_safe (fs_inst, inst, block) { if (inst->opcode == SHADER_OPCODE_RND_MODE) { @@ -4447,7 +4577,7 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, uint32_t ex_desc = 0; inst->desc = - (inst->group / 16) ? (1 << 11) : 0 | /* rt slot group */ + (inst->group / 16) << 11 | /* rt slot group */ brw_dp_write_desc(devinfo, inst->target, msg_ctl, GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE, inst->last_rt, false); @@ -4457,6 +4587,9 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst, * in the extended message descriptor, in lieu of using a header. */ ex_desc = inst->target << 12 | src0_alpha_present << 15; + + if (key->nr_color_regions == 0) + ex_desc |= 1 << 20; /* Null Render Target */ } inst->opcode = SHADER_OPCODE_SEND; @@ -7450,6 +7583,8 @@ fs_visitor::allocate_registers(unsigned min_dispatch_width, bool allow_spilling) */ assert(prog_data->total_scratch < max_scratch_size); } + + lower_scoreboard(); } bool @@ -8106,6 +8241,19 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, if (devinfo->gen < 6) brw_setup_vue_interpolation(vue_map, shader, prog_data); + /* From the SKL PRM, Volume 7, "Alpha Coverage": + * "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in + * hardware, regardless of the state setting for this feature." + */ + if (devinfo->gen > 6 && key->alpha_to_coverage) { + /* Run constant fold optimization in order to get the correct source + * offset to determine render target 0 store instruction in + * emit_alpha_to_coverage pass. + */ + NIR_PASS_V(shader, nir_opt_constant_folding); + NIR_PASS_V(shader, brw_nir_lower_alpha_to_coverage); + } + if (!key->multisample_fbo) NIR_PASS_V(shader, demote_sample_qualifiers); NIR_PASS_V(shader, move_interpolation_to_top);