case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case SHADER_OPCODE_URB_READ_SIMD8:
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
+ case SHADER_OPCODE_INTERLOCK:
+ case SHADER_OPCODE_MEMORY_FENCE:
+ case SHADER_OPCODE_BARRIER:
return true;
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return src[1].file == VGRF;
}
}
+bool
+fs_inst::is_payload(unsigned arg) const
+{
+ switch (opcode) {
+ case FS_OPCODE_FB_WRITE:
+ case FS_OPCODE_FB_READ:
+ case SHADER_OPCODE_URB_WRITE_SIMD8:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
+ case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
+ case SHADER_OPCODE_URB_READ_SIMD8:
+ case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
+ case VEC4_OPCODE_UNTYPED_ATOMIC:
+ case VEC4_OPCODE_UNTYPED_SURFACE_READ:
+ case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
+ case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
+ case SHADER_OPCODE_SHADER_TIME_ADD:
+ case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
+ case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
+ case SHADER_OPCODE_INTERLOCK:
+ case SHADER_OPCODE_MEMORY_FENCE:
+ case SHADER_OPCODE_BARRIER:
+ return arg == 0;
+
+ case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
+ return arg == 1;
+
+ case SHADER_OPCODE_SEND:
+ return arg == 2 || arg == 3;
+
+ default:
+ if (is_tex())
+ return arg == 0;
+ else
+ return false;
+ }
+}
+
/**
* Returns true if this instruction's sources and destinations cannot
* safely be the same register.
if (is_send_from_grf())
return false;
+ /* From GEN:BUG:1604601757:
+ *
+ * "When multiplying a DW and any lower precision integer, source modifier
+ * is not supported."
+ */
+ if (devinfo->gen >= 12 && (opcode == BRW_OPCODE_MUL ||
+ opcode == BRW_OPCODE_MAD)) {
+ const brw_reg_type exec_type = get_exec_type(this);
+ const unsigned min_type_sz = opcode == BRW_OPCODE_MAD ?
+ MIN2(type_sz(src[1].type), type_sz(src[2].type)) :
+ MIN2(type_sz(src[0].type), type_sz(src[1].type));
+
+ if (brw_reg_type_is_integer(exec_type) &&
+ type_sz(exec_type) >= 4 &&
+ type_sz(exec_type) != min_type_sz)
+ return false;
+ }
+
if (!backend_instruction::can_do_source_mods())
return false;
}
namespace {
+ unsigned
+ predicate_width(brw_predicate predicate)
+ {
+ switch (predicate) {
+ case BRW_PREDICATE_NONE: return 1;
+ case BRW_PREDICATE_NORMAL: return 1;
+ case BRW_PREDICATE_ALIGN1_ANY2H: return 2;
+ case BRW_PREDICATE_ALIGN1_ALL2H: return 2;
+ case BRW_PREDICATE_ALIGN1_ANY4H: return 4;
+ case BRW_PREDICATE_ALIGN1_ALL4H: return 4;
+ case BRW_PREDICATE_ALIGN1_ANY8H: return 8;
+ case BRW_PREDICATE_ALIGN1_ALL8H: return 8;
+ case BRW_PREDICATE_ALIGN1_ANY16H: return 16;
+ case BRW_PREDICATE_ALIGN1_ALL16H: return 16;
+ case BRW_PREDICATE_ALIGN1_ANY32H: return 32;
+ case BRW_PREDICATE_ALIGN1_ALL32H: return 32;
+ default: unreachable("Unsupported predicate");
+ }
+ }
+
/* Return the subset of flag registers that an instruction could
* potentially read or write based on the execution controls and flag
* subregister number of the instruction.
*/
unsigned
- flag_mask(const fs_inst *inst)
+ flag_mask(const fs_inst *inst, unsigned width)
{
- const unsigned start = inst->flag_subreg * 16 + inst->group;
- const unsigned end = start + inst->exec_size;
+ assert(util_is_power_of_two_nonzero(width));
+ const unsigned start = (inst->flag_subreg * 16 + inst->group) &
+ ~(width - 1);
+ const unsigned end = start + ALIGN(inst->exec_size, width);
return ((1 << DIV_ROUND_UP(end, 8)) - 1) & ~((1 << (start / 8)) - 1);
}
* f0.0 and f1.0 on Gen7+, and f0.0 and f0.1 on older hardware.
*/
const unsigned shift = devinfo->gen >= 7 ? 4 : 2;
- return flag_mask(this) << shift | flag_mask(this);
+ return flag_mask(this, 1) << shift | flag_mask(this, 1);
} else if (predicate) {
- return flag_mask(this);
+ return flag_mask(this, predicate_width(predicate));
} else {
unsigned mask = 0;
for (int i = 0; i < sources; i++) {
opcode != BRW_OPCODE_WHILE)) ||
opcode == SHADER_OPCODE_FIND_LIVE_CHANNEL ||
opcode == FS_OPCODE_FB_WRITE) {
- return flag_mask(this);
+ return flag_mask(this, 1);
} else {
return flag_mask(dst, size_written);
}
* instruction -- the FS opcodes often generate MOVs in addition.
*/
int
-fs_visitor::implied_mrf_writes(fs_inst *inst) const
+fs_visitor::implied_mrf_writes(const fs_inst *inst) const
{
if (inst->mlen == 0)
return 0;
} else {
bld.emit(FS_OPCODE_LINTERP, wpos,
this->delta_xy[BRW_BARYCENTRIC_PERSPECTIVE_PIXEL],
- interp_reg(VARYING_SLOT_POS, 2));
+ component(interp_reg(VARYING_SLOT_POS, 2), 0));
}
wpos = offset(wpos, bld, 1);
{
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::bool_type));
- if (devinfo->gen >= 6) {
+ if (devinfo->gen >= 12) {
+ fs_reg g1 = fs_reg(retype(brw_vec1_grf(1, 1), BRW_REGISTER_TYPE_W));
+
+ fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_W);
+ bld.ASR(tmp, g1, brw_imm_d(15));
+ bld.NOT(*reg, tmp);
+ } else if (devinfo->gen >= 6) {
/* Bit 15 of g0.0 is 0 if the polygon is front facing. We want to create
* a boolean result from this (~0/true or 0/false).
*
}
foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ /* We fix up undef instructions later */
+ if (inst->opcode == SHADER_OPCODE_UNDEF) {
+ /* UNDEF instructions are currently only used to undef entire
+ * registers. We need this invariant later when we split them.
+ */
+ assert(inst->dst.file == VGRF);
+ assert(inst->dst.offset == 0);
+ assert(inst->size_written == alloc.sizes[inst->dst.nr] * REG_SIZE);
+ continue;
+ }
+
if (inst->dst.file == VGRF) {
int reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;
for (unsigned j = 1; j < regs_written(inst); j++)
}
assert(reg == reg_count);
- foreach_block_and_inst(block, fs_inst, inst, cfg) {
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ if (inst->opcode == SHADER_OPCODE_UNDEF) {
+ const fs_builder ibld(this, block, inst);
+ assert(inst->size_written % REG_SIZE == 0);
+ unsigned reg_offset = 0;
+ while (reg_offset < inst->size_written / REG_SIZE) {
+ reg = vgrf_to_reg[inst->dst.nr] + reg_offset;
+ ibld.UNDEF(fs_reg(VGRF, new_virtual_grf[reg], inst->dst.type));
+ reg_offset += alloc.sizes[new_virtual_grf[reg]];
+ }
+ inst->remove(block);
+ continue;
+ }
+
if (inst->dst.file == VGRF) {
reg = vgrf_to_reg[inst->dst.nr] + inst->dst.offset / REG_SIZE;
inst->dst.nr = new_virtual_grf[reg];
*out_surf_index = prog_data->binding_table.ubo_start + range->block;
*out_pull_index = (32 * range->start + src.offset) / 4;
+
+ prog_data->has_ubo_pull = true;
return true;
}
/* A regular uniform push constant */
*out_surf_index = stage_prog_data->binding_table.pull_constants_start;
*out_pull_index = pull_constant_loc[location];
+
+ prog_data->has_ubo_pull = true;
return true;
}
assert(mov->src[0].file == FIXED_GRF);
mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
}
+
+ lower_scoreboard();
}
/**
fs_visitor::remove_extra_rounding_modes()
{
bool progress = false;
+ unsigned execution_mode = this->nir->info.float_controls_execution_mode;
+
+ brw_rnd_mode base_mode = BRW_RND_MODE_UNSPECIFIED;
+ if ((FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP16 |
+ FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP32 |
+ FLOAT_CONTROLS_ROUNDING_MODE_RTE_FP64) &
+ execution_mode)
+ base_mode = BRW_RND_MODE_RTNE;
+ if ((FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP16 |
+ FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP32 |
+ FLOAT_CONTROLS_ROUNDING_MODE_RTZ_FP64) &
+ execution_mode)
+ base_mode = BRW_RND_MODE_RTZ;
foreach_block (block, cfg) {
- brw_rnd_mode prev_mode = BRW_RND_MODE_UNSPECIFIED;
+ brw_rnd_mode prev_mode = base_mode;
foreach_inst_in_block_safe (fs_inst, inst, block) {
if (inst->opcode == SHADER_OPCODE_RND_MODE) {
length = 2;
} else if ((devinfo->gen <= 7 && !devinfo->is_haswell &&
prog_data->uses_kill) ||
- color1.file != BAD_FILE ||
- key->nr_color_regions > 1) {
+ (devinfo->gen < 11 &&
+ (color1.file != BAD_FILE || key->nr_color_regions > 1))) {
/* From the Sandy Bridge PRM, volume 4, page 198:
*
* "Dispatched Pixel Enables. One bit per pixel indicating
length++;
}
+ bool src0_alpha_present = false;
+
if (src0_alpha.file != BAD_FILE) {
for (unsigned i = 0; i < bld.dispatch_width() / 8; i++) {
const fs_builder &ubld = bld.exec_all().group(8, i)
setup_color_payload(ubld, key, &sources[length], tmp, 1);
length++;
}
+ src0_alpha_present = true;
} else if (prog_data->replicate_alpha && inst->target != 0) {
/* Handle the case when fragment shader doesn't write to draw buffer
* zero. No need to call setup_color_payload() for src0_alpha because
* alpha value will be undefined.
*/
length += bld.dispatch_width() / 8;
+ src0_alpha_present = true;
}
if (sample_mask.file != BAD_FILE) {
payload.nr = bld.shader->alloc.allocate(regs_written(load));
load->dst = payload;
- inst->src[0] = payload;
- inst->resize_sources(1);
+ uint32_t msg_ctl = brw_fb_write_msg_control(inst, prog_data);
+ uint32_t ex_desc = 0;
+
+ inst->desc =
+ (inst->group / 16) << 11 | /* rt slot group */
+ brw_dp_write_desc(devinfo, inst->target, msg_ctl,
+ GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE,
+ inst->last_rt, false);
+
+ if (devinfo->gen >= 11) {
+ /* Set the "Render Target Index" and "Src0 Alpha Present" fields
+ * in the extended message descriptor, in lieu of using a header.
+ */
+ ex_desc = inst->target << 12 | src0_alpha_present << 15;
+
+ if (key->nr_color_regions == 0)
+ ex_desc |= 1 << 20; /* Null Render Target */
+ }
+
+ inst->opcode = SHADER_OPCODE_SEND;
+ inst->resize_sources(3);
+ inst->sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
+ inst->src[0] = brw_imm_ud(inst->desc);
+ inst->src[1] = brw_imm_ud(ex_desc);
+ inst->src[2] = payload;
+ inst->mlen = regs_written(load);
+ inst->ex_mlen = 0;
+ inst->header_size = header_size;
+ inst->check_tdr = true;
+ inst->send_has_side_effects = true;
} else {
/* Send from the MRF */
load = bld.LOAD_PAYLOAD(fs_reg(MRF, 1, BRW_REGISTER_TYPE_F),
inst->resize_sources(0);
}
inst->base_mrf = 1;
+ inst->opcode = FS_OPCODE_FB_WRITE;
+ inst->mlen = regs_written(load);
+ inst->header_size = header_size;
}
-
- inst->opcode = FS_OPCODE_FB_WRITE;
- inst->mlen = regs_written(load);
- inst->header_size = header_size;
}
static void
*/
assert(prog_data->total_scratch < max_scratch_size);
}
+
+ lower_scoreboard();
}
bool
if (devinfo->gen < 6)
brw_setup_vue_interpolation(vue_map, shader, prog_data);
+ /* From the SKL PRM, Volume 7, "Alpha Coverage":
+ * "If Pixel Shader outputs oMask, AlphaToCoverage is disabled in
+ * hardware, regardless of the state setting for this feature."
+ */
+ if (devinfo->gen > 6 && key->alpha_to_coverage) {
+ /* Run constant fold optimization in order to get the correct source
+ * offset to determine render target 0 store instruction in
+ * emit_alpha_to_coverage pass.
+ */
+ NIR_PASS_V(shader, nir_opt_constant_folding);
+ NIR_PASS_V(shader, brw_nir_lower_alpha_to_coverage);
+ }
+
if (!key->multisample_fbo)
NIR_PASS_V(shader, demote_sample_qualifiers);
NIR_PASS_V(shader, move_interpolation_to_top);