void
fs_visitor::assign_constant_locations()
{
- /* Only the first compile (SIMD8 mode) gets to decide on locations. */
- if (dispatch_width != 8)
+ /* Only the first compile gets to decide on locations. */
+ if (dispatch_width != min_dispatch_width)
return;
bool is_live[uniforms];
progress = true;
}
break;
- case SHADER_OPCODE_RCP: {
- fs_inst *prev = (fs_inst *)inst->prev;
- if (prev->opcode == SHADER_OPCODE_SQRT) {
- if (inst->src[0].equals(prev->dst)) {
- inst->opcode = SHADER_OPCODE_RSQ;
- inst->src[0] = prev->src[0];
- progress = true;
- }
- }
- break;
- }
case SHADER_OPCODE_BROADCAST:
if (is_uniform(inst->src[0])) {
inst->opcode = BRW_OPCODE_MOV;
* we have enough space, but it will make sure the dead code eliminator kills
* the instruction that this will replace.
*/
- if (tex_inst->header_size != 0)
+ if (tex_inst->header_size != 0) {
+ invalidate_live_intervals();
return true;
+ }
fs_reg send_header = ibld.vgrf(BRW_REGISTER_TYPE_F,
load_payload->sources + 1);
tex_inst->insert_before(cfg->blocks[cfg->num_blocks - 1], new_load_payload);
tex_inst->src[0] = send_header;
+ invalidate_live_intervals();
return true;
}
int color_mrf = base_mrf + 2;
fs_inst *mov;
- if (uniforms == 1) {
+ if (uniforms > 0) {
mov = bld.exec_all().group(4, 0)
.MOV(brw_message_reg(color_mrf),
fs_reg(UNIFORM, 0, BRW_REGISTER_TYPE_F));
} else {
struct brw_reg reg =
- brw_reg(BRW_GENERAL_REGISTER_FILE,
- 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
- BRW_VERTICAL_STRIDE_8,
- BRW_WIDTH_2,
- BRW_HORIZONTAL_STRIDE_4, BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
+ brw_reg(BRW_GENERAL_REGISTER_FILE, 2, 3, 0, 0, BRW_REGISTER_TYPE_F,
+ BRW_VERTICAL_STRIDE_8, BRW_WIDTH_2, BRW_HORIZONTAL_STRIDE_4,
+ BRW_SWIZZLE_XYZW, WRITEMASK_XYZW);
mov = bld.exec_all().group(4, 0)
.MOV(vec4(brw_message_reg(color_mrf)), fs_reg(reg));
assign_curb_setup();
/* Now that we have the uniform assigned, go ahead and force it to a vec4. */
- if (uniforms == 1) {
+ if (uniforms > 0) {
assert(mov->src[0].file == FIXED_GRF);
mov->src[0] = brw_vec4_grf(mov->src[0].nr, 0);
}
return progress;
}
+bool
+fs_visitor::lower_minmax()
+{
+ assert(devinfo->gen < 6);
+
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ const fs_builder ibld(this, block, inst);
+
+ if (inst->opcode == BRW_OPCODE_SEL &&
+ inst->predicate == BRW_PREDICATE_NONE) {
+ /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of
+ * the original SEL.L/GE instruction
+ */
+ ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
+ inst->conditional_mod);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->conditional_mod = BRW_CONDITIONAL_NONE;
+
+ progress = true;
+ }
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
static void
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
fs_reg *dst, fs_reg color, unsigned components)
case IMM:
switch (inst->src[i].type) {
case BRW_REGISTER_TYPE_F:
- fprintf(file, "%ff", inst->src[i].f);
+ fprintf(file, "%-gf", inst->src[i].f);
break;
case BRW_REGISTER_TYPE_W:
case BRW_REGISTER_TYPE_D:
}
void
-fs_visitor::setup_payload_gen6()
+fs_visitor::setup_fs_payload_gen6()
{
- bool uses_depth =
- (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
+ assert(stage == MESA_SHADER_FRAGMENT);
+ brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
+ brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
+
unsigned barycentric_interp_modes =
(stage == MESA_SHADER_FRAGMENT) ?
((brw_wm_prog_data*) this->prog_data)->barycentric_interp_modes : 0;
}
/* R27: interpolated depth if uses source depth */
- if (uses_depth) {
+ prog_data->uses_src_depth =
+ (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
+ if (prog_data->uses_src_depth) {
payload.source_depth_reg = payload.num_regs;
payload.num_regs++;
if (dispatch_width == 16) {
payload.num_regs++;
}
}
+
/* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. */
- if (uses_depth) {
+ prog_data->uses_src_w =
+ (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
+ if (prog_data->uses_src_w) {
payload.source_w_reg = payload.num_regs;
payload.num_regs++;
if (dispatch_width == 16) {
}
}
- if (stage == MESA_SHADER_FRAGMENT) {
- brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- prog_data->uses_pos_offset = key->compute_pos_offset;
- /* R31: MSAA position offsets. */
- if (prog_data->uses_pos_offset) {
- payload.sample_pos_reg = payload.num_regs;
- payload.num_regs++;
- }
+ prog_data->uses_pos_offset = key->compute_pos_offset;
+ /* R31: MSAA position offsets. */
+ if (prog_data->uses_pos_offset) {
+ payload.sample_pos_reg = payload.num_regs;
+ payload.num_regs++;
}
/* R32: MSAA input coverage mask */
- if (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) {
+ prog_data->uses_sample_mask =
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) != 0;
+ if (prog_data->uses_sample_mask) {
assert(devinfo->gen >= 7);
payload.sample_mask_in_reg = payload.num_regs;
payload.num_regs++;
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
- snprintf(filename, 64, "%s%d-%s-00-start",
+ snprintf(filename, 64, "%s%d-%s-00-00-start",
stage_abbrev, dispatch_width, nir->info.name);
backend_shader::dump_instructions(filename);
OPT(opt_combine_constants);
OPT(lower_integer_multiplication);
+ if (devinfo->gen <= 5 && OPT(lower_minmax)) {
+ OPT(opt_cmod_propagation);
+ OPT(opt_cse);
+ OPT(opt_copy_propagate);
+ OPT(dead_code_eliminate);
+ }
+
lower_uniform_pull_constant_loads();
validate();
void
fs_visitor::fixup_3src_null_dest()
{
+ bool progress = false;
+
foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
if (inst->is_3src() && inst->dst.is_null()) {
inst->dst = fs_reg(VGRF, alloc.allocate(dispatch_width / 8),
inst->dst.type);
+ progress = true;
}
}
+
+ if (progress)
+ invalidate_live_intervals();
}
void
* SIMD8. There's probably actually some intermediate point where
* SIMD16 with a couple of spills is still better.
*/
- if (dispatch_width == 16) {
+ if (dispatch_width == 16 && min_dispatch_width <= 8) {
fail("Failure to register allocate. Reduce number of "
"live scalar values to avoid this.");
} else {
assert(stage == MESA_SHADER_FRAGMENT);
if (devinfo->gen >= 6)
- setup_payload_gen6();
+ setup_fs_payload_gen6();
else
- setup_payload_gen4();
+ setup_fs_payload_gen4();
if (0) {
emit_dummy_fs();
if (shader_time_index >= 0)
emit_shader_time_begin();
+ if (devinfo->is_haswell && prog_data->total_shared > 0) {
+ /* Move SLM index from g0.0[27:24] to sr0.1[11:8] */
+ const fs_builder abld = bld.exec_all().group(1, 0);
+ abld.MOV(retype(suboffset(brw_sr0_reg(), 1), BRW_REGISTER_TYPE_UW),
+ suboffset(retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW), 1));
+ }
+
emit_nir_code();
if (failed)
return barycentric_interp_modes;
}
+static void
+brw_compute_flat_inputs(struct brw_wm_prog_data *prog_data,
+ bool shade_model_flat, const nir_shader *shader)
+{
+ prog_data->flat_inputs = 0;
+
+ nir_foreach_variable(var, &shader->inputs) {
+ enum glsl_interp_qualifier interp_qualifier =
+ (enum glsl_interp_qualifier)var->data.interpolation;
+ bool is_gl_Color = (var->data.location == VARYING_SLOT_COL0) ||
+ (var->data.location == VARYING_SLOT_COL1);
+
+ int input_index = prog_data->urb_setup[var->data.location];
+
+ if (input_index < 0)
+ continue;
+
+ /* flat shading */
+ if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
+ (shade_model_flat && is_gl_Color &&
+ interp_qualifier == INTERP_QUALIFIER_NONE))
+ prog_data->flat_inputs |= (1 << input_index);
+ }
+}
+
static uint8_t
computed_depth_mode(const nir_shader *shader)
{
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
true);
+ brw_nir_lower_fs_inputs(shader);
+ brw_nir_lower_fs_outputs(shader);
shader = brw_postprocess_nir(shader, compiler->devinfo, true);
/* key->alpha_test_func means simulating alpha testing via discards,
}
}
+ /* We have to compute the flat inputs after the visitor is finished running
+ * because it relies on prog_data->urb_setup which is computed in
+ * fs_visitor::calculate_urb_setup().
+ */
+ brw_compute_flat_inputs(prog_data, key->flat_shade, shader);
+
cfg_t *simd8_cfg;
int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || use_rep_send;
if ((no_simd8 || compiler->devinfo->gen < 5) && simd16_cfg) {
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
true);
+ brw_nir_lower_cs_shared(shader);
+ prog_data->base.total_shared += shader->num_shared;
shader = brw_postprocess_nir(shader, compiler->devinfo, true);
prog_data->local_size[0] = shader->info.cs.local_size[0];
shader->info.cs.local_size[2];
unsigned max_cs_threads = compiler->devinfo->max_cs_threads;
+ unsigned simd_required = DIV_ROUND_UP(local_workgroup_size, max_cs_threads);
cfg_t *cfg = NULL;
const char *fail_msg = NULL;
fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base,
NULL, /* Never used in core profile */
shader, 8, shader_time_index);
- if (!v8.run_cs()) {
- fail_msg = v8.fail_msg;
- } else if (local_workgroup_size <= 8 * max_cs_threads) {
- cfg = v8.cfg;
- prog_data->simd_size = 8;
+ if (simd_required <= 8) {
+ if (!v8.run_cs()) {
+ fail_msg = v8.fail_msg;
+ } else {
+ cfg = v8.cfg;
+ prog_data->simd_size = 8;
+ }
}
fs_visitor v16(compiler, log_data, mem_ctx, key, &prog_data->base,
!fail_msg && !v8.simd16_unsupported &&
local_workgroup_size <= 16 * max_cs_threads) {
/* Try a SIMD16 compile */
- v16.import_uniforms(&v8);
+ if (simd_required <= 8)
+ v16.import_uniforms(&v8);
if (!v16.run_cs()) {
compiler->shader_perf_log(log_data,
"SIMD16 shader failed to compile: %s",