#include "brw_fs.h"
#include "brw_cfg.h"
#include "brw_vs.h"
+#include "brw_nir.h"
#include "brw_vec4_live_variables.h"
#include "brw_dead_control_flow.h"
#include "program/prog_print.h"
#include "program/prog_parameter.h"
}
+#include "main/context.h"
#define MAX_INSTRUCTION (1 << 30)
switch (opcode) {
case SHADER_OPCODE_SHADER_TIME_ADD:
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
return true;
default:
return false;
switch (opcode) {
case SHADER_OPCODE_SHADER_TIME_ADD:
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
return arg == 0 ? mlen : 1;
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
}
bool
-vec4_instruction::can_do_source_mods(struct brw_context *brw)
+vec4_instruction::can_do_source_mods(const struct brw_device_info *devinfo)
{
- if (brw->gen == 6 && is_math())
+ if (devinfo->gen == 6 && is_math())
return false;
if (is_send_from_grf())
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
- return inst->header_present ? 1 : 0;
- case SHADER_OPCODE_UNTYPED_ATOMIC:
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- return 0;
+ return inst->header_size;
default:
unreachable("not reached");
}
}
break;
}
+ case SHADER_OPCODE_BROADCAST:
+ if (is_uniform(inst->src[0]) ||
+ inst->src[1].is_zero()) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->src[1] = src_reg();
+ inst->force_writemask_all = true;
+ progress = true;
+ }
+ break;
+
default:
break;
}
* multiply, DepCtrl must not be used."
* May apply to future SoCs as well.
*/
- if (brw->is_cherryview) {
+ if (devinfo->is_cherryview) {
if (inst->opcode == BRW_OPCODE_MUL &&
IS_DWORD(inst->src[0]) &&
IS_DWORD(inst->src[1]))
}
#undef IS_DWORD
- if (brw->gen >= 8) {
+ if (devinfo->gen >= 8) {
if (inst->opcode == BRW_OPCODE_F32TO16)
return true;
}
if (scan_inst->mlen)
break;
- if (brw->gen == 6) {
+ if (devinfo->gen == 6) {
/* gen6 math instructions must have the destination be
* GRF, so no compute-to-MRF for them.
*/
return progress;
}
+/**
+ * Eliminate FIND_LIVE_CHANNEL instructions occurring outside any control
+ * flow. We could probably do better here with some form of divergence
+ * analysis.
+ */
+bool
+vec4_visitor::eliminate_find_live_channel()
+{
+ bool progress = false;
+ unsigned depth = 0;
+
+ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+ switch (inst->opcode) {
+ case BRW_OPCODE_IF:
+ case BRW_OPCODE_DO:
+ depth++;
+ break;
+
+ case BRW_OPCODE_ENDIF:
+ case BRW_OPCODE_WHILE:
+ depth--;
+ break;
+
+ case SHADER_OPCODE_FIND_LIVE_CHANNEL:
+ if (depth == 0) {
+ inst->opcode = BRW_OPCODE_MOV;
+ inst->src[0] = src_reg(0);
+ inst->force_writemask_all = true;
+ progress = true;
+ }
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return progress;
+}
+
/**
* Splits virtual GRFs requesting more than one contiguous physical register.
*
if (inst->conditional_mod) {
fprintf(file, "%s", conditional_modifier[inst->conditional_mod]);
if (!inst->predicate &&
- (brw->gen < 5 || (inst->opcode != BRW_OPCODE_SEL &&
- inst->opcode != BRW_OPCODE_IF &&
- inst->opcode != BRW_OPCODE_WHILE))) {
+ (devinfo->gen < 5 || (inst->opcode != BRW_OPCODE_SEL &&
+ inst->opcode != BRW_OPCODE_IF &&
+ inst->opcode != BRW_OPCODE_WHILE))) {
fprintf(file, ".f0.%d", inst->flag_subreg);
}
}
unsigned vue_entries =
MAX2(nr_attributes, prog_data->vue_map.num_slots);
- if (brw->gen == 6)
+ if (devinfo->gen == 6)
prog_data->urb_entry_size = ALIGN(vue_entries, 8) / 8;
else
prog_data->urb_entry_size = ALIGN(vue_entries, 4) / 4;
/* The pre-gen6 VS requires that some push constants get loaded no
* matter what, or the GPU would hang.
*/
- if (brw->gen < 6 && this->uniforms == 0) {
+ if (devinfo->gen < 6 && this->uniforms == 0) {
assert(this->uniforms < this->uniform_array_size);
this->uniform_vector_size[this->uniforms] = 1;
src_reg
vec4_visitor::get_timestamp()
{
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
src_reg ts = src_reg(brw_reg(BRW_ARCHITECTURE_REGISTER_FILE,
BRW_ARF_TIMESTAMP,
*/
emit(ADD(diff, src_reg(diff), src_reg(-2u)));
- emit_shader_time_write(st_base, src_reg(diff));
- emit_shader_time_write(st_written, src_reg(1u));
+ emit_shader_time_write(0, src_reg(diff));
+ emit_shader_time_write(1, src_reg(1u));
emit(BRW_OPCODE_ELSE);
- emit_shader_time_write(st_reset, src_reg(1u));
+ emit_shader_time_write(2, src_reg(1u));
emit(BRW_OPCODE_ENDIF);
}
void
-vec4_visitor::emit_shader_time_write(enum shader_time_shader_type type,
- src_reg value)
+vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
{
- int shader_time_index =
- brw_get_shader_time_index(brw, shader_prog, prog, type);
-
dst_reg dst =
dst_reg(this, glsl_type::get_array_instance(glsl_type::vec4_type, 2));
time.reg_offset++;
offset.type = BRW_REGISTER_TYPE_UD;
- emit(MOV(offset, src_reg(shader_time_index * SHADER_TIME_STRIDE)));
+ int index = shader_time_index * 3 + shader_time_subindex;
+ emit(MOV(offset, src_reg(index * SHADER_TIME_STRIDE)));
time.type = BRW_REGISTER_TYPE_UD;
emit(MOV(time, src_reg(value)));
}
bool
-vec4_visitor::run()
+vec4_visitor::run(gl_clip_plane *clip_planes)
{
sanity_param_count = prog->Parameters->NumParameters;
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ if (shader_time_index >= 0)
emit_shader_time_begin();
assign_binding_table_offsets();
base_ir = NULL;
if (key->userclip_active && !prog->UsesClipDistanceOut)
- setup_uniform_clipplane_values();
+ setup_uniform_clipplane_values(clip_planes);
emit_thread_end();
move_push_constants_to_pull_constants();
split_virtual_grfs();
- const char *stage_name = stage == MESA_SHADER_GEOMETRY ? "gs" : "vs";
-
#define OPT(pass, args...) ({ \
pass_num++; \
bool this_progress = pass(args); \
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \
char filename[64]; \
snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \
- stage_name, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
+ stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
\
- backend_visitor::dump_instructions(filename); \
+ backend_shader::dump_instructions(filename); \
} \
\
progress = progress || this_progress; \
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
snprintf(filename, 64, "%s-%04d-00-start",
- stage_name, shader_prog ? shader_prog->Name : 0);
+ stage_abbrev, shader_prog ? shader_prog->Name : 0);
- backend_visitor::dump_instructions(filename);
+ backend_shader::dump_instructions(filename);
}
bool progress;
OPT(opt_cse);
OPT(opt_algebraic);
OPT(opt_register_coalesce);
+ OPT(eliminate_find_live_channel);
} while (progress);
pass_num = 0;
}
}
- while (!reg_allocate()) {
- if (failed)
- return false;
+ bool allocated_without_spills = reg_allocate();
+
+ if (!allocated_without_spills) {
+ compiler->shader_perf_log(log_data,
+ "%s shader triggered register spilling. "
+ "Try reducing the number of live vec4 values "
+ "to improve performance.\n",
+ stage_name);
+
+ while (!reg_allocate()) {
+ if (failed)
+ return false;
+ }
}
opt_schedule_instructions();
opt_set_dependency_control();
+ if (last_scratch > 0) {
+ prog_data->base.total_scratch =
+ brw_get_scratch_size(last_scratch * REG_SIZE);
+ }
+
/* If any state parameters were appended, then ParameterValues could have
* been realloced, in which case the driver uniform storage set up by
* _mesa_associate_uniform_storage() would point to freed memory. Make
if (prog)
shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ int st_index = -1;
+ if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+ st_index = brw_get_shader_time_index(brw, prog, &c->vp->program.Base,
+ ST_VS);
+
if (unlikely(INTEL_DEBUG & DEBUG_VS))
brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
- if (brw->scalar_vs && (prog || brw_env_var_as_boolean("INTEL_USE_NIR", false))) {
- fs_visitor v(brw, mem_ctx, &c->key, prog_data, prog, &c->vp->program, 8);
- if (!v.run_vs()) {
+ if (brw->intelScreen->compiler->scalar_vs) {
+ if (!c->vp->program.Base.nir) {
+ /* Normally we generate NIR in LinkShader() or
+ * ProgramStringNotify(), but Mesa's fixed-function vertex program
+ * handling doesn't notify the driver at all. Just do it here, at
+ * the last minute, even though it's lame.
+ */
+ assert(c->vp->program.Base.Id == 0 && prog == NULL);
+ c->vp->program.Base.nir =
+ brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX);
+ }
+
+ prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
+
+ fs_visitor v(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_VERTEX, &c->key,
+ &prog_data->base.base, prog, &c->vp->program.Base,
+ 8, st_index);
+ if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
if (prog) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, v.fail_msg);
return NULL;
}
- fs_generator g(brw, mem_ctx, (void *) &c->key, &prog_data->base.base,
+ fs_generator g(brw->intelScreen->compiler, brw,
+ mem_ctx, (void *) &c->key, &prog_data->base.base,
&c->vp->program.Base, v.promoted_constants,
v.runtime_check_aads_emit, "VS");
if (INTEL_DEBUG & DEBUG_VS) {
}
g.generate_code(v.cfg, 8);
assembly = g.get_assembly(final_assembly_size);
-
- if (assembly)
- prog_data->base.simd8 = true;
- c->base.last_scratch = v.last_scratch;
}
if (!assembly) {
- vec4_vs_visitor v(brw, c, prog_data, prog, mem_ctx);
- if (!v.run()) {
+ prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
+
+ vec4_vs_visitor v(brw->intelScreen->compiler, brw,
+ c, prog_data, prog, mem_ctx, st_index,
+ !_mesa_is_gles3(&brw->ctx));
+ if (!v.run(brw_select_clip_planes(&brw->ctx))) {
if (prog) {
prog->LinkStatus = false;
ralloc_strcat(&prog->InfoLog, v.fail_msg);
return NULL;
}
- vec4_generator g(brw, prog, &c->vp->program.Base, &prog_data->base,
+ vec4_generator g(brw->intelScreen->compiler, brw,
+ prog, &c->vp->program.Base, &prog_data->base,
mem_ctx, INTEL_DEBUG & DEBUG_VS, "vertex", "VS");
assembly = g.generate_assembly(v.cfg, final_assembly_size);
}
struct brw_context *brw = brw_context(ctx);
key->program_string_id = id;
- const bool has_shader_channel_select = brw->is_haswell || brw->gen >= 8;
- unsigned sampler_count = _mesa_fls(prog->SamplersUsed);
- for (unsigned i = 0; i < sampler_count; i++) {
- if (!has_shader_channel_select && (prog->ShadowSamplers & (1 << i))) {
- /* Assume DEPTH_TEXTURE_MODE is the default: X, X, X, 1 */
- key->tex.swizzles[i] =
- MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_ONE);
- } else {
- /* Color sampler: assume no swizzling. */
- key->tex.swizzles[i] = SWIZZLE_XYZW;
- }
- }
+ brw_setup_tex_for_precompile(brw, &key->tex, prog);
}
} /* extern "C" */