#include "brw_cfg.h"
#include "brw_program.h"
#include "brw_dead_control_flow.h"
-#include "glsl/nir/glsl_types.h"
+#include "compiler/glsl_types.h"
using namespace brw;
* CSE can later notice that those loads are all the same and eliminate
* the redundant ones.
*/
- fs_reg vec4_offset = vgrf(glsl_type::int_type);
+ fs_reg vec4_offset = vgrf(glsl_type::uint_type);
bld.ADD(vec4_offset, varying_offset, brw_imm_ud(const_offset & ~0xf));
int scale = 1;
{
this->reg_offset = 0;
this->subreg_offset = 0;
- this->reladdr = NULL;
this->stride = 1;
if (this->file == IMM &&
(this->type != BRW_REGISTER_TYPE_V &&
{
return (this->backend_reg::equals(r) &&
subreg_offset == r.subreg_offset &&
- !reladdr && !r.reladdr &&
stride == r.stride);
}
case SHADER_OPCODE_LOD_LOGICAL:
case SHADER_OPCODE_TG4_LOGICAL:
case SHADER_OPCODE_TG4_OFFSET_LOGICAL:
- assert(src[9].file == IMM && src[10].file == IMM);
+ assert(src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM &&
+ src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
/* Texture coordinates. */
- if (i == 0)
- return src[9].ud;
+ if (i == TEX_LOGICAL_SRC_COORDINATE)
+ return src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
/* Texture derivatives. */
- else if ((i == 2 || i == 3) && opcode == SHADER_OPCODE_TXD_LOGICAL)
- return src[10].ud;
+ else if ((i == TEX_LOGICAL_SRC_LOD || i == TEX_LOGICAL_SRC_LOD2) &&
+ opcode == SHADER_OPCODE_TXD_LOGICAL)
+ return src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
/* Texture offset. */
- else if (i == 8)
+ else if (i == TEX_LOGICAL_SRC_OFFSET_VALUE)
return 2;
/* MCS */
- else if (i == 5 && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL)
+ else if (i == TEX_LOGICAL_SRC_MCS && opcode == SHADER_OPCODE_TXF_CMS_W_LOGICAL)
return 2;
else
return 1;
assert(src[2].file == IMM);
unsigned region_length = src[2].ud;
- if (src[0].file == FIXED_GRF) {
+ if (src[0].file == UNIFORM) {
+ assert(region_length % 4 == 0);
+ return region_length / 4;
+ } else if (src[0].file == FIXED_GRF) {
/* If the start of the region is not register aligned, then
* there's some portion of the register that's technically
* unread at the beginning.
* unread portion at the beginning.
*/
if (src[0].subnr)
- region_length += src[0].subnr * type_sz(src[0].type);
+ region_length += src[0].subnr;
return DIV_ROUND_UP(region_length, REG_SIZE);
} else {
this->push_constant_loc = v->push_constant_loc;
this->pull_constant_loc = v->pull_constant_loc;
this->uniforms = v->uniforms;
- this->param_size = v->param_size;
}
fs_reg *
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
assert(stage == MESA_SHADER_VERTEX);
- int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
- if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
- count++;
/* Each attribute is 4 regs. */
this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
* maximum number of fragment shader uniform components (64). If
* there are too many of these, they'd fill up all of register space.
* So, this will push some of them out to the pull constant buffer and
- * update the program to load them. We also use pull constants for all
- * indirect constant loads because we don't support indirect accesses in
- * registers yet.
+ * update the program to load them.
*/
void
fs_visitor::assign_constant_locations()
if (dispatch_width != 8)
return;
- unsigned int num_pull_constants = 0;
-
- pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
- memset(pull_constant_loc, -1, sizeof(pull_constant_loc[0]) * uniforms);
-
bool is_live[uniforms];
memset(is_live, 0, sizeof(is_live));
+ /* For each uniform slot, a value of true indicates that the given slot and
+ * the next slot must remain contiguous. This is used to keep us from
+ * splitting arrays apart.
+ */
+ bool contiguous[uniforms];
+ memset(contiguous, 0, sizeof(contiguous));
+
/* First, we walk through the instructions and do two things:
*
* 1) Figure out which uniforms are live.
*
- * 2) Find all indirect access of uniform arrays and flag them as needing
- * to go into the pull constant buffer.
+ * 2) Mark any indirectly used ranges of registers as contiguous.
*
* Note that we don't move constant-indexed accesses to arrays. No
* testing has been done of the performance impact of this choice.
if (inst->src[i].file != UNIFORM)
continue;
- if (inst->src[i].reladdr) {
- int uniform = inst->src[i].nr;
+ int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
- /* If this array isn't already present in the pull constant buffer,
- * add it.
- */
- if (pull_constant_loc[uniform] == -1) {
- assert(param_size[uniform]);
- for (int j = 0; j < param_size[uniform]; j++)
- pull_constant_loc[uniform + j] = num_pull_constants++;
+ if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0) {
+ assert(inst->src[2].ud % 4 == 0);
+ unsigned last = constant_nr + (inst->src[2].ud / 4) - 1;
+ assert(last < uniforms);
+
+ for (unsigned j = constant_nr; j < last; j++) {
+ is_live[j] = true;
+ contiguous[j] = true;
}
+ is_live[last] = true;
} else {
- /* Mark the the one accessed uniform as live */
- int constant_nr = inst->src[i].nr + inst->src[i].reg_offset;
if (constant_nr >= 0 && constant_nr < (int) uniforms)
is_live[constant_nr] = true;
}
* If changing this value, note the limitation about total_regs in
* brw_curbe.c.
*/
- unsigned int max_push_components = 16 * 8;
+ const unsigned int max_push_components = 16 * 8;
+
+ /* For vulkan we don't limit the max_chunk_size. We set it to 32 float =
+ * 128 bytes, which is the maximum vulkan push constant size.
+ */
+ const unsigned int max_chunk_size = 32;
+
unsigned int num_push_constants = 0;
+ unsigned int num_pull_constants = 0;
push_constant_loc = ralloc_array(mem_ctx, int, uniforms);
+ pull_constant_loc = ralloc_array(mem_ctx, int, uniforms);
- for (unsigned int i = 0; i < uniforms; i++) {
- if (!is_live[i] || pull_constant_loc[i] != -1) {
- /* This UNIFORM register is either dead, or has already been demoted
- * to a pull const. Mark it as no longer living in the param[] array.
- */
- push_constant_loc[i] = -1;
+ int chunk_start = -1;
+ for (unsigned u = 0; u < uniforms; u++) {
+ push_constant_loc[u] = -1;
+ pull_constant_loc[u] = -1;
+
+ if (!is_live[u])
continue;
- }
- if (num_push_constants < max_push_components) {
- /* Retain as a push constant. Record the location in the params[]
- * array.
- */
- push_constant_loc[i] = num_push_constants++;
- } else {
- /* Demote to a pull constant. */
- push_constant_loc[i] = -1;
- pull_constant_loc[i] = num_pull_constants++;
+ /* This is the first live uniform in the chunk */
+ if (chunk_start < 0)
+ chunk_start = u;
+
+ /* If this element does not need to be contiguous with the next, we
+ * split at this point and everthing between chunk_start and u forms a
+ * single chunk.
+ */
+ if (!contiguous[u]) {
+ unsigned chunk_size = u - chunk_start + 1;
+
+ if (num_push_constants + chunk_size <= max_push_components &&
+ chunk_size <= max_chunk_size) {
+ for (unsigned j = chunk_start; j <= u; j++)
+ push_constant_loc[j] = num_push_constants++;
+ } else {
+ for (unsigned j = chunk_start; j <= u; j++)
+ pull_constant_loc[j] = num_pull_constants++;
+ }
+
+ chunk_start = -1;
}
}
* or VARYING_PULL_CONSTANT_LOAD instructions which load values into VGRFs.
*/
void
-fs_visitor::demote_pull_constants()
+fs_visitor::lower_constant_loads()
{
- foreach_block_and_inst (block, fs_inst, inst, cfg) {
+ const unsigned index = stage_prog_data->binding_table.pull_constants_start;
+
+ foreach_block_and_inst_safe (block, fs_inst, inst, cfg) {
+ /* Set up the annotation tracking for new generated instructions. */
+ const fs_builder ibld(this, block, inst);
+
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file != UNIFORM)
continue;
- int pull_index;
+ /* We'll handle this case later */
+ if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT && i == 0)
+ continue;
+
unsigned location = inst->src[i].nr + inst->src[i].reg_offset;
- if (location >= uniforms) /* Out of bounds access */
- pull_index = -1;
- else
- pull_index = pull_constant_loc[location];
+ if (location >= uniforms)
+ continue; /* Out of bounds access */
+
+ int pull_index = pull_constant_loc[location];
if (pull_index == -1)
continue;
- /* Set up the annotation tracking for new generated instructions. */
- const fs_builder ibld(this, block, inst);
- const unsigned index = stage_prog_data->binding_table.pull_constants_start;
- fs_reg dst = vgrf(glsl_type::float_type);
-
assert(inst->src[i].stride == 0);
- /* Generate a pull load into dst. */
- if (inst->src[i].reladdr) {
- VARYING_PULL_CONSTANT_LOAD(ibld, dst,
- brw_imm_ud(index),
- *inst->src[i].reladdr,
- pull_index * 4);
- inst->src[i].reladdr = NULL;
- inst->src[i].stride = 1;
- } else {
- const fs_builder ubld = ibld.exec_all().group(8, 0);
- struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
- ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
- dst, brw_imm_ud(index), offset);
- inst->src[i].set_smear(pull_index & 3);
- }
- brw_mark_surface_used(prog_data, index);
+ fs_reg dst = vgrf(glsl_type::float_type);
+ const fs_builder ubld = ibld.exec_all().group(8, 0);
+ struct brw_reg offset = brw_imm_ud((unsigned)(pull_index * 4) & ~15);
+ ubld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
+ dst, brw_imm_ud(index), offset);
/* Rewrite the instruction to use the temporary VGRF. */
inst->src[i].file = VGRF;
inst->src[i].nr = dst.nr;
inst->src[i].reg_offset = 0;
+ inst->src[i].set_smear(pull_index & 3);
+
+ brw_mark_surface_used(prog_data, index);
+ }
+
+ if (inst->opcode == SHADER_OPCODE_MOV_INDIRECT &&
+ inst->src[0].file == UNIFORM) {
+
+ unsigned location = inst->src[0].nr + inst->src[0].reg_offset;
+ if (location >= uniforms)
+ continue; /* Out of bounds access */
+
+ int pull_index = pull_constant_loc[location];
+
+ if (pull_index == -1)
+ continue;
+
+ VARYING_PULL_CONSTANT_LOAD(ibld, inst->dst,
+ brw_imm_ud(index),
+ inst->src[1],
+ pull_index * 4);
+ inst->remove(block);
+
+ brw_mark_surface_used(prog_data, index);
}
}
invalidate_live_intervals();
*/
assert(mul->src[1].type == BRW_REGISTER_TYPE_D ||
mul->src[1].type == BRW_REGISTER_TYPE_UD);
- mul->src[1].type = (type_is_signed(mul->src[1].type) ?
- BRW_REGISTER_TYPE_W : BRW_REGISTER_TYPE_UW);
+ mul->src[1].type = BRW_REGISTER_TYPE_UW;
mul->src[1].stride *= 2;
} else if (devinfo->gen == 7 && !devinfo->is_haswell &&
return progress;
}
+bool
+fs_visitor::lower_minmax()
+{
+ assert(devinfo->gen < 6);
+
+ bool progress = false;
+
+ foreach_block_and_inst_safe(block, fs_inst, inst, cfg) {
+ const fs_builder ibld(this, block, inst);
+
+ if (inst->opcode == BRW_OPCODE_SEL &&
+ inst->predicate == BRW_PREDICATE_NONE) {
+ /* FIXME: Using CMP doesn't preserve the NaN propagation semantics of
+ * the original SEL.L/GE instruction
+ */
+ ibld.CMP(ibld.null_reg_d(), inst->src[0], inst->src[1],
+ inst->conditional_mod);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->conditional_mod = BRW_CONDITIONAL_NONE;
+
+ progress = true;
+ }
+ }
+
+ if (progress)
+ invalidate_live_intervals();
+
+ return progress;
+}
+
static void
setup_color_payload(const fs_builder &bld, const brw_wm_prog_key *key,
fs_reg *dst, fs_reg color, unsigned components)
lower_sampler_logical_send(const fs_builder &bld, fs_inst *inst, opcode op)
{
const brw_device_info *devinfo = bld.shader->devinfo;
- const fs_reg &coordinate = inst->src[0];
- const fs_reg &shadow_c = inst->src[1];
- const fs_reg &lod = inst->src[2];
- const fs_reg &lod2 = inst->src[3];
- const fs_reg &sample_index = inst->src[4];
- const fs_reg &mcs = inst->src[5];
- const fs_reg &surface = inst->src[6];
- const fs_reg &sampler = inst->src[7];
- const fs_reg &offset_value = inst->src[8];
- assert(inst->src[9].file == IMM && inst->src[10].file == IMM);
- const unsigned coord_components = inst->src[9].ud;
- const unsigned grad_components = inst->src[10].ud;
+ const fs_reg &coordinate = inst->src[TEX_LOGICAL_SRC_COORDINATE];
+ const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
+ const fs_reg &lod = inst->src[TEX_LOGICAL_SRC_LOD];
+ const fs_reg &lod2 = inst->src[TEX_LOGICAL_SRC_LOD2];
+ const fs_reg &sample_index = inst->src[TEX_LOGICAL_SRC_SAMPLE_INDEX];
+ const fs_reg &mcs = inst->src[TEX_LOGICAL_SRC_MCS];
+ const fs_reg &surface = inst->src[TEX_LOGICAL_SRC_SURFACE];
+ const fs_reg &sampler = inst->src[TEX_LOGICAL_SRC_SAMPLER];
+ const fs_reg &offset_value = inst->src[TEX_LOGICAL_SRC_OFFSET_VALUE];
+ assert(inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].file == IMM);
+ const unsigned coord_components = inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
+ assert(inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].file == IMM);
+ const unsigned grad_components = inst->src[TEX_LOGICAL_SRC_GRAD_COMPONENTS].ud;
if (devinfo->gen >= 7) {
lower_sampler_logical_send_gen7(bld, inst, op, coordinate,
case SHADER_OPCODE_TG4_OFFSET_LOGICAL: {
/* gather4_po_c is unsupported in SIMD16 mode. */
- const fs_reg &shadow_c = inst->src[1];
+ const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
return (shadow_c.file != BAD_FILE ? 8 : inst->exec_size);
}
case SHADER_OPCODE_TXL_LOGICAL:
* Gen4-6 can't support TXL and TXB with shadow comparison in SIMD16
* mode because the message exceeds the maximum length of 11.
*/
- const fs_reg &shadow_c = inst->src[1];
+ const fs_reg &shadow_c = inst->src[TEX_LOGICAL_SRC_SHADOW_C];
if (devinfo->gen == 4 && shadow_c.file == BAD_FILE)
return 16;
else if (devinfo->gen < 7 && shadow_c.file != BAD_FILE)
* circumstances it can end up with a message that is too long in SIMD16
* mode.
*/
- const unsigned coord_components = inst->src[8].ud;
+ const unsigned coord_components =
+ inst->src[TEX_LOGICAL_SRC_COORD_COMPONENTS].ud;
/* First three arguments are the sample index and the two arguments for
* the MCS data.
*/
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
return 8;
+ case SHADER_OPCODE_MOV_INDIRECT:
+ /* Prior to Broadwell, we only have 8 address subregisters */
+ return devinfo->gen < 8 ? 8 : inst->exec_size;
+
default:
return inst->exec_size;
}
break;
case UNIFORM:
fprintf(file, "u%d", inst->src[i].nr + inst->src[i].reg_offset);
- if (inst->src[i].reladdr) {
- fprintf(file, "+reladdr");
- } else if (inst->src[i].subreg_offset) {
+ if (inst->src[i].subreg_offset) {
fprintf(file, "+%d.%d", inst->src[i].reg_offset,
inst->src[i].subreg_offset);
}
case IMM:
switch (inst->src[i].type) {
case BRW_REGISTER_TYPE_F:
- fprintf(file, "%ff", inst->src[i].f);
+ fprintf(file, "%-gf", inst->src[i].f);
break;
case BRW_REGISTER_TYPE_W:
case BRW_REGISTER_TYPE_D:
{
if (end == start ||
end->is_partial_write() ||
- reg.reladdr ||
!reg.equals(end->dst)) {
return NULL;
} else {
}
void
-fs_visitor::setup_payload_gen6()
+fs_visitor::setup_fs_payload_gen6()
{
- bool uses_depth =
- (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
+ assert(stage == MESA_SHADER_FRAGMENT);
+ brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
+ brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
+
unsigned barycentric_interp_modes =
(stage == MESA_SHADER_FRAGMENT) ?
((brw_wm_prog_data*) this->prog_data)->barycentric_interp_modes : 0;
}
/* R27: interpolated depth if uses source depth */
- if (uses_depth) {
+ prog_data->uses_src_depth =
+ (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
+ if (prog_data->uses_src_depth) {
payload.source_depth_reg = payload.num_regs;
payload.num_regs++;
if (dispatch_width == 16) {
payload.num_regs++;
}
}
+
/* R29: interpolated W set if GEN6_WM_USES_SOURCE_W. */
- if (uses_depth) {
+ prog_data->uses_src_w =
+ (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
+ if (prog_data->uses_src_w) {
payload.source_w_reg = payload.num_regs;
payload.num_regs++;
if (dispatch_width == 16) {
}
}
- if (stage == MESA_SHADER_FRAGMENT) {
- brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- prog_data->uses_pos_offset = key->compute_pos_offset;
- /* R31: MSAA position offsets. */
- if (prog_data->uses_pos_offset) {
- payload.sample_pos_reg = payload.num_regs;
- payload.num_regs++;
- }
+ prog_data->uses_pos_offset = key->compute_pos_offset;
+ /* R31: MSAA position offsets. */
+ if (prog_data->uses_pos_offset) {
+ payload.sample_pos_reg = payload.num_regs;
+ payload.num_regs++;
}
/* R32: MSAA input coverage mask */
- if (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) {
+ prog_data->uses_sample_mask =
+ (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) != 0;
+ if (prog_data->uses_sample_mask) {
assert(devinfo->gen >= 7);
payload.sample_mask_in_reg = payload.num_regs;
payload.num_regs++;
bld = fs_builder(this, 64);
assign_constant_locations();
- demote_pull_constants();
+ lower_constant_loads();
validate();
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
- snprintf(filename, 64, "%s%d-%s-00-start",
+ snprintf(filename, 64, "%s%d-%s-00-00-start",
stage_abbrev, dispatch_width, nir->info.name);
backend_shader::dump_instructions(filename);
OPT(opt_combine_constants);
OPT(lower_integer_multiplication);
+ if (devinfo->gen <= 5 && OPT(lower_minmax)) {
+ OPT(opt_cmod_propagation);
+ OPT(opt_cse);
+ OPT(opt_copy_propagate);
+ OPT(dead_code_eliminate);
+ }
+
lower_uniform_pull_constant_loads();
validate();
assert(stage == MESA_SHADER_FRAGMENT);
if (devinfo->gen >= 6)
- setup_payload_gen6();
+ setup_fs_payload_gen6();
else
- setup_payload_gen4();
+ setup_fs_payload_gen4();
if (0) {
emit_dummy_fs();
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
true);
+ brw_nir_lower_fs_inputs(shader);
+ brw_nir_lower_fs_outputs(shader);
shader = brw_postprocess_nir(shader, compiler->devinfo, true);
/* key->alpha_test_func means simulating alpha testing via discards,
}
fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base,
- v.promoted_constants, v.runtime_check_aads_emit, "FS");
+ v.promoted_constants, v.runtime_check_aads_emit,
+ MESA_SHADER_FRAGMENT);
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s",
}
fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base,
- v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
+ v8.promoted_constants, v8.runtime_check_aads_emit,
+ MESA_SHADER_COMPUTE);
if (INTEL_DEBUG & DEBUG_CS) {
char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s",
shader->info.label ? shader->info.label :