#include "brw_dead_control_flow.h"
#include "main/uniforms.h"
#include "brw_fs_live_variables.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "program/sampler.h"
using namespace brw;
inst->regs_written = regs_written;
if (devinfo->gen < 7) {
- inst->base_mrf = 13;
+ inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen);
inst->header_size = 1;
if (devinfo->gen == 4)
inst->mlen = 3;
unsigned int array_elements;
if (type->is_array()) {
- array_elements = type->length;
+ array_elements = type->arrays_of_arrays_size();
if (array_elements == 0) {
fail("dereferenced array '%s' has length 0\n", name);
}
- type = type->fields.array;
+ type = type->without_array();
} else {
array_elements = 1;
}
int urb_next = 0;
/* Figure out where each of the incoming setup attributes lands. */
if (devinfo->gen >= 6) {
- if (_mesa_bitcount_64(prog->InputsRead &
+ if (_mesa_bitcount_64(nir->info.inputs_read &
BRW_FS_VARYING_INPUT_MASK) <= 16) {
/* The SF/SBE pipeline stage can do arbitrary rearrangement of the
* first 16 varying inputs, so we can put them wherever we want.
* a different vertex (or geometry) shader.
*/
for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
- if (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
+ if (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
BITFIELD64_BIT(i)) {
prog_data->urb_setup[i] = urb_next++;
}
struct brw_vue_map prev_stage_vue_map;
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
key->input_slots_valid,
- shader_prog->SeparateShader);
+ nir->info.separate_shader);
int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
* unused.
*/
if (varying != BRW_VARYING_SLOT_COUNT &&
- (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
+ (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
BITFIELD64_BIT(varying))) {
prog_data->urb_setup[varying] = slot - first_slot;
}
*
* See compile_sf_prog() for more info.
*/
- if (prog->InputsRead & BITFIELD64_BIT(VARYING_SLOT_PNTC))
+ if (nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC))
prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++;
}
fs_visitor::assign_vs_urb_setup()
{
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
- int grf, count, slot, channel, attr;
assert(stage == MESA_SHADER_VERTEX);
- count = _mesa_bitcount_64(vs_prog_data->inputs_read);
+ int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
count++;
/* Each attribute is 4 regs. */
- this->first_non_payload_grf += count * 4;
-
- unsigned vue_entries =
- MAX2(count, vs_prog_data->base.vue_map.num_slots);
-
- vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
- vs_prog_data->base.urb_read_length = (count + 1) / 2;
+ this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
assert(vs_prog_data->base.urb_read_length <= 15);
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == ATTR) {
-
- if (inst->src[i].reg == VERT_ATTRIB_MAX) {
- slot = count - 1;
- } else {
- /* Attributes come in in a contiguous block, ordered by their
- * gl_vert_attrib value. That means we can compute the slot
- * number for an attribute by masking out the enabled
- * attributes before it and counting the bits.
- */
- attr = inst->src[i].reg + inst->src[i].reg_offset / 4;
- slot = _mesa_bitcount_64(vs_prog_data->inputs_read &
- BITFIELD64_MASK(attr));
- }
-
- channel = inst->src[i].reg_offset & 3;
-
- grf = payload.num_regs +
- prog_data->curb_read_length +
- slot * 4 + channel;
+ int grf = payload.num_regs +
+ prog_data->curb_read_length +
+ inst->src[i].reg +
+ inst->src[i].reg_offset;
inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg =
bool
fs_visitor::remove_duplicate_mrf_writes()
{
- fs_inst *last_mrf_move[16];
+ fs_inst *last_mrf_move[BRW_MAX_MRF(devinfo->gen)];
bool progress = false;
/* Need to update the MRF tracking for compressed instructions. */
* else does except for register spill/unspill, which generates and
* uses its MRF within a single IR instruction.
*/
- inst->base_mrf = 14;
+ inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1;
inst->mlen = 1;
}
}
fs_visitor::setup_payload_gen6()
{
bool uses_depth =
- (prog->InputsRead & (1 << VARYING_SLOT_POS)) != 0;
+ (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
unsigned barycentric_interp_modes =
(stage == MESA_SHADER_FRAGMENT) ?
((brw_wm_prog_data*) this->prog_data)->barycentric_interp_modes : 0;
}
/* R32: MSAA input coverage mask */
- if (prog->SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) {
+ if (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) {
assert(devinfo->gen >= 7);
payload.sample_mask_in_reg = payload.num_regs;
payload.num_regs++;
/* R34-: bary for 32-pixel. */
/* R58-59: interp W for 32-pixel. */
- if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
source_depth_to_render_target = true;
}
}
payload.num_regs = 2;
}
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ * no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ * easily use 16-bit words rather than 32-bit dwords in the push constant
+ * data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ * conveying the data, and thereby reduce push constant usage.
+ *
+ */
void
fs_visitor::setup_cs_payload()
{
assert(devinfo->gen >= 7);
+ brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
payload.num_regs = 1;
- if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- const unsigned local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
- assert((local_id_dwords & 0x7) == 0);
- const unsigned local_id_regs = local_id_dwords / 8;
+ if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+ prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
payload.local_invocation_id_reg = payload.num_regs;
- payload.num_regs += local_id_regs;
+ payload.num_regs += prog_data->local_invocation_id_regs;
}
}
-void
-fs_visitor::assign_fs_binding_table_offsets()
-{
- assert(stage == MESA_SHADER_FRAGMENT);
- brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- uint32_t next_binding_table_offset = 0;
-
- /* If there are no color regions, we still perform an FB write to a null
- * renderbuffer, which we place at surface index 0.
- */
- prog_data->binding_table.render_target_start = next_binding_table_offset;
- next_binding_table_offset += MAX2(key->nr_color_regions, 1);
-
- assign_common_binding_table_offsets(next_binding_table_offset);
-}
-
-void
-fs_visitor::assign_cs_binding_table_offsets()
-{
- assert(stage == MESA_SHADER_COMPUTE);
- brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
- uint32_t next_binding_table_offset = 0;
-
- /* May not be used if the gl_NumWorkGroups variable is not accessed. */
- prog_data->binding_table.work_groups_start = next_binding_table_offset;
- next_binding_table_offset++;
-
- assign_common_binding_table_offsets(next_binding_table_offset);
-}
-
void
fs_visitor::calculate_register_pressure()
{
\
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \
char filename[64]; \
- snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \
- stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
+ snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \
+ stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \
\
backend_shader::dump_instructions(filename); \
} \
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
- snprintf(filename, 64, "%s%d-%04d-00-start",
- stage_abbrev, dispatch_width,
- shader_prog ? shader_prog->Name : 0);
+ snprintf(filename, 64, "%s%d-%s-00-start",
+ stage_abbrev, dispatch_width, nir->info.name);
backend_shader::dump_instructions(filename);
}
OPT(opt_algebraic);
OPT(opt_cse);
OPT(opt_copy_propagate);
- OPT(opt_peephole_predicated_break);
+ OPT(opt_predicated_break, this);
OPT(opt_cmod_propagation);
OPT(dead_code_eliminate);
OPT(opt_peephole_sel);
{
assert(stage == MESA_SHADER_VERTEX);
- assign_common_binding_table_offsets(0);
setup_vs_payload();
if (shader_time_index >= 0)
assert(stage == MESA_SHADER_FRAGMENT);
- sanity_param_count = prog->Parameters->NumParameters;
-
- assign_fs_binding_table_offsets();
-
if (devinfo->gen >= 6)
setup_payload_gen6();
else
emit_shader_time_begin();
calculate_urb_setup();
- if (prog->InputsRead > 0) {
+ if (nir->info.inputs_read > 0) {
if (devinfo->gen < 6)
emit_interpolation_setup_gen4();
else
else
wm_prog_data->reg_blocks_16 = brw_register_blocks(grf_used);
- /* If any state parameters were appended, then ParameterValues could have
- * been realloced, in which case the driver uniform storage set up by
- * _mesa_associate_uniform_storage() would point to freed memory. Make
- * sure that didn't happen.
- */
- assert(sanity_param_count == prog->Parameters->NumParameters);
-
return !failed;
}
fs_visitor::run_cs()
{
assert(stage == MESA_SHADER_COMPUTE);
- assert(shader);
-
- sanity_param_count = prog->Parameters->NumParameters;
-
- assign_cs_binding_table_offsets();
setup_cs_payload();
if (failed)
return false;
- /* If any state parameters were appended, then ParameterValues could have
- * been realloced, in which case the driver uniform storage set up by
- * _mesa_associate_uniform_storage() would point to freed memory. Make
- * sure that didn't happen.
- */
- assert(sanity_param_count == prog->Parameters->NumParameters);
-
return !failed;
}
struct brw_wm_prog_data *prog_data,
struct gl_fragment_program *fp,
struct gl_shader_program *prog,
+ int shader_time_index8, int shader_time_index16,
unsigned *final_assembly_size)
{
- struct brw_shader *shader = NULL;
- if (prog)
- shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM))
- brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
-
- int st_index8 = -1, st_index16 = -1;
- if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
- st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8);
- st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16);
- }
-
/* Now the main event: Visit the shader IR and generate our FS IR for it.
*/
- fs_visitor v(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
- prog, &fp->Base, 8, st_index8);
+ fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data->base, &fp->Base, fp->Base.nir, 8, shader_time_index8);
if (!v.run_fs(false /* do_rep_send */)) {
if (prog) {
prog->LinkStatus = false;
}
cfg_t *simd16_cfg = NULL;
- fs_visitor v2(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
- prog, &fp->Base, 16, st_index16);
+ fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data->base, &fp->Base, fp->Base.nir, 16, shader_time_index16);
if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
if (!v.simd16_unsupported) {
/* Try a SIMD16 compile */
return g.get_assembly(final_assembly_size);
}
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
+ void *buffer, uint32_t threads, uint32_t stride)
+{
+ if (prog_data->local_invocation_id_regs == 0)
+ return;
+
+ /* 'stride' should be an integer number of registers, that is, a multiple
+ * of 32 bytes.
+ */
+ assert(stride % 32 == 0);
+
+ unsigned x = 0, y = 0, z = 0;
+ for (unsigned t = 0; t < threads; t++) {
+ uint32_t *param = (uint32_t *) buffer + stride * t / 4;
+
+ for (unsigned i = 0; i < prog_data->simd_size; i++) {
+ param[0 * prog_data->simd_size + i] = x;
+ param[1 * prog_data->simd_size + i] = y;
+ param[2 * prog_data->simd_size + i] = z;
+
+ x++;
+ if (x == prog_data->local_size[0]) {
+ x = 0;
+ y++;
+ if (y == prog_data->local_size[1]) {
+ y = 0;
+ z++;
+ if (z == prog_data->local_size[2])
+ z = 0;
+ }
+ }
+ }
+ }
+}
+
fs_reg *
fs_visitor::emit_cs_local_invocation_id_setup()
{
struct brw_cs_prog_data *prog_data,
struct gl_compute_program *cp,
struct gl_shader_program *prog,
+ int shader_time_index,
unsigned *final_assembly_size)
{
- struct brw_shader *shader =
- (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-
- if (unlikely(INTEL_DEBUG & DEBUG_CS))
- brw_dump_ir("compute", prog, &shader->base, &cp->Base);
-
prog_data->local_size[0] = cp->LocalSize[0];
prog_data->local_size[1] = cp->LocalSize[1];
prog_data->local_size[2] = cp->LocalSize[2];
unsigned local_workgroup_size =
cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
+ unsigned max_cs_threads = brw->intelScreen->compiler->devinfo->max_cs_threads;
cfg_t *cfg = NULL;
const char *fail_msg = NULL;
- int st_index = -1;
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
- st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
-
/* Now the main event: Visit the shader IR and generate our CS IR for it.
*/
- fs_visitor v8(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
- &cp->Base, 8, st_index);
+ fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data->base, &cp->Base, cp->Base.nir, 8, shader_time_index);
if (!v8.run_cs()) {
fail_msg = v8.fail_msg;
- } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+ } else if (local_workgroup_size <= 8 * max_cs_threads) {
cfg = v8.cfg;
prog_data->simd_size = 8;
}
- fs_visitor v16(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
- &cp->Base, 16, st_index);
+ fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data->base, &cp->Base, cp->Base.nir, 16, shader_time_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && !v8.simd16_unsupported &&
- local_workgroup_size <= 16 * brw->max_cs_threads) {
+ local_workgroup_size <= 16 * max_cs_threads) {
/* Try a SIMD16 compile */
v16.import_uniforms(&v8);
if (!v16.run_cs()) {