#include "brw_dead_control_flow.h"
#include "main/uniforms.h"
#include "brw_fs_live_variables.h"
-#include "glsl/glsl_types.h"
+#include "glsl/nir/glsl_types.h"
#include "program/sampler.h"
using namespace brw;
inst->regs_written = regs_written;
if (devinfo->gen < 7) {
- inst->base_mrf = 13;
+ inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen);
inst->header_size = 1;
if (devinfo->gen == 4)
inst->mlen = 3;
unsigned int array_elements;
if (type->is_array()) {
- array_elements = type->length;
+ array_elements = type->arrays_of_arrays_size();
if (array_elements == 0) {
fail("dereferenced array '%s' has length 0\n", name);
}
- type = type->fields.array;
+ type = type->without_array();
} else {
array_elements = 1;
}
int urb_next = 0;
/* Figure out where each of the incoming setup attributes lands. */
if (devinfo->gen >= 6) {
- if (_mesa_bitcount_64(prog->InputsRead &
+ if (_mesa_bitcount_64(nir->info.inputs_read &
BRW_FS_VARYING_INPUT_MASK) <= 16) {
/* The SF/SBE pipeline stage can do arbitrary rearrangement of the
* first 16 varying inputs, so we can put them wherever we want.
* a different vertex (or geometry) shader.
*/
for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
- if (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
+ if (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
BITFIELD64_BIT(i)) {
prog_data->urb_setup[i] = urb_next++;
}
struct brw_vue_map prev_stage_vue_map;
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
key->input_slots_valid,
- shader_prog->SeparateShader);
+ nir->info.separate_shader);
int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
* unused.
*/
if (varying != BRW_VARYING_SLOT_COUNT &&
- (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
+ (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
BITFIELD64_BIT(varying))) {
prog_data->urb_setup[varying] = slot - first_slot;
}
*
* See compile_sf_prog() for more info.
*/
- if (prog->InputsRead & BITFIELD64_BIT(VARYING_SLOT_PNTC))
+ if (nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC))
prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++;
}
fs_visitor::assign_vs_urb_setup()
{
brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
- int grf, count, slot, channel, attr;
assert(stage == MESA_SHADER_VERTEX);
- count = _mesa_bitcount_64(vs_prog_data->inputs_read);
+ int count = _mesa_bitcount_64(vs_prog_data->inputs_read);
if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
count++;
/* Each attribute is 4 regs. */
- this->first_non_payload_grf += count * 4;
-
- unsigned vue_entries =
- MAX2(count, vs_prog_data->base.vue_map.num_slots);
-
- vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
- vs_prog_data->base.urb_read_length = (count + 1) / 2;
+ this->first_non_payload_grf += 4 * vs_prog_data->nr_attributes;
assert(vs_prog_data->base.urb_read_length <= 15);
foreach_block_and_inst(block, fs_inst, inst, cfg) {
for (int i = 0; i < inst->sources; i++) {
if (inst->src[i].file == ATTR) {
-
- if (inst->src[i].reg == VERT_ATTRIB_MAX) {
- slot = count - 1;
- } else {
- /* Attributes come in in a contiguous block, ordered by their
- * gl_vert_attrib value. That means we can compute the slot
- * number for an attribute by masking out the enabled
- * attributes before it and counting the bits.
- */
- attr = inst->src[i].reg + inst->src[i].reg_offset / 4;
- slot = _mesa_bitcount_64(vs_prog_data->inputs_read &
- BITFIELD64_MASK(attr));
- }
-
- channel = inst->src[i].reg_offset & 3;
-
- grf = payload.num_regs +
- prog_data->curb_read_length +
- slot * 4 + channel;
+ int grf = payload.num_regs +
+ prog_data->curb_read_length +
+ inst->src[i].reg +
+ inst->src[i].reg_offset;
inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg =
bool
fs_visitor::remove_duplicate_mrf_writes()
{
- fs_inst *last_mrf_move[16];
+ fs_inst *last_mrf_move[BRW_MAX_MRF(devinfo->gen)];
bool progress = false;
/* Need to update the MRF tracking for compressed instructions. */
* else does except for register spill/unspill, which generates and
* uses its MRF within a single IR instruction.
*/
- inst->base_mrf = 14;
+ inst->base_mrf = FIRST_PULL_LOAD_MRF(devinfo->gen) + 1;
inst->mlen = 1;
}
}
fs_visitor::setup_payload_gen6()
{
bool uses_depth =
- (prog->InputsRead & (1 << VARYING_SLOT_POS)) != 0;
+ (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
unsigned barycentric_interp_modes =
(stage == MESA_SHADER_FRAGMENT) ?
((brw_wm_prog_data*) this->prog_data)->barycentric_interp_modes : 0;
}
/* R32: MSAA input coverage mask */
- if (prog->SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) {
+ if (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) {
assert(devinfo->gen >= 7);
payload.sample_mask_in_reg = payload.num_regs;
payload.num_regs++;
/* R34-: bary for 32-pixel. */
/* R58-59: interp W for 32-pixel. */
- if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
source_depth_to_render_target = true;
}
}
payload.num_regs = 2;
}
+/**
+ * We are building the local ID push constant data using the simplest possible
+ * method. We simply push the local IDs directly as they should appear in the
+ * registers for the uvec3 gl_LocalInvocationID variable.
+ *
+ * Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
+ * registers worth of push constant space.
+ *
+ * Note: Any updates to brw_cs_prog_local_id_payload_dwords,
+ * fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
+ * to coordinated.
+ *
+ * FINISHME: There are a few easy optimizations to consider.
+ *
+ * 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
+ * no need for using push constant space for that dimension.
+ *
+ * 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
+ * easily use 16-bit words rather than 32-bit dwords in the push constant
+ * data.
+ *
+ * 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
+ * conveying the data, and thereby reduce push constant usage.
+ *
+ */
void
fs_visitor::setup_cs_payload()
{
assert(devinfo->gen >= 7);
+ brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
payload.num_regs = 1;
- if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
- const unsigned local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(dispatch_width);
- assert((local_id_dwords & 0x7) == 0);
- const unsigned local_id_regs = local_id_dwords / 8;
+ if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+ prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
payload.local_invocation_id_reg = payload.num_regs;
- payload.num_regs += local_id_regs;
+ payload.num_regs += prog_data->local_invocation_id_regs;
}
}
\
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \
char filename[64]; \
- snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \
- stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
+ snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \
+ stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \
\
backend_shader::dump_instructions(filename); \
} \
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
- snprintf(filename, 64, "%s%d-%04d-00-start",
- stage_abbrev, dispatch_width,
- shader_prog ? shader_prog->Name : 0);
+ snprintf(filename, 64, "%s%d-%s-00-start",
+ stage_abbrev, dispatch_width, nir->info.name);
backend_shader::dump_instructions(filename);
}
OPT(opt_algebraic);
OPT(opt_cse);
OPT(opt_copy_propagate);
- OPT(opt_peephole_predicated_break);
+ OPT(opt_predicated_break, this);
OPT(opt_cmod_propagation);
OPT(dead_code_eliminate);
OPT(opt_peephole_sel);
assert(stage == MESA_SHADER_FRAGMENT);
- sanity_param_count = prog->Parameters->NumParameters;
-
if (devinfo->gen >= 6)
setup_payload_gen6();
else
emit_shader_time_begin();
calculate_urb_setup();
- if (prog->InputsRead > 0) {
+ if (nir->info.inputs_read > 0) {
if (devinfo->gen < 6)
emit_interpolation_setup_gen4();
else
else
wm_prog_data->reg_blocks_16 = brw_register_blocks(grf_used);
- /* If any state parameters were appended, then ParameterValues could have
- * been realloced, in which case the driver uniform storage set up by
- * _mesa_associate_uniform_storage() would point to freed memory. Make
- * sure that didn't happen.
- */
- assert(sanity_param_count == prog->Parameters->NumParameters);
-
return !failed;
}
fs_visitor::run_cs()
{
assert(stage == MESA_SHADER_COMPUTE);
- assert(shader);
-
- sanity_param_count = prog->Parameters->NumParameters;
setup_cs_payload();
if (failed)
return false;
- /* If any state parameters were appended, then ParameterValues could have
- * been realloced, in which case the driver uniform storage set up by
- * _mesa_associate_uniform_storage() would point to freed memory. Make
- * sure that didn't happen.
- */
- assert(sanity_param_count == prog->Parameters->NumParameters);
-
return !failed;
}
const unsigned *
-brw_wm_fs_emit(struct brw_context *brw,
+brw_wm_fs_emit(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const struct brw_wm_prog_key *key,
struct brw_wm_prog_data *prog_data,
- struct gl_fragment_program *fp,
- struct gl_shader_program *prog,
- unsigned *final_assembly_size)
+ const nir_shader *shader,
+ struct gl_program *prog,
+ int shader_time_index8, int shader_time_index16,
+ bool use_rep_send,
+ unsigned *final_assembly_size,
+ char **error_str)
{
- struct brw_shader *shader = NULL;
- if (prog)
- shader = (brw_shader *) prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
-
- if (unlikely(INTEL_DEBUG & DEBUG_WM))
- brw_dump_ir("fragment", prog, &shader->base, &fp->Base);
-
- int st_index8 = -1, st_index16 = -1;
- if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
- st_index8 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS8);
- st_index16 = brw_get_shader_time_index(brw, prog, &fp->Base, ST_FS16);
- }
-
- /* Now the main event: Visit the shader IR and generate our FS IR for it.
- */
- fs_visitor v(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
- prog, &fp->Base, 8, st_index8);
+ fs_visitor v(compiler, log_data, mem_ctx, key,
+ &prog_data->base, prog, shader, 8,
+ shader_time_index8);
if (!v.run_fs(false /* do_rep_send */)) {
- if (prog) {
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, v.fail_msg);
- }
-
- _mesa_problem(NULL, "Failed to compile fragment shader: %s\n",
- v.fail_msg);
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, v.fail_msg);
return NULL;
}
cfg_t *simd16_cfg = NULL;
- fs_visitor v2(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
- prog, &fp->Base, 16, st_index16);
- if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
+ fs_visitor v2(compiler, log_data, mem_ctx, key,
+ &prog_data->base, prog, shader, 16,
+ shader_time_index16);
+ if (likely(!(INTEL_DEBUG & DEBUG_NO16) || use_rep_send)) {
if (!v.simd16_unsupported) {
/* Try a SIMD16 compile */
v2.import_uniforms(&v);
- if (!v2.run_fs(brw->use_rep_send)) {
- perf_debug("SIMD16 shader failed to compile: %s", v2.fail_msg);
+ if (!v2.run_fs(use_rep_send)) {
+ compiler->shader_perf_log(log_data,
+ "SIMD16 shader failed to compile: %s",
+ v2.fail_msg);
} else {
simd16_cfg = v2.cfg;
}
}
cfg_t *simd8_cfg;
- int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || brw->no_simd8;
- if ((no_simd8 || brw->gen < 5) && simd16_cfg) {
+ int no_simd8 = (INTEL_DEBUG & DEBUG_NO8) || use_rep_send;
+ if ((no_simd8 || compiler->devinfo->gen < 5) && simd16_cfg) {
simd8_cfg = NULL;
prog_data->no_8 = true;
} else {
prog_data->no_8 = false;
}
- fs_generator g(brw->intelScreen->compiler, brw,
- mem_ctx, (void *) key, &prog_data->base,
- &fp->Base, v.promoted_constants, v.runtime_check_aads_emit, "FS");
+ fs_generator g(compiler, log_data, mem_ctx, (void *) key, &prog_data->base,
+ v.promoted_constants, v.runtime_check_aads_emit, "FS");
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {
- char *name;
- if (prog)
- name = ralloc_asprintf(mem_ctx, "%s fragment shader %d",
- prog->Label ? prog->Label : "unnamed",
- prog->Name);
- else
- name = ralloc_asprintf(mem_ctx, "fragment program %d", fp->Base.Id);
-
- g.enable_debug(name);
+ g.enable_debug(ralloc_asprintf(mem_ctx, "%s fragment shader %s",
+ shader->info.label ? shader->info.label :
+ "unnamed",
+ shader->info.name));
}
if (simd8_cfg)
return g.get_assembly(final_assembly_size);
}
+void
+brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
+ void *buffer, uint32_t threads, uint32_t stride)
+{
+ if (prog_data->local_invocation_id_regs == 0)
+ return;
+
+ /* 'stride' should be an integer number of registers, that is, a multiple
+ * of 32 bytes.
+ */
+ assert(stride % 32 == 0);
+
+ unsigned x = 0, y = 0, z = 0;
+ for (unsigned t = 0; t < threads; t++) {
+ uint32_t *param = (uint32_t *) buffer + stride * t / 4;
+
+ for (unsigned i = 0; i < prog_data->simd_size; i++) {
+ param[0 * prog_data->simd_size + i] = x;
+ param[1 * prog_data->simd_size + i] = y;
+ param[2 * prog_data->simd_size + i] = z;
+
+ x++;
+ if (x == prog_data->local_size[0]) {
+ x = 0;
+ y++;
+ if (y == prog_data->local_size[1]) {
+ y = 0;
+ z++;
+ if (z == prog_data->local_size[2])
+ z = 0;
+ }
+ }
+ }
+ }
+}
+
fs_reg *
fs_visitor::emit_cs_local_invocation_id_setup()
{
}
const unsigned *
-brw_cs_emit(struct brw_context *brw,
+brw_cs_emit(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
const struct brw_cs_prog_key *key,
struct brw_cs_prog_data *prog_data,
- struct gl_compute_program *cp,
- struct gl_shader_program *prog,
- unsigned *final_assembly_size)
+ const nir_shader *shader,
+ int shader_time_index,
+ unsigned *final_assembly_size,
+ char **error_str)
{
- struct brw_shader *shader =
- (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_COMPUTE];
-
- if (unlikely(INTEL_DEBUG & DEBUG_CS))
- brw_dump_ir("compute", prog, &shader->base, &cp->Base);
-
- prog_data->local_size[0] = cp->LocalSize[0];
- prog_data->local_size[1] = cp->LocalSize[1];
- prog_data->local_size[2] = cp->LocalSize[2];
+ prog_data->local_size[0] = shader->info.cs.local_size[0];
+ prog_data->local_size[1] = shader->info.cs.local_size[1];
+ prog_data->local_size[2] = shader->info.cs.local_size[2];
unsigned local_workgroup_size =
- cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
+ shader->info.cs.local_size[0] * shader->info.cs.local_size[1] *
+ shader->info.cs.local_size[2];
+
+ unsigned max_cs_threads = compiler->devinfo->max_cs_threads;
cfg_t *cfg = NULL;
const char *fail_msg = NULL;
- int st_index = -1;
- if (INTEL_DEBUG & DEBUG_SHADER_TIME)
- st_index = brw_get_shader_time_index(brw, prog, &cp->Base, ST_CS);
-
/* Now the main event: Visit the shader IR and generate our CS IR for it.
*/
- fs_visitor v8(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
- &cp->Base, 8, st_index);
+ fs_visitor v8(compiler, log_data, mem_ctx, key, &prog_data->base,
+ NULL, /* Never used in core profile */
+ shader, 8, shader_time_index);
if (!v8.run_cs()) {
fail_msg = v8.fail_msg;
- } else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
+ } else if (local_workgroup_size <= 8 * max_cs_threads) {
cfg = v8.cfg;
prog_data->simd_size = 8;
}
- fs_visitor v16(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
- &cp->Base, 16, st_index);
+ fs_visitor v16(compiler, log_data, mem_ctx, key, &prog_data->base,
+ NULL, /* Never used in core profile */
+ shader, 16, shader_time_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && !v8.simd16_unsupported &&
- local_workgroup_size <= 16 * brw->max_cs_threads) {
+ local_workgroup_size <= 16 * max_cs_threads) {
/* Try a SIMD16 compile */
v16.import_uniforms(&v8);
if (!v16.run_cs()) {
- perf_debug("SIMD16 shader failed to compile: %s", v16.fail_msg);
+ compiler->shader_perf_log(log_data,
+ "SIMD16 shader failed to compile: %s",
+ v16.fail_msg);
if (!cfg) {
fail_msg =
"Couldn't generate SIMD16 program and not "
if (unlikely(cfg == NULL)) {
assert(fail_msg);
- prog->LinkStatus = false;
- ralloc_strcat(&prog->InfoLog, fail_msg);
- _mesa_problem(NULL, "Failed to compile compute shader: %s\n",
- fail_msg);
+ if (error_str)
+ *error_str = ralloc_strdup(mem_ctx, fail_msg);
+
return NULL;
}
- fs_generator g(brw->intelScreen->compiler, brw,
- mem_ctx, (void*) key, &prog_data->base, &cp->Base,
+ fs_generator g(compiler, log_data, mem_ctx, (void*) key, &prog_data->base,
v8.promoted_constants, v8.runtime_check_aads_emit, "CS");
if (INTEL_DEBUG & DEBUG_CS) {
- char *name = ralloc_asprintf(mem_ctx, "%s compute shader %d",
- prog->Label ? prog->Label : "unnamed",
- prog->Name);
+ char *name = ralloc_asprintf(mem_ctx, "%s compute shader %s",
+ shader->info.label ? shader->info.label :
+ "unnamed",
+ shader->info.name);
g.enable_debug(name);
}