}
static void
-st_nir_assign_var_locations(struct exec_list *var_list, unsigned *size)
+st_nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
+ gl_shader_stage stage)
{
unsigned location = 0;
- unsigned assigned_locations[VARYING_SLOT_MAX];
+ unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
uint64_t processed_locs = 0;
+ uint32_t processed_patch_locs = 0;
nir_foreach_variable(var, var_list) {
+
+ const struct glsl_type *type = var->type;
+ if (nir_is_per_vertex_io(var, stage)) {
+ assert(glsl_type_is_array(type));
+ type = glsl_get_array_element(type);
+ }
+
+ bool processed = false;
+ if (var->data.patch &&
+ var->data.location != VARYING_SLOT_TESS_LEVEL_INNER &&
+ var->data.location != VARYING_SLOT_TESS_LEVEL_OUTER &&
+ var->data.location != VARYING_SLOT_BOUNDING_BOX0 &&
+ var->data.location != VARYING_SLOT_BOUNDING_BOX1) {
+ unsigned patch_loc = var->data.location - VARYING_SLOT_PATCH0;
+ if (processed_patch_locs & (1 << patch_loc))
+ processed = true;
+
+ processed_patch_locs |= (1 << patch_loc);
+ } else {
+ if (processed_locs & ((uint64_t)1 << var->data.location))
+ processed = true;
+
+ processed_locs |= ((uint64_t)1 << var->data.location);
+ }
+
/* Because component packing allows varyings to share the same location
* we may have already have processed this location.
*/
- if (var->data.location >= VARYING_SLOT_VAR0 &&
- processed_locs & ((uint64_t)1 << var->data.location)) {
+ if (processed && var->data.location >= VARYING_SLOT_VAR0) {
var->data.driver_location = assigned_locations[var->data.location];
- *size += type_size(var->type);
+ *size += type_size(type);
continue;
}
assigned_locations[var->data.location] = location;
var->data.driver_location = location;
- location += type_size(var->type);
-
- processed_locs |= ((uint64_t)1 << var->data.location);
+ location += type_size(type);
}
*size += location;
do {
progress = false;
+ NIR_PASS_V(nir, nir_lower_64bit_pack);
NIR_PASS(progress, nir, nir_copy_prop);
NIR_PASS(progress, nir, nir_opt_remove_phis);
NIR_PASS(progress, nir, nir_opt_dce);
stcp = (struct st_compute_program *)prog;
stcp->shader_program = shader_program;
stcp->tgsi.ir_type = PIPE_SHADER_IR_NIR;
- stcp->tgsi.prog = nir_shader_clone(NULL, nir);
+ stcp->tgsi.prog = nir;
break;
default:
unreachable("unknown shader stage");
set_st_program(prog, shader_program, nir);
prog->nir = nir;
- NIR_PASS_V(nir, nir_lower_io_to_temporaries,
- nir_shader_get_entrypoint(nir),
- true, true);
+ if (nir->info.stage != MESA_SHADER_TESS_CTRL &&
+ nir->info.stage != MESA_SHADER_TESS_EVAL) {
+ NIR_PASS_V(nir, nir_lower_io_to_temporaries,
+ nir_shader_get_entrypoint(nir),
+ true, true);
+ }
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_lower_var_copies);
}
+static void
+st_nir_link_shaders(nir_shader **producer, nir_shader **consumer)
+{
+ nir_lower_io_arrays_to_elements(*producer, *consumer);
+
+ NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
+ NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
+
+ if (nir_remove_unused_varyings(*producer, *consumer)) {
+ NIR_PASS_V(*producer, nir_lower_global_vars_to_local);
+ NIR_PASS_V(*consumer, nir_lower_global_vars_to_local);
+
+ /* The backend might not be able to handle indirects on
+ * temporaries so we need to lower indirects on any of the
+ * varyings we have demoted here.
+ *
+ * TODO: radeonsi shouldn't need to do this, however LLVM isn't
+ * currently smart enough to handle indirects without causing excess
+ * spilling causing the gpu to hang.
+ *
+ * See the following thread for more details of the problem:
+ * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
+ */
+ nir_variable_mode indirect_mask = nir_var_local;
+
+ NIR_PASS_V(*producer, nir_lower_indirect_derefs, indirect_mask);
+ NIR_PASS_V(*consumer, nir_lower_indirect_derefs, indirect_mask);
+
+ st_nir_opts(*producer);
+ st_nir_opts(*consumer);
+ }
+}
+
extern "C" {
bool
{
struct st_context *st = st_context(ctx);
+ /* Determine first and last stage. */
+ unsigned first = MESA_SHADER_STAGES;
+ unsigned last = 0;
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ if (!shader_program->_LinkedShaders[i])
+ continue;
+ if (first == MESA_SHADER_STAGES)
+ first = i;
+ last = i;
+ }
+
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
if (shader == NULL)
continue;
st_nir_get_mesa_program(ctx, shader_program, shader);
+
+ nir_variable_mode mask = (nir_variable_mode) 0;
+ if (i != first)
+ mask = (nir_variable_mode)(mask | nir_var_shader_in);
+
+ if (i != last)
+ mask = (nir_variable_mode)(mask | nir_var_shader_out);
+
+ nir_shader *nir = shader->Program->nir;
+ nir_lower_io_to_scalar_early(nir, mask);
+ st_nir_opts(nir);
+ }
+
+ /* Linking the stages in the opposite order (from fragment to vertex)
+ * ensures that inter-shader outputs written to in an earlier stage
+ * are eliminated if they are (transitively) not used in a later
+ * stage.
+ */
+ int next = last;
+ for (int i = next - 1; i >= 0; i--) {
+ struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
+ if (shader == NULL)
+ continue;
+
+ st_nir_link_shaders(&shader->Program->nir,
+ &shader_program->_LinkedShaders[next]->Program->nir);
+ next = i;
}
+ int prev = -1;
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
if (shader == NULL)
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
shader->Program->info = nir->info;
+ if (prev != -1) {
+ nir_compact_varyings(shader_program->_LinkedShaders[prev]->Program->nir,
+ nir, ctx->API != API_OPENGL_COMPAT);
+ }
+ prev = i;
+ }
+
+ for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+ struct gl_linked_shader *shader = shader_program->_LinkedShaders[i];
+ if (shader == NULL)
+ continue;
+
st_glsl_to_nir_post_opts(st, shader->Program, shader_program);
assert(shader->Program);
NIR_PASS_V(nir, nir_split_var_copies);
NIR_PASS_V(nir, nir_lower_var_copies);
- NIR_PASS_V(nir, nir_lower_io_types);
+ if (nir->info.stage != MESA_SHADER_TESS_CTRL &&
+ nir->info.stage != MESA_SHADER_TESS_EVAL)
+ NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects);
if (nir->info.stage == MESA_SHADER_VERTEX) {
/* Needs special handling so drvloc matches the vbo state: */
sort_varyings(&nir->outputs);
st_nir_assign_var_locations(&nir->outputs,
- &nir->num_outputs);
+ &nir->num_outputs,
+ nir->info.stage);
+ st_nir_fixup_varying_slots(st, &nir->outputs);
+ } else if (nir->info.stage == MESA_SHADER_GEOMETRY ||
+ nir->info.stage == MESA_SHADER_TESS_CTRL ||
+ nir->info.stage == MESA_SHADER_TESS_EVAL) {
+ sort_varyings(&nir->inputs);
+ st_nir_assign_var_locations(&nir->inputs,
+ &nir->num_inputs,
+ nir->info.stage);
+ st_nir_fixup_varying_slots(st, &nir->inputs);
+
+ sort_varyings(&nir->outputs);
+ st_nir_assign_var_locations(&nir->outputs,
+ &nir->num_outputs,
+ nir->info.stage);
st_nir_fixup_varying_slots(st, &nir->outputs);
} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
sort_varyings(&nir->inputs);
st_nir_assign_var_locations(&nir->inputs,
- &nir->num_inputs);
+ &nir->num_inputs,
+ nir->info.stage);
st_nir_fixup_varying_slots(st, &nir->inputs);
st_nir_assign_var_locations(&nir->outputs,
- &nir->num_outputs);
+ &nir->num_outputs,
+ nir->info.stage);
} else if (nir->info.stage == MESA_SHADER_COMPUTE) {
/* TODO? */
} else {