return nir_deref_instr_get_variable(nir_src_as_deref(instr->src[0]));
}
-static void gather_intrinsic_load_deref_info(const nir_shader *nir,
- const nir_intrinsic_instr *instr,
- nir_variable *var,
- struct tgsi_shader_info *info)
+static void gather_intrinsic_load_deref_input_info(const nir_shader *nir,
+ const nir_intrinsic_instr *instr,
+ nir_variable *var,
+ struct tgsi_shader_info *info)
{
assert(var && var->data.mode == nir_var_shader_in);
case MESA_SHADER_VERTEX: {
unsigned i = var->data.driver_location;
unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
+ uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
for (unsigned j = 0; j < attrib_count; j++, i++) {
if (glsl_type_is_64bit(glsl_without_array(var->type))) {
- /* TODO: set usage mask more accurately for doubles */
- info->input_usage_mask[i] = TGSI_WRITEMASK_XYZW;
+ unsigned dmask = mask;
+
+ if (glsl_type_is_dual_slot(glsl_without_array(var->type)) && j % 2)
+ dmask >>= 2;
+
+ dmask <<= var->data.location_frac / 2;
+
+ if (dmask & 0x1)
+ info->input_usage_mask[i] |= TGSI_WRITEMASK_XY;
+ if (dmask & 0x2)
+ info->input_usage_mask[i] |= TGSI_WRITEMASK_ZW;
} else {
- uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
- info->input_usage_mask[i] |= mask << var->data.location_frac;
+ info->input_usage_mask[i] |=
+ (mask << var->data.location_frac) & 0xf;
}
}
break;
}
- default: {
- unsigned semantic_name, semantic_index;
- tgsi_get_gl_varying_semantic(var->data.location, true,
- &semantic_name, &semantic_index);
-
- if (semantic_name == TGSI_SEMANTIC_COLOR) {
+ case MESA_SHADER_FRAGMENT:
+ if (var->data.location == VARYING_SLOT_COL0 ||
+ var->data.location == VARYING_SLOT_COL1) {
+ unsigned index = var->data.location == VARYING_SLOT_COL1;
uint8_t mask = nir_ssa_def_components_read(&instr->dest.ssa);
- info->colors_read |= mask << (semantic_index * 4);
+ info->colors_read |= mask << (index * 4);
}
break;
+ default:;
}
+}
+
+static void gather_intrinsic_load_deref_output_info(const nir_shader *nir,
+ const nir_intrinsic_instr *instr,
+ nir_variable *var,
+ struct tgsi_shader_info *info)
+{
+ assert(var && var->data.mode == nir_var_shader_out);
+
+ switch (nir->info.stage) {
+ case MESA_SHADER_TESS_CTRL:
+ if (var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
+ var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
+ info->reads_tessfactor_outputs = true;
+ else if (var->data.patch)
+ info->reads_perpatch_outputs = true;
+ else
+ info->reads_pervertex_outputs = true;
+ break;
+
+ case MESA_SHADER_FRAGMENT:
+ if (var->data.fb_fetch_output)
+ info->uses_fbfetch = true;
+ break;
+ default:;
+ }
+}
+
+static void gather_intrinsic_store_deref_output_info(const nir_shader *nir,
+ const nir_intrinsic_instr *instr,
+ nir_variable *var,
+ struct tgsi_shader_info *info)
+{
+ assert(var && var->data.mode == nir_var_shader_out);
+
+ switch (nir->info.stage) {
+ case MESA_SHADER_VERTEX: /* needed by LS, ES */
+ case MESA_SHADER_TESS_EVAL: /* needed by ES */
+ case MESA_SHADER_GEOMETRY: {
+ unsigned i = var->data.driver_location;
+ unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
+ unsigned mask = nir_intrinsic_write_mask(instr);
+
+ assert(!var->data.compact);
+
+ for (unsigned j = 0; j < attrib_count; j++, i++) {
+ if (glsl_type_is_64bit(glsl_without_array(var->type))) {
+ unsigned dmask = mask;
+
+ if (glsl_type_is_dual_slot(glsl_without_array(var->type)) && j % 2)
+ dmask >>= 2;
+
+ dmask <<= var->data.location_frac / 2;
+
+ if (dmask & 0x1)
+ info->output_usagemask[i] |= TGSI_WRITEMASK_XY;
+ if (dmask & 0x2)
+ info->output_usagemask[i] |= TGSI_WRITEMASK_ZW;
+ } else {
+ info->output_usagemask[i] |=
+ (mask << var->data.location_frac) & 0xf;
+ }
+
+ }
+ break;
+ }
+ default:;
}
}
info->uses_bindless_image_store = true;
info->writes_memory = true;
+ info->num_memory_instructions++; /* we only care about stores */
break;
case nir_intrinsic_image_deref_store:
info->writes_memory = true;
+ info->num_memory_instructions++; /* we only care about stores */
break;
case nir_intrinsic_bindless_image_atomic_add:
case nir_intrinsic_bindless_image_atomic_min:
info->uses_bindless_image_atomic = true;
info->writes_memory = true;
+ info->num_memory_instructions++; /* we only care about stores */
break;
case nir_intrinsic_image_deref_atomic_add:
case nir_intrinsic_image_deref_atomic_min:
case nir_intrinsic_image_deref_atomic_exchange:
case nir_intrinsic_image_deref_atomic_comp_swap:
info->writes_memory = true;
+ info->num_memory_instructions++; /* we only care about stores */
break;
case nir_intrinsic_store_ssbo:
case nir_intrinsic_ssbo_atomic_add:
case nir_intrinsic_ssbo_atomic_exchange:
case nir_intrinsic_ssbo_atomic_comp_swap:
info->writes_memory = true;
+ info->num_memory_instructions++; /* we only care about stores */
break;
case nir_intrinsic_load_deref: {
nir_variable *var = intrinsic_get_var(intr);
glsl_get_base_type(glsl_without_array(var->type));
if (mode == nir_var_shader_in) {
- gather_intrinsic_load_deref_info(nir, intr, var, info);
+ gather_intrinsic_load_deref_input_info(nir, intr, var, info);
switch (var->data.interpolation) {
case INTERP_MODE_NONE:
info->uses_linear_center = true;
break;
}
+ } else if (mode == nir_var_shader_out) {
+ gather_intrinsic_load_deref_output_info(nir, intr, var, info);
}
break;
}
+ case nir_intrinsic_store_deref: {
+ nir_variable *var = intrinsic_get_var(intr);
+
+ if (var->data.mode == nir_var_shader_out)
+ gather_intrinsic_store_deref_output_info(nir, intr, var, info);
+ break;
+ }
case nir_intrinsic_interp_deref_at_centroid:
case nir_intrinsic_interp_deref_at_sample:
case nir_intrinsic_interp_deref_at_offset: {
if (semantic_name == TGSI_SEMANTIC_PRIMID)
info->uses_primid = true;
+ if (semantic_name == TGSI_SEMANTIC_COLOR) {
+ /* We only need this for color inputs. */
+ if (variable->data.sample)
+ info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_SAMPLE;
+ else if (variable->data.centroid)
+ info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTROID;
+ else
+ info->input_interpolate_loc[i] = TGSI_INTERPOLATE_LOC_CENTER;
+ }
+
enum glsl_base_type base_type =
glsl_get_base_type(glsl_without_array(variable->type));
unsigned streamw = (gs_out_streams >> 6) & 3;
if (usagemask & TGSI_WRITEMASK_X) {
- info->output_usagemask[i] |= TGSI_WRITEMASK_X;
info->output_streams[i] |= streamx;
info->num_stream_output_components[streamx]++;
}
if (usagemask & TGSI_WRITEMASK_Y) {
- info->output_usagemask[i] |= TGSI_WRITEMASK_Y;
info->output_streams[i] |= streamy << 2;
info->num_stream_output_components[streamy]++;
}
if (usagemask & TGSI_WRITEMASK_Z) {
- info->output_usagemask[i] |= TGSI_WRITEMASK_Z;
info->output_streams[i] |= streamz << 4;
info->num_stream_output_components[streamz]++;
}
if (usagemask & TGSI_WRITEMASK_W) {
- info->output_usagemask[i] |= TGSI_WRITEMASK_W;
info->output_streams[i] |= streamw << 6;
info->num_stream_output_components[streamw]++;
}
info->writes_position = true;
break;
}
-
- if (nir->info.stage == MESA_SHADER_TESS_CTRL) {
- switch (semantic_name) {
- case TGSI_SEMANTIC_PATCH:
- info->reads_perpatch_outputs = true;
- break;
- case TGSI_SEMANTIC_TESSINNER:
- case TGSI_SEMANTIC_TESSOUTER:
- info->reads_tessfactor_outputs = true;
- break;
- default:
- info->reads_pervertex_outputs = true;
- }
- }
}
unsigned loc = variable->data.location;
/* We rely on the fact that nir_lower_samplers_as_deref has
* eliminated struct dereferences.
*/
- if (base_type == GLSL_TYPE_SAMPLER) {
- if (variable->data.bindless) {
- info->const_buffers_declared |= 1;
- info->const_file_max[0] = max_slot;
- } else {
- info->samplers_declared |=
- u_bit_consecutive(variable->data.binding, aoa_size);
- }
- } else if (base_type == GLSL_TYPE_IMAGE) {
- if (variable->data.bindless) {
- info->const_buffers_declared |= 1;
- info->const_file_max[0] = max_slot;
- } else {
- info->images_declared |=
- u_bit_consecutive(variable->data.binding, aoa_size);
- }
+ if (base_type == GLSL_TYPE_SAMPLER && !variable->data.bindless) {
+ info->samplers_declared |=
+ u_bit_consecutive(variable->data.binding, aoa_size);
+ } else if (base_type == GLSL_TYPE_IMAGE && !variable->data.bindless) {
+ info->images_declared |=
+ u_bit_consecutive(variable->data.binding, aoa_size);
} else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
info->const_buffers_declared |= 1;
info->const_file_max[0] = max_slot;
* selector is created.
*/
void
-si_lower_nir(struct si_shader_selector* sel)
+si_lower_nir(struct si_shader_selector* sel, unsigned wave_size)
{
/* Adjust the driver location of inputs and outputs. The state tracker
* interprets them as slots, while the ac/nir backend interprets them
NIR_PASS_V(sel->nir, nir_lower_tex, &lower_tex_options);
const nir_lower_subgroups_options subgroups_options = {
- .subgroup_size = 64,
- .ballot_bit_size = 64,
+ .subgroup_size = wave_size,
+ .ballot_bit_size = wave_size,
.lower_to_scalar = true,
.lower_subgroup_masks = true,
.lower_vote_trivial = false,
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
LLVMBuilderRef builder = ctx->ac.builder;
unsigned const_index = base_index + constant_index;
- bool dcc_off = write;
assert(!descriptor_set);
assert(!image || desc_type == AC_DESC_IMAGE || desc_type == AC_DESC_BUFFER);
LLVMConstInt(ctx->i64, 2, 0), "");
return si_load_image_desc(ctx, list, dynamic_index, desc_type,
- dcc_off, true);
+ write, true);
}
/* Since bindless handle arithmetic can contain an unsigned integer
index = LLVMBuildSub(ctx->ac.builder,
LLVMConstInt(ctx->i32, SI_NUM_IMAGES - 1, 0),
index, "");
- return si_load_image_desc(ctx, list, index, desc_type, dcc_off, false);
+ return si_load_image_desc(ctx, list, index, desc_type, write, false);
}
index = LLVMBuildAdd(ctx->ac.builder, index,