info->num_outputs = num_outputs;
+ struct set *ubo_set = _mesa_set_create(NULL, _mesa_hash_pointer,
+ _mesa_key_pointer_equal);
+
+ /* Intialise const_file_max[0] */
+ info->const_file_max[0] = -1;
+
+ unsigned ubo_idx = 1;
nir_foreach_variable(variable, &nir->uniforms) {
const struct glsl_type *type = variable->type;
enum glsl_base_type base_type =
glsl_get_base_type(glsl_without_array(type));
unsigned aoa_size = MAX2(1, glsl_get_aoa_size(type));
+ /* Gather buffers declared bitmasks. Note: radeonsi doesn't
+ * really use the mask (other than ubo_idx == 1 for regular
+ * uniforms) its really only used for getting the buffer count
+ * so we don't need to worry about the ordering.
+ */
+ if (variable->interface_type != NULL) {
+ if (variable->data.mode == nir_var_uniform) {
+
+ unsigned block_count;
+ if (base_type != GLSL_TYPE_INTERFACE) {
+ struct set_entry *entry =
+ _mesa_set_search(ubo_set, variable->interface_type);
+
+ /* Check if we have already processed
+ * a member from this ubo.
+ */
+ if (entry)
+ continue;
+
+ block_count = 1;
+ } else {
+ block_count = aoa_size;
+ }
+
+ info->const_buffers_declared |= u_bit_consecutive(ubo_idx, block_count);
+ ubo_idx += block_count;
+
+ _mesa_set_add(ubo_set, variable->interface_type);
+ }
+
+ if (variable->data.mode == nir_var_shader_storage) {
+ /* TODO: make this more accurate */
+ info->shader_buffers_declared =
+ u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
+ }
+
+ continue;
+ }
+
/* We rely on the fact that nir_lower_samplers_as_deref has
* eliminated struct dereferences.
*/
- if (base_type == GLSL_TYPE_SAMPLER)
+ if (base_type == GLSL_TYPE_SAMPLER) {
info->samplers_declared |=
u_bit_consecutive(variable->data.binding, aoa_size);
- else if (base_type == GLSL_TYPE_IMAGE)
+
+ if (variable->data.bindless) {
+ info->const_buffers_declared |= 1;
+ info->const_file_max[0] +=
+ glsl_count_attribute_slots(type, false);
+ }
+ } else if (base_type == GLSL_TYPE_IMAGE) {
info->images_declared |=
u_bit_consecutive(variable->data.binding, aoa_size);
+
+ if (variable->data.bindless) {
+ info->const_buffers_declared |= 1;
+ info->const_file_max[0] +=
+ glsl_count_attribute_slots(type, false);
+ }
+ } else if (base_type != GLSL_TYPE_ATOMIC_UINT) {
+ if (strncmp(variable->name, "state.", 6) == 0 ||
+ strncmp(variable->name, "gl_", 3) == 0) {
+ /* FIXME: figure out why piglit tests with builtin
+ * uniforms are failing without this.
+ */
+ info->const_buffers_declared =
+ u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
+ } else {
+ info->const_buffers_declared |= 1;
+ info->const_file_max[0] +=
+ glsl_count_attribute_slots(type, false);
+ }
+ }
}
+ _mesa_set_destroy(ubo_set, NULL);
+
info->num_written_clipdistance = nir->info.clip_distance_array_size;
info->num_written_culldistance = nir->info.cull_distance_array_size;
info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
if (info->processor == PIPE_SHADER_FRAGMENT)
info->uses_kill = nir->info.fs.uses_discard;
- /* TODO make this more accurate */
- info->const_buffers_declared = u_bit_consecutive(0, SI_NUM_CONST_BUFFERS);
- info->shader_buffers_declared = u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
-
func = (struct nir_function *)exec_list_get_head_const(&nir->functions);
nir_foreach_block(block, func->impl) {
nir_foreach_instr(instr, block)
void
si_lower_nir(struct si_shader_selector* sel)
{
+ /* Disable const buffer fast path for old LLVM versions */
+ if (sel->screen->info.chip_class == SI && HAVE_LLVM < 0x0600 &&
+ sel->info.const_buffers_declared == 1 &&
+ sel->info.shader_buffers_declared == 0) {
+ sel->info.const_buffers_declared |= 0x2;
+ }
+
/* Adjust the driver location of inputs and outputs. The state tracker
* interprets them as slots, while the ac/nir backend interprets them
* as individual components.