vk/0.210.0: Use VkSampleCountFlagBits for sample counts
[mesa.git] / src / vulkan / anv_nir_apply_dynamic_offsets.c
index 1f6c64a9e02507ef115c177f56f7af8d4167be19..dd4f5dfe5453cbdd6aaac83d0e543ccadd9f9aaf 100644 (file)
@@ -39,7 +39,8 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state)
 {
    struct apply_dynamic_offsets_state *state = void_state;
    struct anv_descriptor_set_layout *set_layout;
-   const struct anv_descriptor_slot *slot;
+
+   nir_builder *b = &state->builder;
 
    nir_foreach_instr_safe(block, instr) {
       if (instr->type != nir_instr_type_intrinsic)
@@ -47,68 +48,160 @@ apply_dynamic_offsets_block(nir_block *block, void *void_state)
 
       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
 
-      bool has_indirect = false;
-      uint32_t set, binding;
+      unsigned block_idx_src;
       switch (intrin->intrinsic) {
+      case nir_intrinsic_load_ubo:
       case nir_intrinsic_load_ubo_indirect:
-         has_indirect = true;
-         /* fallthrough */
-      case nir_intrinsic_load_ubo: {
-         set = intrin->const_index[0];
-
-         nir_const_value *const_binding = nir_src_as_const_value(intrin->src[0]);
-         if (const_binding) {
-            binding = const_binding->u[0];
-         } else {
-            assert(0 && "need more info from the ir for this.");
-         }
+      case nir_intrinsic_load_ssbo:
+      case nir_intrinsic_load_ssbo_indirect:
+         block_idx_src = 0;
+         break;
+      case nir_intrinsic_store_ssbo:
+      case nir_intrinsic_store_ssbo_indirect:
+         block_idx_src = 1;
          break;
-      }
       default:
          continue; /* the loop */
       }
 
+      nir_instr *res_instr = intrin->src[block_idx_src].ssa->parent_instr;
+      assert(res_instr->type == nir_instr_type_intrinsic);
+      nir_intrinsic_instr *res_intrin = nir_instr_as_intrinsic(res_instr);
+      assert(res_intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
+
+      unsigned set = res_intrin->const_index[0];
+      unsigned binding = res_intrin->const_index[1];
+
       set_layout = state->layout->set[set].layout;
-      slot = &set_layout->stage[state->stage].surface_start[binding];
-      if (slot->dynamic_slot < 0)
+      if (set_layout->binding[binding].dynamic_offset_index < 0)
          continue;
 
-      uint32_t dynamic_index = state->layout->set[set].dynamic_offset_start +
-                               slot->dynamic_slot;
+      b->cursor = nir_before_instr(&intrin->instr);
+
+      int indirect_src;
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_load_ubo_indirect:
+      case nir_intrinsic_load_ssbo_indirect:
+         indirect_src = 1;
+         break;
+      case nir_intrinsic_store_ssbo_indirect:
+         indirect_src = 2;
+         break;
+      default:
+         indirect_src = -1;
+         break;
+      }
+
+      /* First, we need to generate the uniform load for the buffer offset */
+      uint32_t index = state->layout->set[set].dynamic_offset_start +
+                       set_layout->binding[binding].dynamic_offset_index;
 
-      state->builder.cursor = nir_before_instr(&intrin->instr);
+      nir_const_value *const_arr_idx =
+         nir_src_as_const_value(res_intrin->src[0]);
+
+      nir_intrinsic_op offset_load_op;
+      if (const_arr_idx)
+         offset_load_op = nir_intrinsic_load_uniform;
+      else
+         offset_load_op = nir_intrinsic_load_uniform_indirect;
 
       nir_intrinsic_instr *offset_load =
-         nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform);
-      offset_load->num_components = 1;
-      offset_load->const_index[0] = state->indices_start + dynamic_index;
-      offset_load->const_index[1] = 0;
-      nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL);
-      nir_builder_instr_insert(&state->builder, &offset_load->instr);
-
-      nir_ssa_def *offset = &offset_load->dest.ssa;
-      if (has_indirect) {
-         assert(intrin->src[1].is_ssa);
-         offset = nir_iadd(&state->builder, intrin->src[1].ssa, offset);
+         nir_intrinsic_instr_create(state->shader, offset_load_op);
+      offset_load->num_components = 2;
+      offset_load->const_index[0] = state->indices_start + index * 2;
+
+      if (const_arr_idx) {
+         offset_load->const_index[1] = const_arr_idx->u[0] * 2;
+      } else {
+         offset_load->const_index[1] = 0;
+         offset_load->src[0] = nir_src_for_ssa(
+            nir_imul(b, nir_ssa_for_src(b, res_intrin->src[0], 1),
+                     nir_imm_int(b, 2)));
       }
 
-      assert(intrin->dest.is_ssa);
-
-      nir_intrinsic_instr *new_load =
-         nir_intrinsic_instr_create(state->shader,
-                                    nir_intrinsic_load_ubo_indirect);
-      new_load->num_components = intrin->num_components;
-      new_load->const_index[0] = intrin->const_index[0];
-      new_load->const_index[1] = intrin->const_index[1];
-      nir_src_copy(&new_load->src[0], &intrin->src[0], &new_load->instr);
-      new_load->src[1] = nir_src_for_ssa(offset);
-      nir_ssa_dest_init(&new_load->instr, &new_load->dest,
+      nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 2, NULL);
+      nir_builder_instr_insert(b, &offset_load->instr);
+
+      /* We calculate the full offset and don't bother with the base
+       * offset.  We need the full offset for the predicate anyway.
+       */
+      nir_ssa_def *rel_offset = nir_imm_int(b, intrin->const_index[0]);
+      if (indirect_src >= 0) {
+         assert(intrin->src[indirect_src].is_ssa);
+         rel_offset = nir_iadd(b, intrin->src[indirect_src].ssa, rel_offset);
+      }
+      nir_ssa_def *global_offset = nir_iadd(b, rel_offset,
+                                            &offset_load->dest.ssa);
+
+      /* Now we replace the load/store intrinsic */
+
+      nir_intrinsic_op indirect_op;
+      switch (intrin->intrinsic) {
+      case nir_intrinsic_load_ubo:
+         indirect_op = nir_intrinsic_load_ubo_indirect;
+         break;
+      case nir_intrinsic_load_ssbo:
+         indirect_op = nir_intrinsic_load_ssbo_indirect;
+         break;
+      case nir_intrinsic_store_ssbo:
+         indirect_op = nir_intrinsic_store_ssbo_indirect;
+         break;
+      default:
+         unreachable("Invalid direct load/store intrinsic");
+      }
+
+      nir_intrinsic_instr *copy =
+         nir_intrinsic_instr_create(state->shader, indirect_op);
+      copy->num_components = intrin->num_components;
+
+      /* The indirect is always the last source */
+      indirect_src = nir_intrinsic_infos[indirect_op].num_srcs - 1;
+
+      for (unsigned i = 0; i < (unsigned)indirect_src; i++)
+         nir_src_copy(&copy->src[i], &intrin->src[i], &copy->instr);
+
+      copy->src[indirect_src] = nir_src_for_ssa(global_offset);
+      nir_ssa_dest_init(&copy->instr, &copy->dest,
                         intrin->dest.ssa.num_components,
                         intrin->dest.ssa.name);
-      nir_builder_instr_insert(&state->builder, &new_load->instr);
 
-      nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                               nir_src_for_ssa(&new_load->dest.ssa));
+      /* In order to avoid out-of-bounds access, we predicate */
+      nir_ssa_def *pred = nir_fge(b, nir_channel(b, &offset_load->dest.ssa, 1),
+                                  rel_offset);
+      nir_if *if_stmt = nir_if_create(b->shader);
+      if_stmt->condition = nir_src_for_ssa(pred);
+      nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
+
+      nir_instr_insert_after_cf_list(&if_stmt->then_list, &copy->instr);
+
+      if (indirect_op != nir_intrinsic_store_ssbo) {
+         /* It's a load, we need a phi node */
+         nir_phi_instr *phi = nir_phi_instr_create(b->shader);
+         nir_ssa_dest_init(&phi->instr, &phi->dest,
+                           intrin->num_components, NULL);
+
+         nir_phi_src *src1 = ralloc(phi, nir_phi_src);
+         struct exec_node *tnode = exec_list_get_tail(&if_stmt->then_list);
+         src1->pred = exec_node_data(nir_block, tnode, cf_node.node);
+         src1->src = nir_src_for_ssa(&copy->dest.ssa);
+         exec_list_push_tail(&phi->srcs, &src1->node);
+
+         b->cursor = nir_after_cf_list(&if_stmt->else_list);
+         nir_ssa_def *zero = nir_build_imm(b, intrin->num_components,
+            (nir_const_value) { .u = { 0, 0, 0, 0 } });
+
+         nir_phi_src *src2 = ralloc(phi, nir_phi_src);
+         struct exec_node *enode = exec_list_get_tail(&if_stmt->else_list);
+         src2->pred = exec_node_data(nir_block, enode, cf_node.node);
+         src2->src = nir_src_for_ssa(zero);
+         exec_list_push_tail(&phi->srcs, &src2->node);
+
+         nir_instr_insert_after_cf(&if_stmt->cf_node, &phi->instr);
+
+         assert(intrin->dest.is_ssa);
+         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
+                                  nir_src_for_ssa(&phi->dest.ssa));
+      }
 
       nir_instr_remove(&intrin->instr);
    }
@@ -141,9 +234,12 @@ anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline,
    }
 
    struct anv_push_constants *null_data = NULL;
-   for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++)
-      prog_data->param[i + shader->num_uniforms] =
-         (const gl_constant_value *)&null_data->dynamic_offsets[i];
+   for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) {
+      prog_data->param[i * 2 + shader->num_uniforms] =
+         (const gl_constant_value *)&null_data->dynamic[i].offset;
+      prog_data->param[i * 2 + 1 + shader->num_uniforms] =
+         (const gl_constant_value *)&null_data->dynamic[i].range;
+   }
 
-   shader->num_uniforms += MAX_DYNAMIC_BUFFERS;
+   shader->num_uniforms += MAX_DYNAMIC_BUFFERS * 2;
 }