iris: Patch constant data pointers into shaders
authorJason Ekstrand <jason@jlekstrand.net>
Wed, 12 Aug 2020 00:43:17 +0000 (19:43 -0500)
committerMarge Bot <eric+marge@anholt.net>
Wed, 2 Sep 2020 19:48:44 +0000 (19:48 +0000)
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6244>

src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_disk_cache.c
src/gallium/drivers/iris/iris_program.c
src/gallium/drivers/iris/iris_program_cache.c
src/gallium/drivers/iris/iris_state.c

index 8dc64f5d4be732a267f2486b90e8bc2b10c59f8f..d8b7bd90e047482917baf572e1c7ac7d0acdd2f9 100644 (file)
@@ -56,6 +56,11 @@ enum iris_param_domain {
    BRW_PARAM_DOMAIN_IMAGE,
 };
 
+enum iris_shader_reloc {
+   IRIS_SHADER_RELOC_CONST_DATA_ADDR_LOW,
+   IRIS_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
+};
+
 enum {
    DRI_CONF_BO_REUSE_DISABLED,
    DRI_CONF_BO_REUSE_ALL
@@ -380,12 +385,6 @@ struct iris_uncompiled_shader {
 
    /** Size (in bytes) of the kernel input data */
    unsigned kernel_input_size;
-
-   /** Constant data scraped from the shader by nir_opt_large_constants */
-   struct pipe_resource *const_data;
-
-   /** Surface state for const_data */
-   struct iris_state_ref const_data_state;
 };
 
 enum iris_surface_group {
index 0383512b2953d76a9b8ee0a9b71bca0092f6b06c..6e8abe20ce357551ada16b8501b1d7338c2b4c65 100644 (file)
@@ -107,8 +107,9 @@ iris_disk_cache_store(struct disk_cache *cache,
     * 3. Number of entries in the system value array
     * 4. System value array
     * 5. Size (in bytes) of kernel inputs
-    * 6. Legacy param array (only used for compute workgroup ID)
-    * 7. Binding table
+    * 6. Shader relocations
+    * 7. Legacy param array (only used for compute workgroup ID)
+    * 8. Binding table
     */
    blob_write_bytes(&blob, shader->prog_data, brw_prog_data_size(stage));
    blob_write_bytes(&blob, shader->map, shader->prog_data->program_size);
@@ -116,6 +117,8 @@ iris_disk_cache_store(struct disk_cache *cache,
    blob_write_bytes(&blob, shader->system_values,
                     shader->num_system_values * sizeof(enum brw_param_builtin));
    blob_write_uint32(&blob, shader->kernel_input_size);
+   blob_write_bytes(&blob, prog_data->relocs,
+                    prog_data->num_relocs * sizeof(struct brw_shader_reloc));
    blob_write_bytes(&blob, prog_data->param,
                     prog_data->nr_params * sizeof(uint32_t));
    blob_write_bytes(&blob, &shader->bt, sizeof(shader->bt));
@@ -193,6 +196,15 @@ iris_disk_cache_retrieve(struct iris_context *ice,
 
    kernel_input_size = blob_read_uint32(&blob);
 
+   prog_data->relocs = NULL;
+   if (prog_data->num_relocs) {
+      struct brw_shader_reloc *relocs =
+         ralloc_array(NULL, struct brw_shader_reloc, prog_data->num_relocs);
+      blob_copy_bytes(&blob, relocs,
+                      prog_data->num_relocs * sizeof(struct brw_shader_reloc));
+      prog_data->relocs = relocs;
+   }
+
    prog_data->param = NULL;
    prog_data->pull_param = NULL;
    assert(prog_data->nr_pull_params == 0);
index bdad2abcf708775bfd44aa97f3083ff51e38091d..5d934f1688b9ac09e62f7c1303c3edbba3e2c9c6 100644 (file)
@@ -406,7 +406,6 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
 
    b.cursor = nir_before_block(nir_start_block(impl));
    nir_ssa_def *temp_ubo_name = nir_ssa_undef(&b, 1, 32);
-   nir_ssa_def *temp_const_ubo_name = NULL;
 
    /* Turn system value intrinsics into uniforms */
    nir_foreach_block(block, impl) {
@@ -419,34 +418,36 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
 
          switch (intrin->intrinsic) {
          case nir_intrinsic_load_constant: {
+            unsigned load_size = intrin->dest.ssa.num_components *
+                                 intrin->dest.ssa.bit_size / 8;
+            unsigned load_align = intrin->dest.ssa.bit_size / 8;
+
             /* This one is special because it reads from the shader constant
              * data and not cbuf0 which gallium uploads for us.
              */
-            b.cursor = nir_before_instr(instr);
+            b.cursor = nir_instr_remove(&intrin->instr);
+
             nir_ssa_def *offset =
                nir_iadd_imm(&b, nir_ssa_for_src(&b, intrin->src[0], 1),
                                 nir_intrinsic_base(intrin));
 
-            if (temp_const_ubo_name == NULL)
-               temp_const_ubo_name = nir_imm_int(&b, 0);
-
-            nir_intrinsic_instr *load_ubo =
-               nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ubo);
-            load_ubo->num_components = intrin->num_components;
-            load_ubo->src[0] = nir_src_for_ssa(temp_const_ubo_name);
-            load_ubo->src[1] = nir_src_for_ssa(offset);
-            nir_intrinsic_set_align(load_ubo,
-                                    nir_intrinsic_align_mul(intrin),
-                                    nir_intrinsic_align_offset(intrin));
-            nir_ssa_dest_init(&load_ubo->instr, &load_ubo->dest,
-                              intrin->dest.ssa.num_components,
-                              intrin->dest.ssa.bit_size,
-                              intrin->dest.ssa.name);
-            nir_builder_instr_insert(&b, &load_ubo->instr);
+            assert(load_size < b.shader->constant_data_size);
+            unsigned max_offset = b.shader->constant_data_size - load_size;
+            offset = nir_umin(&b, offset, nir_imm_int(&b, max_offset));
+
+            nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(&b,
+               nir_load_reloc_const_intel(&b, IRIS_SHADER_RELOC_CONST_DATA_ADDR_LOW),
+               nir_load_reloc_const_intel(&b, IRIS_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
+
+            nir_ssa_def *data =
+               nir_load_global(&b, nir_iadd(&b, const_data_base_addr,
+                                                nir_u2u64(&b, offset)),
+                               load_align,
+                               intrin->dest.ssa.num_components,
+                               intrin->dest.ssa.bit_size);
 
             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
-                                     nir_src_for_ssa(&load_ubo->dest.ssa));
-            nir_instr_remove(&intrin->instr);
+                                     nir_src_for_ssa(data));
             continue;
          }
          case nir_intrinsic_load_user_clip_plane: {
@@ -623,16 +624,6 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
     */
    nir->num_uniforms = 0;
 
-   /* Constant loads (if any) need to go at the end of the constant buffers so
-    * we need to know num_cbufs before we can lower to them.
-    */
-   if (temp_const_ubo_name != NULL) {
-      nir_load_const_instr *const_ubo_index =
-         nir_instr_as_load_const(temp_const_ubo_name->parent_instr);
-      assert(const_ubo_index->def.bit_size == 32);
-      const_ubo_index->value[0].u32 = num_cbufs;
-   }
-
    *out_system_values = system_values;
    *out_num_system_values = num_system_values;
    *out_num_cbufs = num_cbufs;
@@ -2161,7 +2152,6 @@ iris_create_uncompiled_shader(struct pipe_context *ctx,
                               nir_shader *nir,
                               const struct pipe_stream_output_info *so_info)
 {
-   struct iris_context *ice = (void *)ctx;
    struct iris_screen *screen = (struct iris_screen *)ctx->screen;
    const struct gen_device_info *devinfo = &screen->devinfo;
 
@@ -2180,19 +2170,6 @@ iris_create_uncompiled_shader(struct pipe_context *ctx,
 
    nir_sweep(nir);
 
-   if (nir->constant_data_size > 0) {
-      unsigned data_offset;
-      u_upload_data(ice->shaders.uploader, 0, nir->constant_data_size,
-                    32, nir->constant_data, &data_offset, &ish->const_data);
-
-      struct pipe_shader_buffer psb = {
-         .buffer = ish->const_data,
-         .buffer_offset = data_offset,
-         .buffer_size = nir->constant_data_size,
-      };
-      iris_upload_ubo_ssbo_surf_state(ice, &psb, &ish->const_data_state, false);
-   }
-
    ish->program_id = get_new_program_id(screen);
    ish->nir = nir;
    if (so_info) {
@@ -2455,11 +2432,6 @@ iris_delete_shader_state(struct pipe_context *ctx, void *state, gl_shader_stage
       ice->state.stage_dirty |= IRIS_STAGE_DIRTY_UNCOMPILED_VS << stage;
    }
 
-   if (ish->const_data) {
-      pipe_resource_reference(&ish->const_data, NULL);
-      pipe_resource_reference(&ish->const_data_state.res, NULL);
-   }
-
    iris_delete_shader_variants(ice, ish);
 
    ralloc_free(ish->nir);
index 5bee13f2d429b46b50d6e1de9a00dfc05025164d..82feab3f3d7ca5134df89c6763532353ba812c40 100644 (file)
@@ -228,6 +228,23 @@ iris_upload_shader(struct iris_context *ice,
                      &shader->assembly.offset, &shader->assembly.res,
                      &shader->map);
       memcpy(shader->map, assembly, prog_data->program_size);
+
+      uint64_t shader_data_addr = IRIS_MEMZONE_SHADER_START +
+                                  shader->assembly.offset +
+                                  prog_data->const_data_offset;
+
+      struct brw_shader_reloc_value reloc_values[] = {
+         {
+            .id = IRIS_SHADER_RELOC_CONST_DATA_ADDR_LOW,
+            .value = shader_data_addr,
+         },
+         {
+            .id = IRIS_SHADER_RELOC_CONST_DATA_ADDR_HIGH,
+            .value = shader_data_addr >> 32,
+         },
+      };
+      brw_write_shader_relocs(&screen->devinfo, shader->map, prog_data,
+                              reloc_values, ARRAY_SIZE(reloc_values));
    }
 
    list_inithead(&shader->link);
@@ -241,6 +258,7 @@ iris_upload_shader(struct iris_context *ice,
    shader->bt = *bt;
 
    ralloc_steal(shader, shader->prog_data);
+   ralloc_steal(shader->prog_data, (void *)prog_data->relocs);
    ralloc_steal(shader->prog_data, prog_data->param);
    ralloc_steal(shader->prog_data, prog_data->pull_param);
    ralloc_steal(shader, shader->streamout);
index 04053e2daad9f59e64fd26faf4462c43894e0617..341cdbdb20879f84c4925fc6bc724d667b3f54f3 100644 (file)
@@ -4826,7 +4826,6 @@ iris_populate_binding_table(struct iris_context *ice,
                             bool pin_only)
 {
    const struct iris_binder *binder = &ice->state.binder;
-   struct iris_uncompiled_shader *ish = ice->shaders.uncompiled[stage];
    struct iris_compiled_shader *shader = ice->shaders.prog[stage];
    if (!shader)
       return;
@@ -4909,25 +4908,9 @@ iris_populate_binding_table(struct iris_context *ice,
    }
 
    foreach_surface_used(i, IRIS_SURFACE_GROUP_UBO) {
-      uint32_t addr;
-
-      if (i == bt->sizes[IRIS_SURFACE_GROUP_UBO] - 1) {
-         if (ish->const_data) {
-            iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data), false,
-                               IRIS_DOMAIN_OTHER_READ);
-            iris_use_pinned_bo(batch, iris_resource_bo(ish->const_data_state.res),
-                               false, IRIS_DOMAIN_NONE);
-            addr = ish->const_data_state.offset;
-         } else {
-            /* This can only happen with INTEL_DISABLE_COMPACT_BINDING_TABLE=1. */
-            addr = use_null_surface(batch, ice);
-         }
-      } else {
-         addr = use_ubo_ssbo(batch, ice, &shs->constbuf[i],
-                             &shs->constbuf_surf_state[i], false,
-                             IRIS_DOMAIN_OTHER_READ);
-      }
-
+      uint32_t addr = use_ubo_ssbo(batch, ice, &shs->constbuf[i],
+                                   &shs->constbuf_surf_state[i], false,
+                                   IRIS_DOMAIN_OTHER_READ);
       push_bt_entry(addr);
    }