iris: Upload kernel inputs with system values
authorJason Ekstrand <jason@jlekstrand.net>
Tue, 11 Aug 2020 15:30:42 +0000 (10:30 -0500)
committerMarge Bot <eric+marge@anholt.net>
Wed, 12 Aug 2020 10:11:06 +0000 (10:11 +0000)
Clover doesn't upload a cbuf0 but instead provides the kernel inputs as
part of the pipe_grid.  The most obvious thing to do is to upload them
along with system values.

Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6280>

src/gallium/drivers/iris/iris_context.h
src/gallium/drivers/iris/iris_disk_cache.c
src/gallium/drivers/iris/iris_program.c
src/gallium/drivers/iris/iris_state.c

index 1cdb035cfe52b44a334bd6c89126bd19f75c1b59..8dc64f5d4be732a267f2486b90e8bc2b10c59f8f 100644 (file)
@@ -378,6 +378,9 @@ struct iris_uncompiled_shader {
    /* Whether shader uses atomic operations. */
    bool uses_atomic_load_store;
 
    /* Whether shader uses atomic operations. */
    bool uses_atomic_load_store;
 
+   /** Size (in bytes) of the kernel input data */
+   unsigned kernel_input_size;
+
    /** Constant data scraped from the shader by nir_opt_large_constants */
    struct pipe_resource *const_data;
 
    /** Constant data scraped from the shader by nir_opt_large_constants */
    struct pipe_resource *const_data;
 
index 4913f309d0c3026faaa5af12ccf3ee9d16c73718..0383512b2953d76a9b8ee0a9b71bca0092f6b06c 100644 (file)
@@ -106,8 +106,9 @@ iris_disk_cache_store(struct disk_cache *cache,
     * 2. Assembly code
     * 3. Number of entries in the system value array
     * 4. System value array
     * 2. Assembly code
     * 3. Number of entries in the system value array
     * 4. System value array
-    * 5. Legacy param array (only used for compute workgroup ID)
-    * 6. Binding table
+    * 5. Size (in bytes) of kernel inputs
+    * 6. Legacy param array (only used for compute workgroup ID)
+    * 7. Binding table
     */
    blob_write_bytes(&blob, shader->prog_data, brw_prog_data_size(stage));
    blob_write_bytes(&blob, shader->map, shader->prog_data->program_size);
     */
    blob_write_bytes(&blob, shader->prog_data, brw_prog_data_size(stage));
    blob_write_bytes(&blob, shader->map, shader->prog_data->program_size);
@@ -222,7 +223,7 @@ iris_disk_cache_retrieve(struct iris_context *ice,
    if (num_cbufs || ish->nir->num_uniforms)
       num_cbufs++;
 
    if (num_cbufs || ish->nir->num_uniforms)
       num_cbufs++;
 
-   if (num_system_values)
+   if (num_system_values || kernel_input_size)
       num_cbufs++;
 
    assert(stage < ARRAY_SIZE(cache_id_for_stage));
       num_cbufs++;
 
    assert(stage < ARRAY_SIZE(cache_id_for_stage));
index 8470022eb9d26d4e429ddc2dd8956baa905e4c17..7d63bc185e025e415de5eea808a8cc65dab0fb86 100644 (file)
@@ -377,12 +377,15 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
                     void *mem_ctx,
                     nir_shader *nir,
                     struct brw_stage_prog_data *prog_data,
                     void *mem_ctx,
                     nir_shader *nir,
                     struct brw_stage_prog_data *prog_data,
+                    unsigned kernel_input_size,
                     enum brw_param_builtin **out_system_values,
                     unsigned *out_num_system_values,
                     unsigned *out_num_cbufs)
 {
    UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
 
                     enum brw_param_builtin **out_system_values,
                     unsigned *out_num_system_values,
                     unsigned *out_num_cbufs)
 {
    UNUSED const struct gen_device_info *devinfo = compiler->devinfo;
 
+   unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t));
+
    const unsigned IRIS_MAX_SYSTEM_VALUES =
       PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
    enum brw_param_builtin *system_values =
    const unsigned IRIS_MAX_SYSTEM_VALUES =
       PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE;
    enum brw_param_builtin *system_values =
@@ -460,7 +463,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
             }
 
             b.cursor = nir_before_instr(instr);
             }
 
             b.cursor = nir_before_instr(instr);
-            offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t));
+            offset = nir_imm_int(&b, system_values_start +
+                                     ucp_idx[ucp] * sizeof(uint32_t));
             break;
          }
          case nir_intrinsic_load_patch_vertices_in:
             break;
          }
          case nir_intrinsic_load_patch_vertices_in:
@@ -471,7 +475,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
                BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
 
             b.cursor = nir_before_instr(instr);
                BRW_PARAM_BUILTIN_PATCH_VERTICES_IN;
 
             b.cursor = nir_before_instr(instr);
-            offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t));
+            offset = nir_imm_int(&b, system_values_start +
+                                     patch_vert_idx * sizeof(uint32_t));
             break;
          case nir_intrinsic_image_deref_load_param_intel: {
             assert(devinfo->gen < 9);
             break;
          case nir_intrinsic_image_deref_load_param_intel: {
             assert(devinfo->gen < 9);
@@ -512,7 +517,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
             b.cursor = nir_before_instr(instr);
             offset = nir_iadd(&b,
                get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
             b.cursor = nir_before_instr(instr);
             offset = nir_iadd(&b,
                get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4),
-               nir_imm_int(&b, img_idx[var->data.binding] * 4 +
+               nir_imm_int(&b, system_values_start +
+                               img_idx[var->data.binding] * 4 +
                                nir_intrinsic_base(intrin) * 16));
             break;
          }
                                nir_intrinsic_base(intrin) * 16));
             break;
          }
@@ -528,7 +534,16 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
             }
 
             b.cursor = nir_before_instr(instr);
             }
 
             b.cursor = nir_before_instr(instr);
-            offset = nir_imm_int(&b, variable_group_size_idx * sizeof(uint32_t));
+            offset = nir_imm_int(&b, system_values_start +
+                                     variable_group_size_idx * sizeof(uint32_t));
+            break;
+         }
+         case nir_intrinsic_load_kernel_input: {
+            assert(nir_intrinsic_base(intrin) +
+                   nir_intrinsic_range(intrin) <= kernel_input_size);
+            b.cursor = nir_before_instr(instr);
+            offset = nir_iadd_imm(&b, intrin->src[0].ssa,
+                                      nir_intrinsic_base(intrin));
             break;
          }
          default:
             break;
          }
          default:
@@ -562,7 +577,7 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
       num_cbufs++;
 
    /* Place the new params in a new cbuf. */
       num_cbufs++;
 
    /* Place the new params in a new cbuf. */
-   if (num_system_values > 0) {
+   if (num_system_values > 0 || kernel_input_size > 0) {
       unsigned sysval_cbuf_index = num_cbufs;
       num_cbufs++;
 
       unsigned sysval_cbuf_index = num_cbufs;
       num_cbufs++;
 
@@ -1101,7 +1116,7 @@ iris_compile_vs(struct iris_context *ice,
 
    prog_data->use_alt_mode = ish->use_alt_mode;
 
 
    prog_data->use_alt_mode = ish->use_alt_mode;
 
-   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
                        &num_system_values, &num_cbufs);
 
    struct iris_binding_table bt;
                        &num_system_values, &num_cbufs);
 
    struct iris_binding_table bt;
@@ -1281,7 +1296,7 @@ iris_compile_tcs(struct iris_context *ice,
    if (ish) {
       nir = nir_shader_clone(mem_ctx, ish->nir);
 
    if (ish) {
       nir = nir_shader_clone(mem_ctx, ish->nir);
 
-      iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+      iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
                           &num_system_values, &num_cbufs);
       iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
                                num_system_values, num_cbufs);
                           &num_system_values, &num_cbufs);
       iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
                                num_system_values, num_cbufs);
@@ -1435,7 +1450,7 @@ iris_compile_tes(struct iris_context *ice,
       nir_shader_gather_info(nir, impl);
    }
 
       nir_shader_gather_info(nir, impl);
    }
 
-   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
                        &num_system_values, &num_cbufs);
 
    struct iris_binding_table bt;
                        &num_system_values, &num_cbufs);
 
    struct iris_binding_table bt;
@@ -1557,7 +1572,7 @@ iris_compile_gs(struct iris_context *ice,
       nir_shader_gather_info(nir, impl);
    }
 
       nir_shader_gather_info(nir, impl);
    }
 
-   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
                        &num_system_values, &num_cbufs);
 
    struct iris_binding_table bt;
                        &num_system_values, &num_cbufs);
 
    struct iris_binding_table bt;
@@ -1665,7 +1680,7 @@ iris_compile_fs(struct iris_context *ice,
 
    prog_data->use_alt_mode = ish->use_alt_mode;
 
 
    prog_data->use_alt_mode = ish->use_alt_mode;
 
-   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
+   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values,
                        &num_system_values, &num_cbufs);
 
    /* Lower output variables to load_output intrinsics before setting up
                        &num_system_values, &num_cbufs);
 
    /* Lower output variables to load_output intrinsics before setting up
@@ -1964,8 +1979,9 @@ iris_compile_cs(struct iris_context *ice,
 
    NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
 
 
    NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics);
 
-   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values,
-                       &num_system_values, &num_cbufs);
+   iris_setup_uniforms(compiler, mem_ctx, nir, prog_data,
+                       ish->kernel_input_size,
+                       &system_values, &num_system_values, &num_cbufs);
 
    struct iris_binding_table bt;
    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
 
    struct iris_binding_table bt;
    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
@@ -1992,7 +2008,7 @@ iris_compile_cs(struct iris_context *ice,
    struct iris_compiled_shader *shader =
       iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
                          prog_data, NULL, system_values, num_system_values,
    struct iris_compiled_shader *shader =
       iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program,
                          prog_data, NULL, system_values, num_system_values,
-                         0, num_cbufs, &bt);
+                         ish->kernel_input_size, num_cbufs, &bt);
 
    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
 
 
    iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key));
 
@@ -2401,6 +2417,7 @@ iris_create_compute_state(struct pipe_context *ctx,
 
    struct iris_uncompiled_shader *ish =
       iris_create_uncompiled_shader(ctx, nir, NULL);
 
    struct iris_uncompiled_shader *ish =
       iris_create_uncompiled_shader(ctx, nir, NULL);
+   ish->kernel_input_size = state->req_input_mem;
 
    // XXX: disallow more than 64KB of shared variables
 
 
    // XXX: disallow more than 64KB of shared variables
 
index 5837d2193567566ed5b59f7c31c1723e17807436..e9f391d5a5c0ec91445728721a9958288faf9be4 100644 (file)
@@ -3221,26 +3221,35 @@ iris_set_constant_buffer(struct pipe_context *ctx,
 
 static void
 upload_sysvals(struct iris_context *ice,
 
 static void
 upload_sysvals(struct iris_context *ice,
-                gl_shader_stage stage)
+               gl_shader_stage stage,
+               const struct pipe_grid_info *grid)
 {
    UNUSED struct iris_genx_state *genx = ice->state.genx;
    struct iris_shader_state *shs = &ice->state.shaders[stage];
 
    struct iris_compiled_shader *shader = ice->shaders.prog[stage];
 {
    UNUSED struct iris_genx_state *genx = ice->state.genx;
    struct iris_shader_state *shs = &ice->state.shaders[stage];
 
    struct iris_compiled_shader *shader = ice->shaders.prog[stage];
-   if (!shader || shader->num_system_values == 0)
+   if (!shader || (shader->num_system_values == 0 &&
+                   shader->kernel_input_size == 0))
       return;
 
    assert(shader->num_cbufs > 0);
 
    unsigned sysval_cbuf_index = shader->num_cbufs - 1;
    struct pipe_shader_buffer *cbuf = &shs->constbuf[sysval_cbuf_index];
       return;
 
    assert(shader->num_cbufs > 0);
 
    unsigned sysval_cbuf_index = shader->num_cbufs - 1;
    struct pipe_shader_buffer *cbuf = &shs->constbuf[sysval_cbuf_index];
-   unsigned upload_size = shader->num_system_values * sizeof(uint32_t);
-   uint32_t *map = NULL;
+   unsigned system_values_start =
+      ALIGN(shader->kernel_input_size, sizeof(uint32_t));
+   unsigned upload_size = system_values_start +
+                          shader->num_system_values * sizeof(uint32_t);
+   void *map = NULL;
 
    assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);
    u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,
 
    assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS);
    u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64,
-                  &cbuf->buffer_offset, &cbuf->buffer, (void **) &map);
+                  &cbuf->buffer_offset, &cbuf->buffer, &map);
 
 
+   if (shader->kernel_input_size > 0)
+      memcpy(map, grid->input, shader->kernel_input_size);
+
+   uint32_t *sysval_map = map + system_values_start;
    for (int i = 0; i < shader->num_system_values; i++) {
       uint32_t sysval = shader->system_values[i];
       uint32_t value = 0;
    for (int i = 0; i < shader->num_system_values; i++) {
       uint32_t sysval = shader->system_values[i];
       uint32_t value = 0;
@@ -3289,7 +3298,7 @@ upload_sysvals(struct iris_context *ice,
          assert(!"unhandled system value");
       }
 
          assert(!"unhandled system value");
       }
 
-      *map++ = value;
+      *sysval_map++ = value;
    }
 
    cbuf->buffer_size = upload_size;
    }
 
    cbuf->buffer_size = upload_size;
@@ -5641,7 +5650,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
          continue;
 
       if (shs->sysvals_need_upload)
          continue;
 
       if (shs->sysvals_need_upload)
-         upload_sysvals(ice, stage);
+         upload_sysvals(ice, stage, NULL);
 
       struct push_bos push_bos = {};
       setup_constant_buffers(ice, batch, stage, &push_bos);
 
       struct push_bos push_bos = {};
       setup_constant_buffers(ice, batch, stage, &push_bos);
@@ -6790,7 +6799,7 @@ iris_upload_compute_state(struct iris_context *ice,
 
    if ((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) &&
        shs->sysvals_need_upload)
 
    if ((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) &&
        shs->sysvals_need_upload)
-      upload_sysvals(ice, MESA_SHADER_COMPUTE);
+      upload_sysvals(ice, MESA_SHADER_COMPUTE, grid);
 
    if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS)
       iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);
 
    if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS)
       iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false);