From 65eeb06a7f7afd1fbf48490f06051dfad9de3214 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Tue, 11 Aug 2020 10:30:42 -0500 Subject: [PATCH] iris: Upload kernel inputs with system values Clover doesn't upload a cbuf0 but instead provides the kernel inputs as part of the pipe_grid. The most obvious thing to do is to upload them along with system values. Reviewed-by: Kenneth Graunke Part-of: --- src/gallium/drivers/iris/iris_context.h | 3 ++ src/gallium/drivers/iris/iris_disk_cache.c | 7 ++-- src/gallium/drivers/iris/iris_program.c | 43 +++++++++++++++------- src/gallium/drivers/iris/iris_state.c | 25 +++++++++---- 4 files changed, 54 insertions(+), 24 deletions(-) diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 1cdb035cfe5..8dc64f5d4be 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -378,6 +378,9 @@ struct iris_uncompiled_shader { /* Whether shader uses atomic operations. */ bool uses_atomic_load_store; + /** Size (in bytes) of the kernel input data */ + unsigned kernel_input_size; + /** Constant data scraped from the shader by nir_opt_large_constants */ struct pipe_resource *const_data; diff --git a/src/gallium/drivers/iris/iris_disk_cache.c b/src/gallium/drivers/iris/iris_disk_cache.c index 4913f309d0c..0383512b295 100644 --- a/src/gallium/drivers/iris/iris_disk_cache.c +++ b/src/gallium/drivers/iris/iris_disk_cache.c @@ -106,8 +106,9 @@ iris_disk_cache_store(struct disk_cache *cache, * 2. Assembly code * 3. Number of entries in the system value array * 4. System value array - * 5. Legacy param array (only used for compute workgroup ID) - * 6. Binding table + * 5. Size (in bytes) of kernel inputs + * 6. Legacy param array (only used for compute workgroup ID) + * 7. Binding table */ blob_write_bytes(&blob, shader->prog_data, brw_prog_data_size(stage)); blob_write_bytes(&blob, shader->map, shader->prog_data->program_size); @@ -222,7 +223,7 @@ iris_disk_cache_retrieve(struct iris_context *ice, if (num_cbufs || ish->nir->num_uniforms) num_cbufs++; - if (num_system_values) + if (num_system_values || kernel_input_size) num_cbufs++; assert(stage < ARRAY_SIZE(cache_id_for_stage)); diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index 8470022eb9d..7d63bc185e0 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -377,12 +377,15 @@ iris_setup_uniforms(const struct brw_compiler *compiler, void *mem_ctx, nir_shader *nir, struct brw_stage_prog_data *prog_data, + unsigned kernel_input_size, enum brw_param_builtin **out_system_values, unsigned *out_num_system_values, unsigned *out_num_cbufs) { UNUSED const struct gen_device_info *devinfo = compiler->devinfo; + unsigned system_values_start = ALIGN(kernel_input_size, sizeof(uint32_t)); + const unsigned IRIS_MAX_SYSTEM_VALUES = PIPE_MAX_SHADER_IMAGES * BRW_IMAGE_PARAM_SIZE; enum brw_param_builtin *system_values = @@ -460,7 +463,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler, } b.cursor = nir_before_instr(instr); - offset = nir_imm_int(&b, ucp_idx[ucp] * sizeof(uint32_t)); + offset = nir_imm_int(&b, system_values_start + + ucp_idx[ucp] * sizeof(uint32_t)); break; } case nir_intrinsic_load_patch_vertices_in: @@ -471,7 +475,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler, BRW_PARAM_BUILTIN_PATCH_VERTICES_IN; b.cursor = nir_before_instr(instr); - offset = nir_imm_int(&b, patch_vert_idx * sizeof(uint32_t)); + offset = nir_imm_int(&b, system_values_start + + patch_vert_idx * sizeof(uint32_t)); break; case nir_intrinsic_image_deref_load_param_intel: { assert(devinfo->gen < 9); @@ -512,7 +517,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler, b.cursor = nir_before_instr(instr); offset = nir_iadd(&b, get_aoa_deref_offset(&b, deref, BRW_IMAGE_PARAM_SIZE * 4), - nir_imm_int(&b, img_idx[var->data.binding] * 4 + + nir_imm_int(&b, system_values_start + + img_idx[var->data.binding] * 4 + nir_intrinsic_base(intrin) * 16)); break; } @@ -528,7 +534,16 @@ iris_setup_uniforms(const struct brw_compiler *compiler, } b.cursor = nir_before_instr(instr); - offset = nir_imm_int(&b, variable_group_size_idx * sizeof(uint32_t)); + offset = nir_imm_int(&b, system_values_start + + variable_group_size_idx * sizeof(uint32_t)); + break; + } + case nir_intrinsic_load_kernel_input: { + assert(nir_intrinsic_base(intrin) + + nir_intrinsic_range(intrin) <= kernel_input_size); + b.cursor = nir_before_instr(instr); + offset = nir_iadd_imm(&b, intrin->src[0].ssa, + nir_intrinsic_base(intrin)); break; } default: @@ -562,7 +577,7 @@ iris_setup_uniforms(const struct brw_compiler *compiler, num_cbufs++; /* Place the new params in a new cbuf. */ - if (num_system_values > 0) { + if (num_system_values > 0 || kernel_input_size > 0) { unsigned sysval_cbuf_index = num_cbufs; num_cbufs++; @@ -1101,7 +1116,7 @@ iris_compile_vs(struct iris_context *ice, prog_data->use_alt_mode = ish->use_alt_mode; - iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values, &num_system_values, &num_cbufs); struct iris_binding_table bt; @@ -1281,7 +1296,7 @@ iris_compile_tcs(struct iris_context *ice, if (ish) { nir = nir_shader_clone(mem_ctx, ish->nir); - iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values, &num_system_values, &num_cbufs); iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, num_system_values, num_cbufs); @@ -1435,7 +1450,7 @@ iris_compile_tes(struct iris_context *ice, nir_shader_gather_info(nir, impl); } - iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values, &num_system_values, &num_cbufs); struct iris_binding_table bt; @@ -1557,7 +1572,7 @@ iris_compile_gs(struct iris_context *ice, nir_shader_gather_info(nir, impl); } - iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values, &num_system_values, &num_cbufs); struct iris_binding_table bt; @@ -1665,7 +1680,7 @@ iris_compile_fs(struct iris_context *ice, prog_data->use_alt_mode = ish->use_alt_mode; - iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, 0, &system_values, &num_system_values, &num_cbufs); /* Lower output variables to load_output intrinsics before setting up @@ -1964,8 +1979,9 @@ iris_compile_cs(struct iris_context *ice, NIR_PASS_V(nir, brw_nir_lower_cs_intrinsics); - iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, &system_values, - &num_system_values, &num_cbufs); + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data, + ish->kernel_input_size, + &system_values, &num_system_values, &num_cbufs); struct iris_binding_table bt; iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0, @@ -1992,7 +2008,7 @@ iris_compile_cs(struct iris_context *ice, struct iris_compiled_shader *shader = iris_upload_shader(ice, IRIS_CACHE_CS, sizeof(*key), key, program, prog_data, NULL, system_values, num_system_values, - 0, num_cbufs, &bt); + ish->kernel_input_size, num_cbufs, &bt); iris_disk_cache_store(screen->disk_cache, ish, shader, key, sizeof(*key)); @@ -2401,6 +2417,7 @@ iris_create_compute_state(struct pipe_context *ctx, struct iris_uncompiled_shader *ish = iris_create_uncompiled_shader(ctx, nir, NULL); + ish->kernel_input_size = state->req_input_mem; // XXX: disallow more than 64KB of shared variables diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 5837d219356..e9f391d5a5c 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -3221,26 +3221,35 @@ iris_set_constant_buffer(struct pipe_context *ctx, static void upload_sysvals(struct iris_context *ice, - gl_shader_stage stage) + gl_shader_stage stage, + const struct pipe_grid_info *grid) { UNUSED struct iris_genx_state *genx = ice->state.genx; struct iris_shader_state *shs = &ice->state.shaders[stage]; struct iris_compiled_shader *shader = ice->shaders.prog[stage]; - if (!shader || shader->num_system_values == 0) + if (!shader || (shader->num_system_values == 0 && + shader->kernel_input_size == 0)) return; assert(shader->num_cbufs > 0); unsigned sysval_cbuf_index = shader->num_cbufs - 1; struct pipe_shader_buffer *cbuf = &shs->constbuf[sysval_cbuf_index]; - unsigned upload_size = shader->num_system_values * sizeof(uint32_t); - uint32_t *map = NULL; + unsigned system_values_start = + ALIGN(shader->kernel_input_size, sizeof(uint32_t)); + unsigned upload_size = system_values_start + + shader->num_system_values * sizeof(uint32_t); + void *map = NULL; assert(sysval_cbuf_index < PIPE_MAX_CONSTANT_BUFFERS); u_upload_alloc(ice->ctx.const_uploader, 0, upload_size, 64, - &cbuf->buffer_offset, &cbuf->buffer, (void **) &map); + &cbuf->buffer_offset, &cbuf->buffer, &map); + if (shader->kernel_input_size > 0) + memcpy(map, grid->input, shader->kernel_input_size); + + uint32_t *sysval_map = map + system_values_start; for (int i = 0; i < shader->num_system_values; i++) { uint32_t sysval = shader->system_values[i]; uint32_t value = 0; @@ -3289,7 +3298,7 @@ upload_sysvals(struct iris_context *ice, assert(!"unhandled system value"); } - *map++ = value; + *sysval_map++ = value; } cbuf->buffer_size = upload_size; @@ -5641,7 +5650,7 @@ iris_upload_dirty_render_state(struct iris_context *ice, continue; if (shs->sysvals_need_upload) - upload_sysvals(ice, stage); + upload_sysvals(ice, stage, NULL); struct push_bos push_bos = {}; setup_constant_buffers(ice, batch, stage, &push_bos); @@ -6790,7 +6799,7 @@ iris_upload_compute_state(struct iris_context *ice, if ((stage_dirty & IRIS_STAGE_DIRTY_CONSTANTS_CS) && shs->sysvals_need_upload) - upload_sysvals(ice, MESA_SHADER_COMPUTE); + upload_sysvals(ice, MESA_SHADER_COMPUTE, grid); if (stage_dirty & IRIS_STAGE_DIRTY_BINDINGS_CS) iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false); -- 2.30.2