From: Kenneth Graunke Date: Wed, 6 Jun 2018 09:16:52 +0000 (-0700) Subject: iris: better ubo handling X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=26cc609927825aa72a1faa77701ac2359484084a;p=mesa.git iris: better ubo handling --- diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 50942198f5f..f23ba7c73e8 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -87,29 +87,6 @@ struct blorp_params; #define IRIS_DIRTY_CONSTANTS_FS (1ull << 39) #define IRIS_DIRTY_DEPTH_BUFFER (1ull << 40) -enum brw_param_domain { - BRW_PARAM_DOMAIN_BUILTIN = 0, - BRW_PARAM_DOMAIN_PARAMETER, - BRW_PARAM_DOMAIN_UNIFORM, - BRW_PARAM_DOMAIN_IMAGE, -}; - -#define BRW_PARAM(domain, val) (BRW_PARAM_DOMAIN_##domain << 24 | (val)) -#define BRW_PARAM_DOMAIN(param) ((uint32_t)(param) >> 24) -#define BRW_PARAM_VALUE(param) ((uint32_t)(param) & 0x00ffffff) - -#define BRW_PARAM_PARAMETER(idx, comp) \ - BRW_PARAM(PARAMETER, ((idx) << 2) | (comp)) -#define BRW_PARAM_PARAMETER_IDX(param) (BRW_PARAM_VALUE(param) >> 2) -#define BRW_PARAM_PARAMETER_COMP(param) (BRW_PARAM_VALUE(param) & 0x3) - -#define BRW_PARAM_UNIFORM(idx) BRW_PARAM(UNIFORM, (idx)) -#define BRW_PARAM_UNIFORM_IDX(param) BRW_PARAM_VALUE(param) - -#define BRW_PARAM_IMAGE(idx, offset) BRW_PARAM(IMAGE, ((idx) << 8) | (offset)) -#define BRW_PARAM_IMAGE_IDX(value) (BRW_PARAM_VALUE(value) >> 8) -#define BRW_PARAM_IMAGE_OFFSET(value) (BRW_PARAM_VALUE(value) & 0xf) - struct iris_depth_stencil_alpha_state; enum iris_program_cache_id { @@ -195,9 +172,8 @@ struct iris_compiled_shader { struct iris_shader_state { struct pipe_constant_buffer constbuf[PIPE_MAX_CONSTANT_BUFFERS]; - struct pipe_resource *push_resource; + struct pipe_resource *const_resources[PIPE_MAX_CONSTANT_BUFFERS]; unsigned const_offset; - unsigned const_size; }; struct iris_vtable { diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index d1c4b88ca97..59fcbed6302 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -64,19 +64,6 @@ iris_create_shader_state(struct pipe_context *ctx, nir = brw_preprocess_nir(screen->compiler, nir); -#if 0 - /* Reassign uniform locations using type_size_scalar_bytes instead of - * the slot based calculation that st_nir uses. - */ - nir_assign_var_locations(&nir->uniforms, &nir->num_uniforms, - type_size_scalar_bytes); - nir_lower_io(nir, nir_var_uniform, type_size_scalar_bytes, 0); -#endif - nir_foreach_variable(var, &nir->uniforms) { - var->data.driver_location *= 4; - } - nir_lower_io(nir, nir_var_uniform, type_size_vec4_bytes, 0); - ish->program_id = get_new_program_id(screen); ish->base.type = PIPE_SHADER_IR_NIR; ish->base.ir.nir = nir; @@ -154,10 +141,12 @@ iris_bind_fs_state(struct pipe_context *ctx, void *hwcso) */ static uint32_t assign_common_binding_table_offsets(const struct gen_device_info *devinfo, - const struct shader_info *info, + const struct nir_shader *nir, struct brw_stage_prog_data *prog_data, uint32_t next_binding_table_offset) { + const struct shader_info *info = &nir->info; + if (info->num_textures) { prog_data->binding_table.texture_start = next_binding_table_offset; prog_data->binding_table.gather_texture_start = next_binding_table_offset; @@ -167,10 +156,12 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo, prog_data->binding_table.gather_texture_start = 0xd0d0d0d0; } - if (info->num_ubos) { + int num_ubos = info->num_ubos + (nir->num_uniforms > 0 ? 1 : 0); + + if (num_ubos) { //assert(info->num_ubos <= BRW_MAX_UBO); prog_data->binding_table.ubo_start = next_binding_table_offset; - next_binding_table_offset += info->num_ubos; + next_binding_table_offset += num_ubos; } else { prog_data->binding_table.ubo_start = 0xd0d0d0d0; } @@ -213,28 +204,41 @@ assign_common_binding_table_offsets(const struct gen_device_info *devinfo, } static void -iris_setup_uniforms(void *mem_ctx, +iris_setup_uniforms(const struct brw_compiler *compiler, + void *mem_ctx, nir_shader *nir, struct brw_stage_prog_data *prog_data) { - prog_data->nr_params = nir->num_uniforms * 4; + prog_data->nr_params = nir->num_uniforms; prog_data->param = rzalloc_array(mem_ctx, uint32_t, prog_data->nr_params); - nir->num_uniforms *= 16; - nir_foreach_variable(var, &nir->uniforms) { - /* UBO's, atomics and samplers don't take up space */ - //if (var->interface_type != NULL || var->type->contains_atomic()) - //continue; - const unsigned components = glsl_get_components(var->type); - for (unsigned i = 0; i < 4; i++) { + for (unsigned i = 0; i < components; i++) { prog_data->param[var->data.driver_location] = - i < components ? BRW_PARAM_PARAMETER(var->data.driver_location, i) - : BRW_PARAM_BUILTIN_ZERO; + var->data.driver_location; } } + + // XXX: vs clip planes? + brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges); +} + +static void +iris_setup_push_uniform_range(const struct brw_compiler *compiler, + struct brw_stage_prog_data *prog_data) +{ + if (prog_data->nr_params) { + for (int i = 3; i > 0; i--) + prog_data->ubo_ranges[i] = prog_data->ubo_ranges[i - 1]; + + prog_data->ubo_ranges[0] = (struct brw_ubo_range) { + .block = 0, + .start = 0, + .length = DIV_ROUND_UP(prog_data->nr_params, 8), + }; + } } static bool @@ -256,9 +260,9 @@ iris_compile_vs(struct iris_context *ice, nir_shader *nir = ish->base.ir.nir; // XXX: alt mode - assign_common_binding_table_offsets(devinfo, &nir->info, prog_data, 0); + assign_common_binding_table_offsets(devinfo, nir, prog_data, 0); - iris_setup_uniforms(mem_ctx, nir, prog_data); + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data); brw_compute_vue_map(devinfo, &vue_prog_data->vue_map, nir->info.outputs_written, @@ -274,6 +278,8 @@ iris_compile_vs(struct iris_context *ice, return false; } + iris_setup_push_uniform_range(compiler, prog_data); + iris_upload_and_bind_shader(ice, IRIS_CACHE_VS, key, program, prog_data); ralloc_free(mem_ctx); @@ -317,7 +323,7 @@ iris_compile_tes(struct iris_context *ice, nir_shader *nir = ish->base.ir.nir; - assign_common_binding_table_offsets(devinfo, &nir->info, prog_data, 0); + assign_common_binding_table_offsets(devinfo, nir, prog_data, 0); struct brw_vue_map input_vue_map; brw_compute_tess_vue_map(&input_vue_map, key->inputs_read, @@ -333,6 +339,8 @@ iris_compile_tes(struct iris_context *ice, return false; } + iris_setup_push_uniform_range(compiler, prog_data); + iris_upload_and_bind_shader(ice, IRIS_CACHE_TES, key, program, prog_data); ralloc_free(mem_ctx); @@ -383,10 +391,10 @@ iris_compile_fs(struct iris_context *ice, nir_shader *nir = ish->base.ir.nir; // XXX: alt mode - assign_common_binding_table_offsets(devinfo, &nir->info, prog_data, + assign_common_binding_table_offsets(devinfo, nir, prog_data, MAX2(key->nr_color_regions, 1)); - iris_setup_uniforms(mem_ctx, nir, prog_data); + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data); char *error_str = NULL; const unsigned *program = @@ -400,6 +408,8 @@ iris_compile_fs(struct iris_context *ice, //brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch); + iris_setup_push_uniform_range(compiler, prog_data); + iris_upload_and_bind_shader(ice, IRIS_CACHE_FS, key, program, prog_data); ralloc_free(mem_ctx); diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 342defd8e93..2b1a89fa3b4 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -139,6 +139,7 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_COPY_BETWEEN_COMPRESSED_AND_PLAIN_FORMATS: case PIPE_CAP_FRAMEBUFFER_NO_ATTACHMENT: case PIPE_CAP_CULL_DISTANCE: + case PIPE_CAP_PACKED_UNIFORMS: return true; case PIPE_CAP_FRAGMENT_COLOR_CLAMPED: @@ -154,7 +155,6 @@ iris_get_param(struct pipe_screen *pscreen, enum pipe_cap param) case PIPE_CAP_VERTEXID_NOBASE: case PIPE_CAP_FENCE_SIGNAL: case PIPE_CAP_CONSTBUF0_FLAGS: - case PIPE_CAP_PACKED_UNIFORMS: case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_TRIANGLES: case PIPE_CAP_CONSERVATIVE_RASTER_POST_SNAP_POINTS_LINES: case PIPE_CAP_CONSERVATIVE_RASTER_PRE_SNAP_TRIANGLES: diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index bcbab9e2b07..f8772315d1c 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -1340,12 +1340,23 @@ iris_set_framebuffer_state(struct pipe_context *ctx, static void iris_set_constant_buffer(struct pipe_context *ctx, enum pipe_shader_type p_stage, unsigned index, - const struct pipe_constant_buffer *cb) + const struct pipe_constant_buffer *input) { struct iris_context *ice = (struct iris_context *) ctx; gl_shader_stage stage = stage_from_pipe(p_stage); + struct iris_shader_state *shs = &ice->shaders.state[stage]; - util_copy_constant_buffer(&ice->shaders.state[stage].constbuf[index], cb); + if (input && (input->buffer || input->user_buffer)) { + if (input->user_buffer) { + u_upload_data(ctx->const_uploader, 0, input->buffer_size, 32, + input->user_buffer, &shs->const_offset, + &shs->const_resources[index]); + } else { + pipe_resource_reference(&shs->const_resources[index], input->buffer); + } + } else { + pipe_resource_reference(&shs->const_resources[index], NULL); + } } static void @@ -2170,36 +2181,47 @@ iris_upload_render_state(struct iris_context *ice, if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage))) continue; - struct pipe_constant_buffer *cbuf0 = - &ice->shaders.state[stage].constbuf[0]; - - if (!ice->shaders.prog[stage] || cbuf0->buffer || !cbuf0->buffer_size) - continue; - struct iris_shader_state *shs = &ice->shaders.state[stage]; struct iris_compiled_shader *shader = ice->shaders.prog[stage]; - struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; - // XXX: DIV_ROUND_UP(prog_data->nr_params, 8)? - //shs->const_size = DIV_ROUND_UP(cbuf0->buffer_size, 32); - shs->const_size = DIV_ROUND_UP(prog_data->nr_params, 8); - u_upload_data(ice->ctx.const_uploader, 0, 32 * shs->const_size, 32, - cbuf0->user_buffer, &shs->const_offset, - &shs->push_resource); - } - for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { - // XXX: wrong dirty tracking... - if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage))) + if (!shader) continue; - struct iris_shader_state *shs = &ice->shaders.state[stage]; - struct iris_resource *res = (void *) shs->push_resource; + struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) { pkt._3DCommandSubOpcode = push_constant_opcodes[stage]; - if (res) { - pkt.ConstantBody.ReadLength[3] = shs->const_size; - pkt.ConstantBody.Buffer[3] = ro_bo(res->bo, shs->const_offset); + if (prog_data) { + /* The Skylake PRM contains the following restriction: + * + * "The driver must ensure The following case does not occur + * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with + * buffer 3 read length equal to zero committed followed by a + * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to + * zero committed." + * + * To avoid this, we program the buffers in the highest slots. + * This way, slot 0 is only used if slot 3 is also used. + */ + int n = 3; + + for (int i = 3; i >= 0; i--) { + const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; + + if (range->length == 0) + continue; + + // XXX: is range->block a constbuf index? it would be nice + struct iris_resource *res = + (void *) shs->const_resources[range->block]; + + assert(shs->const_offset % 32 == 0); + + pkt.ConstantBody.ReadLength[n] = range->length; + pkt.ConstantBody.Buffer[n] = + ro_bo(res->bo, range->start * 32 + shs->const_offset); + n--; + } } } }