X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Firis%2Firis_state.c;h=4d2321a8092ef7007f267be372b8ed1e27d3dbc4;hb=594374dd8d83a32fa9149b2b799d8fc1c51ceb87;hp=eab934fb73895ba0c4b43d458579a6929df1ede8;hpb=0ea3ca3eca4c0c7ff3b41ff09e6cb30b532c8bc5;p=mesa.git diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index eab934fb738..4d2321a8092 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -95,6 +95,7 @@ #include "util/u_transfer.h" #include "util/u_upload_mgr.h" #include "util/u_viewport.h" +#include "util/u_memory.h" #include "drm-uapi/i915_drm.h" #include "nir.h" #include "intel/compiler/brw_compiler.h" @@ -1333,7 +1334,7 @@ iris_create_zsa_state(struct pipe_context *ctx, state->stencil[0].writemask != 0 || (two_sided_stencil && state->stencil[1].writemask != 0); - /* The state tracker needs to optimize away EQUAL writes for us. */ + /* gallium frontends need to optimize away EQUAL writes for us. */ assert(!(state->depth.func == PIPE_FUNC_EQUAL && state->depth.writemask)); iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), cso->wmds, wmds) { @@ -1360,6 +1361,9 @@ iris_create_zsa_state(struct pipe_context *ctx, wmds.BackfaceStencilTestMask = state->stencil[1].valuemask; wmds.BackfaceStencilWriteMask = state->stencil[1].writemask; /* wmds.[Backface]StencilReferenceValue are merged later */ +#if GEN_GEN >= 12 + wmds.StencilReferenceValueModifyDisable = true; +#endif } #if GEN_GEN >= 12 @@ -1986,7 +1990,7 @@ iris_upload_sampler_states(struct iris_context *ice, gl_shader_stage stage) struct iris_shader_state *shs = &ice->state.shaders[stage]; const struct shader_info *info = iris_get_shader_info(ice, stage); - /* We assume the state tracker will call pipe->bind_sampler_states() + /* We assume gallium frontends will call pipe->bind_sampler_states() * if the program's number of textures changes. */ unsigned count = info ? util_last_bit(info->textures_used) : 0; @@ -2585,7 +2589,7 @@ iris_create_surface(struct pipe_context *ctx, * texture, the tile offsets may be anything and we can't rely on * X/Y Offset. * - * Return NULL to force the state tracker to take fallback paths. + * Return NULL to force gallium frontends to take fallback paths. */ if (view->array_len > 1 || GEN_GEN == 8) return NULL; @@ -2680,7 +2684,6 @@ iris_set_shader_images(struct pipe_context *ctx, { struct iris_context *ice = (struct iris_context *) ctx; struct iris_screen *screen = (struct iris_screen *)ctx->screen; - const struct gen_device_info *devinfo = &screen->devinfo; gl_shader_stage stage = stage_from_pipe(p_stage); struct iris_shader_state *shs = &ice->state.shaders[stage]; #if GEN_GEN == 8 @@ -2704,27 +2707,13 @@ iris_set_shader_images(struct pipe_context *ctx, res->bind_history |= PIPE_BIND_SHADER_IMAGE; res->bind_stages |= 1 << stage; - isl_surf_usage_flags_t usage = ISL_SURF_USAGE_STORAGE_BIT; - enum isl_format isl_fmt = - iris_format_for_usage(devinfo, img->format, usage).fmt; + enum isl_format isl_fmt = iris_image_view_get_format(ice, img); - bool untyped_fallback = false; - - if (img->shader_access & PIPE_IMAGE_ACCESS_READ) { - /* On Gen8, try to use typed surfaces reads (which support a - * limited number of formats), and if not possible, fall back - * to untyped reads. - */ - untyped_fallback = GEN_GEN == 8 && - !isl_has_matching_typed_storage_image_format(devinfo, isl_fmt); + /* Render compression with images supported on gen12+ only. */ + unsigned aux_usages = GEN_GEN >= 12 ? res->aux.possible_usages : + 1 << ISL_AUX_USAGE_NONE; - if (untyped_fallback) - isl_fmt = ISL_FORMAT_RAW; - else - isl_fmt = isl_lower_storage_image_format(devinfo, isl_fmt); - } - - alloc_surface_states(&iv->surface_state, 1 << ISL_AUX_USAGE_NONE); + alloc_surface_states(&iv->surface_state, aux_usages); iv->surface_state.bo_address = res->bo->gtt_offset; void *map = iv->surface_state.cpu; @@ -2737,16 +2726,16 @@ iris_set_shader_images(struct pipe_context *ctx, .base_array_layer = img->u.tex.first_layer, .array_len = img->u.tex.last_layer - img->u.tex.first_layer + 1, .swizzle = ISL_SWIZZLE_IDENTITY, - .usage = usage, + .usage = ISL_SURF_USAGE_STORAGE_BIT, }; - if (untyped_fallback) { + /* If using untyped fallback. */ + if (isl_fmt == ISL_FORMAT_RAW) { fill_buffer_surface_state(&screen->isl_dev, res, map, isl_fmt, ISL_SWIZZLE_IDENTITY, 0, res->bo->size); } else { - /* Images don't support compression */ - unsigned aux_modes = 1 << ISL_AUX_USAGE_NONE; + unsigned aux_modes = aux_usages; while (aux_modes) { enum isl_aux_usage usage = u_bit_scan(&aux_modes); @@ -2950,10 +2939,12 @@ iris_set_stencil_ref(struct pipe_context *ctx, { struct iris_context *ice = (struct iris_context *) ctx; memcpy(&ice->state.stencil_ref, state, sizeof(*state)); - if (GEN_GEN == 8) - ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE; - else + if (GEN_GEN >= 12) + ice->state.dirty |= IRIS_DIRTY_STENCIL_REF; + else if (GEN_GEN >= 9) ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL; + else + ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE; } static float @@ -3228,6 +3219,10 @@ upload_sysvals(struct iris_context *ice, value = fui(ice->state.default_inner_level[0]); } else if (sysval == BRW_PARAM_BUILTIN_TESS_LEVEL_INNER_Y) { value = fui(ice->state.default_inner_level[1]); + } else if (sysval >= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X && + sysval <= BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_Z) { + unsigned i = sysval - BRW_PARAM_BUILTIN_WORK_GROUP_SIZE_X; + value = ice->state.last_block[i]; } else { assert(!"unhandled system value"); } @@ -4192,12 +4187,21 @@ iris_store_tcs_state(struct iris_context *ice, * more than 2 times the number of instance count. */ assert((devinfo->max_tcs_threads / 2) > tcs_prog_data->instances); + hs.DispatchGRFStartRegisterForURBData = prog_data->dispatch_grf_start_reg & 0x1f; + hs.DispatchGRFStartRegisterForURBData5 = prog_data->dispatch_grf_start_reg >> 5; #endif hs.InstanceCount = tcs_prog_data->instances - 1; hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; hs.IncludeVertexHandles = true; +#if GEN_GEN == 12 + /* Patch Count threshold specifies the maximum number of patches that + * will be accumulated before a thread dispatch is forced. + */ + hs.PatchCountThreshold = tcs_prog_data->patch_count_threshold; +#endif + #if GEN_GEN >= 9 hs.DispatchMode = vue_prog_data->dispatch_mode; hs.IncludePrimitiveID = tcs_prog_data->include_primitive_id; @@ -4371,9 +4375,7 @@ iris_store_cs_state(struct iris_context *ice, void *map = shader->derived_data; iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), map, desc) { - desc.KernelStartPointer = KSP(shader); desc.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs; - desc.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads; desc.SharedLocalMemorySize = encode_slm_size(GEN_GEN, prog_data->total_shared); desc.BarrierEnable = cs_prog_data->uses_barrier; @@ -4676,7 +4678,8 @@ use_ubo_ssbo(struct iris_batch *batch, static uint32_t use_image(struct iris_batch *batch, struct iris_context *ice, - struct iris_shader_state *shs, int i) + struct iris_shader_state *shs, const struct shader_info *info, + int i) { struct iris_image_view *iv = &shs->image[i]; struct iris_resource *res = (void *) iv->base.resource; @@ -4692,7 +4695,11 @@ use_image(struct iris_batch *batch, struct iris_context *ice, if (res->aux.bo) iris_use_pinned_bo(batch, res->aux.bo, write); - return iv->surface_state.ref.offset; + enum isl_aux_usage aux_usage = + iris_image_view_aux_usage(ice, &iv->base, info); + + return iv->surface_state.ref.offset + + surf_state_offset_for_aux(res, res->aux.possible_usages, aux_usage); } #define push_bt_entry(addr) \ @@ -4792,7 +4799,7 @@ iris_populate_binding_table(struct iris_context *ice, } foreach_surface_used(i, IRIS_SURFACE_GROUP_IMAGE) { - uint32_t addr = use_image(batch, ice, shs, i); + uint32_t addr = use_image(batch, ice, shs, info, i); push_bt_entry(addr); } @@ -5216,7 +5223,7 @@ setup_constant_buffers(struct iris_context *ice, push_bos->buffers[n].length = range->length; push_bos->buffers[n].addr = res ? ro_bo(res->bo, range->start * 32 + cbuf->buffer_offset) - : ro_bo(batch->screen->workaround_bo, 0); + : batch->screen->workaround_address; n++; } @@ -5877,7 +5884,7 @@ iris_upload_dirty_render_state(struct iris_context *ice, if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) { struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; -#if GEN_GEN >= 9 +#if GEN_GEN >= 9 && GEN_GEN < 12 struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref; uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) { @@ -5886,6 +5893,9 @@ iris_upload_dirty_render_state(struct iris_context *ice, } iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds)); #else + /* Use modify disable fields which allow us to emit packets + * directly instead of merging them later. + */ iris_batch_emit(batch, cso->wmds, sizeof(cso->wmds)); #endif @@ -5894,6 +5904,25 @@ iris_upload_dirty_render_state(struct iris_context *ice, #endif } + if (dirty & IRIS_DIRTY_STENCIL_REF) { +#if GEN_GEN >= 12 + /* Use modify disable fields which allow us to emit packets + * directly instead of merging them later. + */ + struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref; + uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; + iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) { + wmds.StencilReferenceValue = p_stencil_refs->ref_value[0]; + wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1]; + wmds.StencilTestMaskModifyDisable = true; + wmds.StencilWriteMaskModifyDisable = true; + wmds.StencilStateModifyDisable = true; + wmds.DepthStateModifyDisable = true; + } + iris_batch_emit(batch, stencil_refs, sizeof(stencil_refs)); +#endif + } + if (dirty & IRIS_DIRTY_SCISSOR_RECT) { uint32_t scissor_offset = emit_state(batch, ice->state.dynamic_uploader, @@ -5941,7 +5970,8 @@ iris_upload_dirty_render_state(struct iris_context *ice, */ iris_emit_pipe_control_write(batch, "WA for stencil state", PIPE_CONTROL_WRITE_IMMEDIATE, - batch->screen->workaround_bo, 0, 0); + batch->screen->workaround_address.bo, + batch->screen->workaround_address.offset, 0); } union isl_color_value clear_value = { .f32 = { 0, } }; @@ -5991,7 +6021,7 @@ iris_upload_dirty_render_state(struct iris_context *ice, if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) { int count = util_bitcount64(ice->state.bound_vertex_buffers); - int dynamic_bound = ice->state.bound_vertex_buffers; + uint64_t dynamic_bound = ice->state.bound_vertex_buffers; if (ice->state.vs_uses_draw_params) { assert(ice->draw.draw_params.res); @@ -6474,6 +6504,11 @@ iris_upload_compute_state(struct iris_context *ice, struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_cs_prog_data *cs_prog_data = (void *) prog_data; + const uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2]; + const unsigned simd_size = + brw_cs_simd_size_for_group_size(devinfo, cs_prog_data, group_size); + const unsigned threads = DIV_ROUND_UP(group_size, simd_size); + /* Always pin the binder. If we're emitting new binding table pointers, * we need it. If not, we're probably inheriting old tables via the * context, and need it anyway. Since true zero-bindings cases are @@ -6535,7 +6570,7 @@ iris_upload_compute_state(struct iris_context *ice, vfe.URBEntryAllocationSize = 2; vfe.CURBEAllocationSize = - ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads + + ALIGN(cs_prog_data->push.per_thread.regs * threads + cs_prog_data->push.cross_thread.regs, 2); } } @@ -6546,18 +6581,19 @@ iris_upload_compute_state(struct iris_context *ice, assert(cs_prog_data->push.cross_thread.dwords == 0 && cs_prog_data->push.per_thread.dwords == 1 && cs_prog_data->base.param[0] == BRW_PARAM_BUILTIN_SUBGROUP_ID); + const unsigned push_const_size = + brw_cs_push_const_total_size(cs_prog_data, threads); uint32_t *curbe_data_map = stream_state(batch, ice->state.dynamic_uploader, &ice->state.last_res.cs_thread_ids, - ALIGN(cs_prog_data->push.total.size, 64), 64, + ALIGN(push_const_size, 64), 64, &curbe_data_offset); assert(curbe_data_map); - memset(curbe_data_map, 0x5a, ALIGN(cs_prog_data->push.total.size, 64)); - iris_fill_cs_push_const_buffer(cs_prog_data, curbe_data_map); + memset(curbe_data_map, 0x5a, ALIGN(push_const_size, 64)); + iris_fill_cs_push_const_buffer(cs_prog_data, threads, curbe_data_map); iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) { - curbe.CURBETotalDataLength = - ALIGN(cs_prog_data->push.total.size, 64); + curbe.CURBETotalDataLength = ALIGN(push_const_size, 64); curbe.CURBEDataStartAddress = curbe_data_offset; } } @@ -6569,8 +6605,11 @@ iris_upload_compute_state(struct iris_context *ice, uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)]; iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) { + idd.KernelStartPointer = + KSP(shader) + brw_cs_prog_data_prog_offset(cs_prog_data, simd_size); idd.SamplerStatePointer = shs->sampler_table.offset; idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE]; + idd.NumberofThreadsinGPGPUThreadGroup = threads; } for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++) @@ -6585,14 +6624,13 @@ iris_upload_compute_state(struct iris_context *ice, } } - uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2]; - uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); + uint32_t remainder = group_size & (simd_size - 1); uint32_t right_mask; if (remainder > 0) right_mask = ~0u >> (32 - remainder); else - right_mask = ~0u >> (32 - cs_prog_data->simd_size); + right_mask = ~0u >> (32 - simd_size); #define GPGPU_DISPATCHDIMX 0x2500 #define GPGPU_DISPATCHDIMY 0x2504 @@ -6617,10 +6655,10 @@ iris_upload_compute_state(struct iris_context *ice, iris_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) { ggw.IndirectParameterEnable = grid->indirect != NULL; - ggw.SIMDSize = cs_prog_data->simd_size / 16; + ggw.SIMDSize = simd_size / 16; ggw.ThreadDepthCounterMaximum = 0; ggw.ThreadHeightCounterMaximum = 0; - ggw.ThreadWidthCounterMaximum = cs_prog_data->threads - 1; + ggw.ThreadWidthCounterMaximum = threads - 1; ggw.ThreadGroupIDXDimension = grid->grid[0]; ggw.ThreadGroupIDYDimension = grid->grid[1]; ggw.ThreadGroupIDZDimension = grid->grid[2]; @@ -6955,7 +6993,8 @@ iris_emit_raw_pipe_control(struct iris_batch *batch, flags |= PIPE_CONTROL_WRITE_IMMEDIATE; post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; - bo = batch->screen->workaround_bo; + bo = batch->screen->workaround_address.bo; + offset = batch->screen->workaround_address.offset; } } @@ -7533,36 +7572,36 @@ genX(init_state)(struct iris_context *ice) ctx->set_stream_output_targets = iris_set_stream_output_targets; ctx->set_frontend_noop = iris_set_frontend_noop; - ice->vtbl.destroy_state = iris_destroy_state; - ice->vtbl.init_render_context = iris_init_render_context; - ice->vtbl.init_compute_context = iris_init_compute_context; - ice->vtbl.upload_render_state = iris_upload_render_state; - ice->vtbl.update_surface_base_address = iris_update_surface_base_address; - ice->vtbl.upload_compute_state = iris_upload_compute_state; - ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; - ice->vtbl.emit_mi_report_perf_count = iris_emit_mi_report_perf_count; - ice->vtbl.rebind_buffer = iris_rebind_buffer; - ice->vtbl.load_register_reg32 = iris_load_register_reg32; - ice->vtbl.load_register_reg64 = iris_load_register_reg64; - ice->vtbl.load_register_imm32 = iris_load_register_imm32; - ice->vtbl.load_register_imm64 = iris_load_register_imm64; - ice->vtbl.load_register_mem32 = iris_load_register_mem32; - ice->vtbl.load_register_mem64 = iris_load_register_mem64; - ice->vtbl.store_register_mem32 = iris_store_register_mem32; - ice->vtbl.store_register_mem64 = iris_store_register_mem64; - ice->vtbl.store_data_imm32 = iris_store_data_imm32; - ice->vtbl.store_data_imm64 = iris_store_data_imm64; - ice->vtbl.copy_mem_mem = iris_copy_mem_mem; - ice->vtbl.derived_program_state_size = iris_derived_program_state_size; - ice->vtbl.store_derived_program_state = iris_store_derived_program_state; - ice->vtbl.create_so_decl_list = iris_create_so_decl_list; - ice->vtbl.populate_vs_key = iris_populate_vs_key; - ice->vtbl.populate_tcs_key = iris_populate_tcs_key; - ice->vtbl.populate_tes_key = iris_populate_tes_key; - ice->vtbl.populate_gs_key = iris_populate_gs_key; - ice->vtbl.populate_fs_key = iris_populate_fs_key; - ice->vtbl.populate_cs_key = iris_populate_cs_key; - ice->vtbl.lost_genx_state = iris_lost_genx_state; + screen->vtbl.destroy_state = iris_destroy_state; + screen->vtbl.init_render_context = iris_init_render_context; + screen->vtbl.init_compute_context = iris_init_compute_context; + screen->vtbl.upload_render_state = iris_upload_render_state; + screen->vtbl.update_surface_base_address = iris_update_surface_base_address; + screen->vtbl.upload_compute_state = iris_upload_compute_state; + screen->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; + screen->vtbl.emit_mi_report_perf_count = iris_emit_mi_report_perf_count; + screen->vtbl.rebind_buffer = iris_rebind_buffer; + screen->vtbl.load_register_reg32 = iris_load_register_reg32; + screen->vtbl.load_register_reg64 = iris_load_register_reg64; + screen->vtbl.load_register_imm32 = iris_load_register_imm32; + screen->vtbl.load_register_imm64 = iris_load_register_imm64; + screen->vtbl.load_register_mem32 = iris_load_register_mem32; + screen->vtbl.load_register_mem64 = iris_load_register_mem64; + screen->vtbl.store_register_mem32 = iris_store_register_mem32; + screen->vtbl.store_register_mem64 = iris_store_register_mem64; + screen->vtbl.store_data_imm32 = iris_store_data_imm32; + screen->vtbl.store_data_imm64 = iris_store_data_imm64; + screen->vtbl.copy_mem_mem = iris_copy_mem_mem; + screen->vtbl.derived_program_state_size = iris_derived_program_state_size; + screen->vtbl.store_derived_program_state = iris_store_derived_program_state; + screen->vtbl.create_so_decl_list = iris_create_so_decl_list; + screen->vtbl.populate_vs_key = iris_populate_vs_key; + screen->vtbl.populate_tcs_key = iris_populate_tcs_key; + screen->vtbl.populate_tes_key = iris_populate_tes_key; + screen->vtbl.populate_gs_key = iris_populate_gs_key; + screen->vtbl.populate_fs_key = iris_populate_fs_key; + screen->vtbl.populate_cs_key = iris_populate_cs_key; + screen->vtbl.lost_genx_state = iris_lost_genx_state; ice->state.dirty = ~0ull;