From: Kenneth Graunke Date: Thu, 25 Jan 2018 09:36:49 +0000 (-0800) Subject: iris: use vtbl to avoid multiple symbols, fix state base address X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=5ae278da18b63a03a1f38ecdb935a28484917dca;p=mesa.git iris: use vtbl to avoid multiple symbols, fix state base address --- diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c index 23cff418960..5d3d6698a37 100644 --- a/src/gallium/drivers/iris/iris_batch.c +++ b/src/gallium/drivers/iris/iris_batch.c @@ -201,6 +201,9 @@ iris_batch_reset(struct iris_batch *batch) if (batch->state_sizes) _mesa_hash_table_clear(batch->state_sizes, NULL); + + if (batch->ring == I915_EXEC_RENDER) + batch->emit_state_base_address(batch); } static void diff --git a/src/gallium/drivers/iris/iris_batch.h b/src/gallium/drivers/iris/iris_batch.h index 47da23baeb1..6b67737f614 100644 --- a/src/gallium/drivers/iris/iris_batch.h +++ b/src/gallium/drivers/iris/iris_batch.h @@ -89,6 +89,8 @@ struct iris_batch { /** Map from batch offset to iris_alloc_state data (with DEBUG_BATCH) */ struct hash_table *state_sizes; + + void (*emit_state_base_address)(struct iris_batch *batch); }; void iris_init_batch(struct iris_batch *batch, diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c index ffebaf3c0a0..ca2aeeb4942 100644 --- a/src/gallium/drivers/iris/iris_context.c +++ b/src/gallium/drivers/iris/iris_context.c @@ -84,10 +84,23 @@ iris_destroy_context(struct pipe_context *ctx) ralloc_free(ice); } +#define genX_call(devinfo, func, ...) \ + switch (devinfo->gen) { \ + case 10: \ + gen10_##func(__VA_ARGS__); \ + break; \ + case 9: \ + gen9_##func(__VA_ARGS__); \ + break; \ + default: \ + unreachable("Unknown hardware generation"); \ + } + struct pipe_context * iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags) { struct iris_screen *screen = (struct iris_screen*)pscreen; + const struct gen_device_info *devinfo = &screen->devinfo; struct iris_context *ice = rzalloc(NULL, struct iris_context); if (!ice) @@ -115,11 +128,10 @@ iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags) iris_init_resource_functions(ctx); iris_init_query_functions(ctx); - iris_init_state(ice); iris_init_program_cache(ice); iris_init_batch(&ice->render_batch, screen, &ice->dbg, I915_EXEC_RENDER); - iris_upload_initial_gpu_state(&ice->render_batch); + genX_call(devinfo, init_state, ice); return ctx; } diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 46ef2d17269..b85c63729d2 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -75,6 +75,16 @@ struct iris_batch; struct iris_depth_stencil_alpha_state; +enum iris_program_cache_id { + IRIS_CACHE_VS = MESA_SHADER_VERTEX, + IRIS_CACHE_TCS = MESA_SHADER_TESS_CTRL, + IRIS_CACHE_TES = MESA_SHADER_TESS_EVAL, + IRIS_CACHE_GS = MESA_SHADER_GEOMETRY, + IRIS_CACHE_FS = MESA_SHADER_FRAGMENT, + IRIS_CACHE_CS = MESA_SHADER_COMPUTE, + IRIS_CACHE_BLORP_BLIT, +}; + struct iris_program_cache { struct hash_table *table; struct iris_bo *bo; @@ -131,6 +141,15 @@ struct iris_context { struct pipe_framebuffer_state framebuffer; struct iris_sampler_state *samplers[MESA_SHADER_STAGES][IRIS_MAX_TEXTURE_SAMPLERS]; + + void (*upload_render_state)(struct iris_context *ice, + struct iris_batch *batch, + const struct pipe_draw_info *draw); + unsigned (*derived_program_state_size)(enum iris_program_cache_id id); + void (*set_derived_program_state)(const struct gen_device_info *devinfo, + enum iris_program_cache_id cache_id, + struct iris_compiled_shader *shader); + void (*destroy_state)(struct iris_context *ice); } state; }; @@ -151,38 +170,15 @@ void iris_init_clear_functions(struct pipe_context *ctx); void iris_init_program_functions(struct pipe_context *ctx); void iris_init_resource_functions(struct pipe_context *ctx); void iris_init_query_functions(struct pipe_context *ctx); - -void iris_setup_state_base_address(struct iris_context *ice, - struct iris_batch *batch, - struct iris_bo *instruction_bo); -void iris_upload_initial_gpu_state(struct iris_batch *batch); -void iris_upload_render_state(struct iris_context *ice, - struct iris_batch *batch, - const struct pipe_draw_info *draw); -void iris_destroy_state(struct iris_context *ice); - void iris_update_compiled_shaders(struct iris_context *ice); void iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); -enum iris_program_cache_id { - IRIS_CACHE_VS = MESA_SHADER_VERTEX, - IRIS_CACHE_TCS = MESA_SHADER_TESS_CTRL, - IRIS_CACHE_TES = MESA_SHADER_TESS_EVAL, - IRIS_CACHE_GS = MESA_SHADER_GEOMETRY, - IRIS_CACHE_FS = MESA_SHADER_FRAGMENT, - IRIS_CACHE_CS = MESA_SHADER_COMPUTE, - IRIS_CACHE_BLORP_BLIT, -}; - -void iris_init_state(struct iris_context *ice); +void gen9_init_state(struct iris_context *ice); +void gen10_init_state(struct iris_context *ice); void iris_init_program_cache(struct iris_context *ice); void iris_destroy_program_cache(struct iris_context *ice); void iris_print_program_cache(struct iris_context *ice); -unsigned iris_derived_program_state_size(enum iris_program_cache_id cache_id); -void iris_set_derived_program_state(const struct gen_device_info *devinfo, - enum iris_program_cache_id cache_id, - struct iris_compiled_shader *shader); bool iris_bind_cached_shader(struct iris_context *ice, enum iris_program_cache_id cache_id, const void *key); diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c index 72537ccb262..96e05fa5f24 100644 --- a/src/gallium/drivers/iris/iris_draw.c +++ b/src/gallium/drivers/iris/iris_draw.c @@ -37,5 +37,5 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) struct iris_context *ice = (struct iris_context *) ctx; iris_update_compiled_shaders(ice); - iris_upload_render_state(ice, &ice->render_batch, info); + ice->state.upload_render_state(ice, &ice->render_batch, info); } diff --git a/src/gallium/drivers/iris/iris_program_cache.c b/src/gallium/drivers/iris/iris_program_cache.c index acbfba0681a..c1d7f5fbe05 100644 --- a/src/gallium/drivers/iris/iris_program_cache.c +++ b/src/gallium/drivers/iris/iris_program_cache.c @@ -259,7 +259,7 @@ iris_upload_and_bind_shader(struct iris_context *ice, struct iris_program_cache *cache = &ice->shaders.cache; struct iris_compiled_shader *shader = ralloc_size(cache->table, sizeof(struct iris_compiled_shader) + - iris_derived_program_state_size(cache_id)); + ice->state.derived_program_state_size(cache_id)); const struct iris_compiled_shader *existing = find_existing_assembly(cache, assembly, prog_data->program_size); @@ -283,7 +283,7 @@ iris_upload_and_bind_shader(struct iris_context *ice, ralloc_steal(shader->prog_data, prog_data->pull_param); /* Store the 3DSTATE shader packets and other derived state. */ - iris_set_derived_program_state(devinfo, cache_id, shader); + ice->state.set_derived_program_state(devinfo, cache_id, shader); struct keybox *keybox = make_keybox(cache, cache_id, key); _mesa_hash_table_insert(cache->table, keybox, shader); diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 058bcea07eb..0a05b55fdf3 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -282,7 +282,7 @@ ro_bo(struct iris_bo *bo, uint32_t offset) return (struct iris_address) { .bo = bo, .offset = offset }; } -void +static void iris_upload_initial_gpu_state(struct iris_batch *batch) { iris_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { @@ -1312,18 +1312,9 @@ iris_set_stream_output_targets(struct pipe_context *ctx, { } -void -iris_setup_state_base_address(struct iris_context *ice, - struct iris_batch *batch, - struct iris_bo *instruction_bo) +static void +iris_emit_state_base_address(struct iris_batch *batch) { - if (!(ice->state.dirty & IRIS_DIRTY_STATE_BASE_ADDRESS)) - return; - - //iris_batchbuffer_flush(...) - - ice->state.dirty &= ~IRIS_DIRTY_STATE_BASE_ADDRESS; - /* XXX: PIPE_CONTROLs */ iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) { @@ -1351,611 +1342,611 @@ iris_setup_state_base_address(struct iris_context *ice, sba.SurfaceStateBaseAddress = ro_bo(batch->statebuf.bo, 0); sba.DynamicStateBaseAddress = ro_bo(batch->statebuf.bo, 0); - sba.InstructionBaseAddress = ro_bo(instruction_bo, 0); sba.GeneralStateBufferSize = 0xfffff000; - sba.DynamicStateBufferSize = ALIGN(MAX_STATE_SIZE, 4096); sba.IndirectObjectBufferSize = 0xfffff000; - sba.InstructionBufferSize = ALIGN(ice->shaders.cache.bo->size, 4096); - sba.BindlessSurfaceStateSize = 0; + sba.InstructionBufferSize = 0xfffff000; + sba.DynamicStateBufferSize = ALIGN(MAX_STATE_SIZE, 4096); } } -void -iris_upload_render_state(struct iris_context *ice, - struct iris_batch *batch, - const struct pipe_draw_info *draw) +static void +iris_bind_compute_state(struct pipe_context *ctx, void *state) { - const uint64_t dirty = ice->state.dirty; - - struct brw_wm_prog_data *wm_prog_data = (void *) - ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; +} - if (dirty & IRIS_DIRTY_CC_VIEWPORT) { - struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; - iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { - ptr.CCViewportPointer = - iris_emit_state(batch, cso->cc_vp, sizeof(cso->cc_vp), 32); - } - } + //pkt.SamplerCount = \ + //DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ + //pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \ + //ffs(stage_state->per_thread_scratch) - 11; \ - if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) { - struct iris_viewport_state *cso = ice->state.cso_vp; - iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { - ptr.SFClipViewportPointer = - iris_emit_state(batch, cso->sf_cl_vp, sizeof(cso->sf_cl_vp), 64); - } - } +#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \ + pkt.KernelStartPointer = shader->prog_offset; \ + pkt.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4; \ + pkt.FloatingPointMode = prog_data->use_alt_mode; \ + \ + pkt.DispatchGRFStartRegisterForURBData = \ + prog_data->dispatch_grf_start_reg; \ + pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \ + pkt.prefix##URBEntryReadOffset = 0; \ + \ + pkt.StatisticsEnable = true; \ + pkt.Enable = true; - /* XXX: L3 State */ +static void +iris_set_vs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) +{ + struct brw_stage_prog_data *prog_data = shader->prog_data; + struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; - if (dirty & IRIS_DIRTY_URB) { - /* XXX: URB */ + iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) { + INIT_THREAD_DISPATCH_FIELDS(vs, Vertex); + vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; + vs.SIMD8DispatchEnable = true; + vs.UserClipDistanceCullTestEnableBitmask = + vue_prog_data->cull_distance_mask; } +} - if (dirty & IRIS_DIRTY_BLEND_STATE) { - struct iris_blend_state *cso = ice->state.cso_blend; - // XXX: 3DSTATE_BLEND_STATE_POINTERS - BLEND_STATE - // -> from iris_blend_state (most) + iris_depth_stencil_alpha_state - // (alpha test function/enable) + has writeable RT from ??????? - } +static void +iris_set_tcs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) +{ + struct brw_stage_prog_data *prog_data = shader->prog_data; + struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; + struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data; - if (dirty & IRIS_DIRTY_COLOR_CALC_STATE) { - struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; - uint32_t cc_offset; - void *cc_map = - iris_alloc_state(batch, - sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length), - 64, &cc_offset); - iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) { - cc.AlphaTestFormat = ALPHATEST_FLOAT32; - cc.AlphaReferenceValueAsFLOAT32 = cso->alpha.ref_value; - cc.BlendConstantColorRed = ice->state.blend_color.color[0]; - cc.BlendConstantColorGreen = ice->state.blend_color.color[1]; - cc.BlendConstantColorBlue = ice->state.blend_color.color[2]; - cc.BlendConstantColorAlpha = ice->state.blend_color.color[3]; - } - iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { - ptr.ColorCalcStatePointer = cc_offset; - ptr.ColorCalcStatePointerValid = true; - } + iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) { + INIT_THREAD_DISPATCH_FIELDS(hs, Vertex); + + hs.InstanceCount = tcs_prog_data->instances - 1; + hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; + hs.IncludeVertexHandles = true; } +} - // XXX: 3DSTATE_CONSTANT_XS - // XXX: 3DSTATE_BINDING_TABLE_POINTERS_XS +static void +iris_set_tes_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) +{ + struct brw_stage_prog_data *prog_data = shader->prog_data; + struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; + struct brw_tes_prog_data *tes_prog_data = (void *) prog_data; - for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { - if (!(dirty & (IRIS_DIRTY_SAMPLER_STATES_VS << stage))) - continue; + uint32_t *te_state = (void *) shader->derived_data; + uint32_t *ds_state = te_state + GENX(3DSTATE_TE_length); - // XXX: get sampler count from shader; don't emit them all... - const int count = IRIS_MAX_TEXTURE_SAMPLERS; + iris_pack_command(GENX(3DSTATE_TE), te_state, te) { + te.Partitioning = tes_prog_data->partitioning; + te.OutputTopology = tes_prog_data->output_topology; + te.TEDomain = tes_prog_data->domain; + te.TEEnable = true; + te.MaximumTessellationFactorOdd = 63.0; + te.MaximumTessellationFactorNotOdd = 64.0; + } - uint32_t offset; - uint32_t *map = iris_alloc_state(batch, - count * 4 * GENX(SAMPLER_STATE_length), - 32, &offset); + iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) { + INIT_THREAD_DISPATCH_FIELDS(ds, Patch); - for (int i = 0; i < count; i++) { - // XXX: when we have a correct count, these better be bound - if (!ice->state.samplers[stage][i]) - continue; - memcpy(map, ice->state.samplers[stage][i]->sampler_state, - 4 * GENX(SAMPLER_STATE_length)); - map += GENX(SAMPLER_STATE_length); - } + ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; + ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; + ds.ComputeWCoordinateEnable = + tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; - iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) { - ptr._3DCommandSubOpcode = 43 + stage; - ptr.PointertoVSSamplerState = offset; - } + ds.UserClipDistanceCullTestEnableBitmask = + vue_prog_data->cull_distance_mask; } - if (dirty & IRIS_DIRTY_MULTISAMPLE) { - iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) { - ms.PixelLocation = - ice->state.cso_rast->half_pixel_center ? CENTER : UL_CORNER; - if (ice->state.framebuffer.samples > 0) - ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1; - } - } +} - if (dirty & IRIS_DIRTY_SAMPLE_MASK) { - iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) { - ms.SampleMask = ice->state.sample_mask; - } - } +static void +iris_set_gs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) +{ + struct brw_stage_prog_data *prog_data = shader->prog_data; + struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; + struct brw_gs_prog_data *gs_prog_data = (void *) prog_data; - for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { - if (!(dirty & (IRIS_DIRTY_VS << stage))) - continue; + iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) { + INIT_THREAD_DISPATCH_FIELDS(gs, Vertex); - if (ice->shaders.prog[stage]) { - iris_batch_emit(batch, ice->shaders.prog[stage]->derived_data, - iris_derived_program_state_size(stage)); - } else { - if (stage == MESA_SHADER_TESS_EVAL) { - iris_emit_cmd(batch, GENX(3DSTATE_HS), hs); - iris_emit_cmd(batch, GENX(3DSTATE_TE), te); - iris_emit_cmd(batch, GENX(3DSTATE_DS), ds); - } else if (stage == MESA_SHADER_GEOMETRY) { - iris_emit_cmd(batch, GENX(3DSTATE_GS), gs); - } - } - } + gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; + gs.OutputTopology = gs_prog_data->output_topology; + gs.ControlDataHeaderSize = + gs_prog_data->control_data_header_size_hwords; + gs.InstanceControl = gs_prog_data->invocations - 1; + gs.DispatchMode = SIMD8; + gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; + gs.ControlDataFormat = gs_prog_data->control_data_format; + gs.ReorderMode = TRAILING; + gs.ExpectedVertexCount = gs_prog_data->vertices_in; + gs.MaximumNumberofThreads = + GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1) + : (devinfo->max_gs_threads - 1); - // XXX: SOL and so on + if (gs_prog_data->static_vertex_count != -1) { + gs.StaticOutput = true; + gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count; + } + gs.IncludeVertexHandles = vue_prog_data->include_vue_handles; - if (dirty & IRIS_DIRTY_CLIP) { - struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; - struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; + gs.UserClipDistanceCullTestEnableBitmask = + vue_prog_data->cull_distance_mask; - uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)]; - iris_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) { - if (wm_prog_data->barycentric_interp_modes & - BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) - cl.NonPerspectiveBarycentricEnable = true; + const int urb_entry_write_offset = 1; + const uint32_t urb_entry_output_length = + DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) - + urb_entry_write_offset; - cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0; - } - iris_emit_merge(batch, cso_rast->clip, dynamic_clip, - ARRAY_SIZE(cso_rast->clip)); + gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset; + gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1); } +} - if (dirty & IRIS_DIRTY_RASTER) { - struct iris_rasterizer_state *cso = ice->state.cso_rast; - iris_batch_emit(batch, cso->raster, sizeof(cso->raster)); - iris_batch_emit(batch, cso->sf, sizeof(cso->sf)); +static void +iris_set_fs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) +{ + struct brw_stage_prog_data *prog_data = shader->prog_data; + struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; - } + uint32_t *ps_state = (void *) shader->derived_data; + uint32_t *psx_state = ps_state + GENX(3DSTATE_PS_length); - if (dirty & (IRIS_DIRTY_RASTER | IRIS_DIRTY_FS)) { - struct iris_rasterizer_state *cso = ice->state.cso_rast; - uint32_t dynamic_wm[GENX(3DSTATE_WM_length)]; + iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) { + ps.VectorMaskEnable = true; + //ps.SamplerCount = ... + ps.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4; + ps.FloatingPointMode = prog_data->use_alt_mode; + ps.MaximumNumberofThreadsPerPSD = 64 - (GEN_GEN == 8 ? 2 : 1); - iris_pack_command(GENX(3DSTATE_WM), &dynamic_wm, wm) { - wm.BarycentricInterpolationMode = - wm_prog_data->barycentric_interp_modes; + ps.PushConstantEnable = prog_data->nr_params > 0 || + prog_data->ubo_ranges[0].length > 0; - if (wm_prog_data->early_fragment_tests) - wm.EarlyDepthStencilControl = EDSC_PREPS; - else if (wm_prog_data->has_side_effects) - wm.EarlyDepthStencilControl = EDSC_PSEXEC; - } - iris_emit_merge(batch, cso->wm, dynamic_wm, ARRAY_SIZE(cso->wm)); - } + /* From the documentation for this packet: + * "If the PS kernel does not need the Position XY Offsets to + * compute a Position Value, then this field should be programmed + * to POSOFFSET_NONE." + * + * "SW Recommendation: If the PS kernel needs the Position Offsets + * to compute a Position XY value, this field should match Position + * ZW Interpolation Mode to ensure a consistent position.xyzw + * computation." + * + * We only require XY sample offsets. So, this recommendation doesn't + * look useful at the moment. We might need this in future. + */ + ps.PositionXYOffsetSelect = + wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; + ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; + ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; + ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; - // XXX: SBE, SBE_SWIZ + // XXX: Disable SIMD32 with 16x MSAA - if (dirty & IRIS_DIRTY_PS_BLEND) { - struct iris_blend_state *cso = ice->state.cso_blend; - iris_batch_emit(batch, cso->ps_blend, sizeof(cso->ps_blend)); + ps.DispatchGRFStartRegisterForConstantSetupData0 = + brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); + ps.DispatchGRFStartRegisterForConstantSetupData1 = + brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); + ps.DispatchGRFStartRegisterForConstantSetupData2 = + brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); + + ps.KernelStartPointer0 = + shader->prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); + ps.KernelStartPointer1 = + shader->prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); + ps.KernelStartPointer2 = + shader->prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); } - if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) { - struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; - struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref; + iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) { + psx.PixelShaderValid = true; + psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; + psx.PixelShaderKillsPixel = wm_prog_data->uses_kill; + psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0; + psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; + psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; + psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; - uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; - iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) { - wmds.StencilReferenceValue = p_stencil_refs->ref_value[0]; - wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1]; + if (wm_prog_data->uses_sample_mask) { + /* TODO: conservative rasterization */ + if (wm_prog_data->post_depth_coverage) + psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; + else + psx.InputCoverageMaskState = ICMS_NORMAL; } - iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds)); - } - if (dirty & IRIS_DIRTY_SCISSOR) { - uint32_t scissor_offset = - iris_emit_state(batch, ice->state.scissors, - sizeof(struct pipe_scissor_state) * - ice->state.num_scissors, 32); + psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; + psx.PixelShaderPullsBary = wm_prog_data->pulls_bary; + psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil; - iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) { - ptr.ScissorRectPointer = scissor_offset; - } + // XXX: UAV bit } +} - // XXX: 3DSTATE_DEPTH_BUFFER and friends +static unsigned +iris_derived_program_state_size(enum iris_program_cache_id cache_id) +{ + assert(cache_id <= IRIS_CACHE_CS); - if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) { - iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) { - for (int i = 0; i < 32; i++) { - poly.PatternRow[i] = ice->state.poly_stipple.stipple[i]; - } - } + static const unsigned dwords[] = { + [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length), + [IRIS_CACHE_TCS] = GENX(3DSTATE_HS_length), + [IRIS_CACHE_TES] = GENX(3DSTATE_TE_length) + GENX(3DSTATE_DS_length), + [IRIS_CACHE_GS] = GENX(3DSTATE_GS_length), + [IRIS_CACHE_FS] = + GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length), + [IRIS_CACHE_CS] = 0, + [IRIS_CACHE_BLORP_BLIT] = 0, + }; + + return sizeof(uint32_t) * dwords[cache_id]; +} + +static void +iris_set_derived_program_state(const struct gen_device_info *devinfo, + enum iris_program_cache_id cache_id, + struct iris_compiled_shader *shader) +{ + switch (cache_id) { + case IRIS_CACHE_VS: + iris_set_vs_state(devinfo, shader); + break; + case IRIS_CACHE_TCS: + iris_set_tcs_state(devinfo, shader); + break; + case IRIS_CACHE_TES: + iris_set_tes_state(devinfo, shader); + break; + case IRIS_CACHE_GS: + iris_set_gs_state(devinfo, shader); + break; + case IRIS_CACHE_FS: + iris_set_fs_state(devinfo, shader); + break; + case IRIS_CACHE_CS: + break; + default: + break; } +} - if (dirty & IRIS_DIRTY_LINE_STIPPLE) { - struct iris_rasterizer_state *cso = ice->state.cso_rast; - iris_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple)); +static void +iris_upload_render_state(struct iris_context *ice, + struct iris_batch *batch, + const struct pipe_draw_info *draw) +{ + const uint64_t dirty = ice->state.dirty; + + struct brw_wm_prog_data *wm_prog_data = (void *) + ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; + + if (dirty & IRIS_DIRTY_CC_VIEWPORT) { + struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; + iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { + ptr.CCViewportPointer = + iris_emit_state(batch, cso->cc_vp, sizeof(cso->cc_vp), 32); + } } - if (1) { - iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { - topo.PrimitiveTopologyType = - translate_prim_type(draw->mode, draw->vertices_per_patch); + if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) { + struct iris_viewport_state *cso = ice->state.cso_vp; + iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { + ptr.SFClipViewportPointer = + iris_emit_state(batch, cso->sf_cl_vp, sizeof(cso->sf_cl_vp), 64); } } - if (draw->index_size > 0) { - struct iris_resource *res = (struct iris_resource *)draw->index.resource; + /* XXX: L3 State */ - assert(!draw->has_user_indices); + if (dirty & IRIS_DIRTY_URB) { + /* XXX: URB */ + } - iris_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) { - ib.IndexFormat = draw->index_size; - ib.MOCS = MOCS_WB; - ib.BufferSize = res->bo->size; - ib.BufferStartingAddress = ro_bo(res->bo, 0); + if (dirty & IRIS_DIRTY_BLEND_STATE) { + struct iris_blend_state *cso = ice->state.cso_blend; + // XXX: 3DSTATE_BLEND_STATE_POINTERS - BLEND_STATE + // -> from iris_blend_state (most) + iris_depth_stencil_alpha_state + // (alpha test function/enable) + has writeable RT from ??????? + } + + if (dirty & IRIS_DIRTY_COLOR_CALC_STATE) { + struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; + uint32_t cc_offset; + void *cc_map = + iris_alloc_state(batch, + sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length), + 64, &cc_offset); + iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) { + cc.AlphaTestFormat = ALPHATEST_FLOAT32; + cc.AlphaReferenceValueAsFLOAT32 = cso->alpha.ref_value; + cc.BlendConstantColorRed = ice->state.blend_color.color[0]; + cc.BlendConstantColorGreen = ice->state.blend_color.color[1]; + cc.BlendConstantColorBlue = ice->state.blend_color.color[2]; + cc.BlendConstantColorAlpha = ice->state.blend_color.color[3]; + } + iris_emit_cmd(batch, GENX(3DSTATE_CC_STATE_POINTERS), ptr) { + ptr.ColorCalcStatePointer = cc_offset; + ptr.ColorCalcStatePointerValid = true; } } - if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) { - struct iris_vertex_buffer_state *cso = ice->state.cso_vertex_buffers; + // XXX: 3DSTATE_CONSTANT_XS + // XXX: 3DSTATE_BINDING_TABLE_POINTERS_XS - STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_length) == 4); - STATIC_ASSERT((GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) % 32) == 0); + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + if (!(dirty & (IRIS_DIRTY_SAMPLER_STATES_VS << stage))) + continue; - uint64_t *addr = batch->cmdbuf.map_next + sizeof(uint32_t) * - (GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) / 32); - uint32_t *delta = cso->vertex_buffers + - (1 + GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) / 32); + // XXX: get sampler count from shader; don't emit them all... + const int count = IRIS_MAX_TEXTURE_SAMPLERS; - iris_batch_emit(batch, cso->vertex_buffers, - sizeof(uint32_t) * (1 + 4 * cso->num_buffers)); + uint32_t offset; + uint32_t *map = iris_alloc_state(batch, + count * 4 * GENX(SAMPLER_STATE_length), + 32, &offset); - for (unsigned i = 0; i < cso->num_buffers; i++) { - *addr = iris_batch_reloc(batch, (void *) addr - batch->cmdbuf.map, - cso->bos[i].bo, cso->bos[i].offset + - *delta, cso->bos[i].reloc_flags); - addr = (void *) addr + 16; - delta = (void *) delta + 16; + for (int i = 0; i < count; i++) { + // XXX: when we have a correct count, these better be bound + if (!ice->state.samplers[stage][i]) + continue; + memcpy(map, ice->state.samplers[stage][i]->sampler_state, + 4 * GENX(SAMPLER_STATE_length)); + map += GENX(SAMPLER_STATE_length); + } + + iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) { + ptr._3DCommandSubOpcode = 43 + stage; + ptr.PointertoVSSamplerState = offset; } } - if (dirty & IRIS_DIRTY_VERTEX_ELEMENTS) { - struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements; - iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) * - (1 + cso->count * GENX(VERTEX_ELEMENT_STATE_length))); - for (int i = 0; i < cso->count; i++) { - iris_batch_emit(batch, cso->vf_instancing[i], sizeof(uint32_t) * - (cso->count * GENX(3DSTATE_VF_INSTANCING_length))); + if (dirty & IRIS_DIRTY_MULTISAMPLE) { + iris_emit_cmd(batch, GENX(3DSTATE_MULTISAMPLE), ms) { + ms.PixelLocation = + ice->state.cso_rast->half_pixel_center ? CENTER : UL_CORNER; + if (ice->state.framebuffer.samples > 0) + ms.NumberofMultisamples = ffs(ice->state.framebuffer.samples) - 1; } - for (int i = 0; i < cso->count; i++) { - /* TODO: vertexid, instanceid support */ - iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs); + } + + if (dirty & IRIS_DIRTY_SAMPLE_MASK) { + iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) { + ms.SampleMask = ice->state.sample_mask; } } - if (1) { - iris_emit_cmd(batch, GENX(3DSTATE_VF), vf) { - if (draw->primitive_restart) { - vf.IndexedDrawCutIndexEnable = true; - vf.CutIndex = draw->restart_index; + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + if (!(dirty & (IRIS_DIRTY_VS << stage))) + continue; + + if (ice->shaders.prog[stage]) { + iris_batch_emit(batch, ice->shaders.prog[stage]->derived_data, + iris_derived_program_state_size(stage)); + } else { + if (stage == MESA_SHADER_TESS_EVAL) { + iris_emit_cmd(batch, GENX(3DSTATE_HS), hs); + iris_emit_cmd(batch, GENX(3DSTATE_TE), te); + iris_emit_cmd(batch, GENX(3DSTATE_DS), ds); + } else if (stage == MESA_SHADER_GEOMETRY) { + iris_emit_cmd(batch, GENX(3DSTATE_GS), gs); } } } - // XXX: Gen8 - PMA fix - - assert(!draw->indirect); // XXX: indirect support + // XXX: SOL and so on - iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) { - prim.StartInstanceLocation = draw->start_instance; - prim.InstanceCount = draw->instance_count; - prim.VertexCountPerInstance = draw->count; - prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL; + if (dirty & IRIS_DIRTY_CLIP) { + struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; + struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; - // XXX: this is probably bonkers. - prim.StartVertexLocation = draw->start; + uint32_t dynamic_clip[GENX(3DSTATE_CLIP_length)]; + iris_pack_command(GENX(3DSTATE_CLIP), &dynamic_clip, cl) { + if (wm_prog_data->barycentric_interp_modes & + BRW_BARYCENTRIC_NONPERSPECTIVE_BITS) + cl.NonPerspectiveBarycentricEnable = true; - if (draw->index_size) { - prim.BaseVertexLocation += draw->index_bias; - } else { - prim.StartVertexLocation += draw->index_bias; + cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0; } - - //prim.BaseVertexLocation = ...; + iris_emit_merge(batch, cso_rast->clip, dynamic_clip, + ARRAY_SIZE(cso_rast->clip)); } -#if 0 - l3 configuration - 3DSTATE_URB_* - -> TODO + if (dirty & IRIS_DIRTY_RASTER) { + struct iris_rasterizer_state *cso = ice->state.cso_rast; + iris_batch_emit(batch, cso->raster, sizeof(cso->raster)); + iris_batch_emit(batch, cso->sf, sizeof(cso->sf)); - 3DSTATE_CONSTANT_* - push constants - -> TODO + } - Surfaces: - - pull constants - - ubos/ssbos/abos - - images - - textures - - render targets - write and read - 3DSTATE_BINDING_TABLE_POINTERS_* - -> TODO + if (dirty & (IRIS_DIRTY_RASTER | IRIS_DIRTY_FS)) { + struct iris_rasterizer_state *cso = ice->state.cso_rast; + uint32_t dynamic_wm[GENX(3DSTATE_WM_length)]; - 3DSTATE_STREAMOUT - 3DSTATE_SO_BUFFER - 3DSTATE_SO_DECL_LIST + iris_pack_command(GENX(3DSTATE_WM), &dynamic_wm, wm) { + wm.BarycentricInterpolationMode = + wm_prog_data->barycentric_interp_modes; - 3DSTATE_SBE - -> iris_raster_state (point sprite texture coordinate origin) - -> bunch of shader state... - 3DSTATE_SBE_SWIZ - -> FS state + if (wm_prog_data->early_fragment_tests) + wm.EarlyDepthStencilControl = EDSC_PREPS; + else if (wm_prog_data->has_side_effects) + wm.EarlyDepthStencilControl = EDSC_PSEXEC; + } + iris_emit_merge(batch, cso->wm, dynamic_wm, ARRAY_SIZE(cso->wm)); + } - 3DSTATE_DEPTH_BUFFER - 3DSTATE_HIER_DEPTH_BUFFER - 3DSTATE_STENCIL_BUFFER - 3DSTATE_CLEAR_PARAMS - -> iris_framebuffer_state? -#endif -} + // XXX: SBE, SBE_SWIZ -static void -iris_bind_compute_state(struct pipe_context *ctx, void *state) -{ -} + if (dirty & IRIS_DIRTY_PS_BLEND) { + struct iris_blend_state *cso = ice->state.cso_blend; + iris_batch_emit(batch, cso->ps_blend, sizeof(cso->ps_blend)); + } - //pkt.SamplerCount = \ - //DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ - //pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \ - //ffs(stage_state->per_thread_scratch) - 11; \ + if (dirty & IRIS_DIRTY_WM_DEPTH_STENCIL) { + struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; + struct pipe_stencil_ref *p_stencil_refs = &ice->state.stencil_ref; -#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \ - pkt.KernelStartPointer = shader->prog_offset; \ - pkt.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4; \ - pkt.FloatingPointMode = prog_data->use_alt_mode; \ - \ - pkt.DispatchGRFStartRegisterForURBData = \ - prog_data->dispatch_grf_start_reg; \ - pkt.prefix##URBEntryReadLength = vue_prog_data->urb_read_length; \ - pkt.prefix##URBEntryReadOffset = 0; \ - \ - pkt.StatisticsEnable = true; \ - pkt.Enable = true; + uint32_t stencil_refs[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; + iris_pack_command(GENX(3DSTATE_WM_DEPTH_STENCIL), &stencil_refs, wmds) { + wmds.StencilReferenceValue = p_stencil_refs->ref_value[0]; + wmds.BackfaceStencilReferenceValue = p_stencil_refs->ref_value[1]; + } + iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds)); + } -static void -iris_set_vs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) -{ - struct brw_stage_prog_data *prog_data = shader->prog_data; - struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; + if (dirty & IRIS_DIRTY_SCISSOR) { + uint32_t scissor_offset = + iris_emit_state(batch, ice->state.scissors, + sizeof(struct pipe_scissor_state) * + ice->state.num_scissors, 32); - iris_pack_command(GENX(3DSTATE_VS), shader->derived_data, vs) { - INIT_THREAD_DISPATCH_FIELDS(vs, Vertex); - vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1; - vs.SIMD8DispatchEnable = true; - vs.UserClipDistanceCullTestEnableBitmask = - vue_prog_data->cull_distance_mask; + iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) { + ptr.ScissorRectPointer = scissor_offset; + } } -} - -static void -iris_set_tcs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) -{ - struct brw_stage_prog_data *prog_data = shader->prog_data; - struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; - struct brw_tcs_prog_data *tcs_prog_data = (void *) prog_data; - iris_pack_command(GENX(3DSTATE_HS), shader->derived_data, hs) { - INIT_THREAD_DISPATCH_FIELDS(hs, Vertex); + // XXX: 3DSTATE_DEPTH_BUFFER and friends - hs.InstanceCount = tcs_prog_data->instances - 1; - hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1; - hs.IncludeVertexHandles = true; + if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) { + iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) { + for (int i = 0; i < 32; i++) { + poly.PatternRow[i] = ice->state.poly_stipple.stipple[i]; + } + } } -} - -static void -iris_set_tes_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) -{ - struct brw_stage_prog_data *prog_data = shader->prog_data; - struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; - struct brw_tes_prog_data *tes_prog_data = (void *) prog_data; - uint32_t *te_state = (void *) shader->derived_data; - uint32_t *ds_state = te_state + GENX(3DSTATE_TE_length); + if (dirty & IRIS_DIRTY_LINE_STIPPLE) { + struct iris_rasterizer_state *cso = ice->state.cso_rast; + iris_batch_emit(batch, cso->line_stipple, sizeof(cso->line_stipple)); + } - iris_pack_command(GENX(3DSTATE_TE), te_state, te) { - te.Partitioning = tes_prog_data->partitioning; - te.OutputTopology = tes_prog_data->output_topology; - te.TEDomain = tes_prog_data->domain; - te.TEEnable = true; - te.MaximumTessellationFactorOdd = 63.0; - te.MaximumTessellationFactorNotOdd = 64.0; + if (1) { + iris_emit_cmd(batch, GENX(3DSTATE_VF_TOPOLOGY), topo) { + topo.PrimitiveTopologyType = + translate_prim_type(draw->mode, draw->vertices_per_patch); + } } - iris_pack_command(GENX(3DSTATE_DS), ds_state, ds) { - INIT_THREAD_DISPATCH_FIELDS(ds, Patch); + if (draw->index_size > 0) { + struct iris_resource *res = (struct iris_resource *)draw->index.resource; - ds.DispatchMode = DISPATCH_MODE_SIMD8_SINGLE_PATCH; - ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1; - ds.ComputeWCoordinateEnable = - tes_prog_data->domain == BRW_TESS_DOMAIN_TRI; + assert(!draw->has_user_indices); - ds.UserClipDistanceCullTestEnableBitmask = - vue_prog_data->cull_distance_mask; + iris_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) { + ib.IndexFormat = draw->index_size; + ib.MOCS = MOCS_WB; + ib.BufferSize = res->bo->size; + ib.BufferStartingAddress = ro_bo(res->bo, 0); + } } -} + if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) { + struct iris_vertex_buffer_state *cso = ice->state.cso_vertex_buffers; -static void -iris_set_gs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) -{ - struct brw_stage_prog_data *prog_data = shader->prog_data; - struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; - struct brw_gs_prog_data *gs_prog_data = (void *) prog_data; + STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_length) == 4); + STATIC_ASSERT((GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) % 32) == 0); - iris_pack_command(GENX(3DSTATE_GS), shader->derived_data, gs) { - INIT_THREAD_DISPATCH_FIELDS(gs, Vertex); + uint64_t *addr = batch->cmdbuf.map_next + sizeof(uint32_t) * + (GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) / 32); + uint32_t *delta = cso->vertex_buffers + + (1 + GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) / 32); - gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1; - gs.OutputTopology = gs_prog_data->output_topology; - gs.ControlDataHeaderSize = - gs_prog_data->control_data_header_size_hwords; - gs.InstanceControl = gs_prog_data->invocations - 1; - gs.DispatchMode = SIMD8; - gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; - gs.ControlDataFormat = gs_prog_data->control_data_format; - gs.ReorderMode = TRAILING; - gs.ExpectedVertexCount = gs_prog_data->vertices_in; - gs.MaximumNumberofThreads = - GEN_GEN == 8 ? (devinfo->max_gs_threads / 2 - 1) - : (devinfo->max_gs_threads - 1); + iris_batch_emit(batch, cso->vertex_buffers, + sizeof(uint32_t) * (1 + 4 * cso->num_buffers)); - if (gs_prog_data->static_vertex_count != -1) { - gs.StaticOutput = true; - gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count; + for (unsigned i = 0; i < cso->num_buffers; i++) { + *addr = iris_batch_reloc(batch, (void *) addr - batch->cmdbuf.map, + cso->bos[i].bo, cso->bos[i].offset + + *delta, cso->bos[i].reloc_flags); + addr = (void *) addr + 16; + delta = (void *) delta + 16; } - gs.IncludeVertexHandles = vue_prog_data->include_vue_handles; - - gs.UserClipDistanceCullTestEnableBitmask = - vue_prog_data->cull_distance_mask; - - const int urb_entry_write_offset = 1; - const uint32_t urb_entry_output_length = - DIV_ROUND_UP(vue_prog_data->vue_map.num_slots, 2) - - urb_entry_write_offset; - - gs.VertexURBEntryOutputReadOffset = urb_entry_write_offset; - gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1); } -} -static void -iris_set_fs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) -{ - struct brw_stage_prog_data *prog_data = shader->prog_data; - struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; + if (dirty & IRIS_DIRTY_VERTEX_ELEMENTS) { + struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements; + iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) * + (1 + cso->count * GENX(VERTEX_ELEMENT_STATE_length))); + for (int i = 0; i < cso->count; i++) { + iris_batch_emit(batch, cso->vf_instancing[i], sizeof(uint32_t) * + (cso->count * GENX(3DSTATE_VF_INSTANCING_length))); + } + for (int i = 0; i < cso->count; i++) { + /* TODO: vertexid, instanceid support */ + iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs); + } + } - uint32_t *ps_state = (void *) shader->derived_data; - uint32_t *psx_state = ps_state + GENX(3DSTATE_PS_length); + if (1) { + iris_emit_cmd(batch, GENX(3DSTATE_VF), vf) { + if (draw->primitive_restart) { + vf.IndexedDrawCutIndexEnable = true; + vf.CutIndex = draw->restart_index; + } + } + } - iris_pack_command(GENX(3DSTATE_PS), ps_state, ps) { - ps.VectorMaskEnable = true; - //ps.SamplerCount = ... - ps.BindingTableEntryCount = prog_data->binding_table.size_bytes / 4; - ps.FloatingPointMode = prog_data->use_alt_mode; - ps.MaximumNumberofThreadsPerPSD = 64 - (GEN_GEN == 8 ? 2 : 1); + // XXX: Gen8 - PMA fix - ps.PushConstantEnable = prog_data->nr_params > 0 || - prog_data->ubo_ranges[0].length > 0; + assert(!draw->indirect); // XXX: indirect support - /* From the documentation for this packet: - * "If the PS kernel does not need the Position XY Offsets to - * compute a Position Value, then this field should be programmed - * to POSOFFSET_NONE." - * - * "SW Recommendation: If the PS kernel needs the Position Offsets - * to compute a Position XY value, this field should match Position - * ZW Interpolation Mode to ensure a consistent position.xyzw - * computation." - * - * We only require XY sample offsets. So, this recommendation doesn't - * look useful at the moment. We might need this in future. - */ - ps.PositionXYOffsetSelect = - wm_prog_data->uses_pos_offset ? POSOFFSET_SAMPLE : POSOFFSET_NONE; - ps._8PixelDispatchEnable = wm_prog_data->dispatch_8; - ps._16PixelDispatchEnable = wm_prog_data->dispatch_16; - ps._32PixelDispatchEnable = wm_prog_data->dispatch_32; + iris_emit_cmd(batch, GENX(3DPRIMITIVE), prim) { + prim.StartInstanceLocation = draw->start_instance; + prim.InstanceCount = draw->instance_count; + prim.VertexCountPerInstance = draw->count; + prim.VertexAccessType = draw->index_size > 0 ? RANDOM : SEQUENTIAL; - // XXX: Disable SIMD32 with 16x MSAA + // XXX: this is probably bonkers. + prim.StartVertexLocation = draw->start; - ps.DispatchGRFStartRegisterForConstantSetupData0 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0); - ps.DispatchGRFStartRegisterForConstantSetupData1 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1); - ps.DispatchGRFStartRegisterForConstantSetupData2 = - brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2); + if (draw->index_size) { + prim.BaseVertexLocation += draw->index_bias; + } else { + prim.StartVertexLocation += draw->index_bias; + } - ps.KernelStartPointer0 = - shader->prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0); - ps.KernelStartPointer1 = - shader->prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1); - ps.KernelStartPointer2 = - shader->prog_offset + brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2); + //prim.BaseVertexLocation = ...; } +#if 0 + l3 configuration - iris_pack_command(GENX(3DSTATE_PS_EXTRA), psx_state, psx) { - psx.PixelShaderValid = true; - psx.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode; - psx.PixelShaderKillsPixel = wm_prog_data->uses_kill; - psx.AttributeEnable = wm_prog_data->num_varying_inputs != 0; - psx.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth; - psx.PixelShaderUsesSourceW = wm_prog_data->uses_src_w; - psx.PixelShaderIsPerSample = wm_prog_data->persample_dispatch; - - if (wm_prog_data->uses_sample_mask) { - /* TODO: conservative rasterization */ - if (wm_prog_data->post_depth_coverage) - psx.InputCoverageMaskState = ICMS_DEPTH_COVERAGE; - else - psx.InputCoverageMaskState = ICMS_NORMAL; - } + 3DSTATE_URB_* + -> TODO - psx.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask; - psx.PixelShaderPullsBary = wm_prog_data->pulls_bary; - psx.PixelShaderComputesStencil = wm_prog_data->computed_stencil; + 3DSTATE_CONSTANT_* - push constants + -> TODO - // XXX: UAV bit - } -} + Surfaces: + - pull constants + - ubos/ssbos/abos + - images + - textures + - render targets - write and read + 3DSTATE_BINDING_TABLE_POINTERS_* + -> TODO -unsigned -iris_derived_program_state_size(enum iris_program_cache_id cache_id) -{ - assert(cache_id <= IRIS_CACHE_CS); + 3DSTATE_STREAMOUT + 3DSTATE_SO_BUFFER + 3DSTATE_SO_DECL_LIST - static const unsigned dwords[] = { - [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length), - [IRIS_CACHE_TCS] = GENX(3DSTATE_HS_length), - [IRIS_CACHE_TES] = GENX(3DSTATE_TE_length) + GENX(3DSTATE_DS_length), - [IRIS_CACHE_GS] = GENX(3DSTATE_GS_length), - [IRIS_CACHE_FS] = - GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length), - [IRIS_CACHE_CS] = 0, - [IRIS_CACHE_BLORP_BLIT] = 0, - }; + 3DSTATE_SBE + -> iris_raster_state (point sprite texture coordinate origin) + -> bunch of shader state... + 3DSTATE_SBE_SWIZ + -> FS state - return sizeof(uint32_t) * dwords[cache_id]; + 3DSTATE_DEPTH_BUFFER + 3DSTATE_HIER_DEPTH_BUFFER + 3DSTATE_STENCIL_BUFFER + 3DSTATE_CLEAR_PARAMS + -> iris_framebuffer_state? +#endif } -void -iris_set_derived_program_state(const struct gen_device_info *devinfo, - enum iris_program_cache_id cache_id, - struct iris_compiled_shader *shader) -{ - switch (cache_id) { - case IRIS_CACHE_VS: - iris_set_vs_state(devinfo, shader); - break; - case IRIS_CACHE_TCS: - iris_set_tcs_state(devinfo, shader); - break; - case IRIS_CACHE_TES: - iris_set_tes_state(devinfo, shader); - break; - case IRIS_CACHE_GS: - iris_set_gs_state(devinfo, shader); - break; - case IRIS_CACHE_FS: - iris_set_fs_state(devinfo, shader); - break; - case IRIS_CACHE_CS: - break; - default: - break; - } -} -void + +static void iris_destroy_state(struct iris_context *ice) { // XXX: unreference resources/surfaces. @@ -1966,12 +1957,10 @@ iris_destroy_state(struct iris_context *ice) } void -iris_init_state(struct iris_context *ice) +genX(init_state)(struct iris_context *ice) { struct pipe_context *ctx = &ice->ctx; - ice->state.dirty = ~0ull; - ctx->create_blend_state = iris_create_blend_state; ctx->create_depth_stencil_alpha_state = iris_create_zsa_state; ctx->create_rasterizer_state = iris_create_rasterizer_state; @@ -2015,4 +2004,14 @@ iris_init_state(struct iris_context *ice) ctx->create_stream_output_target = iris_create_stream_output_target; ctx->stream_output_target_destroy = iris_stream_output_target_destroy; ctx->set_stream_output_targets = iris_set_stream_output_targets; + + ice->render_batch.emit_state_base_address = iris_emit_state_base_address; + ice->state.upload_render_state = iris_upload_render_state; + ice->state.derived_program_state_size = iris_derived_program_state_size; + ice->state.set_derived_program_state = iris_set_derived_program_state; + ice->state.destroy_state = iris_destroy_state; + + ice->state.dirty = ~0ull; + + iris_upload_initial_gpu_state(&ice->render_batch); }