X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Firis%2Firis_state.c;h=fa80c5d4db9c1aa33158b5f9ed555fea96f15d85;hb=c0ab9c9890a34720b4580971c0ace16cc0cf52f9;hp=38310cfd6c83dec4b8ab11964b7a6efe8865e5a4;hpb=5d2673ba7e3d1fecdc959ce5a2eb8c89b9592ab8;p=mesa.git diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 38310cfd6c8..fa80c5d4db9 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -23,11 +23,13 @@ #include #include -#ifdef HAVE_VALGRIND +#if HAVE_VALGRIND #include #include #define VG(x) x +#ifndef NDEBUG #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) +#endif #else #define VG(x) #endif @@ -37,9 +39,12 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" #include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_framebuffer.h" #include "util/u_transfer.h" #include "util/u_upload_mgr.h" #include "i915_drm.h" +#include "nir.h" #include "intel/compiler/brw_compiler.h" #include "intel/common/gen_l3_config.h" #include "intel/common/gen_sample_positions.h" @@ -73,15 +78,6 @@ __gen_combine_address(struct iris_batch *batch, void *location, #define __genxml_cmd_header(cmd) cmd ## _header #define __genxml_cmd_pack(cmd) cmd ## _pack -static void * -get_command_space(struct iris_batch *batch, unsigned bytes) -{ - iris_require_command_space(batch, bytes); - void *map = batch->cmdbuf.map_next; - batch->cmdbuf.map_next += bytes; - return map; -} - #define _iris_pack_command(batch, cmd, dst, name) \ for (struct cmd name = { __genxml_cmd_header(cmd) }, \ *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \ @@ -99,11 +95,11 @@ get_command_space(struct iris_batch *batch, unsigned bytes) _dst = NULL) #define iris_emit_cmd(batch, cmd, name) \ - _iris_pack_command(batch, cmd, get_command_space(batch, 4 * __genxml_cmd_length(cmd)), name) + _iris_pack_command(batch, cmd, iris_get_command_space(batch, 4 * __genxml_cmd_length(cmd)), name) #define iris_emit_merge(batch, dwords0, dwords1, num_dwords) \ do { \ - uint32_t *dw = get_command_space(batch, 4 * num_dwords); \ + uint32_t *dw = iris_get_command_space(batch, 4 * num_dwords); \ for (uint32_t i = 0; i < num_dwords; i++) \ dw[i] = (dwords0)[i] | (dwords1)[i]; \ VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \ @@ -280,43 +276,33 @@ ro_bo(struct iris_bo *bo, uint64_t offset) return (struct iris_address) { .bo = bo, .offset = offset }; } -/** - * Returns the BO's address relative to the appropriate base address. - * - * All of our base addresses are programmed to the start of a 4GB region, - * so simply returning the bottom 32 bits of the BO address will give us - * the offset from whatever base address corresponds to that memory region. - */ -static uint32_t -bo_offset_from_base_address(struct pipe_resource *res) +static void * +upload_state(struct u_upload_mgr *uploader, + struct iris_state_ref *ref, + unsigned size, + unsigned alignment) { - struct iris_bo *bo = ((struct iris_resource *) res)->bo; - - /* This only works for buffers in the memory zones corresponding to a - * base address - the top, unbounded memory zone doesn't have a base. - */ - assert(bo->gtt_offset < 3 * (1ull << 32)); - return bo->gtt_offset; + void *p = NULL; + u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p); + return p; } static uint32_t * stream_state(struct iris_batch *batch, struct u_upload_mgr *uploader, + struct pipe_resource **out_res, unsigned size, unsigned alignment, uint32_t *out_offset) { - struct pipe_resource *res = NULL; void *ptr = NULL; - u_upload_alloc(uploader, 0, size, alignment, out_offset, &res, &ptr); + u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr); - struct iris_bo *bo = ((struct iris_resource *) res)->bo; + struct iris_bo *bo = iris_resource_bo(*out_res); iris_use_pinned_bo(batch, bo, false); - *out_offset += bo_offset_from_base_address(res); - - pipe_resource_reference(&res, NULL); + *out_offset += iris_bo_offset_from_base_address(bo); return ptr; } @@ -324,12 +310,14 @@ stream_state(struct iris_batch *batch, static uint32_t emit_state(struct iris_batch *batch, struct u_upload_mgr *uploader, + struct pipe_resource **out_res, const void *data, unsigned size, unsigned alignment) { unsigned offset = 0; - uint32_t *map = stream_state(batch, uploader, size, alignment, &offset); + uint32_t *map = + stream_state(batch, uploader, out_res, size, alignment, &offset); if (map) memcpy(map, data, size); @@ -337,12 +325,17 @@ emit_state(struct iris_batch *batch, return offset; } +#define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x)) +#define cso_changed_memcmp(x) \ + (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0) + static void iris_init_render_context(struct iris_screen *screen, struct iris_batch *batch, + struct iris_vtable *vtbl, struct pipe_debug_callback *dbg) { - iris_init_batch(batch, screen, dbg, I915_EXEC_RENDER); + iris_init_batch(batch, screen, vtbl, dbg, I915_EXEC_RENDER); /* XXX: PIPE_CONTROLs */ @@ -369,8 +362,9 @@ iris_init_render_context(struct iris_screen *screen, sba.IndirectObjectBufferSizeModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; - sba.SurfaceStateBaseAddress = ro_bo(NULL, 1ull << 32); - sba.DynamicStateBaseAddress = ro_bo(NULL, 2 * (1ull << 32)); + sba.InstructionBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SHADER_START); + sba.SurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SURFACE_START); + sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START); sba.GeneralStateBufferSize = 0xfffff; sba.IndirectObjectBufferSize = 0xfffff; @@ -405,6 +399,33 @@ iris_init_render_context(struct iris_screen *screen, } } +struct iris_viewport_state { + uint32_t sf_cl_vp[GENX(SF_CLIP_VIEWPORT_length) * IRIS_MAX_VIEWPORTS]; +}; + +struct iris_vertex_buffer_state { + uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)]; + struct pipe_resource *resources[33]; + unsigned num_buffers; +}; + +struct iris_depth_buffer_state { + uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) + + GENX(3DSTATE_STENCIL_BUFFER_length) + + GENX(3DSTATE_HIER_DEPTH_BUFFER_length) + + GENX(3DSTATE_CLEAR_PARAMS_length)]; +}; + +/** + * State that can't be stored directly in iris_context because the data + * layout varies per generation. + */ +struct iris_genx_state { + struct iris_viewport_state viewport; + struct iris_vertex_buffer_state vertex_buffers; + struct iris_depth_buffer_state depth_buffer; +}; + static void iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info) { @@ -421,7 +442,10 @@ iris_set_blend_color(struct pipe_context *ctx, } struct iris_blend_state { + /** Partial 3DSTATE_PS_BLEND */ uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)]; + + /** Partial BLEND_STATE */ uint32_t blend_state[GENX(BLEND_STATE_length) + BRW_MAX_DRAW_BUFFERS * GENX(BLEND_STATE_ENTRY_length)]; @@ -497,15 +521,19 @@ iris_bind_blend_state(struct pipe_context *ctx, void *state) { struct iris_context *ice = (struct iris_context *) ctx; ice->state.cso_blend = state; - ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT; - ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL; + ice->state.dirty |= IRIS_DIRTY_PS_BLEND; + ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; } struct iris_depth_stencil_alpha_state { + /** Partial 3DSTATE_WM_DEPTH_STENCIL */ uint32_t wmds[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; + + /** Complete CC_VIEWPORT */ uint32_t cc_vp[GENX(CC_VIEWPORT_length)]; - struct pipe_alpha_state alpha; /* to BLEND_STATE, 3DSTATE_PS_BLEND */ + /** Outbound to BLEND_STATE, 3DSTATE_PS_BLEND, COLOR_CALC_STATE */ + struct pipe_alpha_state alpha; }; static void * @@ -564,9 +592,11 @@ iris_bind_zsa_state(struct pipe_context *ctx, void *state) struct iris_depth_stencil_alpha_state *new_cso = state; if (new_cso) { - if (!old_cso || old_cso->alpha.ref_value != new_cso->alpha.ref_value) { + if (cso_changed(alpha.ref_value)) ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE; - } + + if (cso_changed(alpha.enabled)) + ice->state.dirty |= IRIS_DIRTY_PS_BLEND | IRIS_DIRTY_BLEND_STATE; } ice->state.cso_zsa = new_cso; @@ -586,6 +616,8 @@ struct iris_rasterizer_state { bool light_twoside; /* for shader state */ bool rasterizer_discard; /* for 3DSTATE_STREAMOUT */ bool half_pixel_center; /* for 3DSTATE_MULTISAMPLE */ + bool line_stipple_enable; + bool poly_stipple_enable; enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */ uint16_t sprite_coord_enable; }; @@ -617,6 +649,8 @@ iris_create_rasterizer_state(struct pipe_context *ctx, cso->half_pixel_center = state->half_pixel_center; cso->sprite_coord_mode = state->sprite_coord_mode; cso->sprite_coord_enable = state->sprite_coord_enable; + cso->line_stipple_enable = state->line_stipple_enable; + cso->poly_stipple_enable = state->poly_stipple_enable; iris_pack_command(GENX(3DSTATE_SF), cso->sf, sf) { sf.StatisticsEnable = true; @@ -631,15 +665,14 @@ iris_create_rasterizer_state(struct pipe_context *ctx, sf.PointWidth = state->point_size; if (state->flatshade_first) { + sf.TriangleFanProvokingVertexSelect = 1; + } else { sf.TriangleStripListProvokingVertexSelect = 2; sf.TriangleFanProvokingVertexSelect = 2; sf.LineStripListProvokingVertexSelect = 1; - } else { - sf.TriangleFanProvokingVertexSelect = 1; } } - /* COMPLETE! */ iris_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) { rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise; rr.CullMode = translate_cull_mode(state->cull_face); @@ -649,7 +682,7 @@ iris_create_rasterizer_state(struct pipe_context *ctx, rr.GlobalDepthOffsetEnableSolid = state->offset_tri; rr.GlobalDepthOffsetEnableWireframe = state->offset_line; rr.GlobalDepthOffsetEnablePoint = state->offset_point; - rr.GlobalDepthOffsetConstant = state->offset_units; + rr.GlobalDepthOffsetConstant = state->offset_units * 2; rr.GlobalDepthOffsetScale = state->offset_scale; rr.GlobalDepthOffsetClamp = state->offset_clamp; rr.SmoothPointEnable = state->point_smooth; @@ -677,11 +710,11 @@ iris_create_rasterizer_state(struct pipe_context *ctx, cl.MaximumPointWidth = 255.875; if (state->flatshade_first) { + cl.TriangleFanProvokingVertexSelect = 1; + } else { cl.TriangleStripListProvokingVertexSelect = 2; cl.TriangleFanProvokingVertexSelect = 2; cl.LineStripListProvokingVertexSelect = 1; - } else { - cl.TriangleFanProvokingVertexSelect = 1; } } @@ -718,19 +751,19 @@ iris_bind_rasterizer_state(struct pipe_context *ctx, void *state) if (new_cso) { /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */ - if (!old_cso || memcmp(old_cso->line_stipple, new_cso->line_stipple, - sizeof(old_cso->line_stipple)) != 0) { + if (cso_changed_memcmp(line_stipple)) ice->state.dirty |= IRIS_DIRTY_LINE_STIPPLE; - } - if (!old_cso || - old_cso->half_pixel_center != new_cso->half_pixel_center) { + if (cso_changed(half_pixel_center)) ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE; - } + + if (cso_changed(line_stipple_enable) || cso_changed(poly_stipple_enable)) + ice->state.dirty |= IRIS_DIRTY_WM; } ice->state.cso_rast = new_cso; ice->state.dirty |= IRIS_DIRTY_RASTER; + ice->state.dirty |= IRIS_DIRTY_CLIP; } static uint32_t @@ -743,8 +776,10 @@ translate_wrap(unsigned pipe_wrap) [PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, [PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR, [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, - [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1, // XXX: ??? - [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, // XXX: ??? + + /* These are unsupported. */ + [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1, + [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, }; return map[pipe_wrap]; } @@ -778,7 +813,7 @@ struct iris_sampler_state { }; static void * -iris_create_sampler_state(struct pipe_context *pctx, +iris_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { struct iris_sampler_state *cso = CALLOC_STRUCT(iris_sampler_state); @@ -786,6 +821,8 @@ iris_create_sampler_state(struct pipe_context *pctx, if (!cso) return NULL; + memcpy(&cso->base, state, sizeof(*state)); + STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST); STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR); @@ -844,7 +881,7 @@ iris_create_sampler_state(struct pipe_context *pctx, samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod); samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15); - //samp.BorderColorPointer = <> + /* .BorderColorPointer is filled in by iris_bind_sampler_states. */ } return cso; @@ -860,38 +897,61 @@ iris_bind_sampler_states(struct pipe_context *ctx, gl_shader_stage stage = stage_from_pipe(p_stage); assert(start + count <= IRIS_MAX_TEXTURE_SAMPLERS); + ice->state.num_samplers[stage] = + MAX2(ice->state.num_samplers[stage], start + count); + + for (int i = 0; i < count; i++) { + ice->state.samplers[stage][start + i] = states[i]; + } - /* Assemble the SAMPLER_STATEs into a contiguous chunk of memory - * relative to Dynamic State Base Address. + /* Assemble the SAMPLER_STATEs into a contiguous table that lives + * in the dynamic state memory zone, so we can point to it via the + * 3DSTATE_SAMPLER_STATE_POINTERS_* commands. */ - void *map = NULL; - u_upload_alloc(ice->state.dynamic_uploader, 0, - count * 4 * GENX(SAMPLER_STATE_length), 32, - &ice->state.sampler_table_offset[stage], - &ice->state.sampler_table_resource[stage], - &map); - if (!unlikely(map)) - return NULL; + void *map = upload_state(ice->state.dynamic_uploader, + &ice->state.sampler_table[stage], + count * 4 * GENX(SAMPLER_STATE_length), 32); + if (unlikely(!map)) + return; + + struct pipe_resource *res = ice->state.sampler_table[stage].res; + ice->state.sampler_table[stage].offset += + iris_bo_offset_from_base_address(iris_resource_bo(res)); - ice->state.sampler_table_offset[stage] += - bo_offset_from_base_address(ice->state.sampler_table_resource[stage]); + /* Make sure all land in the same BO */ + iris_border_color_pool_reserve(ice, IRIS_MAX_TEXTURE_SAMPLERS); for (int i = 0; i < count; i++) { - struct iris_sampler_state *state = states[i]; + struct iris_sampler_state *state = ice->state.samplers[stage][i]; /* Save a pointer to the iris_sampler_state, a few fields need * to inform draw-time decisions. */ ice->state.samplers[stage][start + i] = state; - if (state) + if (!state) { + memset(map, 0, 4 * GENX(SAMPLER_STATE_length)); + } else if (!state->needs_border_color) { memcpy(map, state->sampler_state, 4 * GENX(SAMPLER_STATE_length)); + } else { + ice->state.need_border_colors = true; + + /* Stream out the border color and merge the pointer. */ + uint32_t offset = + iris_upload_border_color(ice, &state->base.border_color); + + uint32_t dynamic[GENX(SAMPLER_STATE_length)]; + iris_pack_state(GENX(SAMPLER_STATE), dynamic, dyns) { + dyns.BorderColorPointer = offset; + } + + for (uint32_t j = 0; j < GENX(SAMPLER_STATE_length); j++) + ((uint32_t *) map)[j] = state->sampler_state[j] | dynamic[j]; + } map += GENX(SAMPLER_STATE_length); } - ice->state.num_samplers[stage] = count; - ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage; } @@ -900,10 +960,7 @@ struct iris_sampler_view { struct isl_view view; /** The resource (BO) holding our SURFACE_STATE. */ - struct pipe_resource *surface_state_resource; - unsigned surface_state_offset; - - //uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)]; + struct iris_state_ref surface_state; }; /** @@ -959,20 +1016,17 @@ iris_create_sampler_view(struct pipe_context *ctx, .b = pipe_swizzle_to_isl_channel(tmpl->swizzle_b), .a = pipe_swizzle_to_isl_channel(tmpl->swizzle_a), }, - .usage = ISL_SURF_USAGE_TEXTURE_BIT, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + (itex->surf.usage & ISL_SURF_USAGE_CUBE_BIT), }; - void *map = NULL; - u_upload_alloc(ice->state.surface_uploader, 0, - 4 * GENX(RENDER_SURFACE_STATE_length), 64, - &isv->surface_state_offset, - &isv->surface_state_resource, - &map); + void *map = upload_state(ice->state.surface_uploader, &isv->surface_state, + 4 * GENX(RENDER_SURFACE_STATE_length), 64); if (!unlikely(map)) return NULL; - isv->surface_state_offset += - bo_offset_from_base_address(isv->surface_state_resource); + struct iris_bo *state_bo = iris_resource_bo(isv->surface_state.res); + isv->surface_state.offset += iris_bo_offset_from_base_address(state_bo); isl_surf_fill_state(&screen->isl_dev, map, .surf = &itex->surf, .view = &isv->view, @@ -984,17 +1038,6 @@ iris_create_sampler_view(struct pipe_context *ctx, return &isv->pipe; } -struct iris_surface { - struct pipe_surface pipe; - struct isl_view view; - - /** The resource (BO) holding our SURFACE_STATE. */ - struct pipe_resource *surface_state_resource; - unsigned surface_state_offset; - - // uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)]; -}; - static struct pipe_surface * iris_create_surface(struct pipe_context *ctx, struct pipe_resource *tex, @@ -1004,7 +1047,7 @@ iris_create_surface(struct pipe_context *ctx, struct iris_screen *screen = (struct iris_screen *)ctx->screen; struct iris_surface *surf = calloc(1, sizeof(struct iris_surface)); struct pipe_surface *psurf = &surf->pipe; - struct iris_resource *itex = (struct iris_resource *) tex; + struct iris_resource *res = (struct iris_resource *) tex; if (!surf) return NULL; @@ -1020,6 +1063,14 @@ iris_create_surface(struct pipe_context *ctx, psurf->u.tex.last_layer = tmpl->u.tex.last_layer; psurf->u.tex.level = tmpl->u.tex.level; + unsigned usage = 0; + if (tmpl->writable) + usage = ISL_SURF_USAGE_STORAGE_BIT; + else if (util_format_is_depth_or_stencil(tmpl->format)) + usage = ISL_SURF_USAGE_DEPTH_BIT; + else + usage = ISL_SURF_USAGE_RENDER_TARGET_BIT; + surf->view = (struct isl_view) { .format = iris_isl_format_for_pipe_format(tmpl->format), .base_level = tmpl->u.tex.level, @@ -1027,26 +1078,27 @@ iris_create_surface(struct pipe_context *ctx, .base_array_layer = tmpl->u.tex.first_layer, .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1, .swizzle = ISL_SWIZZLE_IDENTITY, - // XXX: DEPTH_BIt, STENCIL_BIT...CUBE_BIT? Other bits?! - .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, + .usage = usage, }; - void *map = NULL; - u_upload_alloc(ice->state.surface_uploader, 0, - 4 * GENX(RENDER_SURFACE_STATE_length), 64, - &surf->surface_state_offset, - &surf->surface_state_resource, - &map); + /* Bail early for depth/stencil */ + if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT | + ISL_SURF_USAGE_STENCIL_BIT)) + return psurf; + + + void *map = upload_state(ice->state.surface_uploader, &surf->surface_state, + 4 * GENX(RENDER_SURFACE_STATE_length), 64); if (!unlikely(map)) return NULL; - surf->surface_state_offset += - bo_offset_from_base_address(surf->surface_state_resource); + struct iris_bo *state_bo = iris_resource_bo(surf->surface_state.res); + surf->surface_state.offset += iris_bo_offset_from_base_address(state_bo); isl_surf_fill_state(&screen->isl_dev, map, - .surf = &itex->surf, .view = &surf->view, + .surf = &res->surf, .view = &surf->view, .mocs = MOCS_WB, - .address = itex->bo->gtt_offset); + .address = res->bo->gtt_offset); // .aux_surf = // .clear_color = clear_color, @@ -1063,14 +1115,18 @@ iris_set_sampler_views(struct pipe_context *ctx, gl_shader_stage stage = stage_from_pipe(p_stage); unsigned i; - for (i = 0; i < count; i++) - pipe_sampler_view_reference(&ice->state.textures[stage][i], views[i]); - for (; i < ice->state.num_textures[stage]; i++) - pipe_sampler_view_reference(&ice->state.textures[stage][i], NULL); + for (i = 0; i < count; i++) { + pipe_sampler_view_reference((struct pipe_sampler_view **) + &ice->state.textures[stage][i], views[i]); + } + for (; i < ice->state.num_textures[stage]; i++) { + pipe_sampler_view_reference((struct pipe_sampler_view **) + &ice->state.textures[stage][i], NULL); + } ice->state.num_textures[stage] = count; - // XXX: ice->state.dirty |= (IRIS_DIRTY_BINDING_TABLE_VS << stage); + ice->state.dirty |= (IRIS_DIRTY_BINDINGS_VS << stage); } static void @@ -1105,8 +1161,6 @@ iris_set_scissor_states(struct pipe_context *ctx, { struct iris_context *ice = (struct iris_context *) ctx; - ice->state.num_scissors = num_scissors; - for (unsigned i = 0; i < num_scissors; i++) { ice->state.scissors[start_slot + i] = states[i]; } @@ -1123,11 +1177,6 @@ iris_set_stencil_ref(struct pipe_context *ctx, ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL; } - -struct iris_viewport_state { - uint32_t sf_cl_vp[GENX(SF_CLIP_VIEWPORT_length) * IRIS_MAX_VIEWPORTS]; -}; - static float viewport_extent(const struct pipe_viewport_state *state, int axis, float sign) { @@ -1219,23 +1268,23 @@ calculate_guardband_size(uint32_t fb_width, uint32_t fb_height, static void iris_set_viewport_states(struct pipe_context *ctx, unsigned start_slot, - unsigned num_viewports, - const struct pipe_viewport_state *state) + unsigned count, + const struct pipe_viewport_state *states) { struct iris_context *ice = (struct iris_context *) ctx; - struct iris_viewport_state *cso = - malloc(sizeof(struct iris_viewport_state)); + struct iris_viewport_state *cso = &ice->state.genx->viewport; uint32_t *vp_map = &cso->sf_cl_vp[start_slot]; // XXX: sf_cl_vp is only big enough for one slot, we don't iterate right - for (unsigned i = 0; i < num_viewports; i++) { + for (unsigned i = 0; i < count; i++) { + const struct pipe_viewport_state *state = &states[start_slot + i]; iris_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) { - vp.ViewportMatrixElementm00 = state[i].scale[0]; - vp.ViewportMatrixElementm11 = state[i].scale[1]; - vp.ViewportMatrixElementm22 = state[i].scale[2]; - vp.ViewportMatrixElementm30 = state[i].translate[0]; - vp.ViewportMatrixElementm31 = state[i].translate[1]; - vp.ViewportMatrixElementm32 = state[i].translate[2]; + vp.ViewportMatrixElementm00 = state->scale[0]; + vp.ViewportMatrixElementm11 = state->scale[1]; + vp.ViewportMatrixElementm22 = state->scale[2]; + vp.ViewportMatrixElementm30 = state->translate[0]; + vp.ViewportMatrixElementm31 = state->translate[1]; + vp.ViewportMatrixElementm32 = state->translate[2]; /* XXX: in i965 this is computed based on the drawbuffer size, * but we don't have that here... */ @@ -1243,32 +1292,25 @@ iris_set_viewport_states(struct pipe_context *ctx, vp.XMaxClipGuardband = 1.0; vp.YMinClipGuardband = -1.0; vp.YMaxClipGuardband = 1.0; - vp.XMinViewPort = viewport_extent(&state[i], 0, -1.0f); - vp.XMaxViewPort = viewport_extent(&state[i], 0, 1.0f) - 1; - vp.YMinViewPort = viewport_extent(&state[i], 1, -1.0f); - vp.YMaxViewPort = viewport_extent(&state[i], 1, 1.0f) - 1; + vp.XMinViewPort = viewport_extent(state, 0, -1.0f); + vp.XMaxViewPort = viewport_extent(state, 0, 1.0f) - 1; + vp.YMinViewPort = viewport_extent(state, 1, -1.0f); + vp.YMaxViewPort = viewport_extent(state, 1, 1.0f) - 1; } vp_map += GENX(SF_CLIP_VIEWPORT_length); } - ice->state.cso_vp = cso; - ice->state.num_viewports = num_viewports; ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT; } -struct iris_depth_state -{ - uint32_t depth_buffer[GENX(3DSTATE_DEPTH_BUFFER_length)]; - uint32_t hier_depth_buffer[GENX(3DSTATE_HIER_DEPTH_BUFFER_length)]; - uint32_t stencil_buffer[GENX(3DSTATE_STENCIL_BUFFER_length)]; -}; - static void iris_set_framebuffer_state(struct pipe_context *ctx, const struct pipe_framebuffer_state *state) { struct iris_context *ice = (struct iris_context *) ctx; + struct iris_screen *screen = (struct iris_screen *)ctx->screen; + struct isl_device *isl_dev = &screen->isl_dev; struct pipe_framebuffer_state *cso = &ice->state.framebuffer; if (cso->samples != state->samples) { @@ -1279,46 +1321,127 @@ iris_set_framebuffer_state(struct pipe_context *ctx, ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; } - cso->width = state->width; - cso->height = state->height; - cso->layers = state->layers; - cso->samples = state->samples; + if ((cso->layers == 0) == (state->layers == 0)) { + ice->state.dirty |= IRIS_DIRTY_CLIP; + } - unsigned i; - for (i = 0; i < state->nr_cbufs; i++) - pipe_surface_reference(&cso->cbufs[i], state->cbufs[i]); - for (; i < cso->nr_cbufs; i++) - pipe_surface_reference(&cso->cbufs[i], NULL); + util_copy_framebuffer_state(cso, state); - cso->nr_cbufs = state->nr_cbufs; + struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer; - pipe_surface_reference(&cso->zsbuf, state->zsbuf); + struct isl_view view = { + .base_level = 0, + .levels = 1, + .base_array_layer = 0, + .array_len = 1, + .swizzle = ISL_SWIZZLE_IDENTITY, + }; struct isl_depth_stencil_hiz_emit_info info = { + .view = &view, .mocs = MOCS_WB, }; - // XXX: depth buffers + struct iris_resource *zres = + (void *) (cso->zsbuf ? cso->zsbuf->texture : NULL); + + if (zres) { + view.usage |= ISL_SURF_USAGE_DEPTH_BIT; + + info.depth_surf = &zres->surf; + info.depth_address = zres->bo->gtt_offset; + + view.format = zres->surf.format; + + view.base_level = cso->zsbuf->u.tex.level; + view.base_array_layer = cso->zsbuf->u.tex.first_layer; + view.array_len = + cso->zsbuf->u.tex.last_layer - cso->zsbuf->u.tex.first_layer + 1; + + info.hiz_usage = ISL_AUX_USAGE_NONE; + } + +#if 0 + if (stencil_mt) { + view.usage |= ISL_SURF_USAGE_STENCIL_BIT; + info.stencil_surf = &stencil_mt->surf; + + if (!depth_mt) { + view.base_level = stencil_irb->mt_level - stencil_irb->mt->first_level; + view.base_array_layer = stencil_irb->mt_layer; + view.array_len = MAX2(stencil_irb->layer_count, 1); + view.format = stencil_mt->surf.format; + } + + uint32_t stencil_offset = 0; + info.stencil_address = stencil_mt->bo->gtt_offset + stencil_mt->offset; + } +#endif + + isl_emit_depth_stencil_hiz_s(isl_dev, cso_z->packets, &info); + + ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER; + + /* Render target change */ + ice->state.dirty |= IRIS_DIRTY_BINDINGS_FS; } static void iris_set_constant_buffer(struct pipe_context *ctx, enum pipe_shader_type p_stage, unsigned index, - const struct pipe_constant_buffer *cb) + const struct pipe_constant_buffer *input) { struct iris_context *ice = (struct iris_context *) ctx; + struct iris_screen *screen = (struct iris_screen *)ctx->screen; gl_shader_stage stage = stage_from_pipe(p_stage); + struct iris_shader_state *shs = &ice->shaders.state[stage]; + struct iris_const_buffer *cbuf = &shs->constbuf[index]; + + if (input && (input->buffer || input->user_buffer)) { + if (input->user_buffer) { + u_upload_data(ctx->const_uploader, 0, input->buffer_size, 32, + input->user_buffer, &cbuf->data.offset, + &cbuf->data.res); + } else { + pipe_resource_reference(&cbuf->data.res, input->buffer); + } + + // XXX: these are not retained forever, use a separate uploader? + void *map = + upload_state(ice->state.surface_uploader, &cbuf->surface_state, + 4 * GENX(RENDER_SURFACE_STATE_length), 64); + if (!unlikely(map)) { + pipe_resource_reference(&cbuf->data.res, NULL); + return; + } + + struct iris_resource *res = (void *) cbuf->data.res; + struct iris_bo *surf_bo = iris_resource_bo(cbuf->surface_state.res); + cbuf->surface_state.offset += iris_bo_offset_from_base_address(surf_bo); + + isl_buffer_fill_state(&screen->isl_dev, map, + .address = res->bo->gtt_offset + cbuf->data.offset, + .size_B = input->buffer_size, + .format = ISL_FORMAT_R32G32B32A32_FLOAT, + .stride_B = 1, + .mocs = MOCS_WB) + } else { + pipe_resource_reference(&cbuf->data.res, NULL); + pipe_resource_reference(&cbuf->surface_state.res, NULL); + } - util_copy_constant_buffer(&ice->shaders.state[stage].constbuf[index], cb); + ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << stage; + // XXX: maybe not necessary all the time...? + ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage; } static void iris_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state) { - struct iris_surface *isv = (void *) state; + struct iris_sampler_view *isv = (void *) state; pipe_resource_reference(&state->texture, NULL); - pipe_resource_reference(&isv->surface_state_resource, NULL); + pipe_resource_reference(&isv->surface_state.res, NULL); free(isv); } @@ -1328,7 +1451,7 @@ iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf) { struct iris_surface *surf = (void *) p_surf; pipe_resource_reference(&p_surf->texture, NULL); - pipe_resource_reference(&surf->surface_state_resource, NULL); + pipe_resource_reference(&surf->surface_state.res, NULL); free(surf); } @@ -1338,20 +1461,11 @@ iris_delete_state(struct pipe_context *ctx, void *state) free(state); } -struct iris_vertex_buffer_state { - uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)]; - struct iris_bo *bos[33]; - unsigned num_buffers; -}; - static void iris_free_vertex_buffers(struct iris_vertex_buffer_state *cso) { - if (cso) { - for (unsigned i = 0; i < cso->num_buffers; i++) - iris_bo_unreference(cso->bos[i]); - free(cso); - } + for (unsigned i = 0; i < cso->num_buffers; i++) + pipe_resource_reference(&cso->resources[i], NULL); } static void @@ -1360,32 +1474,34 @@ iris_set_vertex_buffers(struct pipe_context *ctx, const struct pipe_vertex_buffer *buffers) { struct iris_context *ice = (struct iris_context *) ctx; - struct iris_vertex_buffer_state *cso = - malloc(sizeof(struct iris_vertex_buffer_state)); + struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers; - /* If there are no buffers, do nothing. We can leave the stale - * 3DSTATE_VERTEX_BUFFERS in place - as long as there are no vertex - * elements that point to them, it should be fine. - */ - if (!buffers) - return; + iris_free_vertex_buffers(&ice->state.genx->vertex_buffers); - iris_free_vertex_buffers(ice->state.cso_vertex_buffers); + if (!buffers) + count = 0; cso->num_buffers = count; iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), cso->vertex_buffers, vb) { - vb.DWordLength = 4 * cso->num_buffers - 1; + vb.DWordLength = 4 * MAX2(cso->num_buffers, 1) - 1; } uint32_t *vb_pack_dest = &cso->vertex_buffers[1]; + if (count == 0) { + iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) { + vb.VertexBufferIndex = start_slot; + vb.NullVertexBuffer = true; + vb.AddressModifyEnable = true; + } + } + for (unsigned i = 0; i < count; i++) { assert(!buffers[i].is_user_buffer); - struct iris_resource *res = (void *) buffers[i].buffer.resource; - iris_bo_reference(res->bo); - cso->bos[i] = res->bo; + pipe_resource_reference(&cso->resources[i], buffers[i].buffer.resource); + struct iris_resource *res = (void *) cso->resources[i]; iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) { vb.VertexBufferIndex = start_slot + i; @@ -1400,13 +1516,12 @@ iris_set_vertex_buffers(struct pipe_context *ctx, vb_pack_dest += GENX(VERTEX_BUFFER_STATE_length); } - ice->state.cso_vertex_buffers = cso; ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS; } struct iris_vertex_element_state { uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)]; - uint32_t vf_instancing[GENX(3DSTATE_VF_INSTANCING_length)][33]; + uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)]; unsigned count; }; @@ -1418,20 +1533,37 @@ iris_create_vertex_elements(struct pipe_context *ctx, struct iris_vertex_element_state *cso = malloc(sizeof(struct iris_vertex_element_state)); - cso->count = count; + cso->count = MAX2(count, 1); /* TODO: * - create edge flag one * - create SGV ones * - if those are necessary, use count + 1/2/3... OR in the length */ - iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve); + iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) { + ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * cso->count - 2; + } uint32_t *ve_pack_dest = &cso->vertex_elements[1]; + uint32_t *vfi_pack_dest = cso->vf_instancing; + + if (count == 0) { + iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { + ve.Valid = true; + ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; + ve.Component0Control = VFCOMP_STORE_0; + ve.Component1Control = VFCOMP_STORE_0; + ve.Component2Control = VFCOMP_STORE_0; + ve.Component3Control = VFCOMP_STORE_1_FP; + } + + iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { + } + } for (int i = 0; i < count; i++) { enum isl_format isl_format = - iris_isl_format_for_pipe_format(state[i].src_format); + iris_isl_format_for_pipe_format(state[i].src_format); unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC, VFCOMP_STORE_SRC, VFCOMP_STORE_SRC }; @@ -1455,13 +1587,14 @@ iris_create_vertex_elements(struct pipe_context *ctx, ve.Component3Control = comp[3]; } - iris_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->vf_instancing[i], vi) { + iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { vi.VertexElementIndex = i; vi.InstancingEnable = state[i].instance_divisor > 0; vi.InstanceDataStepRate = state[i].instance_divisor; } ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); + vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length); } return cso; @@ -1517,20 +1650,190 @@ iris_set_stream_output_targets(struct pipe_context *ctx, { } -#if 0 static void -iris_compute_sbe(const struct iris_context *ice, - const struct brw_wm_prog_data *wm_prog_data) +iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots, + const struct brw_vue_map *last_vue_map, + bool two_sided_color, + unsigned *out_offset, + unsigned *out_length) +{ + /* The compiler computes the first URB slot without considering COL/BFC + * swizzling (because it doesn't know whether it's enabled), so we need + * to do that here too. This may result in a smaller offset, which + * should be safe. + */ + const unsigned first_slot = + brw_compute_first_urb_slot_required(fs_input_slots, last_vue_map); + + /* This becomes the URB read offset (counted in pairs of slots). */ + assert(first_slot % 2 == 0); + *out_offset = first_slot / 2; + + /* We need to adjust the inputs read to account for front/back color + * swizzling, as it can make the URB length longer. + */ + for (int c = 0; c <= 1; c++) { + if (fs_input_slots & (VARYING_BIT_COL0 << c)) { + /* If two sided color is enabled, the fragment shader's gl_Color + * (COL0) input comes from either the gl_FrontColor (COL0) or + * gl_BackColor (BFC0) input varyings. Mark BFC as used, too. + */ + if (two_sided_color) + fs_input_slots |= (VARYING_BIT_BFC0 << c); + + /* If front color isn't written, we opt to give them back color + * instead of an undefined value. Switch from COL to BFC. + */ + if (last_vue_map->varying_to_slot[VARYING_SLOT_COL0 + c] == -1) { + fs_input_slots &= ~(VARYING_BIT_COL0 << c); + fs_input_slots |= (VARYING_BIT_BFC0 << c); + } + } + } + + /* Compute the minimum URB Read Length necessary for the FS inputs. + * + * From the Sandy Bridge PRM, Volume 2, Part 1, documentation for + * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length": + * + * "This field should be set to the minimum length required to read the + * maximum source attribute. The maximum source attribute is indicated + * by the maximum value of the enabled Attribute # Source Attribute if + * Attribute Swizzle Enable is set, Number of Output Attributes-1 if + * enable is not set. + * read_length = ceiling((max_source_attr + 1) / 2) + * + * [errata] Corruption/Hang possible if length programmed larger than + * recommended" + * + * Similar text exists for Ivy Bridge. + * + * We find the last URB slot that's actually read by the FS. + */ + unsigned last_read_slot = last_vue_map->num_slots - 1; + while (last_read_slot > first_slot && !(fs_input_slots & + (1ull << last_vue_map->slot_to_varying[last_read_slot]))) + --last_read_slot; + + /* The URB read length is the difference of the two, counted in pairs. */ + *out_length = DIV_ROUND_UP(last_read_slot - first_slot + 1, 2); +} + +static void +iris_emit_sbe_swiz(struct iris_batch *batch, + const struct iris_context *ice, + unsigned urb_read_offset) +{ + struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = {}; + const struct brw_wm_prog_data *wm_prog_data = (void *) + ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; + const struct brw_vue_map *vue_map = ice->shaders.last_vue_map; + const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; + + /* XXX: this should be generated when putting programs in place */ + + // XXX: raster->sprite_coord_enable + + for (int fs_attr = 0; fs_attr < VARYING_SLOT_MAX; fs_attr++) { + const int input_index = wm_prog_data->urb_setup[fs_attr]; + if (input_index < 0 || input_index >= 16) + continue; + + struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr = + &attr_overrides[input_index]; + + /* Viewport and Layer are stored in the VUE header. We need to override + * them to zero if earlier stages didn't write them, as GL requires that + * they read back as zero when not explicitly set. + */ + switch (fs_attr) { + case VARYING_SLOT_VIEWPORT: + case VARYING_SLOT_LAYER: + attr->ComponentOverrideX = true; + attr->ComponentOverrideW = true; + attr->ConstantSource = CONST_0000; + + if (!(vue_map->slots_valid & VARYING_BIT_LAYER)) + attr->ComponentOverrideY = true; + if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT)) + attr->ComponentOverrideZ = true; + continue; + + case VARYING_SLOT_PRIMITIVE_ID: + attr->ComponentOverrideX = true; + attr->ComponentOverrideY = true; + attr->ComponentOverrideZ = true; + attr->ComponentOverrideW = true; + attr->ConstantSource = PRIM_ID; + continue; + + default: + break; + } + + int slot = vue_map->varying_to_slot[fs_attr]; + + /* If there was only a back color written but not front, use back + * as the color instead of undefined. + */ + if (slot == -1 && fs_attr == VARYING_SLOT_COL0) + slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0]; + if (slot == -1 && fs_attr == VARYING_SLOT_COL1) + slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1]; + + /* Not written by the previous stage - undefined. */ + if (slot == -1) { + attr->ComponentOverrideX = true; + attr->ComponentOverrideY = true; + attr->ComponentOverrideZ = true; + attr->ComponentOverrideW = true; + attr->ConstantSource = CONST_0001_FLOAT; + continue; + } + + /* Compute the location of the attribute relative to the read offset, + * which is counted in 256-bit increments (two 128-bit VUE slots). + */ + const int source_attr = slot - 2 * urb_read_offset; + assert(source_attr >= 0 && source_attr <= 32); + attr->SourceAttribute = source_attr; + + /* If we are doing two-sided color, and the VUE slot following this one + * represents a back-facing color, then we need to instruct the SF unit + * to do back-facing swizzling. + */ + if (cso_rast->light_twoside && + ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 && + vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) || + (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 && + vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1))) + attr->SwizzleSelect = INPUTATTR_FACING; + } + + iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) { + for (int i = 0; i < 16; i++) + sbes.Attribute[i] = attr_overrides[i]; + } +} + +static void +iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice) { - uint32_t sbe_map[GENX(3DSTATE_SBE_length)]; - struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; + const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; + const struct brw_wm_prog_data *wm_prog_data = (void *) + ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; + struct pipe_shader_state *p_fs = + (void *) ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; + assert(p_fs->type == PIPE_SHADER_IR_NIR); + nir_shader *fs_nir = p_fs->ir.nir; unsigned urb_read_offset, urb_read_length; - brw_compute_sbe_urb_slot_interval(fp->info.inputs_read, - ice->shaders.last_vue_map, - &urb_read_offset, &urb_read_length); + iris_compute_sbe_urb_read_interval(fs_nir->info.inputs_read, + ice->shaders.last_vue_map, + cso_rast->light_twoside, + &urb_read_offset, &urb_read_length); - iris_pack_command(GENX(3DSTATE_SBE), sbe_map, sbe) { + iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) { sbe.AttributeSwizzleEnable = true; sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; sbe.PointSpriteTextureCoordinateOrigin = cso_rast->sprite_coord_mode; @@ -1540,23 +1843,34 @@ iris_compute_sbe(const struct iris_context *ice, sbe.ForceVertexURBEntryReadLength = true; sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs; - for (int i = 0; i < urb_read_length * 2; i++) { + for (int i = 0; i < 32; i++) { sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW; } } + + iris_emit_sbe_swiz(batch, ice, urb_read_offset); } -#endif static void iris_bind_compute_state(struct pipe_context *ctx, void *state) { } +static void +iris_populate_sampler_key(const struct iris_context *ice, + struct brw_sampler_prog_key_data *key) +{ + for (int i = 0; i < MAX_SAMPLERS; i++) { + key->swizzles[i] = 0x688; /* XYZW */ + } +} + static void iris_populate_vs_key(const struct iris_context *ice, struct brw_vs_prog_key *key) { memset(key, 0, sizeof(*key)); + iris_populate_sampler_key(ice, &key->tex); } static void @@ -1564,6 +1878,7 @@ iris_populate_tcs_key(const struct iris_context *ice, struct brw_tcs_prog_key *key) { memset(key, 0, sizeof(*key)); + iris_populate_sampler_key(ice, &key->tex); } static void @@ -1571,6 +1886,7 @@ iris_populate_tes_key(const struct iris_context *ice, struct brw_tes_prog_key *key) { memset(key, 0, sizeof(*key)); + iris_populate_sampler_key(ice, &key->tex); } static void @@ -1578,6 +1894,7 @@ iris_populate_gs_key(const struct iris_context *ice, struct brw_gs_prog_key *key) { memset(key, 0, sizeof(*key)); + iris_populate_sampler_key(ice, &key->tex); } static void @@ -1585,6 +1902,7 @@ iris_populate_fs_key(const struct iris_context *ice, struct brw_wm_prog_key *key) { memset(key, 0, sizeof(*key)); + iris_populate_sampler_key(ice, &key->tex); /* XXX: dirty flags? */ const struct pipe_framebuffer_state *fb = &ice->state.framebuffer; @@ -1599,6 +1917,9 @@ iris_populate_fs_key(const struct iris_context *ice, key->replicate_alpha = fb->nr_cbufs > 1 && (zsa->alpha.enabled || blend->alpha_to_coverage); + /* XXX: only bother if COL0/1 are read */ + key->flat_shade = rast->flatshade; + // key->force_dual_color_blend for unigine #if 0 if (cso_rast->multisample) { @@ -1614,16 +1935,20 @@ iris_populate_fs_key(const struct iris_context *ice, key->coherent_fb_fetch = true; } - //pkt.SamplerCount = \ - //DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ - //pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \ - //ffs(stage_state->per_thread_scratch) - 11; \ +#if 0 + // XXX: these need to go in INIT_THREAD_DISPATCH_FIELDS + pkt.SamplerCount = \ + DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ + pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \ + ffs(stage_state->per_thread_scratch) - 11; \ + +#endif static uint64_t KSP(const struct iris_compiled_shader *shader) { - struct iris_resource *res = (void *) shader->buffer; - return res->bo->gtt_offset + shader->offset; + struct iris_resource *res = (void *) shader->assembly.res; + return iris_bo_offset_from_base_address(res->bo) + shader->assembly.offset; } #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \ @@ -1640,8 +1965,8 @@ KSP(const struct iris_compiled_shader *shader) pkt.Enable = true; static void -iris_set_vs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) +iris_store_vs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; @@ -1656,8 +1981,8 @@ iris_set_vs_state(const struct gen_device_info *devinfo, } static void -iris_set_tcs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) +iris_store_tcs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; @@ -1673,8 +1998,8 @@ iris_set_tcs_state(const struct gen_device_info *devinfo, } static void -iris_set_tes_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) +iris_store_tes_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; @@ -1707,8 +2032,8 @@ iris_set_tes_state(const struct gen_device_info *devinfo, } static void -iris_set_gs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) +iris_store_gs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; @@ -1722,7 +2047,7 @@ iris_set_gs_state(const struct gen_device_info *devinfo, gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords; gs.InstanceControl = gs_prog_data->invocations - 1; - gs.DispatchMode = SIMD8; + gs.DispatchMode = DISPATCH_MODE_SIMD8; gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; gs.ControlDataFormat = gs_prog_data->control_data_format; gs.ReorderMode = TRAILING; @@ -1751,8 +2076,8 @@ iris_set_gs_state(const struct gen_device_info *devinfo, } static void -iris_set_fs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) +iris_store_fs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; @@ -1834,7 +2159,7 @@ iris_set_fs_state(const struct gen_device_info *devinfo, static unsigned iris_derived_program_state_size(enum iris_program_cache_id cache_id) { - assert(cache_id <= IRIS_CACHE_CS); + assert(cache_id <= IRIS_CACHE_BLORP); static const unsigned dwords[] = { [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length), @@ -1844,34 +2169,35 @@ iris_derived_program_state_size(enum iris_program_cache_id cache_id) [IRIS_CACHE_FS] = GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length), [IRIS_CACHE_CS] = 0, - [IRIS_CACHE_BLORP_BLIT] = 0, + [IRIS_CACHE_BLORP] = 0, }; return sizeof(uint32_t) * dwords[cache_id]; } static void -iris_set_derived_program_state(const struct gen_device_info *devinfo, - enum iris_program_cache_id cache_id, - struct iris_compiled_shader *shader) +iris_store_derived_program_state(const struct gen_device_info *devinfo, + enum iris_program_cache_id cache_id, + struct iris_compiled_shader *shader) { switch (cache_id) { case IRIS_CACHE_VS: - iris_set_vs_state(devinfo, shader); + iris_store_vs_state(devinfo, shader); break; case IRIS_CACHE_TCS: - iris_set_tcs_state(devinfo, shader); + iris_store_tcs_state(devinfo, shader); break; case IRIS_CACHE_TES: - iris_set_tes_state(devinfo, shader); + iris_store_tes_state(devinfo, shader); break; case IRIS_CACHE_GS: - iris_set_gs_state(devinfo, shader); + iris_store_gs_state(devinfo, shader); break; case IRIS_CACHE_FS: - iris_set_fs_state(devinfo, shader); + iris_store_fs_state(devinfo, shader); break; case IRIS_CACHE_CS: + case IRIS_CACHE_BLORP: break; default: break; @@ -1935,73 +2261,276 @@ use_surface(struct iris_batch *batch, bool writeable) { struct iris_surface *surf = (void *) p_surf; - struct iris_resource *res = (void *) p_surf->texture; - struct iris_resource *state_res = (void *) surf->surface_state_resource; - iris_use_pinned_bo(batch, res->bo, writeable); - iris_use_pinned_bo(batch, state_res->bo, false); - return surf->surface_state_offset; + iris_use_pinned_bo(batch, iris_resource_bo(p_surf->texture), writeable); + iris_use_pinned_bo(batch, iris_resource_bo(surf->surface_state.res), false); + + return surf->surface_state.offset; } static uint32_t use_sampler_view(struct iris_batch *batch, struct iris_sampler_view *isv) { - struct iris_resource *res = (void *) isv->pipe.texture; - struct iris_resource *state_res = (void *) isv->surface_state_resource; - iris_use_pinned_bo(batch, res->bo, false); - iris_use_pinned_bo(batch, state_res->bo, false); + iris_use_pinned_bo(batch, iris_resource_bo(isv->pipe.texture), false); + iris_use_pinned_bo(batch, iris_resource_bo(isv->surface_state.res), false); + + return isv->surface_state.offset; +} + +static uint32_t +use_const_buffer(struct iris_batch *batch, struct iris_const_buffer *cbuf) +{ + iris_use_pinned_bo(batch, iris_resource_bo(cbuf->data.res), false); + iris_use_pinned_bo(batch, iris_resource_bo(cbuf->surface_state.res), false); + + return cbuf->surface_state.offset; +} + +static uint32_t +use_null_surface(struct iris_batch *batch, struct iris_context *ice) +{ + struct iris_bo *state_bo = iris_resource_bo(ice->state.unbound_tex.res); + + iris_use_pinned_bo(batch, state_bo, false); - return isv->surface_state_offset; + return ice->state.unbound_tex.offset; } static void -iris_upload_render_state(struct iris_context *ice, - struct iris_batch *batch, - const struct pipe_draw_info *draw) +iris_populate_binding_table(struct iris_context *ice, + struct iris_batch *batch, + gl_shader_stage stage) { - const uint64_t dirty = ice->state.dirty; + const struct iris_binder *binder = &batch->binder; + struct iris_compiled_shader *shader = ice->shaders.prog[stage]; + if (!shader) + return; - struct brw_wm_prog_data *wm_prog_data = (void *) - ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; + // Surfaces: + // - pull constants + // - ubos/ssbos/abos + // - images + // - textures + // - render targets - write and read - if (dirty & IRIS_DIRTY_CC_VIEWPORT) { - struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; - iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { - ptr.CCViewportPointer = - emit_state(batch, ice->state.dynamic_uploader, - cso->cc_vp, sizeof(cso->cc_vp), 32); - } - } + //struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; + uint32_t *bt_map = binder->map + binder->bt_offset[stage]; + int s = 0; - if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) { - struct iris_viewport_state *cso = ice->state.cso_vp; - iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { - ptr.SFClipViewportPointer = - emit_state(batch, ice->state.dynamic_uploader, cso->sf_cl_vp, - 4 * GENX(SF_CLIP_VIEWPORT_length) * - ice->state.num_viewports, 64); + if (stage == MESA_SHADER_FRAGMENT) { + struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; + for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { + bt_map[s++] = use_surface(batch, cso_fb->cbufs[i], true); } } - /* XXX: L3 State */ + //assert(prog_data->binding_table.texture_start == + //(ice->state.num_textures[stage] ? s : 0xd0d0d0d0)); - if (dirty & IRIS_DIRTY_URB) { - iris_upload_urb_config(ice, batch); + for (int i = 0; i < ice->state.num_textures[stage]; i++) { + struct iris_sampler_view *view = ice->state.textures[stage][i]; + bt_map[s++] = view ? use_sampler_view(batch, view) + : use_null_surface(batch, ice); } - if (dirty & IRIS_DIRTY_BLEND_STATE) { - struct iris_blend_state *cso_blend = ice->state.cso_blend; - struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; - struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; - const int num_dwords = 4 * (GENX(BLEND_STATE_length) + - cso_fb->nr_cbufs * GENX(BLEND_STATE_ENTRY_length)); - uint32_t blend_offset; - uint32_t *blend_map = - stream_state(batch, ice->state.dynamic_uploader, 4 * num_dwords, 64, - &blend_offset); + // XXX: want the number of BTE's to shorten this loop + struct iris_shader_state *shs = &ice->shaders.state[stage]; + for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + struct iris_const_buffer *cbuf = &shs->constbuf[i]; + if (!cbuf->surface_state.res) + break; - uint32_t blend_state_header; - iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) { + bt_map[s++] = use_const_buffer(batch, cbuf); + } +#if 0 + // XXX: not implemented yet + assert(prog_data->binding_table.pull_constants_start == 0xd0d0d0d0); + assert(prog_data->binding_table.ubo_start == 0xd0d0d0d0); + assert(prog_data->binding_table.ssbo_start == 0xd0d0d0d0); + assert(prog_data->binding_table.image_start == 0xd0d0d0d0); + assert(prog_data->binding_table.shader_time_start == 0xd0d0d0d0); + //assert(prog_data->binding_table.plane_start[1] == 0xd0d0d0d0); + //assert(prog_data->binding_table.plane_start[2] == 0xd0d0d0d0); +#endif +} + +static void +iris_use_optional_res(struct iris_batch *batch, + struct pipe_resource *res, + bool writeable) +{ + if (res) { + struct iris_bo *bo = iris_resource_bo(res); + iris_use_pinned_bo(batch, bo, writeable); + } +} + + +/** + * Pin any BOs which were installed by a previous batch, and restored + * via the hardware logical context mechanism. + * + * We don't need to re-emit all state every batch - the hardware context + * mechanism will save and restore it for us. This includes pointers to + * various BOs...which won't exist unless we ask the kernel to pin them + * by adding them to the validation list. + * + * We can skip buffers if we've re-emitted those packets, as we're + * overwriting those stale pointers with new ones, and don't actually + * refer to the old BOs. + */ +static void +iris_restore_context_saved_bos(struct iris_context *ice, + struct iris_batch *batch, + const struct pipe_draw_info *draw) +{ + // XXX: whack IRIS_SHADER_DIRTY_BINDING_TABLE on new batch + + const uint64_t clean = ~ice->state.dirty; + + if (clean & IRIS_DIRTY_CC_VIEWPORT) { + iris_use_optional_res(batch, ice->state.last_res.cc_vp, false); + } + + if (clean & IRIS_DIRTY_SF_CL_VIEWPORT) { + iris_use_optional_res(batch, ice->state.last_res.sf_cl_vp, false); + } + + if (clean & IRIS_DIRTY_BLEND_STATE) { + iris_use_optional_res(batch, ice->state.last_res.blend, false); + } + + if (clean & IRIS_DIRTY_COLOR_CALC_STATE) { + iris_use_optional_res(batch, ice->state.last_res.color_calc, false); + } + + if (clean & IRIS_DIRTY_SCISSOR_RECT) { + iris_use_optional_res(batch, ice->state.last_res.scissor, false); + } + + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + if (clean & (IRIS_DIRTY_CONSTANTS_VS << stage)) + continue; + + struct iris_shader_state *shs = &ice->shaders.state[stage]; + struct iris_compiled_shader *shader = ice->shaders.prog[stage]; + + if (!shader) + continue; + + struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; + + for (int i = 0; i < 4; i++) { + const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; + + if (range->length == 0) + continue; + + struct iris_const_buffer *cbuf = &shs->constbuf[range->block]; + struct iris_resource *res = (void *) cbuf->data.res; + + if (res) + iris_use_pinned_bo(batch, res->bo, false); + else + iris_use_pinned_bo(batch, batch->screen->workaround_bo, false); + } + } + + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + struct pipe_resource *res = ice->state.sampler_table[stage].res; + if (res) + iris_use_pinned_bo(batch, iris_resource_bo(res), false); + } + + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + if (clean & (IRIS_DIRTY_VS << stage)) { + struct iris_compiled_shader *shader = ice->shaders.prog[stage]; + if (shader) { + struct iris_bo *bo = iris_resource_bo(shader->assembly.res); + iris_use_pinned_bo(batch, bo, false); + } + + // XXX: scratch buffer + } + } + + // XXX: 3DSTATE_SO_BUFFER + + if (clean & IRIS_DIRTY_DEPTH_BUFFER) { + struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; + + if (cso_fb->zsbuf) { + struct iris_resource *zres = (void *) cso_fb->zsbuf->texture; + // XXX: depth might not be writable... + iris_use_pinned_bo(batch, zres->bo, true); + } + } + + if (draw->index_size > 0) { + // XXX: index buffer + } + + if (clean & IRIS_DIRTY_VERTEX_BUFFERS) { + struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers; + for (unsigned i = 0; i < cso->num_buffers; i++) { + struct iris_resource *res = (void *) cso->resources[i]; + iris_use_pinned_bo(batch, res->bo, false); + } + } +} + +static void +iris_upload_render_state(struct iris_context *ice, + struct iris_batch *batch, + const struct pipe_draw_info *draw) +{ + const uint64_t dirty = ice->state.dirty; + + struct brw_wm_prog_data *wm_prog_data = (void *) + ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; + + if (dirty & IRIS_DIRTY_CC_VIEWPORT) { + struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; + iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { + ptr.CCViewportPointer = + emit_state(batch, ice->state.dynamic_uploader, + &ice->state.last_res.cc_vp, + cso->cc_vp, sizeof(cso->cc_vp), 32); + } + } + + if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) { + struct iris_viewport_state *cso = &ice->state.genx->viewport; + iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { + ptr.SFClipViewportPointer = + emit_state(batch, ice->state.dynamic_uploader, + &ice->state.last_res.sf_cl_vp, + cso->sf_cl_vp, 4 * GENX(SF_CLIP_VIEWPORT_length) * + ice->state.num_viewports, 64); + } + } + + /* XXX: L3 State */ + + // XXX: this is only flagged at setup, we assume a static configuration + if (dirty & IRIS_DIRTY_URB) { + iris_upload_urb_config(ice, batch); + } + + if (dirty & IRIS_DIRTY_BLEND_STATE) { + struct iris_blend_state *cso_blend = ice->state.cso_blend; + struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; + struct iris_depth_stencil_alpha_state *cso_zsa = ice->state.cso_zsa; + const int num_dwords = 4 * (GENX(BLEND_STATE_length) + + cso_fb->nr_cbufs * GENX(BLEND_STATE_ENTRY_length)); + uint32_t blend_offset; + uint32_t *blend_map = + stream_state(batch, ice->state.dynamic_uploader, + &ice->state.last_res.blend, + 4 * num_dwords, 64, &blend_offset); + + uint32_t blend_state_header; + iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) { bs.AlphaTestEnable = cso_zsa->alpha.enabled; bs.AlphaTestFunction = translate_compare_func(cso_zsa->alpha.func); } @@ -2021,6 +2550,7 @@ iris_upload_render_state(struct iris_context *ice, uint32_t cc_offset; void *cc_map = stream_state(batch, ice->state.dynamic_uploader, + &ice->state.last_res.color_calc, sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length), 64, &cc_offset); iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) { @@ -2042,98 +2572,84 @@ iris_upload_render_state(struct iris_context *ice, if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage))) continue; - struct pipe_constant_buffer *cbuf0 = - &ice->shaders.state[stage].constbuf[0]; - - if (!ice->shaders.prog[stage] || cbuf0->buffer || !cbuf0->buffer_size) - continue; - struct iris_shader_state *shs = &ice->shaders.state[stage]; - shs->const_size = cbuf0->buffer_size; - u_upload_data(ice->ctx.const_uploader, 0, shs->const_size, 32, - cbuf0->user_buffer, &shs->const_offset, - &shs->push_resource); - } + struct iris_compiled_shader *shader = ice->shaders.prog[stage]; - for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { - // XXX: wrong dirty tracking... - if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage))) + if (!shader) continue; - struct iris_shader_state *shs = &ice->shaders.state[stage]; - struct iris_resource *res = (void *) shs->push_resource; + struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) { pkt._3DCommandSubOpcode = push_constant_opcodes[stage]; - if (res) { - pkt.ConstantBody.ReadLength[3] = shs->const_size; - pkt.ConstantBody.Buffer[3] = ro_bo(res->bo, shs->const_offset); + if (prog_data) { + /* The Skylake PRM contains the following restriction: + * + * "The driver must ensure The following case does not occur + * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with + * buffer 3 read length equal to zero committed followed by a + * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to + * zero committed." + * + * To avoid this, we program the buffers in the highest slots. + * This way, slot 0 is only used if slot 3 is also used. + */ + int n = 3; + + for (int i = 3; i >= 0; i--) { + const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; + + if (range->length == 0) + continue; + + // XXX: is range->block a constbuf index? it would be nice + struct iris_const_buffer *cbuf = &shs->constbuf[range->block]; + struct iris_resource *res = (void *) cbuf->data.res; + + assert(cbuf->data.offset % 32 == 0); + + pkt.ConstantBody.ReadLength[n] = range->length; + pkt.ConstantBody.Buffer[n] = + res ? ro_bo(res->bo, range->start * 32 + cbuf->data.offset) + : ro_bo(batch->screen->workaround_bo, 0); + n--; + } } } } - // Surfaces: - // - pull constants - // - ubos/ssbos/abos - // - images - // - textures - // - render targets - write and read - // XXX: 3DSTATE_BINDING_TABLE_POINTERS_XS + struct iris_binder *binder = &batch->binder; for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { - struct iris_compiled_shader *shader = ice->shaders.prog[stage]; - if (!shader) // XXX: dirty bits...also, emit a disable maybe? - continue; - - struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; - uint32_t bt_offset = 0; - uint32_t *bt_map = NULL; - - if (prog_data->binding_table.size_bytes != 0) { - iris_use_pinned_bo(batch, ice->state.binder.bo, false); - bt_map = iris_binder_reserve(&ice->state.binder, - prog_data->binding_table.size_bytes, - &bt_offset); - } - - iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) { - ptr._3DCommandSubOpcode = 38 + stage; - ptr.PointertoVSBindingTable = bt_offset; - } - - if (stage == MESA_SHADER_FRAGMENT) { - struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; - for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { - *bt_map++ = use_surface(batch, cso_fb->cbufs[i], true); + if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) { + iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) { + ptr._3DCommandSubOpcode = 38 + stage; + ptr.PointertoVSBindingTable = binder->bt_offset[stage]; } } + } - for (int i = 0; i < ice->state.num_textures[stage]; i++) { - struct iris_sampler_view *view = ice->state.textures[stage][i]; - struct iris_resource *res = (void *) view->pipe.texture; - *bt_map++ = use_sampler_view(batch, view); + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) { + iris_populate_binding_table(ice, batch, stage); } -#if 0 - - // XXX: not implemented yet - assert(prog_data->binding_table.pull_constants_start == 0xd0d0d0d0); - assert(prog_data->binding_table.ubo_start == 0xd0d0d0d0); - assert(prog_data->binding_table.ssbo_start == 0xd0d0d0d0); - assert(prog_data->binding_table.image_start == 0xd0d0d0d0); - assert(prog_data->binding_table.shader_time_start == 0xd0d0d0d0); - //assert(prog_data->binding_table.plane_start[1] == 0xd0d0d0d0); - //assert(prog_data->binding_table.plane_start[2] == 0xd0d0d0d0); -#endif } + if (ice->state.need_border_colors) + iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false); + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { if (!(dirty & (IRIS_DIRTY_SAMPLER_STATES_VS << stage)) || !ice->shaders.prog[stage]) continue; + struct pipe_resource *res = ice->state.sampler_table[stage].res; + if (res) + iris_use_pinned_bo(batch, iris_resource_bo(res), false); + iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) { ptr._3DCommandSubOpcode = 43 + stage; - ptr.PointertoVSSamplerState = ice->state.sampler_table_offset[stage]; + ptr.PointertoVSSamplerState = ice->state.sampler_table[stage].offset; } } @@ -2159,7 +2675,7 @@ iris_upload_render_state(struct iris_context *ice, struct iris_compiled_shader *shader = ice->shaders.prog[stage]; if (shader) { - struct iris_resource *cache = (void *) shader->buffer; + struct iris_resource *cache = (void *) shader->assembly.res; iris_use_pinned_bo(batch, cache->bo, false); iris_batch_emit(batch, shader->derived_data, iris_derived_program_state_size(stage)); @@ -2190,6 +2706,7 @@ iris_upload_render_state(struct iris_context *ice, cl.NonPerspectiveBarycentricEnable = true; cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0; + cl.MaximumVPIndex = ice->state.num_viewports - 1; } iris_emit_merge(batch, cso_rast->clip, dynamic_clip, ARRAY_SIZE(cso_rast->clip)); @@ -2202,7 +2719,8 @@ iris_upload_render_state(struct iris_context *ice, } - if (dirty & (IRIS_DIRTY_RASTER | IRIS_DIRTY_FS)) { + /* XXX: FS program updates needs to flag IRIS_DIRTY_WM */ + if (dirty & IRIS_DIRTY_WM) { struct iris_rasterizer_state *cso = ice->state.cso_rast; uint32_t dynamic_wm[GENX(3DSTATE_WM_length)]; @@ -2222,23 +2740,7 @@ iris_upload_render_state(struct iris_context *ice, // XXX: 3DSTATE_SBE, 3DSTATE_SBE_SWIZ // -> iris_raster_state (point sprite texture coordinate origin) // -> bunch of shader state... - - iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) { - sbe.AttributeSwizzleEnable = true; - sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; - sbe.VertexURBEntryReadOffset = 1; - sbe.VertexURBEntryReadLength = 1; - sbe.ForceVertexURBEntryReadOffset = true; - sbe.ForceVertexURBEntryReadLength = true; - sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs; - - for (int i = 0; i < 2; i++) { - sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW; - } - } - - iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbe) { - } + iris_emit_sbe(batch, ice); } if (dirty & IRIS_DIRTY_PS_BLEND) { @@ -2266,22 +2768,31 @@ iris_upload_render_state(struct iris_context *ice, iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds)); } - if (dirty & IRIS_DIRTY_SCISSOR) { - // XXX: allocate at set_scissor time? - uint32_t scissor_offset = ice->state.num_scissors == 0 ? 0 : - emit_state(batch, ice->state.dynamic_uploader, ice->state.scissors, + if (dirty & IRIS_DIRTY_SCISSOR_RECT) { + uint32_t scissor_offset = + emit_state(batch, ice->state.dynamic_uploader, + &ice->state.last_res.scissor, + ice->state.scissors, sizeof(struct pipe_scissor_state) * - ice->state.num_scissors, 32); + ice->state.num_viewports, 32); iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) { ptr.ScissorRectPointer = scissor_offset; } } - // XXX: 3DSTATE_DEPTH_BUFFER - // XXX: 3DSTATE_HIER_DEPTH_BUFFER - // XXX: 3DSTATE_STENCIL_BUFFER - // XXX: 3DSTATE_CLEAR_PARAMS + if (dirty & IRIS_DIRTY_DEPTH_BUFFER) { + struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; + struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer; + + iris_batch_emit(batch, cso_z->packets, sizeof(cso_z->packets)); + + if (cso_fb->zsbuf) { + struct iris_resource *zres = (void *) cso_fb->zsbuf->texture; + // XXX: depth might not be writable... + iris_use_pinned_bo(batch, zres->bo, true); + } + } if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) { iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) { @@ -2304,29 +2815,38 @@ iris_upload_render_state(struct iris_context *ice, } if (draw->index_size > 0) { - struct iris_resource *res = (struct iris_resource *)draw->index.resource; + struct iris_resource *res = NULL; + unsigned offset; - assert(!draw->has_user_indices); + if (draw->has_user_indices) { + u_upload_data(ice->ctx.stream_uploader, 0, + draw->count * draw->index_size, 4, draw->index.user, + &offset, (struct pipe_resource **) &res); + } else { + res = (struct iris_resource *) draw->index.resource; + offset = 0; + } iris_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) { - ib.IndexFormat = draw->index_size; + ib.IndexFormat = draw->index_size >> 1; ib.MOCS = MOCS_WB; ib.BufferSize = res->bo->size; - ib.BufferStartingAddress = ro_bo(res->bo, 0); + ib.BufferStartingAddress = ro_bo(res->bo, offset); } } if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) { - struct iris_vertex_buffer_state *cso = ice->state.cso_vertex_buffers; + struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers; + const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length); - STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_length) == 4); - STATIC_ASSERT((GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) % 32) == 0); + if (cso->num_buffers > 0) { + iris_batch_emit(batch, cso->vertex_buffers, sizeof(uint32_t) * + (1 + vb_dwords * cso->num_buffers)); - iris_batch_emit(batch, cso->vertex_buffers, - sizeof(uint32_t) * (1 + 4 * cso->num_buffers)); - - for (unsigned i = 0; i < cso->num_buffers; i++) { - iris_use_pinned_bo(batch, cso->bos[i], false); + for (unsigned i = 0; i < cso->num_buffers; i++) { + struct iris_resource *res = (void *) cso->resources[i]; + iris_use_pinned_bo(batch, res->bo, false); + } } } @@ -2334,10 +2854,8 @@ iris_upload_render_state(struct iris_context *ice, struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements; iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) * (1 + cso->count * GENX(VERTEX_ELEMENT_STATE_length))); - for (int i = 0; i < cso->count; i++) { - iris_batch_emit(batch, cso->vf_instancing[i], sizeof(uint32_t) * - (cso->count * GENX(3DSTATE_VF_INSTANCING_length))); - } + iris_batch_emit(batch, cso->vf_instancing, sizeof(uint32_t) * + cso->count * GENX(3DSTATE_VF_INSTANCING_length)); for (int i = 0; i < cso->count; i++) { /* TODO: vertexid, instanceid support */ iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs); @@ -2374,22 +2892,449 @@ iris_upload_render_state(struct iris_context *ice, //prim.BaseVertexLocation = ...; } + + if (!batch->contains_draw) { + iris_restore_context_saved_bos(ice, batch, draw); + batch->contains_draw = true; + } } +/** + * State module teardown. + */ static void iris_destroy_state(struct iris_context *ice) { + iris_free_vertex_buffers(&ice->state.genx->vertex_buffers); + // XXX: unreference resources/surfaces. for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) { pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL); } pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL); + + for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) { + pipe_resource_reference(&ice->state.sampler_table[stage].res, NULL); + } + free(ice->state.genx); + + pipe_resource_reference(&ice->state.last_res.cc_vp, NULL); + pipe_resource_reference(&ice->state.last_res.sf_cl_vp, NULL); + pipe_resource_reference(&ice->state.last_res.color_calc, NULL); + pipe_resource_reference(&ice->state.last_res.scissor, NULL); + pipe_resource_reference(&ice->state.last_res.blend, NULL); +} + +static unsigned +flags_to_post_sync_op(uint32_t flags) +{ + if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) + return WriteImmediateData; + + if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) + return WritePSDepthCount; + + if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) + return WriteTimestamp; + + return 0; +} + +/** + * Do the given flags have a Post Sync or LRI Post Sync operation? + */ +static enum pipe_control_flags +get_post_sync_flags(enum pipe_control_flags flags) +{ + flags &= PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP | + PIPE_CONTROL_LRI_POST_SYNC_OP; + + /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with + * "LRI Post Sync Operation". So more than one bit set would be illegal. + */ + assert(util_bitcount(flags) <= 1); + + return flags; +} + +// XXX: compute support +#define IS_COMPUTE_PIPELINE(batch) (batch->ring != I915_EXEC_RENDER) + +/** + * Emit a series of PIPE_CONTROL commands, taking into account any + * workarounds necessary to actually accomplish the caller's request. + * + * Unless otherwise noted, spec quotations in this function come from: + * + * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming + * Restrictions for PIPE_CONTROL. + */ +static void +iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags, + struct iris_bo *bo, uint32_t offset, uint64_t imm) +{ + UNUSED const struct gen_device_info *devinfo = &batch->screen->devinfo; + enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags); + enum pipe_control_flags non_lri_post_sync_flags = + post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP; + + /* Recursive PIPE_CONTROL workarounds -------------------------------- + * (http://knowyourmeme.com/memes/xzibit-yo-dawg) + * + * We do these first because we want to look at the original operation, + * rather than any workarounds we set. + */ + if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { + /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description + * lists several workarounds: + * + * "Project: SKL, KBL, BXT + * + * If the VF Cache Invalidation Enable is set to a 1 in a + * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields + * sets to 0, with the VF Cache Invalidation Enable set to 0 + * needs to be sent prior to the PIPE_CONTROL with VF Cache + * Invalidation Enable set to a 1." + */ + iris_emit_raw_pipe_control(batch, 0, NULL, 0, 0); + } + + if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(batch) && post_sync_flags) { + /* Project: SKL / Argument: LRI Post Sync Operation [23] + * + * "PIPECONTROL command with “Command Streamer Stall Enable” must be + * programmed prior to programming a PIPECONTROL command with "LRI + * Post Sync Operation" in GPGPU mode of operation (i.e when + * PIPELINE_SELECT command is set to GPGPU mode of operation)." + * + * The same text exists a few rows below for Post Sync Op. + */ + iris_emit_raw_pipe_control(batch, PIPE_CONTROL_CS_STALL, bo, offset, imm); + } + + if (GEN_GEN == 10 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { + /* Cannonlake: + * "Before sending a PIPE_CONTROL command with bit 12 set, SW must issue + * another PIPE_CONTROL with Render Target Cache Flush Enable (bit 12) + * = 0 and Pipe Control Flush Enable (bit 7) = 1" + */ + iris_emit_raw_pipe_control(batch, PIPE_CONTROL_FLUSH_ENABLE, bo, + offset, imm); + } + + /* "Flush Types" workarounds --------------------------------------------- + * We do these now because they may add post-sync operations or CS stalls. + */ + + if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) { + /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate + * + * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or + * 'Write PS Depth Count' or 'Write Timestamp'." + */ + if (!bo) { + flags |= PIPE_CONTROL_WRITE_IMMEDIATE; + post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; + non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; + bo = batch->screen->workaround_bo; + } + } + + /* #1130 from Gen10 workarounds page: + * + * "Enable Depth Stall on every Post Sync Op if Render target Cache + * Flush is not enabled in same PIPE CONTROL and Enable Pixel score + * board stall if Render target cache flush is enabled." + * + * Applicable to CNL B0 and C0 steppings only. + * + * The wording here is unclear, and this workaround doesn't look anything + * like the internal bug report recommendations, but leave it be for now... + */ + if (GEN_GEN == 10) { + if (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) { + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; + } else if (flags & non_lri_post_sync_flags) { + flags |= PIPE_CONTROL_DEPTH_STALL; + } + } + + if (flags & PIPE_CONTROL_DEPTH_STALL) { + /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable): + * + * "This bit must be DISABLED for operations other than writing + * PS_DEPTH_COUNT." + * + * This seems like nonsense. An Ivybridge workaround requires us to + * emit a PIPE_CONTROL with a depth stall and write immediate post-sync + * operation. Gen8+ requires us to emit depth stalls and depth cache + * flushes together. So, it's hard to imagine this means anything other + * than "we originally intended this to be used for PS_DEPTH_COUNT". + * + * We ignore the supposed restriction and do nothing. + */ + } + + if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_STALL_AT_SCOREBOARD)) { + /* From the PIPE_CONTROL instruction table, bit 12 and bit 1: + * + * "This bit must be DISABLED for End-of-pipe (Read) fences, + * PS_DEPTH_COUNT or TIMESTAMP queries." + * + * TODO: Implement end-of-pipe checking. + */ + assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP))); + } + + if (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) { + /* From the PIPE_CONTROL instruction table, bit 1: + * + * "This bit is ignored if Depth Stall Enable is set. + * Further, the render cache is not flushed even if Write Cache + * Flush Enable bit is set." + * + * We assert that the caller doesn't do this combination, to try and + * prevent mistakes. It shouldn't hurt the GPU, though. + */ + assert(!(flags & (PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_RENDER_TARGET_FLUSH))); + } + + /* PIPE_CONTROL page workarounds ------------------------------------- */ + + if (GEN_GEN <= 8 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) { + /* From the PIPE_CONTROL page itself: + * + * "IVB, HSW, BDW + * Restriction: Pipe_control with CS-stall bit set must be issued + * before a pipe-control command that has the State Cache + * Invalidate bit set." + */ + flags |= PIPE_CONTROL_CS_STALL; + } + + if (flags & PIPE_CONTROL_FLUSH_LLC) { + /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC): + * + * "Project: ALL + * SW must always program Post-Sync Operation to "Write Immediate + * Data" when Flush LLC is set." + * + * For now, we just require the caller to do it. + */ + assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE); + } + + /* "Post-Sync Operation" workarounds -------------------------------- */ + + /* Project: All / Argument: Global Snapshot Count Reset [19] + * + * "This bit must not be exercised on any product. + * Requires stall bit ([20] of DW1) set." + * + * We don't use this, so we just assert that it isn't used. The + * PIPE_CONTROL instruction page indicates that they intended this + * as a debug feature and don't think it is useful in production, + * but it may actually be usable, should we ever want to. + */ + assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0); + + if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR | + PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) { + /* Project: All / Arguments: + * + * - Generic Media State Clear [16] + * - Indirect State Pointers Disable [16] + * + * "Requires stall bit ([20] of DW1) set." + * + * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media + * State Clear) says: + * + * "PIPECONTROL command with “Command Streamer Stall Enable” must be + * programmed prior to programming a PIPECONTROL command with "Media + * State Clear" set in GPGPU mode of operation" + * + * This is a subset of the earlier rule, so there's nothing to do. + */ + flags |= PIPE_CONTROL_CS_STALL; + } + + if (flags & PIPE_CONTROL_STORE_DATA_INDEX) { + /* Project: All / Argument: Store Data Index + * + * "Post-Sync Operation ([15:14] of DW1) must be set to something other + * than '0'." + * + * For now, we just assert that the caller does this. We might want to + * automatically add a write to the workaround BO... + */ + assert(non_lri_post_sync_flags != 0); + } + + if (flags & PIPE_CONTROL_SYNC_GFDT) { + /* Project: All / Argument: Sync GFDT + * + * "Post-Sync Operation ([15:14] of DW1) must be set to something other + * than '0' or 0x2520[13] must be set." + * + * For now, we just assert that the caller does this. + */ + assert(non_lri_post_sync_flags != 0); + } + + if (flags & PIPE_CONTROL_TLB_INVALIDATE) { + /* Project: IVB+ / Argument: TLB inv + * + * "Requires stall bit ([20] of DW1) set." + * + * Also, from the PIPE_CONTROL instruction table: + * + * "Project: SKL+ + * Post Sync Operation or CS stall must be set to ensure a TLB + * invalidation occurs. Otherwise no cycle will occur to the TLB + * cache to invalidate." + * + * This is not a subset of the earlier rule, so there's nothing to do. + */ + flags |= PIPE_CONTROL_CS_STALL; + } + + if (GEN_GEN == 9 && devinfo->gt == 4) { + /* TODO: The big Skylake GT4 post sync op workaround */ + } + + /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */ + + if (IS_COMPUTE_PIPELINE(batch)) { + if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) { + /* Project: SKL+ / Argument: Tex Invalidate + * "Requires stall bit ([20] of DW) set for all GPGPU Workloads." + */ + flags |= PIPE_CONTROL_CS_STALL; + } + + if (GEN_GEN == 8 && (post_sync_flags || + (flags & (PIPE_CONTROL_NOTIFY_ENABLE | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DATA_CACHE_FLUSH)))) { + /* Project: BDW / Arguments: + * + * - LRI Post Sync Operation [23] + * - Post Sync Op [15:14] + * - Notify En [8] + * - Depth Stall [13] + * - Render Target Cache Flush [12] + * - Depth Cache Flush [0] + * - DC Flush Enable [5] + * + * "Requires stall bit ([20] of DW) set for all GPGPU and Media + * Workloads." + */ + flags |= PIPE_CONTROL_CS_STALL; + + /* Also, from the PIPE_CONTROL instruction table, bit 20: + * + * "Project: BDW + * This bit must be always set when PIPE_CONTROL command is + * programmed by GPGPU and MEDIA workloads, except for the cases + * when only Read Only Cache Invalidation bits are set (State + * Cache Invalidation Enable, Instruction cache Invalidation + * Enable, Texture Cache Invalidation Enable, Constant Cache + * Invalidation Enable). This is to WA FFDOP CG issue, this WA + * need not implemented when FF_DOP_CG is disable via "Fixed + * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register." + * + * It sounds like we could avoid CS stalls in some cases, but we + * don't currently bother. This list isn't exactly the list above, + * either... + */ + } + } + + /* "Stall" workarounds ---------------------------------------------- + * These have to come after the earlier ones because we may have added + * some additional CS stalls above. + */ + + if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) { + /* Project: PRE-SKL, VLV, CHV + * + * "[All Stepping][All SKUs]: + * + * One of the following must also be set: + * + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - DC Flush Enable ([5] of DW1)" + * + * If we don't already have one of those bits set, we choose to add + * "Stall at Pixel Scoreboard". Some of the other bits require a + * CS stall as a workaround (see above), which would send us into + * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard" + * appears to be safe, so we choose that. + */ + const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP | + PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DATA_CACHE_FLUSH; + if (!(flags & wa_bits)) + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; + } + + /* Emit --------------------------------------------------------------- */ + + iris_emit_cmd(batch, GENX(PIPE_CONTROL), pc) { + pc.LRIPostSyncOperation = NoLRIOperation; + pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; + pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH; + pc.StoreDataIndex = 0; + pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL; + pc.GlobalSnapshotCountReset = + flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET; + pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE; + pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR; + pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD; + pc.RenderTargetCacheFlushEnable = + flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; + pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; + pc.StateCacheInvalidationEnable = + flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; + pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; + pc.ConstantCacheInvalidationEnable = + flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE; + pc.PostSyncOperation = flags_to_post_sync_op(flags); + pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL; + pc.InstructionCacheInvalidateEnable = + flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE; + pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE; + pc.IndirectStatePointersDisable = + flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE; + pc.TextureCacheInvalidationEnable = + flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + pc.Address = ro_bo(bo, offset); + pc.ImmediateData = imm; + } } void genX(init_state)(struct iris_context *ice) { struct pipe_context *ctx = &ice->ctx; + struct iris_screen *screen = (struct iris_screen *)ctx->screen; ctx->create_blend_state = iris_create_blend_state; ctx->create_depth_stencil_alpha_state = iris_create_zsa_state; @@ -2435,17 +3380,26 @@ genX(init_state)(struct iris_context *ice) ctx->stream_output_target_destroy = iris_stream_output_target_destroy; ctx->set_stream_output_targets = iris_set_stream_output_targets; - ice->state.destroy_state = iris_destroy_state; - ice->state.init_render_context = iris_init_render_context; - ice->state.upload_render_state = iris_upload_render_state; - ice->state.derived_program_state_size = iris_derived_program_state_size; - ice->state.set_derived_program_state = iris_set_derived_program_state; - ice->state.populate_vs_key = iris_populate_vs_key; - ice->state.populate_tcs_key = iris_populate_tcs_key; - ice->state.populate_tes_key = iris_populate_tes_key; - ice->state.populate_gs_key = iris_populate_gs_key; - ice->state.populate_fs_key = iris_populate_fs_key; - + ice->vtbl.destroy_state = iris_destroy_state; + ice->vtbl.init_render_context = iris_init_render_context; + ice->vtbl.upload_render_state = iris_upload_render_state; + ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; + ice->vtbl.derived_program_state_size = iris_derived_program_state_size; + ice->vtbl.store_derived_program_state = iris_store_derived_program_state; + ice->vtbl.populate_vs_key = iris_populate_vs_key; + ice->vtbl.populate_tcs_key = iris_populate_tcs_key; + ice->vtbl.populate_tes_key = iris_populate_tes_key; + ice->vtbl.populate_gs_key = iris_populate_gs_key; + ice->vtbl.populate_fs_key = iris_populate_fs_key; ice->state.dirty = ~0ull; + + ice->state.num_viewports = 1; + ice->state.genx = calloc(1, sizeof(struct iris_genx_state)); + + /* Make a 1x1x1 null surface for unbound textures */ + void *null_surf_map = + upload_state(ice->state.surface_uploader, &ice->state.unbound_tex, + 4 * GENX(RENDER_SURFACE_STATE_length), 64); + isl_null_fill_state(&screen->isl_dev, null_surf_map, isl_extent3d(1, 1, 1)); }