X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Firis%2Firis_state.c;h=fa80c5d4db9c1aa33158b5f9ed555fea96f15d85;hb=c0ab9c9890a34720b4580971c0ace16cc0cf52f9;hp=773ea358d42af42a535a4c73cde337ec8758ae99;hpb=9be4b3baafc7cb45806e35ec98f3bf65b0de078a;p=mesa.git diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 773ea358d42..fa80c5d4db9 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -23,11 +23,13 @@ #include #include -#ifdef HAVE_VALGRIND +#if HAVE_VALGRIND #include #include #define VG(x) x +#ifndef NDEBUG #define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x)) +#endif #else #define VG(x) #endif @@ -37,8 +39,12 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" #include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/u_framebuffer.h" #include "util/u_transfer.h" +#include "util/u_upload_mgr.h" #include "i915_drm.h" +#include "nir.h" #include "intel/compiler/brw_compiler.h" #include "intel/common/gen_l3_config.h" #include "intel/common/gen_sample_positions.h" @@ -56,11 +62,15 @@ static uint64_t __gen_combine_address(struct iris_batch *batch, void *location, struct iris_address addr, uint32_t delta) { - if (addr.bo == NULL) - return addr.offset + delta; + uint64_t result = addr.offset + delta; + + if (addr.bo) { + iris_use_pinned_bo(batch, addr.bo, addr.write); + /* Assume this is a general address, not relative to a base. */ + result += addr.bo->gtt_offset; + } - return iris_batch_reloc(batch, location - batch->cmdbuf.map, addr.bo, - addr.offset + delta, addr.reloc_flags); + return result; } #define __genxml_cmd_length(cmd) cmd ## _length @@ -68,15 +78,6 @@ __gen_combine_address(struct iris_batch *batch, void *location, #define __genxml_cmd_header(cmd) cmd ## _header #define __genxml_cmd_pack(cmd) cmd ## _pack -static void * -get_command_space(struct iris_batch *batch, unsigned bytes) -{ - iris_require_command_space(batch, bytes); - void *map = batch->cmdbuf.map_next; - batch->cmdbuf.map_next += bytes; - return map; -} - #define _iris_pack_command(batch, cmd, dst, name) \ for (struct cmd name = { __genxml_cmd_header(cmd) }, \ *_dst = (void *)(dst); __builtin_expect(_dst != NULL, 1); \ @@ -94,36 +95,16 @@ get_command_space(struct iris_batch *batch, unsigned bytes) _dst = NULL) #define iris_emit_cmd(batch, cmd, name) \ - _iris_pack_command(batch, cmd, get_command_space(batch, 4 * __genxml_cmd_length(cmd)), name) + _iris_pack_command(batch, cmd, iris_get_command_space(batch, 4 * __genxml_cmd_length(cmd)), name) #define iris_emit_merge(batch, dwords0, dwords1, num_dwords) \ do { \ - uint32_t *dw = get_command_space(batch, 4 * num_dwords); \ + uint32_t *dw = iris_get_command_space(batch, 4 * num_dwords); \ for (uint32_t i = 0; i < num_dwords; i++) \ dw[i] = (dwords0)[i] | (dwords1)[i]; \ VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \ } while (0) -#define iris_emit_with_addr(batch, dwords, num_dw, addr_field, addr) \ - do { \ - STATIC_ASSERT((GENX(addr_field) % 64) == 0); \ - assert(num_dw <= ARRAY_SIZE(dwords)); \ - int addr_idx = GENX(addr_field) / 32; \ - uint32_t *dw = get_command_space(batch, 4 * num_dw); \ - for (uint32_t i = 0; i < addr_idx; i++) { \ - dw[i] = (dwords)[i]; \ - } \ - uint64_t *qw = (uint64_t *) &dw[addr_idx]; \ - *qw = iris_batch_reloc(batch, (void *)qw - batch->cmdbuf.map, \ - addr.bo, \ - addr.offset + (dwords)[addr_idx + 1], \ - addr.reloc_flags); \ - for (uint32_t i = addr_idx + 1; i < num_dw; i++) { \ - dw[i] = (dwords)[i]; \ - } \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dw * 4)); \ - } while (0) - #include "genxml/genX_pack.h" #include "genxml/gen_macros.h" #include "genxml/genX_bits.h" @@ -289,14 +270,73 @@ translate_fill_mode(unsigned pipe_polymode) } static struct iris_address -ro_bo(struct iris_bo *bo, uint32_t offset) +ro_bo(struct iris_bo *bo, uint64_t offset) { + /* Not for CSOs! */ return (struct iris_address) { .bo = bo, .offset = offset }; } +static void * +upload_state(struct u_upload_mgr *uploader, + struct iris_state_ref *ref, + unsigned size, + unsigned alignment) +{ + void *p = NULL; + u_upload_alloc(uploader, 0, size, alignment, &ref->offset, &ref->res, &p); + return p; +} + +static uint32_t * +stream_state(struct iris_batch *batch, + struct u_upload_mgr *uploader, + struct pipe_resource **out_res, + unsigned size, + unsigned alignment, + uint32_t *out_offset) +{ + void *ptr = NULL; + + u_upload_alloc(uploader, 0, size, alignment, out_offset, out_res, &ptr); + + struct iris_bo *bo = iris_resource_bo(*out_res); + iris_use_pinned_bo(batch, bo, false); + + *out_offset += iris_bo_offset_from_base_address(bo); + + return ptr; +} + +static uint32_t +emit_state(struct iris_batch *batch, + struct u_upload_mgr *uploader, + struct pipe_resource **out_res, + const void *data, + unsigned size, + unsigned alignment) +{ + unsigned offset = 0; + uint32_t *map = + stream_state(batch, uploader, out_res, size, alignment, &offset); + + if (map) + memcpy(map, data, size); + + return offset; +} + +#define cso_changed(x) (!old_cso || (old_cso->x != new_cso->x)) +#define cso_changed_memcmp(x) \ + (!old_cso || memcmp(old_cso->x, new_cso->x, sizeof(old_cso->x)) != 0) + static void -iris_emit_state_base_address(struct iris_batch *batch) +iris_init_render_context(struct iris_screen *screen, + struct iris_batch *batch, + struct iris_vtable *vtbl, + struct pipe_debug_callback *dbg) { + iris_init_batch(batch, screen, vtbl, dbg, I915_EXEC_RENDER); + /* XXX: PIPE_CONTROLs */ iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) { @@ -322,23 +362,15 @@ iris_emit_state_base_address(struct iris_batch *batch) sba.IndirectObjectBufferSizeModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; - sba.SurfaceStateBaseAddress = ro_bo(batch->statebuf.bo, 0); - sba.DynamicStateBaseAddress = ro_bo(batch->statebuf.bo, 0); + sba.InstructionBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SHADER_START); + sba.SurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SURFACE_START); + sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START); sba.GeneralStateBufferSize = 0xfffff; sba.IndirectObjectBufferSize = 0xfffff; sba.InstructionBufferSize = 0xfffff; - sba.DynamicStateBufferSize = ALIGN(MAX_STATE_SIZE, 4096); + sba.DynamicStateBufferSize = 0xfffff; } -} - -static void -iris_init_render_context(struct iris_screen *screen, - struct iris_batch *batch, - struct pipe_debug_callback *dbg) -{ - batch->emit_state_base_address = iris_emit_state_base_address; - iris_init_batch(batch, screen, dbg, I915_EXEC_RENDER); iris_emit_cmd(batch, GENX(3DSTATE_DRAWING_RECTANGLE), rect) { rect.ClippedDrawingRectangleXMax = UINT16_MAX; @@ -367,6 +399,33 @@ iris_init_render_context(struct iris_screen *screen, } } +struct iris_viewport_state { + uint32_t sf_cl_vp[GENX(SF_CLIP_VIEWPORT_length) * IRIS_MAX_VIEWPORTS]; +}; + +struct iris_vertex_buffer_state { + uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)]; + struct pipe_resource *resources[33]; + unsigned num_buffers; +}; + +struct iris_depth_buffer_state { + uint32_t packets[GENX(3DSTATE_DEPTH_BUFFER_length) + + GENX(3DSTATE_STENCIL_BUFFER_length) + + GENX(3DSTATE_HIER_DEPTH_BUFFER_length) + + GENX(3DSTATE_CLEAR_PARAMS_length)]; +}; + +/** + * State that can't be stored directly in iris_context because the data + * layout varies per generation. + */ +struct iris_genx_state { + struct iris_viewport_state viewport; + struct iris_vertex_buffer_state vertex_buffers; + struct iris_depth_buffer_state depth_buffer; +}; + static void iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info) { @@ -383,7 +442,10 @@ iris_set_blend_color(struct pipe_context *ctx, } struct iris_blend_state { + /** Partial 3DSTATE_PS_BLEND */ uint32_t ps_blend[GENX(3DSTATE_PS_BLEND_length)]; + + /** Partial BLEND_STATE */ uint32_t blend_state[GENX(BLEND_STATE_length) + BRW_MAX_DRAW_BUFFERS * GENX(BLEND_STATE_ENTRY_length)]; @@ -443,10 +505,10 @@ iris_create_blend_state(struct pipe_context *ctx, be.DestinationBlendFactor = state->rt[i].rgb_dst_factor; be.DestinationAlphaBlendFactor = state->rt[i].alpha_dst_factor; - be.WriteDisableRed = state->rt[i].colormask & PIPE_MASK_R; - be.WriteDisableGreen = state->rt[i].colormask & PIPE_MASK_G; - be.WriteDisableBlue = state->rt[i].colormask & PIPE_MASK_B; - be.WriteDisableAlpha = state->rt[i].colormask & PIPE_MASK_A; + be.WriteDisableRed = !(state->rt[i].colormask & PIPE_MASK_R); + be.WriteDisableGreen = !(state->rt[i].colormask & PIPE_MASK_G); + be.WriteDisableBlue = !(state->rt[i].colormask & PIPE_MASK_B); + be.WriteDisableAlpha = !(state->rt[i].colormask & PIPE_MASK_A); } blend_state += GENX(BLEND_STATE_ENTRY_length); } @@ -459,15 +521,19 @@ iris_bind_blend_state(struct pipe_context *ctx, void *state) { struct iris_context *ice = (struct iris_context *) ctx; ice->state.cso_blend = state; - ice->state.dirty |= IRIS_DIRTY_CC_VIEWPORT; - ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL; + ice->state.dirty |= IRIS_DIRTY_PS_BLEND; + ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; } struct iris_depth_stencil_alpha_state { + /** Partial 3DSTATE_WM_DEPTH_STENCIL */ uint32_t wmds[GENX(3DSTATE_WM_DEPTH_STENCIL_length)]; + + /** Complete CC_VIEWPORT */ uint32_t cc_vp[GENX(CC_VIEWPORT_length)]; - struct pipe_alpha_state alpha; /* to BLEND_STATE, 3DSTATE_PS_BLEND */ + /** Outbound to BLEND_STATE, 3DSTATE_PS_BLEND, COLOR_CALC_STATE */ + struct pipe_alpha_state alpha; }; static void * @@ -526,9 +592,11 @@ iris_bind_zsa_state(struct pipe_context *ctx, void *state) struct iris_depth_stencil_alpha_state *new_cso = state; if (new_cso) { - if (!old_cso || old_cso->alpha.ref_value != new_cso->alpha.ref_value) { + if (cso_changed(alpha.ref_value)) ice->state.dirty |= IRIS_DIRTY_COLOR_CALC_STATE; - } + + if (cso_changed(alpha.enabled)) + ice->state.dirty |= IRIS_DIRTY_PS_BLEND | IRIS_DIRTY_BLEND_STATE; } ice->state.cso_zsa = new_cso; @@ -548,6 +616,8 @@ struct iris_rasterizer_state { bool light_twoside; /* for shader state */ bool rasterizer_discard; /* for 3DSTATE_STREAMOUT */ bool half_pixel_center; /* for 3DSTATE_MULTISAMPLE */ + bool line_stipple_enable; + bool poly_stipple_enable; enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */ uint16_t sprite_coord_enable; }; @@ -579,6 +649,8 @@ iris_create_rasterizer_state(struct pipe_context *ctx, cso->half_pixel_center = state->half_pixel_center; cso->sprite_coord_mode = state->sprite_coord_mode; cso->sprite_coord_enable = state->sprite_coord_enable; + cso->line_stipple_enable = state->line_stipple_enable; + cso->poly_stipple_enable = state->poly_stipple_enable; iris_pack_command(GENX(3DSTATE_SF), cso->sf, sf) { sf.StatisticsEnable = true; @@ -593,15 +665,14 @@ iris_create_rasterizer_state(struct pipe_context *ctx, sf.PointWidth = state->point_size; if (state->flatshade_first) { + sf.TriangleFanProvokingVertexSelect = 1; + } else { sf.TriangleStripListProvokingVertexSelect = 2; sf.TriangleFanProvokingVertexSelect = 2; sf.LineStripListProvokingVertexSelect = 1; - } else { - sf.TriangleFanProvokingVertexSelect = 1; } } - /* COMPLETE! */ iris_pack_command(GENX(3DSTATE_RASTER), cso->raster, rr) { rr.FrontWinding = state->front_ccw ? CounterClockwise : Clockwise; rr.CullMode = translate_cull_mode(state->cull_face); @@ -611,7 +682,7 @@ iris_create_rasterizer_state(struct pipe_context *ctx, rr.GlobalDepthOffsetEnableSolid = state->offset_tri; rr.GlobalDepthOffsetEnableWireframe = state->offset_line; rr.GlobalDepthOffsetEnablePoint = state->offset_point; - rr.GlobalDepthOffsetConstant = state->offset_units; + rr.GlobalDepthOffsetConstant = state->offset_units * 2; rr.GlobalDepthOffsetScale = state->offset_scale; rr.GlobalDepthOffsetClamp = state->offset_clamp; rr.SmoothPointEnable = state->point_smooth; @@ -639,11 +710,11 @@ iris_create_rasterizer_state(struct pipe_context *ctx, cl.MaximumPointWidth = 255.875; if (state->flatshade_first) { + cl.TriangleFanProvokingVertexSelect = 1; + } else { cl.TriangleStripListProvokingVertexSelect = 2; cl.TriangleFanProvokingVertexSelect = 2; cl.LineStripListProvokingVertexSelect = 1; - } else { - cl.TriangleFanProvokingVertexSelect = 1; } } @@ -680,19 +751,19 @@ iris_bind_rasterizer_state(struct pipe_context *ctx, void *state) if (new_cso) { /* Try to avoid re-emitting 3DSTATE_LINE_STIPPLE, it's non-pipelined */ - if (!old_cso || memcmp(old_cso->line_stipple, new_cso->line_stipple, - sizeof(old_cso->line_stipple)) != 0) { + if (cso_changed_memcmp(line_stipple)) ice->state.dirty |= IRIS_DIRTY_LINE_STIPPLE; - } - if (!old_cso || - old_cso->half_pixel_center != new_cso->half_pixel_center) { + if (cso_changed(half_pixel_center)) ice->state.dirty |= IRIS_DIRTY_MULTISAMPLE; - } + + if (cso_changed(line_stipple_enable) || cso_changed(poly_stipple_enable)) + ice->state.dirty |= IRIS_DIRTY_WM; } ice->state.cso_rast = new_cso; ice->state.dirty |= IRIS_DIRTY_RASTER; + ice->state.dirty |= IRIS_DIRTY_CLIP; } static uint32_t @@ -705,8 +776,10 @@ translate_wrap(unsigned pipe_wrap) [PIPE_TEX_WRAP_CLAMP_TO_BORDER] = TCM_CLAMP_BORDER, [PIPE_TEX_WRAP_MIRROR_REPEAT] = TCM_MIRROR, [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE] = TCM_MIRROR_ONCE, - [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1, // XXX: ??? - [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, // XXX: ??? + + /* These are unsupported. */ + [PIPE_TEX_WRAP_MIRROR_CLAMP] = -1, + [PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER] = -1, }; return map[pipe_wrap]; } @@ -740,7 +813,7 @@ struct iris_sampler_state { }; static void * -iris_create_sampler_state(struct pipe_context *pctx, +iris_create_sampler_state(struct pipe_context *ctx, const struct pipe_sampler_state *state) { struct iris_sampler_state *cso = CALLOC_STRUCT(iris_sampler_state); @@ -748,6 +821,8 @@ iris_create_sampler_state(struct pipe_context *pctx, if (!cso) return NULL; + memcpy(&cso->base, state, sizeof(*state)); + STATIC_ASSERT(PIPE_TEX_FILTER_NEAREST == MAPFILTER_NEAREST); STATIC_ASSERT(PIPE_TEX_FILTER_LINEAR == MAPFILTER_LINEAR); @@ -806,7 +881,7 @@ iris_create_sampler_state(struct pipe_context *pctx, samp.MaxLOD = CLAMP(state->max_lod, 0, hw_max_lod); samp.TextureLODBias = CLAMP(state->lod_bias, -16, 15); - //samp.BorderColorPointer = <> + /* .BorderColorPointer is filled in by iris_bind_sampler_states. */ } return cso; @@ -822,22 +897,74 @@ iris_bind_sampler_states(struct pipe_context *ctx, gl_shader_stage stage = stage_from_pipe(p_stage); assert(start + count <= IRIS_MAX_TEXTURE_SAMPLERS); + ice->state.num_samplers[stage] = + MAX2(ice->state.num_samplers[stage], start + count); for (int i = 0; i < count; i++) { ice->state.samplers[stage][start + i] = states[i]; } + /* Assemble the SAMPLER_STATEs into a contiguous table that lives + * in the dynamic state memory zone, so we can point to it via the + * 3DSTATE_SAMPLER_STATE_POINTERS_* commands. + */ + void *map = upload_state(ice->state.dynamic_uploader, + &ice->state.sampler_table[stage], + count * 4 * GENX(SAMPLER_STATE_length), 32); + if (unlikely(!map)) + return; + + struct pipe_resource *res = ice->state.sampler_table[stage].res; + ice->state.sampler_table[stage].offset += + iris_bo_offset_from_base_address(iris_resource_bo(res)); + + /* Make sure all land in the same BO */ + iris_border_color_pool_reserve(ice, IRIS_MAX_TEXTURE_SAMPLERS); + + for (int i = 0; i < count; i++) { + struct iris_sampler_state *state = ice->state.samplers[stage][i]; + + /* Save a pointer to the iris_sampler_state, a few fields need + * to inform draw-time decisions. + */ + ice->state.samplers[stage][start + i] = state; + + if (!state) { + memset(map, 0, 4 * GENX(SAMPLER_STATE_length)); + } else if (!state->needs_border_color) { + memcpy(map, state->sampler_state, 4 * GENX(SAMPLER_STATE_length)); + } else { + ice->state.need_border_colors = true; + + /* Stream out the border color and merge the pointer. */ + uint32_t offset = + iris_upload_border_color(ice, &state->base.border_color); + + uint32_t dynamic[GENX(SAMPLER_STATE_length)]; + iris_pack_state(GENX(SAMPLER_STATE), dynamic, dyns) { + dyns.BorderColorPointer = offset; + } + + for (uint32_t j = 0; j < GENX(SAMPLER_STATE_length); j++) + ((uint32_t *) map)[j] = state->sampler_state[j] | dynamic[j]; + } + + map += GENX(SAMPLER_STATE_length); + } + ice->state.dirty |= IRIS_DIRTY_SAMPLER_STATES_VS << stage; } struct iris_sampler_view { struct pipe_sampler_view pipe; struct isl_view view; - uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)]; + + /** The resource (BO) holding our SURFACE_STATE. */ + struct iris_state_ref surface_state; }; /** - * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+ + * Convert an swizzle enumeration (i.e. PIPE_SWIZZLE_X) to one of the Gen7.5+ * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED). The mappings are * * SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE @@ -860,6 +987,7 @@ iris_create_sampler_view(struct pipe_context *ctx, struct pipe_resource *tex, const struct pipe_sampler_view *tmpl) { + struct iris_context *ice = (struct iris_context *) ctx; struct iris_screen *screen = (struct iris_screen *)ctx->screen; struct iris_resource *itex = (struct iris_resource *) tex; struct iris_sampler_view *isv = calloc(1, sizeof(struct iris_sampler_view)); @@ -888,34 +1016,38 @@ iris_create_sampler_view(struct pipe_context *ctx, .b = pipe_swizzle_to_isl_channel(tmpl->swizzle_b), .a = pipe_swizzle_to_isl_channel(tmpl->swizzle_a), }, - .usage = ISL_SURF_USAGE_TEXTURE_BIT, + .usage = ISL_SURF_USAGE_TEXTURE_BIT | + (itex->surf.usage & ISL_SURF_USAGE_CUBE_BIT), }; - isl_surf_fill_state(&screen->isl_dev, isv->surface_state, + void *map = upload_state(ice->state.surface_uploader, &isv->surface_state, + 4 * GENX(RENDER_SURFACE_STATE_length), 64); + if (!unlikely(map)) + return NULL; + + struct iris_bo *state_bo = iris_resource_bo(isv->surface_state.res); + isv->surface_state.offset += iris_bo_offset_from_base_address(state_bo); + + isl_surf_fill_state(&screen->isl_dev, map, .surf = &itex->surf, .view = &isv->view, - .mocs = MOCS_WB); - // .address = ... + .mocs = MOCS_WB, + .address = itex->bo->gtt_offset); // .aux_surf = // .clear_color = clear_color, return &isv->pipe; } -struct iris_surface { - struct pipe_surface pipe; - struct isl_view view; - uint32_t surface_state[GENX(RENDER_SURFACE_STATE_length)]; -}; - static struct pipe_surface * iris_create_surface(struct pipe_context *ctx, struct pipe_resource *tex, const struct pipe_surface *tmpl) { + struct iris_context *ice = (struct iris_context *) ctx; struct iris_screen *screen = (struct iris_screen *)ctx->screen; struct iris_surface *surf = calloc(1, sizeof(struct iris_surface)); struct pipe_surface *psurf = &surf->pipe; - struct iris_resource *itex = (struct iris_resource *) tex; + struct iris_resource *res = (struct iris_resource *) tex; if (!surf) return NULL; @@ -931,6 +1063,14 @@ iris_create_surface(struct pipe_context *ctx, psurf->u.tex.last_layer = tmpl->u.tex.last_layer; psurf->u.tex.level = tmpl->u.tex.level; + unsigned usage = 0; + if (tmpl->writable) + usage = ISL_SURF_USAGE_STORAGE_BIT; + else if (util_format_is_depth_or_stencil(tmpl->format)) + usage = ISL_SURF_USAGE_DEPTH_BIT; + else + usage = ISL_SURF_USAGE_RENDER_TARGET_BIT; + surf->view = (struct isl_view) { .format = iris_isl_format_for_pipe_format(tmpl->format), .base_level = tmpl->u.tex.level, @@ -938,14 +1078,27 @@ iris_create_surface(struct pipe_context *ctx, .base_array_layer = tmpl->u.tex.first_layer, .array_len = tmpl->u.tex.last_layer - tmpl->u.tex.first_layer + 1, .swizzle = ISL_SWIZZLE_IDENTITY, - // XXX: DEPTH_BIt, STENCIL_BIT...CUBE_BIT? Other bits?! - .usage = ISL_SURF_USAGE_RENDER_TARGET_BIT, + .usage = usage, }; - isl_surf_fill_state(&screen->isl_dev, surf->surface_state, - .surf = &itex->surf, .view = &surf->view, - .mocs = MOCS_WB); - // .address = ... + /* Bail early for depth/stencil */ + if (res->surf.usage & (ISL_SURF_USAGE_DEPTH_BIT | + ISL_SURF_USAGE_STENCIL_BIT)) + return psurf; + + + void *map = upload_state(ice->state.surface_uploader, &surf->surface_state, + 4 * GENX(RENDER_SURFACE_STATE_length), 64); + if (!unlikely(map)) + return NULL; + + struct iris_bo *state_bo = iris_resource_bo(surf->surface_state.res); + surf->surface_state.offset += iris_bo_offset_from_base_address(state_bo); + + isl_surf_fill_state(&screen->isl_dev, map, + .surf = &res->surf, .view = &surf->view, + .mocs = MOCS_WB, + .address = res->bo->gtt_offset); // .aux_surf = // .clear_color = clear_color, @@ -954,10 +1107,26 @@ iris_create_surface(struct pipe_context *ctx, static void iris_set_sampler_views(struct pipe_context *ctx, - enum pipe_shader_type shader, + enum pipe_shader_type p_stage, unsigned start, unsigned count, struct pipe_sampler_view **views) { + struct iris_context *ice = (struct iris_context *) ctx; + gl_shader_stage stage = stage_from_pipe(p_stage); + + unsigned i; + for (i = 0; i < count; i++) { + pipe_sampler_view_reference((struct pipe_sampler_view **) + &ice->state.textures[stage][i], views[i]); + } + for (; i < ice->state.num_textures[stage]; i++) { + pipe_sampler_view_reference((struct pipe_sampler_view **) + &ice->state.textures[stage][i], NULL); + } + + ice->state.num_textures[stage] = count; + + ice->state.dirty |= (IRIS_DIRTY_BINDINGS_VS << stage); } static void @@ -992,8 +1161,6 @@ iris_set_scissor_states(struct pipe_context *ctx, { struct iris_context *ice = (struct iris_context *) ctx; - ice->state.num_scissors = num_scissors; - for (unsigned i = 0; i < num_scissors; i++) { ice->state.scissors[start_slot + i] = states[i]; } @@ -1010,15 +1177,10 @@ iris_set_stencil_ref(struct pipe_context *ctx, ice->state.dirty |= IRIS_DIRTY_WM_DEPTH_STENCIL; } - -struct iris_viewport_state { - uint32_t sf_cl_vp[GENX(SF_CLIP_VIEWPORT_length)]; -}; - static float -extent_from_matrix(const struct pipe_viewport_state *state, int axis) +viewport_extent(const struct pipe_viewport_state *state, int axis, float sign) { - return fabsf(state->scale[axis]) * state->translate[axis]; + return copysignf(state->scale[axis], sign) + state->translate[axis]; } #if 0 @@ -1106,25 +1268,23 @@ calculate_guardband_size(uint32_t fb_width, uint32_t fb_height, static void iris_set_viewport_states(struct pipe_context *ctx, unsigned start_slot, - unsigned num_viewports, - const struct pipe_viewport_state *state) + unsigned count, + const struct pipe_viewport_state *states) { struct iris_context *ice = (struct iris_context *) ctx; - struct iris_viewport_state *cso = - malloc(sizeof(struct iris_viewport_state)); + struct iris_viewport_state *cso = &ice->state.genx->viewport; + uint32_t *vp_map = &cso->sf_cl_vp[start_slot]; // XXX: sf_cl_vp is only big enough for one slot, we don't iterate right - for (unsigned i = start_slot; i < start_slot + num_viewports; i++) { - float x_extent = extent_from_matrix(&state[i], 0); - float y_extent = extent_from_matrix(&state[i], 1); - - iris_pack_state(GENX(SF_CLIP_VIEWPORT), cso->sf_cl_vp, vp) { - vp.ViewportMatrixElementm00 = state[i].scale[0]; - vp.ViewportMatrixElementm11 = state[i].scale[1]; - vp.ViewportMatrixElementm22 = state[i].scale[2]; - vp.ViewportMatrixElementm30 = state[i].translate[0]; - vp.ViewportMatrixElementm31 = state[i].translate[1]; - vp.ViewportMatrixElementm32 = state[i].translate[2]; + for (unsigned i = 0; i < count; i++) { + const struct pipe_viewport_state *state = &states[start_slot + i]; + iris_pack_state(GENX(SF_CLIP_VIEWPORT), vp_map, vp) { + vp.ViewportMatrixElementm00 = state->scale[0]; + vp.ViewportMatrixElementm11 = state->scale[1]; + vp.ViewportMatrixElementm22 = state->scale[2]; + vp.ViewportMatrixElementm30 = state->translate[0]; + vp.ViewportMatrixElementm31 = state->translate[1]; + vp.ViewportMatrixElementm32 = state->translate[2]; /* XXX: in i965 this is computed based on the drawbuffer size, * but we don't have that here... */ @@ -1132,31 +1292,25 @@ iris_set_viewport_states(struct pipe_context *ctx, vp.XMaxClipGuardband = 1.0; vp.YMinClipGuardband = -1.0; vp.YMaxClipGuardband = 1.0; - vp.XMinViewPort = -x_extent; - vp.XMaxViewPort = x_extent; - vp.YMinViewPort = -y_extent; - vp.YMaxViewPort = y_extent; + vp.XMinViewPort = viewport_extent(state, 0, -1.0f); + vp.XMaxViewPort = viewport_extent(state, 0, 1.0f) - 1; + vp.YMinViewPort = viewport_extent(state, 1, -1.0f); + vp.YMaxViewPort = viewport_extent(state, 1, 1.0f) - 1; } + + vp_map += GENX(SF_CLIP_VIEWPORT_length); } - ice->state.cso_vp = cso; - // XXX: start_slot - ice->state.num_viewports = num_viewports; ice->state.dirty |= IRIS_DIRTY_SF_CL_VIEWPORT; } -struct iris_depth_state -{ - uint32_t depth_buffer[GENX(3DSTATE_DEPTH_BUFFER_length)]; - uint32_t hier_depth_buffer[GENX(3DSTATE_HIER_DEPTH_BUFFER_length)]; - uint32_t stencil_buffer[GENX(3DSTATE_STENCIL_BUFFER_length)]; -}; - static void iris_set_framebuffer_state(struct pipe_context *ctx, const struct pipe_framebuffer_state *state) { struct iris_context *ice = (struct iris_context *) ctx; + struct iris_screen *screen = (struct iris_screen *)ctx->screen; + struct isl_device *isl_dev = &screen->isl_dev; struct pipe_framebuffer_state *cso = &ice->state.framebuffer; if (cso->samples != state->samples) { @@ -1167,50 +1321,138 @@ iris_set_framebuffer_state(struct pipe_context *ctx, ice->state.dirty |= IRIS_DIRTY_BLEND_STATE; } - cso->width = state->width; - cso->height = state->height; - cso->layers = state->layers; - cso->samples = state->samples; + if ((cso->layers == 0) == (state->layers == 0)) { + ice->state.dirty |= IRIS_DIRTY_CLIP; + } - unsigned i; - for (i = 0; i < state->nr_cbufs; i++) - pipe_surface_reference(&cso->cbufs[i], state->cbufs[i]); - for (; i < cso->nr_cbufs; i++) - pipe_surface_reference(&cso->cbufs[i], NULL); + util_copy_framebuffer_state(cso, state); - cso->nr_cbufs = state->nr_cbufs; + struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer; - pipe_surface_reference(&cso->zsbuf, state->zsbuf); + struct isl_view view = { + .base_level = 0, + .levels = 1, + .base_array_layer = 0, + .array_len = 1, + .swizzle = ISL_SWIZZLE_IDENTITY, + }; struct isl_depth_stencil_hiz_emit_info info = { + .view = &view, .mocs = MOCS_WB, }; - // XXX: depth buffers + struct iris_resource *zres = + (void *) (cso->zsbuf ? cso->zsbuf->texture : NULL); + + if (zres) { + view.usage |= ISL_SURF_USAGE_DEPTH_BIT; + + info.depth_surf = &zres->surf; + info.depth_address = zres->bo->gtt_offset; + + view.format = zres->surf.format; + + view.base_level = cso->zsbuf->u.tex.level; + view.base_array_layer = cso->zsbuf->u.tex.first_layer; + view.array_len = + cso->zsbuf->u.tex.last_layer - cso->zsbuf->u.tex.first_layer + 1; + + info.hiz_usage = ISL_AUX_USAGE_NONE; + } + +#if 0 + if (stencil_mt) { + view.usage |= ISL_SURF_USAGE_STENCIL_BIT; + info.stencil_surf = &stencil_mt->surf; + + if (!depth_mt) { + view.base_level = stencil_irb->mt_level - stencil_irb->mt->first_level; + view.base_array_layer = stencil_irb->mt_layer; + view.array_len = MAX2(stencil_irb->layer_count, 1); + view.format = stencil_mt->surf.format; + } + + uint32_t stencil_offset = 0; + info.stencil_address = stencil_mt->bo->gtt_offset + stencil_mt->offset; + } +#endif + + isl_emit_depth_stencil_hiz_s(isl_dev, cso_z->packets, &info); + + ice->state.dirty |= IRIS_DIRTY_DEPTH_BUFFER; + + /* Render target change */ + ice->state.dirty |= IRIS_DIRTY_BINDINGS_FS; } static void iris_set_constant_buffer(struct pipe_context *ctx, - enum pipe_shader_type shader, uint index, - const struct pipe_constant_buffer *cb) + enum pipe_shader_type p_stage, unsigned index, + const struct pipe_constant_buffer *input) { -} + struct iris_context *ice = (struct iris_context *) ctx; + struct iris_screen *screen = (struct iris_screen *)ctx->screen; + gl_shader_stage stage = stage_from_pipe(p_stage); + struct iris_shader_state *shs = &ice->shaders.state[stage]; + struct iris_const_buffer *cbuf = &shs->constbuf[index]; + + if (input && (input->buffer || input->user_buffer)) { + if (input->user_buffer) { + u_upload_data(ctx->const_uploader, 0, input->buffer_size, 32, + input->user_buffer, &cbuf->data.offset, + &cbuf->data.res); + } else { + pipe_resource_reference(&cbuf->data.res, input->buffer); + } + + // XXX: these are not retained forever, use a separate uploader? + void *map = + upload_state(ice->state.surface_uploader, &cbuf->surface_state, + 4 * GENX(RENDER_SURFACE_STATE_length), 64); + if (!unlikely(map)) { + pipe_resource_reference(&cbuf->data.res, NULL); + return; + } + + struct iris_resource *res = (void *) cbuf->data.res; + struct iris_bo *surf_bo = iris_resource_bo(cbuf->surface_state.res); + cbuf->surface_state.offset += iris_bo_offset_from_base_address(surf_bo); + + isl_buffer_fill_state(&screen->isl_dev, map, + .address = res->bo->gtt_offset + cbuf->data.offset, + .size_B = input->buffer_size, + .format = ISL_FORMAT_R32G32B32A32_FLOAT, + .stride_B = 1, + .mocs = MOCS_WB) + } else { + pipe_resource_reference(&cbuf->data.res, NULL); + pipe_resource_reference(&cbuf->surface_state.res, NULL); + } + ice->state.dirty |= IRIS_DIRTY_CONSTANTS_VS << stage; + // XXX: maybe not necessary all the time...? + ice->state.dirty |= IRIS_DIRTY_BINDINGS_VS << stage; +} static void iris_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state) { + struct iris_sampler_view *isv = (void *) state; pipe_resource_reference(&state->texture, NULL); - free(state); + pipe_resource_reference(&isv->surface_state.res, NULL); + free(isv); } static void -iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *surface) +iris_surface_destroy(struct pipe_context *ctx, struct pipe_surface *p_surf) { - pipe_resource_reference(&surface->texture, NULL); - free(surface); + struct iris_surface *surf = (void *) p_surf; + pipe_resource_reference(&p_surf->texture, NULL); + pipe_resource_reference(&surf->surface_state.res, NULL); + free(surf); } static void @@ -1219,20 +1461,11 @@ iris_delete_state(struct pipe_context *ctx, void *state) free(state); } -struct iris_vertex_buffer_state { - uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)]; - struct iris_address bos[33]; - unsigned num_buffers; -}; - static void iris_free_vertex_buffers(struct iris_vertex_buffer_state *cso) { - if (cso) { - for (unsigned i = 0; i < cso->num_buffers; i++) - iris_bo_unreference(cso->bos[i].bo); - free(cso); - } + for (unsigned i = 0; i < cso->num_buffers; i++) + pipe_resource_reference(&cso->resources[i], NULL); } static void @@ -1241,32 +1474,34 @@ iris_set_vertex_buffers(struct pipe_context *ctx, const struct pipe_vertex_buffer *buffers) { struct iris_context *ice = (struct iris_context *) ctx; - struct iris_vertex_buffer_state *cso = - malloc(sizeof(struct iris_vertex_buffer_state)); + struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers; - /* If there are no buffers, do nothing. We can leave the stale - * 3DSTATE_VERTEX_BUFFERS in place - as long as there are no vertex - * elements that point to them, it should be fine. - */ - if (!buffers) - return; + iris_free_vertex_buffers(&ice->state.genx->vertex_buffers); - iris_free_vertex_buffers(ice->state.cso_vertex_buffers); + if (!buffers) + count = 0; cso->num_buffers = count; iris_pack_command(GENX(3DSTATE_VERTEX_BUFFERS), cso->vertex_buffers, vb) { - vb.DWordLength = 4 * cso->num_buffers - 1; + vb.DWordLength = 4 * MAX2(cso->num_buffers, 1) - 1; } uint32_t *vb_pack_dest = &cso->vertex_buffers[1]; + if (count == 0) { + iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) { + vb.VertexBufferIndex = start_slot; + vb.NullVertexBuffer = true; + vb.AddressModifyEnable = true; + } + } + for (unsigned i = 0; i < count; i++) { assert(!buffers[i].is_user_buffer); - struct iris_resource *res = (void *) buffers[i].buffer.resource; - iris_bo_reference(res->bo); - cso->bos[i] = ro_bo(res->bo, buffers[i].buffer_offset); + pipe_resource_reference(&cso->resources[i], buffers[i].buffer.resource); + struct iris_resource *res = (void *) cso->resources[i]; iris_pack_state(GENX(VERTEX_BUFFER_STATE), vb_pack_dest, vb) { vb.VertexBufferIndex = start_slot + i; @@ -1274,19 +1509,19 @@ iris_set_vertex_buffers(struct pipe_context *ctx, vb.AddressModifyEnable = true; vb.BufferPitch = buffers[i].stride; vb.BufferSize = res->bo->size; - /* vb.BufferStartingAddress is filled in at draw time */ + vb.BufferStartingAddress = + ro_bo(NULL, res->bo->gtt_offset + buffers[i].buffer_offset); } vb_pack_dest += GENX(VERTEX_BUFFER_STATE_length); } - ice->state.cso_vertex_buffers = cso; ice->state.dirty |= IRIS_DIRTY_VERTEX_BUFFERS; } struct iris_vertex_element_state { uint32_t vertex_elements[1 + 33 * GENX(VERTEX_ELEMENT_STATE_length)]; - uint32_t vf_instancing[GENX(3DSTATE_VF_INSTANCING_length)][33]; + uint32_t vf_instancing[33 * GENX(3DSTATE_VF_INSTANCING_length)]; unsigned count; }; @@ -1298,20 +1533,37 @@ iris_create_vertex_elements(struct pipe_context *ctx, struct iris_vertex_element_state *cso = malloc(sizeof(struct iris_vertex_element_state)); - cso->count = count; + cso->count = MAX2(count, 1); /* TODO: * - create edge flag one * - create SGV ones * - if those are necessary, use count + 1/2/3... OR in the length */ - iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve); + iris_pack_command(GENX(3DSTATE_VERTEX_ELEMENTS), cso->vertex_elements, ve) { + ve.DWordLength = 1 + GENX(VERTEX_ELEMENT_STATE_length) * cso->count - 2; + } uint32_t *ve_pack_dest = &cso->vertex_elements[1]; + uint32_t *vfi_pack_dest = cso->vf_instancing; + + if (count == 0) { + iris_pack_state(GENX(VERTEX_ELEMENT_STATE), ve_pack_dest, ve) { + ve.Valid = true; + ve.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT; + ve.Component0Control = VFCOMP_STORE_0; + ve.Component1Control = VFCOMP_STORE_0; + ve.Component2Control = VFCOMP_STORE_0; + ve.Component3Control = VFCOMP_STORE_1_FP; + } + + iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { + } + } for (int i = 0; i < count; i++) { enum isl_format isl_format = - iris_isl_format_for_pipe_format(state[i].src_format); + iris_isl_format_for_pipe_format(state[i].src_format); unsigned comp[4] = { VFCOMP_STORE_SRC, VFCOMP_STORE_SRC, VFCOMP_STORE_SRC, VFCOMP_STORE_SRC }; @@ -1335,13 +1587,14 @@ iris_create_vertex_elements(struct pipe_context *ctx, ve.Component3Control = comp[3]; } - iris_pack_command(GENX(3DSTATE_VF_INSTANCING), cso->vf_instancing[i], vi) { + iris_pack_command(GENX(3DSTATE_VF_INSTANCING), vfi_pack_dest, vi) { vi.VertexElementIndex = i; vi.InstancingEnable = state[i].instance_divisor > 0; vi.InstanceDataStepRate = state[i].instance_divisor; } ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length); + vfi_pack_dest += GENX(3DSTATE_VF_INSTANCING_length); } return cso; @@ -1397,20 +1650,190 @@ iris_set_stream_output_targets(struct pipe_context *ctx, { } -#if 0 static void -iris_compute_sbe(const struct iris_context *ice, - const struct brw_wm_prog_data *wm_prog_data) +iris_compute_sbe_urb_read_interval(uint64_t fs_input_slots, + const struct brw_vue_map *last_vue_map, + bool two_sided_color, + unsigned *out_offset, + unsigned *out_length) +{ + /* The compiler computes the first URB slot without considering COL/BFC + * swizzling (because it doesn't know whether it's enabled), so we need + * to do that here too. This may result in a smaller offset, which + * should be safe. + */ + const unsigned first_slot = + brw_compute_first_urb_slot_required(fs_input_slots, last_vue_map); + + /* This becomes the URB read offset (counted in pairs of slots). */ + assert(first_slot % 2 == 0); + *out_offset = first_slot / 2; + + /* We need to adjust the inputs read to account for front/back color + * swizzling, as it can make the URB length longer. + */ + for (int c = 0; c <= 1; c++) { + if (fs_input_slots & (VARYING_BIT_COL0 << c)) { + /* If two sided color is enabled, the fragment shader's gl_Color + * (COL0) input comes from either the gl_FrontColor (COL0) or + * gl_BackColor (BFC0) input varyings. Mark BFC as used, too. + */ + if (two_sided_color) + fs_input_slots |= (VARYING_BIT_BFC0 << c); + + /* If front color isn't written, we opt to give them back color + * instead of an undefined value. Switch from COL to BFC. + */ + if (last_vue_map->varying_to_slot[VARYING_SLOT_COL0 + c] == -1) { + fs_input_slots &= ~(VARYING_BIT_COL0 << c); + fs_input_slots |= (VARYING_BIT_BFC0 << c); + } + } + } + + /* Compute the minimum URB Read Length necessary for the FS inputs. + * + * From the Sandy Bridge PRM, Volume 2, Part 1, documentation for + * 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length": + * + * "This field should be set to the minimum length required to read the + * maximum source attribute. The maximum source attribute is indicated + * by the maximum value of the enabled Attribute # Source Attribute if + * Attribute Swizzle Enable is set, Number of Output Attributes-1 if + * enable is not set. + * read_length = ceiling((max_source_attr + 1) / 2) + * + * [errata] Corruption/Hang possible if length programmed larger than + * recommended" + * + * Similar text exists for Ivy Bridge. + * + * We find the last URB slot that's actually read by the FS. + */ + unsigned last_read_slot = last_vue_map->num_slots - 1; + while (last_read_slot > first_slot && !(fs_input_slots & + (1ull << last_vue_map->slot_to_varying[last_read_slot]))) + --last_read_slot; + + /* The URB read length is the difference of the two, counted in pairs. */ + *out_length = DIV_ROUND_UP(last_read_slot - first_slot + 1, 2); +} + +static void +iris_emit_sbe_swiz(struct iris_batch *batch, + const struct iris_context *ice, + unsigned urb_read_offset) +{ + struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) attr_overrides[16] = {}; + const struct brw_wm_prog_data *wm_prog_data = (void *) + ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; + const struct brw_vue_map *vue_map = ice->shaders.last_vue_map; + const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; + + /* XXX: this should be generated when putting programs in place */ + + // XXX: raster->sprite_coord_enable + + for (int fs_attr = 0; fs_attr < VARYING_SLOT_MAX; fs_attr++) { + const int input_index = wm_prog_data->urb_setup[fs_attr]; + if (input_index < 0 || input_index >= 16) + continue; + + struct GENX(SF_OUTPUT_ATTRIBUTE_DETAIL) *attr = + &attr_overrides[input_index]; + + /* Viewport and Layer are stored in the VUE header. We need to override + * them to zero if earlier stages didn't write them, as GL requires that + * they read back as zero when not explicitly set. + */ + switch (fs_attr) { + case VARYING_SLOT_VIEWPORT: + case VARYING_SLOT_LAYER: + attr->ComponentOverrideX = true; + attr->ComponentOverrideW = true; + attr->ConstantSource = CONST_0000; + + if (!(vue_map->slots_valid & VARYING_BIT_LAYER)) + attr->ComponentOverrideY = true; + if (!(vue_map->slots_valid & VARYING_BIT_VIEWPORT)) + attr->ComponentOverrideZ = true; + continue; + + case VARYING_SLOT_PRIMITIVE_ID: + attr->ComponentOverrideX = true; + attr->ComponentOverrideY = true; + attr->ComponentOverrideZ = true; + attr->ComponentOverrideW = true; + attr->ConstantSource = PRIM_ID; + continue; + + default: + break; + } + + int slot = vue_map->varying_to_slot[fs_attr]; + + /* If there was only a back color written but not front, use back + * as the color instead of undefined. + */ + if (slot == -1 && fs_attr == VARYING_SLOT_COL0) + slot = vue_map->varying_to_slot[VARYING_SLOT_BFC0]; + if (slot == -1 && fs_attr == VARYING_SLOT_COL1) + slot = vue_map->varying_to_slot[VARYING_SLOT_BFC1]; + + /* Not written by the previous stage - undefined. */ + if (slot == -1) { + attr->ComponentOverrideX = true; + attr->ComponentOverrideY = true; + attr->ComponentOverrideZ = true; + attr->ComponentOverrideW = true; + attr->ConstantSource = CONST_0001_FLOAT; + continue; + } + + /* Compute the location of the attribute relative to the read offset, + * which is counted in 256-bit increments (two 128-bit VUE slots). + */ + const int source_attr = slot - 2 * urb_read_offset; + assert(source_attr >= 0 && source_attr <= 32); + attr->SourceAttribute = source_attr; + + /* If we are doing two-sided color, and the VUE slot following this one + * represents a back-facing color, then we need to instruct the SF unit + * to do back-facing swizzling. + */ + if (cso_rast->light_twoside && + ((vue_map->slot_to_varying[slot] == VARYING_SLOT_COL0 && + vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC0) || + (vue_map->slot_to_varying[slot] == VARYING_SLOT_COL1 && + vue_map->slot_to_varying[slot+1] == VARYING_SLOT_BFC1))) + attr->SwizzleSelect = INPUTATTR_FACING; + } + + iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbes) { + for (int i = 0; i < 16; i++) + sbes.Attribute[i] = attr_overrides[i]; + } +} + +static void +iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice) { - uint32_t sbe_map[GENX(3DSTATE_SBE_length)]; - struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; + const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast; + const struct brw_wm_prog_data *wm_prog_data = (void *) + ice->shaders.prog[MESA_SHADER_FRAGMENT]->prog_data; + struct pipe_shader_state *p_fs = + (void *) ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; + assert(p_fs->type == PIPE_SHADER_IR_NIR); + nir_shader *fs_nir = p_fs->ir.nir; unsigned urb_read_offset, urb_read_length; - brw_compute_sbe_urb_slot_interval(fp->info.inputs_read, - ice->shaders.last_vue_map, - &urb_read_offset, &urb_read_length); + iris_compute_sbe_urb_read_interval(fs_nir->info.inputs_read, + ice->shaders.last_vue_map, + cso_rast->light_twoside, + &urb_read_offset, &urb_read_length); - iris_pack_command(GENX(3DSTATE_SBE), sbe_map, sbe) { + iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) { sbe.AttributeSwizzleEnable = true; sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs; sbe.PointSpriteTextureCoordinateOrigin = cso_rast->sprite_coord_mode; @@ -1420,23 +1843,34 @@ iris_compute_sbe(const struct iris_context *ice, sbe.ForceVertexURBEntryReadLength = true; sbe.ConstantInterpolationEnable = wm_prog_data->flat_inputs; - for (int i = 0; i < urb_read_length * 2; i++) { + for (int i = 0; i < 32; i++) { sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW; } } + + iris_emit_sbe_swiz(batch, ice, urb_read_offset); } -#endif static void iris_bind_compute_state(struct pipe_context *ctx, void *state) { } +static void +iris_populate_sampler_key(const struct iris_context *ice, + struct brw_sampler_prog_key_data *key) +{ + for (int i = 0; i < MAX_SAMPLERS; i++) { + key->swizzles[i] = 0x688; /* XYZW */ + } +} + static void iris_populate_vs_key(const struct iris_context *ice, struct brw_vs_prog_key *key) { memset(key, 0, sizeof(*key)); + iris_populate_sampler_key(ice, &key->tex); } static void @@ -1444,6 +1878,7 @@ iris_populate_tcs_key(const struct iris_context *ice, struct brw_tcs_prog_key *key) { memset(key, 0, sizeof(*key)); + iris_populate_sampler_key(ice, &key->tex); } static void @@ -1451,6 +1886,7 @@ iris_populate_tes_key(const struct iris_context *ice, struct brw_tes_prog_key *key) { memset(key, 0, sizeof(*key)); + iris_populate_sampler_key(ice, &key->tex); } static void @@ -1458,6 +1894,7 @@ iris_populate_gs_key(const struct iris_context *ice, struct brw_gs_prog_key *key) { memset(key, 0, sizeof(*key)); + iris_populate_sampler_key(ice, &key->tex); } static void @@ -1465,6 +1902,7 @@ iris_populate_fs_key(const struct iris_context *ice, struct brw_wm_prog_key *key) { memset(key, 0, sizeof(*key)); + iris_populate_sampler_key(ice, &key->tex); /* XXX: dirty flags? */ const struct pipe_framebuffer_state *fb = &ice->state.framebuffer; @@ -1479,6 +1917,9 @@ iris_populate_fs_key(const struct iris_context *ice, key->replicate_alpha = fb->nr_cbufs > 1 && (zsa->alpha.enabled || blend->alpha_to_coverage); + /* XXX: only bother if COL0/1 are read */ + key->flat_shade = rast->flatshade; + // key->force_dual_color_blend for unigine #if 0 if (cso_rast->multisample) { @@ -1494,16 +1935,20 @@ iris_populate_fs_key(const struct iris_context *ice, key->coherent_fb_fetch = true; } - //pkt.SamplerCount = \ - //DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ - //pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \ - //ffs(stage_state->per_thread_scratch) - 11; \ +#if 0 + // XXX: these need to go in INIT_THREAD_DISPATCH_FIELDS + pkt.SamplerCount = \ + DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \ + pkt.PerThreadScratchSpace = prog_data->total_scratch == 0 ? 0 : \ + ffs(stage_state->per_thread_scratch) - 11; \ + +#endif static uint64_t KSP(const struct iris_compiled_shader *shader) { - struct iris_resource *res = (void *) shader->buffer; - return res->bo->gtt_offset + shader->offset; + struct iris_resource *res = (void *) shader->assembly.res; + return iris_bo_offset_from_base_address(res->bo) + shader->assembly.offset; } #define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \ @@ -1520,8 +1965,8 @@ KSP(const struct iris_compiled_shader *shader) pkt.Enable = true; static void -iris_set_vs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) +iris_store_vs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; @@ -1536,8 +1981,8 @@ iris_set_vs_state(const struct gen_device_info *devinfo, } static void -iris_set_tcs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) +iris_store_tcs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; @@ -1553,8 +1998,8 @@ iris_set_tcs_state(const struct gen_device_info *devinfo, } static void -iris_set_tes_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) +iris_store_tes_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; @@ -1587,8 +2032,8 @@ iris_set_tes_state(const struct gen_device_info *devinfo, } static void -iris_set_gs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) +iris_store_gs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_vue_prog_data *vue_prog_data = (void *) prog_data; @@ -1602,7 +2047,7 @@ iris_set_gs_state(const struct gen_device_info *devinfo, gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords; gs.InstanceControl = gs_prog_data->invocations - 1; - gs.DispatchMode = SIMD8; + gs.DispatchMode = DISPATCH_MODE_SIMD8; gs.IncludePrimitiveID = gs_prog_data->include_primitive_id; gs.ControlDataFormat = gs_prog_data->control_data_format; gs.ReorderMode = TRAILING; @@ -1631,8 +2076,8 @@ iris_set_gs_state(const struct gen_device_info *devinfo, } static void -iris_set_fs_state(const struct gen_device_info *devinfo, - struct iris_compiled_shader *shader) +iris_store_fs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) { struct brw_stage_prog_data *prog_data = shader->prog_data; struct brw_wm_prog_data *wm_prog_data = (void *) shader->prog_data; @@ -1714,7 +2159,7 @@ iris_set_fs_state(const struct gen_device_info *devinfo, static unsigned iris_derived_program_state_size(enum iris_program_cache_id cache_id) { - assert(cache_id <= IRIS_CACHE_CS); + assert(cache_id <= IRIS_CACHE_BLORP); static const unsigned dwords[] = { [IRIS_CACHE_VS] = GENX(3DSTATE_VS_length), @@ -1724,34 +2169,35 @@ iris_derived_program_state_size(enum iris_program_cache_id cache_id) [IRIS_CACHE_FS] = GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length), [IRIS_CACHE_CS] = 0, - [IRIS_CACHE_BLORP_BLIT] = 0, + [IRIS_CACHE_BLORP] = 0, }; return sizeof(uint32_t) * dwords[cache_id]; } static void -iris_set_derived_program_state(const struct gen_device_info *devinfo, - enum iris_program_cache_id cache_id, - struct iris_compiled_shader *shader) +iris_store_derived_program_state(const struct gen_device_info *devinfo, + enum iris_program_cache_id cache_id, + struct iris_compiled_shader *shader) { switch (cache_id) { case IRIS_CACHE_VS: - iris_set_vs_state(devinfo, shader); + iris_store_vs_state(devinfo, shader); break; case IRIS_CACHE_TCS: - iris_set_tcs_state(devinfo, shader); + iris_store_tcs_state(devinfo, shader); break; case IRIS_CACHE_TES: - iris_set_tes_state(devinfo, shader); + iris_store_tes_state(devinfo, shader); break; case IRIS_CACHE_GS: - iris_set_gs_state(devinfo, shader); + iris_store_gs_state(devinfo, shader); break; case IRIS_CACHE_FS: - iris_set_fs_state(devinfo, shader); + iris_store_fs_state(devinfo, shader); break; case IRIS_CACHE_CS: + case IRIS_CACHE_BLORP: break; default: break; @@ -1803,35 +2249,239 @@ static const uint32_t push_constant_opcodes[] = { [MESA_SHADER_COMPUTE] = 0, }; +/** + * Add a surface to the validation list, as well as the buffer containing + * the corresponding SURFACE_STATE. + * + * Returns the binding table entry (offset to SURFACE_STATE). + */ static uint32_t -emit_patched_surface_state(struct iris_batch *batch, - uint32_t *surface_state, - const struct iris_resource *res, - unsigned reloc_flags) +use_surface(struct iris_batch *batch, + struct pipe_surface *p_surf, + bool writeable) { - const int num_dwords = GENX(RENDER_SURFACE_STATE_length); - uint32_t offset; - uint32_t *dw = iris_alloc_state(batch, 4 * num_dwords, 64, &offset); + struct iris_surface *surf = (void *) p_surf; - STATIC_ASSERT(GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_start) % 32 == 0); - int addr_idx = GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_start) / 32; - for (uint32_t i = 0; i < addr_idx; i++) - dw[i] = surface_state[i]; + iris_use_pinned_bo(batch, iris_resource_bo(p_surf->texture), writeable); + iris_use_pinned_bo(batch, iris_resource_bo(surf->surface_state.res), false); - uint64_t *qw = (uint64_t *) &dw[addr_idx]; - // XXX: mt->offset, if needed - *qw = iris_state_reloc(batch, (void *)qw - batch->statebuf.map, res->bo, - surface_state[addr_idx + 1], reloc_flags); + return surf->surface_state.offset; +} - for (uint32_t i = addr_idx + 1; i < num_dwords; i++) - dw[i] = surface_state[i]; +static uint32_t +use_sampler_view(struct iris_batch *batch, struct iris_sampler_view *isv) +{ + iris_use_pinned_bo(batch, iris_resource_bo(isv->pipe.texture), false); + iris_use_pinned_bo(batch, iris_resource_bo(isv->surface_state.res), false); - return offset; + return isv->surface_state.offset; } -static void -iris_upload_render_state(struct iris_context *ice, - struct iris_batch *batch, +static uint32_t +use_const_buffer(struct iris_batch *batch, struct iris_const_buffer *cbuf) +{ + iris_use_pinned_bo(batch, iris_resource_bo(cbuf->data.res), false); + iris_use_pinned_bo(batch, iris_resource_bo(cbuf->surface_state.res), false); + + return cbuf->surface_state.offset; +} + +static uint32_t +use_null_surface(struct iris_batch *batch, struct iris_context *ice) +{ + struct iris_bo *state_bo = iris_resource_bo(ice->state.unbound_tex.res); + + iris_use_pinned_bo(batch, state_bo, false); + + return ice->state.unbound_tex.offset; +} + +static void +iris_populate_binding_table(struct iris_context *ice, + struct iris_batch *batch, + gl_shader_stage stage) +{ + const struct iris_binder *binder = &batch->binder; + struct iris_compiled_shader *shader = ice->shaders.prog[stage]; + if (!shader) + return; + + // Surfaces: + // - pull constants + // - ubos/ssbos/abos + // - images + // - textures + // - render targets - write and read + + //struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; + uint32_t *bt_map = binder->map + binder->bt_offset[stage]; + int s = 0; + + if (stage == MESA_SHADER_FRAGMENT) { + struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; + for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { + bt_map[s++] = use_surface(batch, cso_fb->cbufs[i], true); + } + } + + //assert(prog_data->binding_table.texture_start == + //(ice->state.num_textures[stage] ? s : 0xd0d0d0d0)); + + for (int i = 0; i < ice->state.num_textures[stage]; i++) { + struct iris_sampler_view *view = ice->state.textures[stage][i]; + bt_map[s++] = view ? use_sampler_view(batch, view) + : use_null_surface(batch, ice); + } + + // XXX: want the number of BTE's to shorten this loop + struct iris_shader_state *shs = &ice->shaders.state[stage]; + for (int i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; i++) { + struct iris_const_buffer *cbuf = &shs->constbuf[i]; + if (!cbuf->surface_state.res) + break; + + bt_map[s++] = use_const_buffer(batch, cbuf); + } +#if 0 + // XXX: not implemented yet + assert(prog_data->binding_table.pull_constants_start == 0xd0d0d0d0); + assert(prog_data->binding_table.ubo_start == 0xd0d0d0d0); + assert(prog_data->binding_table.ssbo_start == 0xd0d0d0d0); + assert(prog_data->binding_table.image_start == 0xd0d0d0d0); + assert(prog_data->binding_table.shader_time_start == 0xd0d0d0d0); + //assert(prog_data->binding_table.plane_start[1] == 0xd0d0d0d0); + //assert(prog_data->binding_table.plane_start[2] == 0xd0d0d0d0); +#endif +} + +static void +iris_use_optional_res(struct iris_batch *batch, + struct pipe_resource *res, + bool writeable) +{ + if (res) { + struct iris_bo *bo = iris_resource_bo(res); + iris_use_pinned_bo(batch, bo, writeable); + } +} + + +/** + * Pin any BOs which were installed by a previous batch, and restored + * via the hardware logical context mechanism. + * + * We don't need to re-emit all state every batch - the hardware context + * mechanism will save and restore it for us. This includes pointers to + * various BOs...which won't exist unless we ask the kernel to pin them + * by adding them to the validation list. + * + * We can skip buffers if we've re-emitted those packets, as we're + * overwriting those stale pointers with new ones, and don't actually + * refer to the old BOs. + */ +static void +iris_restore_context_saved_bos(struct iris_context *ice, + struct iris_batch *batch, + const struct pipe_draw_info *draw) +{ + // XXX: whack IRIS_SHADER_DIRTY_BINDING_TABLE on new batch + + const uint64_t clean = ~ice->state.dirty; + + if (clean & IRIS_DIRTY_CC_VIEWPORT) { + iris_use_optional_res(batch, ice->state.last_res.cc_vp, false); + } + + if (clean & IRIS_DIRTY_SF_CL_VIEWPORT) { + iris_use_optional_res(batch, ice->state.last_res.sf_cl_vp, false); + } + + if (clean & IRIS_DIRTY_BLEND_STATE) { + iris_use_optional_res(batch, ice->state.last_res.blend, false); + } + + if (clean & IRIS_DIRTY_COLOR_CALC_STATE) { + iris_use_optional_res(batch, ice->state.last_res.color_calc, false); + } + + if (clean & IRIS_DIRTY_SCISSOR_RECT) { + iris_use_optional_res(batch, ice->state.last_res.scissor, false); + } + + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + if (clean & (IRIS_DIRTY_CONSTANTS_VS << stage)) + continue; + + struct iris_shader_state *shs = &ice->shaders.state[stage]; + struct iris_compiled_shader *shader = ice->shaders.prog[stage]; + + if (!shader) + continue; + + struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; + + for (int i = 0; i < 4; i++) { + const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; + + if (range->length == 0) + continue; + + struct iris_const_buffer *cbuf = &shs->constbuf[range->block]; + struct iris_resource *res = (void *) cbuf->data.res; + + if (res) + iris_use_pinned_bo(batch, res->bo, false); + else + iris_use_pinned_bo(batch, batch->screen->workaround_bo, false); + } + } + + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + struct pipe_resource *res = ice->state.sampler_table[stage].res; + if (res) + iris_use_pinned_bo(batch, iris_resource_bo(res), false); + } + + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + if (clean & (IRIS_DIRTY_VS << stage)) { + struct iris_compiled_shader *shader = ice->shaders.prog[stage]; + if (shader) { + struct iris_bo *bo = iris_resource_bo(shader->assembly.res); + iris_use_pinned_bo(batch, bo, false); + } + + // XXX: scratch buffer + } + } + + // XXX: 3DSTATE_SO_BUFFER + + if (clean & IRIS_DIRTY_DEPTH_BUFFER) { + struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; + + if (cso_fb->zsbuf) { + struct iris_resource *zres = (void *) cso_fb->zsbuf->texture; + // XXX: depth might not be writable... + iris_use_pinned_bo(batch, zres->bo, true); + } + } + + if (draw->index_size > 0) { + // XXX: index buffer + } + + if (clean & IRIS_DIRTY_VERTEX_BUFFERS) { + struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers; + for (unsigned i = 0; i < cso->num_buffers; i++) { + struct iris_resource *res = (void *) cso->resources[i]; + iris_use_pinned_bo(batch, res->bo, false); + } + } +} + +static void +iris_upload_render_state(struct iris_context *ice, + struct iris_batch *batch, const struct pipe_draw_info *draw) { const uint64_t dirty = ice->state.dirty; @@ -1843,20 +2493,26 @@ iris_upload_render_state(struct iris_context *ice, struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { ptr.CCViewportPointer = - iris_emit_state(batch, cso->cc_vp, sizeof(cso->cc_vp), 32); + emit_state(batch, ice->state.dynamic_uploader, + &ice->state.last_res.cc_vp, + cso->cc_vp, sizeof(cso->cc_vp), 32); } } if (dirty & IRIS_DIRTY_SF_CL_VIEWPORT) { - struct iris_viewport_state *cso = ice->state.cso_vp; + struct iris_viewport_state *cso = &ice->state.genx->viewport; iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { ptr.SFClipViewportPointer = - iris_emit_state(batch, cso->sf_cl_vp, sizeof(cso->sf_cl_vp), 64); + emit_state(batch, ice->state.dynamic_uploader, + &ice->state.last_res.sf_cl_vp, + cso->sf_cl_vp, 4 * GENX(SF_CLIP_VIEWPORT_length) * + ice->state.num_viewports, 64); } } /* XXX: L3 State */ + // XXX: this is only flagged at setup, we assume a static configuration if (dirty & IRIS_DIRTY_URB) { iris_upload_urb_config(ice, batch); } @@ -1869,7 +2525,9 @@ iris_upload_render_state(struct iris_context *ice, cso_fb->nr_cbufs * GENX(BLEND_STATE_ENTRY_length)); uint32_t blend_offset; uint32_t *blend_map = - iris_alloc_state(batch, num_dwords, 64, &blend_offset); + stream_state(batch, ice->state.dynamic_uploader, + &ice->state.last_res.blend, + 4 * num_dwords, 64, &blend_offset); uint32_t blend_state_header; iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) { @@ -1891,9 +2549,10 @@ iris_upload_render_state(struct iris_context *ice, struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; uint32_t cc_offset; void *cc_map = - iris_alloc_state(batch, - sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length), - 64, &cc_offset); + stream_state(batch, ice->state.dynamic_uploader, + &ice->state.last_res.color_calc, + sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length), + 64, &cc_offset); iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) { cc.AlphaTestFormat = ALPHATEST_FLOAT32; cc.AlphaReferenceValueAsFLOAT32 = cso->alpha.ref_value; @@ -1909,81 +2568,88 @@ iris_upload_render_state(struct iris_context *ice, } for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + // XXX: wrong dirty tracking... if (!(dirty & (IRIS_DIRTY_CONSTANTS_VS << stage))) continue; + struct iris_shader_state *shs = &ice->shaders.state[stage]; + struct iris_compiled_shader *shader = ice->shaders.prog[stage]; + + if (!shader) + continue; + + struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; + iris_emit_cmd(batch, GENX(3DSTATE_CONSTANT_VS), pkt) { pkt._3DCommandSubOpcode = push_constant_opcodes[stage]; - if (ice->shaders.prog[stage]) { - // XXX: 3DSTATE_CONSTANT_XS + if (prog_data) { + /* The Skylake PRM contains the following restriction: + * + * "The driver must ensure The following case does not occur + * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with + * buffer 3 read length equal to zero committed followed by a + * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to + * zero committed." + * + * To avoid this, we program the buffers in the highest slots. + * This way, slot 0 is only used if slot 3 is also used. + */ + int n = 3; + + for (int i = 3; i >= 0; i--) { + const struct brw_ubo_range *range = &prog_data->ubo_ranges[i]; + + if (range->length == 0) + continue; + + // XXX: is range->block a constbuf index? it would be nice + struct iris_const_buffer *cbuf = &shs->constbuf[range->block]; + struct iris_resource *res = (void *) cbuf->data.res; + + assert(cbuf->data.offset % 32 == 0); + + pkt.ConstantBody.ReadLength[n] = range->length; + pkt.ConstantBody.Buffer[n] = + res ? ro_bo(res->bo, range->start * 32 + cbuf->data.offset) + : ro_bo(batch->screen->workaround_bo, 0); + n--; + } } } } - // Surfaces: - // - pull constants - // - ubos/ssbos/abos - // - images - // - textures - // - render targets - write and read - // XXX: 3DSTATE_BINDING_TABLE_POINTERS_XS + struct iris_binder *binder = &batch->binder; for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { - struct iris_compiled_shader *shader = ice->shaders.prog[stage]; - if (!shader) // XXX: dirty bits...also, emit a disable maybe? - continue; - - struct brw_stage_prog_data *prog_data = (void *) shader->prog_data; - uint32_t bt_offset = 0; - uint32_t *bt_map = NULL; - - if (prog_data->binding_table.size_bytes != 0) { - bt_map = iris_alloc_state(batch, prog_data->binding_table.size_bytes, - 64, &bt_offset); - } - - iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) { - ptr._3DCommandSubOpcode = 38 + stage; - ptr.PointertoVSBindingTable = bt_offset; + if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) { + iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) { + ptr._3DCommandSubOpcode = 38 + stage; + ptr.PointertoVSBindingTable = binder->bt_offset[stage]; + } } + } - if (stage == MESA_SHADER_FRAGMENT) { - struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; - for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { - struct iris_surface *surf = (void *) cso_fb->cbufs[i]; - struct iris_resource *res = (void *) surf->pipe.texture; - - *bt_map++ = emit_patched_surface_state(batch, surf->surface_state, - res, RELOC_WRITE); - } + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { + if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) { + iris_populate_binding_table(ice, batch, stage); } } + if (ice->state.need_border_colors) + iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false); + for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) { if (!(dirty & (IRIS_DIRTY_SAMPLER_STATES_VS << stage)) || !ice->shaders.prog[stage]) continue; - // XXX: get sampler count from shader; don't emit them all... - const int count = IRIS_MAX_TEXTURE_SAMPLERS; - - uint32_t offset; - uint32_t *map = iris_alloc_state(batch, - count * 4 * GENX(SAMPLER_STATE_length), - 32, &offset); - - for (int i = 0; i < count; i++) { - // XXX: when we have a correct count, these better be bound - if (!ice->state.samplers[stage][i]) - continue; - memcpy(map, ice->state.samplers[stage][i]->sampler_state, - 4 * GENX(SAMPLER_STATE_length)); - map += GENX(SAMPLER_STATE_length); - } + struct pipe_resource *res = ice->state.sampler_table[stage].res; + if (res) + iris_use_pinned_bo(batch, iris_resource_bo(res), false); iris_emit_cmd(batch, GENX(3DSTATE_SAMPLER_STATE_POINTERS_VS), ptr) { ptr._3DCommandSubOpcode = 43 + stage; - ptr.PointertoVSSamplerState = offset; + ptr.PointertoVSSamplerState = ice->state.sampler_table[stage].offset; } } @@ -1998,7 +2664,7 @@ iris_upload_render_state(struct iris_context *ice, if (dirty & IRIS_DIRTY_SAMPLE_MASK) { iris_emit_cmd(batch, GENX(3DSTATE_SAMPLE_MASK), ms) { - ms.SampleMask = ice->state.sample_mask; + ms.SampleMask = MAX2(ice->state.sample_mask, 1); } } @@ -2009,8 +2675,8 @@ iris_upload_render_state(struct iris_context *ice, struct iris_compiled_shader *shader = ice->shaders.prog[stage]; if (shader) { - struct iris_resource *cache = (void *) shader->buffer; - iris_use_pinned_bo(batch, cache->bo); + struct iris_resource *cache = (void *) shader->assembly.res; + iris_use_pinned_bo(batch, cache->bo, false); iris_batch_emit(batch, shader->derived_data, iris_derived_program_state_size(stage)); } else { @@ -2040,6 +2706,7 @@ iris_upload_render_state(struct iris_context *ice, cl.NonPerspectiveBarycentricEnable = true; cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0; + cl.MaximumVPIndex = ice->state.num_viewports - 1; } iris_emit_merge(batch, cso_rast->clip, dynamic_clip, ARRAY_SIZE(cso_rast->clip)); @@ -2052,7 +2719,8 @@ iris_upload_render_state(struct iris_context *ice, } - if (dirty & (IRIS_DIRTY_RASTER | IRIS_DIRTY_FS)) { + /* XXX: FS program updates needs to flag IRIS_DIRTY_WM */ + if (dirty & IRIS_DIRTY_WM) { struct iris_rasterizer_state *cso = ice->state.cso_rast; uint32_t dynamic_wm[GENX(3DSTATE_WM_length)]; @@ -2072,10 +2740,7 @@ iris_upload_render_state(struct iris_context *ice, // XXX: 3DSTATE_SBE, 3DSTATE_SBE_SWIZ // -> iris_raster_state (point sprite texture coordinate origin) // -> bunch of shader state... - iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) { - } - iris_emit_cmd(batch, GENX(3DSTATE_SBE_SWIZ), sbe) { - } + iris_emit_sbe(batch, ice); } if (dirty & IRIS_DIRTY_PS_BLEND) { @@ -2103,21 +2768,31 @@ iris_upload_render_state(struct iris_context *ice, iris_emit_merge(batch, cso->wmds, stencil_refs, ARRAY_SIZE(cso->wmds)); } - if (dirty & IRIS_DIRTY_SCISSOR) { + if (dirty & IRIS_DIRTY_SCISSOR_RECT) { uint32_t scissor_offset = - iris_emit_state(batch, ice->state.scissors, - sizeof(struct pipe_scissor_state) * - ice->state.num_scissors, 32); + emit_state(batch, ice->state.dynamic_uploader, + &ice->state.last_res.scissor, + ice->state.scissors, + sizeof(struct pipe_scissor_state) * + ice->state.num_viewports, 32); iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) { ptr.ScissorRectPointer = scissor_offset; } } - // XXX: 3DSTATE_DEPTH_BUFFER - // XXX: 3DSTATE_HIER_DEPTH_BUFFER - // XXX: 3DSTATE_STENCIL_BUFFER - // XXX: 3DSTATE_CLEAR_PARAMS + if (dirty & IRIS_DIRTY_DEPTH_BUFFER) { + struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; + struct iris_depth_buffer_state *cso_z = &ice->state.genx->depth_buffer; + + iris_batch_emit(batch, cso_z->packets, sizeof(cso_z->packets)); + + if (cso_fb->zsbuf) { + struct iris_resource *zres = (void *) cso_fb->zsbuf->texture; + // XXX: depth might not be writable... + iris_use_pinned_bo(batch, zres->bo, true); + } + } if (dirty & IRIS_DIRTY_POLYGON_STIPPLE) { iris_emit_cmd(batch, GENX(3DSTATE_POLY_STIPPLE_PATTERN), poly) { @@ -2140,38 +2815,38 @@ iris_upload_render_state(struct iris_context *ice, } if (draw->index_size > 0) { - struct iris_resource *res = (struct iris_resource *)draw->index.resource; + struct iris_resource *res = NULL; + unsigned offset; - assert(!draw->has_user_indices); + if (draw->has_user_indices) { + u_upload_data(ice->ctx.stream_uploader, 0, + draw->count * draw->index_size, 4, draw->index.user, + &offset, (struct pipe_resource **) &res); + } else { + res = (struct iris_resource *) draw->index.resource; + offset = 0; + } iris_emit_cmd(batch, GENX(3DSTATE_INDEX_BUFFER), ib) { - ib.IndexFormat = draw->index_size; + ib.IndexFormat = draw->index_size >> 1; ib.MOCS = MOCS_WB; ib.BufferSize = res->bo->size; - ib.BufferStartingAddress = ro_bo(res->bo, 0); + ib.BufferStartingAddress = ro_bo(res->bo, offset); } } if (dirty & IRIS_DIRTY_VERTEX_BUFFERS) { - struct iris_vertex_buffer_state *cso = ice->state.cso_vertex_buffers; - - STATIC_ASSERT(GENX(VERTEX_BUFFER_STATE_length) == 4); - STATIC_ASSERT((GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) % 32) == 0); + struct iris_vertex_buffer_state *cso = &ice->state.genx->vertex_buffers; + const unsigned vb_dwords = GENX(VERTEX_BUFFER_STATE_length); - uint64_t *addr = batch->cmdbuf.map_next + sizeof(uint32_t) * - (GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) / 32); - uint32_t *delta = cso->vertex_buffers + - (1 + GENX(VERTEX_BUFFER_STATE_BufferStartingAddress_bits) / 32); + if (cso->num_buffers > 0) { + iris_batch_emit(batch, cso->vertex_buffers, sizeof(uint32_t) * + (1 + vb_dwords * cso->num_buffers)); - iris_batch_emit(batch, cso->vertex_buffers, - sizeof(uint32_t) * (1 + 4 * cso->num_buffers)); - - for (unsigned i = 0; i < cso->num_buffers; i++) { - *addr = iris_batch_reloc(batch, (void *) addr - batch->cmdbuf.map, - cso->bos[i].bo, cso->bos[i].offset + - *delta, cso->bos[i].reloc_flags); - addr = (void *) addr + 16; - delta = (void *) delta + 16; + for (unsigned i = 0; i < cso->num_buffers; i++) { + struct iris_resource *res = (void *) cso->resources[i]; + iris_use_pinned_bo(batch, res->bo, false); + } } } @@ -2179,10 +2854,8 @@ iris_upload_render_state(struct iris_context *ice, struct iris_vertex_element_state *cso = ice->state.cso_vertex_elements; iris_batch_emit(batch, cso->vertex_elements, sizeof(uint32_t) * (1 + cso->count * GENX(VERTEX_ELEMENT_STATE_length))); - for (int i = 0; i < cso->count; i++) { - iris_batch_emit(batch, cso->vf_instancing[i], sizeof(uint32_t) * - (cso->count * GENX(3DSTATE_VF_INSTANCING_length))); - } + iris_batch_emit(batch, cso->vf_instancing, sizeof(uint32_t) * + cso->count * GENX(3DSTATE_VF_INSTANCING_length)); for (int i = 0; i < cso->count; i++) { /* TODO: vertexid, instanceid support */ iris_emit_cmd(batch, GENX(3DSTATE_VF_SGVS), sgvs); @@ -2219,22 +2892,449 @@ iris_upload_render_state(struct iris_context *ice, //prim.BaseVertexLocation = ...; } + + if (!batch->contains_draw) { + iris_restore_context_saved_bos(ice, batch, draw); + batch->contains_draw = true; + } } +/** + * State module teardown. + */ static void iris_destroy_state(struct iris_context *ice) { + iris_free_vertex_buffers(&ice->state.genx->vertex_buffers); + // XXX: unreference resources/surfaces. for (unsigned i = 0; i < ice->state.framebuffer.nr_cbufs; i++) { pipe_surface_reference(&ice->state.framebuffer.cbufs[i], NULL); } pipe_surface_reference(&ice->state.framebuffer.zsbuf, NULL); + + for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) { + pipe_resource_reference(&ice->state.sampler_table[stage].res, NULL); + } + free(ice->state.genx); + + pipe_resource_reference(&ice->state.last_res.cc_vp, NULL); + pipe_resource_reference(&ice->state.last_res.sf_cl_vp, NULL); + pipe_resource_reference(&ice->state.last_res.color_calc, NULL); + pipe_resource_reference(&ice->state.last_res.scissor, NULL); + pipe_resource_reference(&ice->state.last_res.blend, NULL); +} + +static unsigned +flags_to_post_sync_op(uint32_t flags) +{ + if (flags & PIPE_CONTROL_WRITE_IMMEDIATE) + return WriteImmediateData; + + if (flags & PIPE_CONTROL_WRITE_DEPTH_COUNT) + return WritePSDepthCount; + + if (flags & PIPE_CONTROL_WRITE_TIMESTAMP) + return WriteTimestamp; + + return 0; +} + +/** + * Do the given flags have a Post Sync or LRI Post Sync operation? + */ +static enum pipe_control_flags +get_post_sync_flags(enum pipe_control_flags flags) +{ + flags &= PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP | + PIPE_CONTROL_LRI_POST_SYNC_OP; + + /* Only one "Post Sync Op" is allowed, and it's mutually exclusive with + * "LRI Post Sync Operation". So more than one bit set would be illegal. + */ + assert(util_bitcount(flags) <= 1); + + return flags; +} + +// XXX: compute support +#define IS_COMPUTE_PIPELINE(batch) (batch->ring != I915_EXEC_RENDER) + +/** + * Emit a series of PIPE_CONTROL commands, taking into account any + * workarounds necessary to actually accomplish the caller's request. + * + * Unless otherwise noted, spec quotations in this function come from: + * + * Synchronization of the 3D Pipeline > PIPE_CONTROL Command > Programming + * Restrictions for PIPE_CONTROL. + */ +static void +iris_emit_raw_pipe_control(struct iris_batch *batch, uint32_t flags, + struct iris_bo *bo, uint32_t offset, uint64_t imm) +{ + UNUSED const struct gen_device_info *devinfo = &batch->screen->devinfo; + enum pipe_control_flags post_sync_flags = get_post_sync_flags(flags); + enum pipe_control_flags non_lri_post_sync_flags = + post_sync_flags & ~PIPE_CONTROL_LRI_POST_SYNC_OP; + + /* Recursive PIPE_CONTROL workarounds -------------------------------- + * (http://knowyourmeme.com/memes/xzibit-yo-dawg) + * + * We do these first because we want to look at the original operation, + * rather than any workarounds we set. + */ + if (GEN_GEN == 9 && (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) { + /* The PIPE_CONTROL "VF Cache Invalidation Enable" bit description + * lists several workarounds: + * + * "Project: SKL, KBL, BXT + * + * If the VF Cache Invalidation Enable is set to a 1 in a + * PIPE_CONTROL, a separate Null PIPE_CONTROL, all bitfields + * sets to 0, with the VF Cache Invalidation Enable set to 0 + * needs to be sent prior to the PIPE_CONTROL with VF Cache + * Invalidation Enable set to a 1." + */ + iris_emit_raw_pipe_control(batch, 0, NULL, 0, 0); + } + + if (GEN_GEN == 9 && IS_COMPUTE_PIPELINE(batch) && post_sync_flags) { + /* Project: SKL / Argument: LRI Post Sync Operation [23] + * + * "PIPECONTROL command with “Command Streamer Stall Enable” must be + * programmed prior to programming a PIPECONTROL command with "LRI + * Post Sync Operation" in GPGPU mode of operation (i.e when + * PIPELINE_SELECT command is set to GPGPU mode of operation)." + * + * The same text exists a few rows below for Post Sync Op. + */ + iris_emit_raw_pipe_control(batch, PIPE_CONTROL_CS_STALL, bo, offset, imm); + } + + if (GEN_GEN == 10 && (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) { + /* Cannonlake: + * "Before sending a PIPE_CONTROL command with bit 12 set, SW must issue + * another PIPE_CONTROL with Render Target Cache Flush Enable (bit 12) + * = 0 and Pipe Control Flush Enable (bit 7) = 1" + */ + iris_emit_raw_pipe_control(batch, PIPE_CONTROL_FLUSH_ENABLE, bo, + offset, imm); + } + + /* "Flush Types" workarounds --------------------------------------------- + * We do these now because they may add post-sync operations or CS stalls. + */ + + if (flags & PIPE_CONTROL_VF_CACHE_INVALIDATE) { + /* Project: BDW, SKL+ (stopping at CNL) / Argument: VF Invalidate + * + * "'Post Sync Operation' must be enabled to 'Write Immediate Data' or + * 'Write PS Depth Count' or 'Write Timestamp'." + */ + if (!bo) { + flags |= PIPE_CONTROL_WRITE_IMMEDIATE; + post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; + non_lri_post_sync_flags |= PIPE_CONTROL_WRITE_IMMEDIATE; + bo = batch->screen->workaround_bo; + } + } + + /* #1130 from Gen10 workarounds page: + * + * "Enable Depth Stall on every Post Sync Op if Render target Cache + * Flush is not enabled in same PIPE CONTROL and Enable Pixel score + * board stall if Render target cache flush is enabled." + * + * Applicable to CNL B0 and C0 steppings only. + * + * The wording here is unclear, and this workaround doesn't look anything + * like the internal bug report recommendations, but leave it be for now... + */ + if (GEN_GEN == 10) { + if (flags & PIPE_CONTROL_RENDER_TARGET_FLUSH) { + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; + } else if (flags & non_lri_post_sync_flags) { + flags |= PIPE_CONTROL_DEPTH_STALL; + } + } + + if (flags & PIPE_CONTROL_DEPTH_STALL) { + /* From the PIPE_CONTROL instruction table, bit 13 (Depth Stall Enable): + * + * "This bit must be DISABLED for operations other than writing + * PS_DEPTH_COUNT." + * + * This seems like nonsense. An Ivybridge workaround requires us to + * emit a PIPE_CONTROL with a depth stall and write immediate post-sync + * operation. Gen8+ requires us to emit depth stalls and depth cache + * flushes together. So, it's hard to imagine this means anything other + * than "we originally intended this to be used for PS_DEPTH_COUNT". + * + * We ignore the supposed restriction and do nothing. + */ + } + + if (flags & (PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_STALL_AT_SCOREBOARD)) { + /* From the PIPE_CONTROL instruction table, bit 12 and bit 1: + * + * "This bit must be DISABLED for End-of-pipe (Read) fences, + * PS_DEPTH_COUNT or TIMESTAMP queries." + * + * TODO: Implement end-of-pipe checking. + */ + assert(!(post_sync_flags & (PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP))); + } + + if (flags & PIPE_CONTROL_STALL_AT_SCOREBOARD) { + /* From the PIPE_CONTROL instruction table, bit 1: + * + * "This bit is ignored if Depth Stall Enable is set. + * Further, the render cache is not flushed even if Write Cache + * Flush Enable bit is set." + * + * We assert that the caller doesn't do this combination, to try and + * prevent mistakes. It shouldn't hurt the GPU, though. + */ + assert(!(flags & (PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_RENDER_TARGET_FLUSH))); + } + + /* PIPE_CONTROL page workarounds ------------------------------------- */ + + if (GEN_GEN <= 8 && (flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE)) { + /* From the PIPE_CONTROL page itself: + * + * "IVB, HSW, BDW + * Restriction: Pipe_control with CS-stall bit set must be issued + * before a pipe-control command that has the State Cache + * Invalidate bit set." + */ + flags |= PIPE_CONTROL_CS_STALL; + } + + if (flags & PIPE_CONTROL_FLUSH_LLC) { + /* From the PIPE_CONTROL instruction table, bit 26 (Flush LLC): + * + * "Project: ALL + * SW must always program Post-Sync Operation to "Write Immediate + * Data" when Flush LLC is set." + * + * For now, we just require the caller to do it. + */ + assert(flags & PIPE_CONTROL_WRITE_IMMEDIATE); + } + + /* "Post-Sync Operation" workarounds -------------------------------- */ + + /* Project: All / Argument: Global Snapshot Count Reset [19] + * + * "This bit must not be exercised on any product. + * Requires stall bit ([20] of DW1) set." + * + * We don't use this, so we just assert that it isn't used. The + * PIPE_CONTROL instruction page indicates that they intended this + * as a debug feature and don't think it is useful in production, + * but it may actually be usable, should we ever want to. + */ + assert((flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET) == 0); + + if (flags & (PIPE_CONTROL_MEDIA_STATE_CLEAR | + PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE)) { + /* Project: All / Arguments: + * + * - Generic Media State Clear [16] + * - Indirect State Pointers Disable [16] + * + * "Requires stall bit ([20] of DW1) set." + * + * Also, the PIPE_CONTROL instruction table, bit 16 (Generic Media + * State Clear) says: + * + * "PIPECONTROL command with “Command Streamer Stall Enable” must be + * programmed prior to programming a PIPECONTROL command with "Media + * State Clear" set in GPGPU mode of operation" + * + * This is a subset of the earlier rule, so there's nothing to do. + */ + flags |= PIPE_CONTROL_CS_STALL; + } + + if (flags & PIPE_CONTROL_STORE_DATA_INDEX) { + /* Project: All / Argument: Store Data Index + * + * "Post-Sync Operation ([15:14] of DW1) must be set to something other + * than '0'." + * + * For now, we just assert that the caller does this. We might want to + * automatically add a write to the workaround BO... + */ + assert(non_lri_post_sync_flags != 0); + } + + if (flags & PIPE_CONTROL_SYNC_GFDT) { + /* Project: All / Argument: Sync GFDT + * + * "Post-Sync Operation ([15:14] of DW1) must be set to something other + * than '0' or 0x2520[13] must be set." + * + * For now, we just assert that the caller does this. + */ + assert(non_lri_post_sync_flags != 0); + } + + if (flags & PIPE_CONTROL_TLB_INVALIDATE) { + /* Project: IVB+ / Argument: TLB inv + * + * "Requires stall bit ([20] of DW1) set." + * + * Also, from the PIPE_CONTROL instruction table: + * + * "Project: SKL+ + * Post Sync Operation or CS stall must be set to ensure a TLB + * invalidation occurs. Otherwise no cycle will occur to the TLB + * cache to invalidate." + * + * This is not a subset of the earlier rule, so there's nothing to do. + */ + flags |= PIPE_CONTROL_CS_STALL; + } + + if (GEN_GEN == 9 && devinfo->gt == 4) { + /* TODO: The big Skylake GT4 post sync op workaround */ + } + + /* "GPGPU specific workarounds" (both post-sync and flush) ------------ */ + + if (IS_COMPUTE_PIPELINE(batch)) { + if (GEN_GEN >= 9 && (flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE)) { + /* Project: SKL+ / Argument: Tex Invalidate + * "Requires stall bit ([20] of DW) set for all GPGPU Workloads." + */ + flags |= PIPE_CONTROL_CS_STALL; + } + + if (GEN_GEN == 8 && (post_sync_flags || + (flags & (PIPE_CONTROL_NOTIFY_ENABLE | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DATA_CACHE_FLUSH)))) { + /* Project: BDW / Arguments: + * + * - LRI Post Sync Operation [23] + * - Post Sync Op [15:14] + * - Notify En [8] + * - Depth Stall [13] + * - Render Target Cache Flush [12] + * - Depth Cache Flush [0] + * - DC Flush Enable [5] + * + * "Requires stall bit ([20] of DW) set for all GPGPU and Media + * Workloads." + */ + flags |= PIPE_CONTROL_CS_STALL; + + /* Also, from the PIPE_CONTROL instruction table, bit 20: + * + * "Project: BDW + * This bit must be always set when PIPE_CONTROL command is + * programmed by GPGPU and MEDIA workloads, except for the cases + * when only Read Only Cache Invalidation bits are set (State + * Cache Invalidation Enable, Instruction cache Invalidation + * Enable, Texture Cache Invalidation Enable, Constant Cache + * Invalidation Enable). This is to WA FFDOP CG issue, this WA + * need not implemented when FF_DOP_CG is disable via "Fixed + * Function DOP Clock Gate Disable" bit in RC_PSMI_CTRL register." + * + * It sounds like we could avoid CS stalls in some cases, but we + * don't currently bother. This list isn't exactly the list above, + * either... + */ + } + } + + /* "Stall" workarounds ---------------------------------------------- + * These have to come after the earlier ones because we may have added + * some additional CS stalls above. + */ + + if (GEN_GEN < 9 && (flags & PIPE_CONTROL_CS_STALL)) { + /* Project: PRE-SKL, VLV, CHV + * + * "[All Stepping][All SKUs]: + * + * One of the following must also be set: + * + * - Render Target Cache Flush Enable ([12] of DW1) + * - Depth Cache Flush Enable ([0] of DW1) + * - Stall at Pixel Scoreboard ([1] of DW1) + * - Depth Stall ([13] of DW1) + * - Post-Sync Operation ([13] of DW1) + * - DC Flush Enable ([5] of DW1)" + * + * If we don't already have one of those bits set, we choose to add + * "Stall at Pixel Scoreboard". Some of the other bits require a + * CS stall as a workaround (see above), which would send us into + * an infinite recursion of PIPE_CONTROLs. "Stall at Pixel Scoreboard" + * appears to be safe, so we choose that. + */ + const uint32_t wa_bits = PIPE_CONTROL_RENDER_TARGET_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_WRITE_IMMEDIATE | + PIPE_CONTROL_WRITE_DEPTH_COUNT | + PIPE_CONTROL_WRITE_TIMESTAMP | + PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DATA_CACHE_FLUSH; + if (!(flags & wa_bits)) + flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD; + } + + /* Emit --------------------------------------------------------------- */ + + iris_emit_cmd(batch, GENX(PIPE_CONTROL), pc) { + pc.LRIPostSyncOperation = NoLRIOperation; + pc.PipeControlFlushEnable = flags & PIPE_CONTROL_FLUSH_ENABLE; + pc.DCFlushEnable = flags & PIPE_CONTROL_DATA_CACHE_FLUSH; + pc.StoreDataIndex = 0; + pc.CommandStreamerStallEnable = flags & PIPE_CONTROL_CS_STALL; + pc.GlobalSnapshotCountReset = + flags & PIPE_CONTROL_GLOBAL_SNAPSHOT_COUNT_RESET; + pc.TLBInvalidate = flags & PIPE_CONTROL_TLB_INVALIDATE; + pc.GenericMediaStateClear = flags & PIPE_CONTROL_MEDIA_STATE_CLEAR; + pc.StallAtPixelScoreboard = flags & PIPE_CONTROL_STALL_AT_SCOREBOARD; + pc.RenderTargetCacheFlushEnable = + flags & PIPE_CONTROL_RENDER_TARGET_FLUSH; + pc.DepthCacheFlushEnable = flags & PIPE_CONTROL_DEPTH_CACHE_FLUSH; + pc.StateCacheInvalidationEnable = + flags & PIPE_CONTROL_STATE_CACHE_INVALIDATE; + pc.VFCacheInvalidationEnable = flags & PIPE_CONTROL_VF_CACHE_INVALIDATE; + pc.ConstantCacheInvalidationEnable = + flags & PIPE_CONTROL_CONST_CACHE_INVALIDATE; + pc.PostSyncOperation = flags_to_post_sync_op(flags); + pc.DepthStallEnable = flags & PIPE_CONTROL_DEPTH_STALL; + pc.InstructionCacheInvalidateEnable = + flags & PIPE_CONTROL_INSTRUCTION_INVALIDATE; + pc.NotifyEnable = flags & PIPE_CONTROL_NOTIFY_ENABLE; + pc.IndirectStatePointersDisable = + flags & PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE; + pc.TextureCacheInvalidationEnable = + flags & PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + pc.Address = ro_bo(bo, offset); + pc.ImmediateData = imm; + } } void genX(init_state)(struct iris_context *ice) { struct pipe_context *ctx = &ice->ctx; + struct iris_screen *screen = (struct iris_screen *)ctx->screen; ctx->create_blend_state = iris_create_blend_state; ctx->create_depth_stencil_alpha_state = iris_create_zsa_state; @@ -2280,17 +3380,26 @@ genX(init_state)(struct iris_context *ice) ctx->stream_output_target_destroy = iris_stream_output_target_destroy; ctx->set_stream_output_targets = iris_set_stream_output_targets; - ice->state.destroy_state = iris_destroy_state; - ice->state.init_render_context = iris_init_render_context; - ice->state.upload_render_state = iris_upload_render_state; - ice->state.derived_program_state_size = iris_derived_program_state_size; - ice->state.set_derived_program_state = iris_set_derived_program_state; - ice->state.populate_vs_key = iris_populate_vs_key; - ice->state.populate_tcs_key = iris_populate_tcs_key; - ice->state.populate_tes_key = iris_populate_tes_key; - ice->state.populate_gs_key = iris_populate_gs_key; - ice->state.populate_fs_key = iris_populate_fs_key; - + ice->vtbl.destroy_state = iris_destroy_state; + ice->vtbl.init_render_context = iris_init_render_context; + ice->vtbl.upload_render_state = iris_upload_render_state; + ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; + ice->vtbl.derived_program_state_size = iris_derived_program_state_size; + ice->vtbl.store_derived_program_state = iris_store_derived_program_state; + ice->vtbl.populate_vs_key = iris_populate_vs_key; + ice->vtbl.populate_tcs_key = iris_populate_tcs_key; + ice->vtbl.populate_tes_key = iris_populate_tes_key; + ice->vtbl.populate_gs_key = iris_populate_gs_key; + ice->vtbl.populate_fs_key = iris_populate_fs_key; ice->state.dirty = ~0ull; + + ice->state.num_viewports = 1; + ice->state.genx = calloc(1, sizeof(struct iris_genx_state)); + + /* Make a 1x1x1 null surface for unbound textures */ + void *null_surf_map = + upload_state(ice->state.surface_uploader, &ice->state.unbound_tex, + 4 * GENX(RENDER_SURFACE_STATE_length), 64); + isl_null_fill_state(&screen->isl_dev, null_surf_map, isl_extent3d(1, 1, 1)); }