X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Ffreedreno_context.h;h=995e7d4c4337ba12d8fd959617662e92c0049f73;hb=6f93c75a47d80f3067e19fa3de4d54e5593a9d55;hp=bc5267aa96e5fb77d2f895d5ea098eddb8095dc4;hpb=56462a30080c1f25a81ae566d59a25d2ad6bb809;p=mesa.git diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index bc5267aa96e..995e7d4c433 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -33,29 +33,31 @@ #include "indices/u_primconvert.h" #include "util/u_blitter.h" #include "util/list.h" -#include "util/u_slab.h" +#include "util/slab.h" #include "util/u_string.h" +#include "freedreno_batch.h" #include "freedreno_screen.h" #include "freedreno_gmem.h" #include "freedreno_util.h" +#define BORDER_COLOR_UPLOAD_SIZE (2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE) + struct fd_vertex_stateobj; struct fd_texture_stateobj { struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS]; unsigned num_textures; + unsigned valid_textures; struct pipe_sampler_state *samplers[PIPE_MAX_SAMPLERS]; unsigned num_samplers; - unsigned dirty_samplers; + unsigned valid_samplers; }; struct fd_program_stateobj { void *vp, *fp; - enum { - FD_SHADER_DIRTY_VP = (1 << 0), - FD_SHADER_DIRTY_FP = (1 << 1), - } dirty; + + /* rest only used by fd2.. split out: */ uint8_t num_exports; /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index * for TGSI_SEMANTIC_GENERIC. Special vs exports (position and point- @@ -82,6 +84,20 @@ struct fd_vertex_stateobj { unsigned num_elements; }; +struct fd_streamout_stateobj { + struct pipe_stream_output_target *targets[PIPE_MAX_SO_BUFFERS]; + unsigned num_targets; + /* Track offset from vtxcnt for streamout data. This counter + * is just incremented by # of vertices on each draw until + * reset or new streamout buffer bound. + * + * When we eventually have GS, the CPU won't actually know the + * number of vertices per draw, so I think we'll have to do + * something more clever. + */ + unsigned offsets[PIPE_MAX_SO_BUFFERS]; +}; + /* group together the vertex and vertexbuf state.. for ease of passing * around, and because various internal operations (gmem<->mem, etc) * need their own vertex state: @@ -91,33 +107,6 @@ struct fd_vertex_state { struct fd_vertexbuf_stateobj vertexbuf; }; -/* Bitmask of stages in rendering that a particular query query is - * active. Queries will be automatically started/stopped (generating - * additional fd_hw_sample_period's) on entrance/exit from stages that - * are applicable to the query. - * - * NOTE: set the stage to NULL at end of IB to ensure no query is still - * active. Things aren't going to work out the way you want if a query - * is active across IB's (or between tile IB and draw IB) - */ -enum fd_render_stage { - FD_STAGE_NULL = 0x00, - FD_STAGE_DRAW = 0x01, - FD_STAGE_CLEAR = 0x02, - /* TODO before queries which include MEM2GMEM or GMEM2MEM will - * work we will need to call fd_hw_query_prepare() from somewhere - * appropriate so that queries in the tiling IB get backed with - * memory to write results to. - */ - FD_STAGE_MEM2GMEM = 0x04, - FD_STAGE_GMEM2MEM = 0x08, - /* used for driver internal draws (ie. util_blitter_blit()): */ - FD_STAGE_BLIT = 0x10, -}; - -#define MAX_HW_SAMPLE_PROVIDERS 4 -struct fd_hw_sample_provider; -struct fd_hw_sample; struct fd_context { struct pipe_context base; @@ -125,49 +114,25 @@ struct fd_context { struct fd_device *dev; struct fd_screen *screen; + struct util_queue flush_queue; + struct blitter_context *blitter; + void *clear_rs_state; struct primconvert_context *primconvert; /* slab for pipe_transfer allocations: */ - struct util_slab_mempool transfer_pool; + struct slab_child_pool transfer_pool; /* slabs for fd_hw_sample and fd_hw_sample_period allocations: */ - struct util_slab_mempool sample_pool; - struct util_slab_mempool sample_period_pool; - - /* next sample offset.. incremented for each sample in the batch/ - * submit, reset to zero on next submit. - */ - uint32_t next_sample_offset; + struct slab_mempool sample_pool; + struct slab_mempool sample_period_pool; /* sample-providers for hw queries: */ const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS]; - /* cached samples (in case multiple queries need to reference - * the same sample snapshot) - */ - struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS]; - - /* tracking for current stage, to know when to start/stop - * any active queries: - */ - enum fd_render_stage stage; - /* list of active queries: */ struct list_head active_queries; - /* list of queries that are not active, but were active in the - * current submit: - */ - struct list_head current_queries; - - /* current query result bo and tile stride: */ - struct fd_bo *query_bo; - uint32_t query_tile_stride; - - /* list of resources used by currently-unsubmitted renders */ - struct list_head used_resources; - /* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to * DI_PT_x value to use for draw initiator. There are some * slight differences between generation: @@ -179,96 +144,38 @@ struct fd_context { struct fd_program_stateobj solid_prog; // TODO move to screen? /* shaders used by mem->gmem blits: */ - struct fd_program_stateobj blit_prog[8]; // TODO move to screen? + struct fd_program_stateobj blit_prog[MAX_RENDER_TARGETS]; // TODO move to screen? struct fd_program_stateobj blit_z, blit_zs; - /* do we need to mem2gmem before rendering. We don't, if for example, - * there was a glClear() that invalidated the entire previous buffer - * contents. Keep track of which buffer(s) are cleared, or needs - * restore. Masks of PIPE_CLEAR_* - * - * The 'cleared' bits will be set for buffers which are *entirely* - * cleared, and 'partial_cleared' bits will be set if you must - * check cleared_scissor. - */ - enum { - /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */ - FD_BUFFER_COLOR = PIPE_CLEAR_COLOR, - FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH, - FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL, - FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL, - } cleared, partial_cleared, restore, resolve; - - bool needs_flush; - - /* To decide whether to render to system memory, keep track of the - * number of draws, and whether any of them require multisample, - * depth_test (or depth write), stencil_test, blending, and - * color_logic_Op (since those functions are disabled when by- - * passing GMEM. - */ - enum { - FD_GMEM_CLEARS_DEPTH_STENCIL = 0x01, - FD_GMEM_DEPTH_ENABLED = 0x02, - FD_GMEM_STENCIL_ENABLED = 0x04, - - FD_GMEM_MSAA_ENABLED = 0x08, - FD_GMEM_BLEND_ENABLED = 0x10, - FD_GMEM_LOGICOP_ENABLED = 0x20, - } gmem_reason; - unsigned num_draws; /* number of draws in current batch */ - /* Stats/counters: */ struct { uint64_t prims_emitted; + uint64_t prims_generated; uint64_t draw_calls; uint64_t batch_total, batch_sysmem, batch_gmem, batch_restore; } stats; - /* we can't really sanely deal with wraparound point in ringbuffer - * and because of the way tiling works we can't really flush at - * arbitrary points (without a big performance hit). When we get - * too close to the end of the current ringbuffer, cycle to the next - * one (and wait for pending rendering from next rb to complete). - * We want the # of ringbuffers to be high enough that we don't - * normally have to wait before resetting to the start of the next - * rb. - */ - struct fd_ringbuffer *rings[8]; - unsigned rings_idx; - - /* NOTE: currently using a single ringbuffer for both draw and - * tiling commands, we need to make sure we need to leave enough - * room at the end to append the tiling commands when we flush. - * 0x7000 dwords should be a couple times more than we ever need - * so should be a nice conservative threshold. + /* Current batch.. the rule here is that you can deref ctx->batch + * in codepaths from pipe_context entrypoints. But not in code- + * paths from fd_batch_flush() (basically, the stuff that gets + * called from GMEM code), since in those code-paths the batch + * you care about is not necessarily the same as ctx->batch. */ -#define FD_TILING_COMMANDS_DWORDS 0x7000 + struct fd_batch *batch; - /* normal draw/clear cmds: */ - struct fd_ringbuffer *ring; - struct fd_ringmarker *draw_start, *draw_end; + struct pipe_fence_handle *last_fence; - /* binning pass draw/clear cmds: */ - struct fd_ringbuffer *binning_ring; - struct fd_ringmarker *binning_start, *binning_end; - - /* Keep track if WAIT_FOR_IDLE is needed for registers we need - * to update via RMW: + /* Are we in process of shadowing a resource? Used to detect recursion + * in transfer_map, and skip unneeded synchronization. */ - bool needs_wfi; - - /* Do we need to re-emit RB_FRAME_BUFFER_DIMENSION? At least on a3xx - * it is not a banked context register, so it needs a WFI to update. - * Keep track if it has actually changed, to avoid unneeded WFI. - * */ - bool needs_rb_fbd; + bool in_shadow : 1; - /* Keep track of DRAW initiators that need to be patched up depending - * on whether we using binning or not: + /* Ie. in blit situation where we no longer care about previous framebuffer + * contents. Main point is to eliminate blits from fd_try_shadow_resource(). + * For example, in case of texture upload + gen-mipmaps. */ - struct util_dynarray draw_patches; + bool in_blit : 1; struct pipe_scissor_state scissor; @@ -278,26 +185,15 @@ struct fd_context { */ struct pipe_scissor_state disabled_scissor; - /* Track the maximal bounds of the scissor of all the draws within a - * batch. Used at the tile rendering step (fd_gmem_render_tiles(), - * mem2gmem/gmem2mem) to avoid needlessly moving data in/out of gmem. - */ - struct pipe_scissor_state max_scissor; - - /* Track the cleared scissor for color/depth/stencil, so we know - * which, if any, tiles need to be restored (mem2gmem). Only valid - * if the corresponding bit in ctx->cleared is set. - */ - struct { - struct pipe_scissor_state color, depth, stencil; - } cleared_scissor; - /* Current gmem/tiling configuration.. gets updated on render_tiles() * if out of date with current maximal-scissor/cpp: + * + * (NOTE: this is kind of related to the batch, but moving it there + * means we'd always have to recalc tiles ever batch) */ struct fd_gmem_stateobj gmem; struct fd_vsc_pipe pipe[8]; - struct fd_tile tile[256]; + struct fd_tile tile[512]; /* which state objects need to be re-emit'd: */ enum { @@ -307,18 +203,26 @@ struct fd_context { FD_DIRTY_FRAGTEX = (1 << 3), FD_DIRTY_VERTTEX = (1 << 4), FD_DIRTY_TEXSTATE = (1 << 5), - FD_DIRTY_PROG = (1 << 6), - FD_DIRTY_BLEND_COLOR = (1 << 7), - FD_DIRTY_STENCIL_REF = (1 << 8), - FD_DIRTY_SAMPLE_MASK = (1 << 9), - FD_DIRTY_FRAMEBUFFER = (1 << 10), - FD_DIRTY_STIPPLE = (1 << 11), - FD_DIRTY_VIEWPORT = (1 << 12), - FD_DIRTY_CONSTBUF = (1 << 13), - FD_DIRTY_VTXSTATE = (1 << 14), - FD_DIRTY_VTXBUF = (1 << 15), - FD_DIRTY_INDEXBUF = (1 << 16), - FD_DIRTY_SCISSOR = (1 << 17), + + FD_SHADER_DIRTY_VP = (1 << 6), + FD_SHADER_DIRTY_FP = (1 << 7), + /* skip geom/tcs/tes/compute */ + FD_DIRTY_PROG = FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP, + + FD_DIRTY_BLEND_COLOR = (1 << 12), + FD_DIRTY_STENCIL_REF = (1 << 13), + FD_DIRTY_SAMPLE_MASK = (1 << 14), + FD_DIRTY_FRAMEBUFFER = (1 << 15), + FD_DIRTY_STIPPLE = (1 << 16), + FD_DIRTY_VIEWPORT = (1 << 17), + FD_DIRTY_CONSTBUF = (1 << 18), + FD_DIRTY_VTXSTATE = (1 << 19), + FD_DIRTY_VTXBUF = (1 << 20), + FD_DIRTY_INDEXBUF = (1 << 21), + FD_DIRTY_SCISSOR = (1 << 22), + FD_DIRTY_STREAMOUT = (1 << 23), + FD_DIRTY_UCP = (1 << 24), + FD_DIRTY_BLEND_DUAL = (1 << 25), } dirty; struct pipe_blend_state *blend; @@ -334,24 +238,33 @@ struct fd_context { struct pipe_blend_color blend_color; struct pipe_stencil_ref stencil_ref; unsigned sample_mask; - struct pipe_framebuffer_state framebuffer; struct pipe_poly_stipple stipple; struct pipe_viewport_state viewport; struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES]; struct pipe_index_buffer indexbuf; + struct fd_streamout_stateobj streamout; + struct pipe_clip_state ucp; + + struct pipe_query *cond_query; + bool cond_cond; /* inverted rendering condition */ + uint cond_mode; + + struct pipe_debug_callback debug; /* GMEM/tile handling fxns: */ - void (*emit_tile_init)(struct fd_context *ctx); - void (*emit_tile_prep)(struct fd_context *ctx, struct fd_tile *tile); - void (*emit_tile_mem2gmem)(struct fd_context *ctx, struct fd_tile *tile); - void (*emit_tile_renderprep)(struct fd_context *ctx, struct fd_tile *tile); - void (*emit_tile_gmem2mem)(struct fd_context *ctx, struct fd_tile *tile); + void (*emit_tile_init)(struct fd_batch *batch); + void (*emit_tile_prep)(struct fd_batch *batch, struct fd_tile *tile); + void (*emit_tile_mem2gmem)(struct fd_batch *batch, struct fd_tile *tile); + void (*emit_tile_renderprep)(struct fd_batch *batch, struct fd_tile *tile); + void (*emit_tile_gmem2mem)(struct fd_batch *batch, struct fd_tile *tile); + void (*emit_tile_fini)(struct fd_batch *batch); /* optional */ /* optional, for GMEM bypass: */ - void (*emit_sysmem_prep)(struct fd_context *ctx); + void (*emit_sysmem_prep)(struct fd_batch *batch); + void (*emit_sysmem_fini)(struct fd_batch *batch); /* draw: */ - void (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info); + bool (*draw_vbo)(struct fd_context *ctx, const struct pipe_draw_info *info); void (*clear)(struct fd_context *ctx, unsigned buffers, const union pipe_color_union *color, double depth, unsigned stencil); @@ -359,8 +272,33 @@ struct fd_context { void (*emit_const)(struct fd_ringbuffer *ring, enum shader_t type, uint32_t regid, uint32_t offset, uint32_t sizedwords, const uint32_t *dwords, struct pipe_resource *prsc); + /* emit bo addresses as constant: */ void (*emit_const_bo)(struct fd_ringbuffer *ring, enum shader_t type, boolean write, - uint32_t regid, uint32_t num, struct fd_bo **bos, uint32_t *offsets); + uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets); + + /* indirect-branch emit: */ + void (*emit_ib)(struct fd_ringbuffer *ring, struct fd_ringbuffer *target); + + /* + * Common pre-cooked VBO state (used for a3xx and later): + */ + + /* for clear/gmem->mem vertices, and mem->gmem */ + struct pipe_resource *solid_vbuf; + + /* for mem->gmem tex coords: */ + struct pipe_resource *blit_texcoord_vbuf; + + /* vertex state for solid_vbuf: + * - solid_vbuf / 12 / R32G32B32_FLOAT + */ + struct fd_vertex_state solid_vbuf_state; + + /* vertex state for blit_prog: + * - blit_texcoord_vbuf / 8 / R32G32_FLOAT + * - solid_vbuf / 12 / R32G32B32_FLOAT + */ + struct fd_vertex_state blit_vbuf_state; }; static inline struct fd_context * @@ -369,55 +307,45 @@ fd_context(struct pipe_context *pctx) return (struct fd_context *)pctx; } -static inline struct pipe_scissor_state * -fd_context_get_scissor(struct fd_context *ctx) +static inline void +fd_context_assert_locked(struct fd_context *ctx) { - if (ctx->rasterizer && ctx->rasterizer->scissor) - return &ctx->scissor; - return &ctx->disabled_scissor; + pipe_mutex_assert_locked(ctx->screen->lock); } -static inline bool -fd_supported_prim(struct fd_context *ctx, unsigned prim) +static inline void +fd_context_lock(struct fd_context *ctx) { - return (1 << prim) & ctx->primtype_mask; + pipe_mutex_lock(ctx->screen->lock); } static inline void -fd_reset_wfi(struct fd_context *ctx) +fd_context_unlock(struct fd_context *ctx) { - ctx->needs_wfi = true; + pipe_mutex_unlock(ctx->screen->lock); } -/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already - * been one since last draw: - */ -static inline void -fd_wfi(struct fd_context *ctx, struct fd_ringbuffer *ring) +static inline struct pipe_scissor_state * +fd_context_get_scissor(struct fd_context *ctx) { - if (ctx->needs_wfi) { - OUT_WFI(ring); - ctx->needs_wfi = false; - } + if (ctx->rasterizer && ctx->rasterizer->scissor) + return &ctx->scissor; + return &ctx->disabled_scissor; } -/* emit a CP_EVENT_WRITE: - */ -static inline void -fd_event_write(struct fd_context *ctx, struct fd_ringbuffer *ring, - enum vgt_event_type evt) +static inline bool +fd_supported_prim(struct fd_context *ctx, unsigned prim) { - OUT_PKT3(ring, CP_EVENT_WRITE, 1); - OUT_RING(ring, evt); - fd_reset_wfi(ctx); + return (1 << prim) & ctx->primtype_mask; } +void fd_context_setup_common_vbos(struct fd_context *ctx); +void fd_context_cleanup_common_vbos(struct fd_context *ctx); + struct pipe_context * fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen, const uint8_t *primtypes, void *priv); -void fd_context_render(struct pipe_context *pctx); - void fd_context_destroy(struct pipe_context *pctx); #endif /* FREEDRENO_CONTEXT_H_ */