From: Rob Clark Date: Tue, 8 Nov 2016 15:50:03 +0000 (-0500) Subject: freedreno/a5xx: initial support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=946cf4eb6846767306a221eec7d0f82d20dfb6b5;p=mesa.git freedreno/a5xx: initial support Signed-off-by: Rob Clark --- diff --git a/configure.ac b/configure.ac index e7e562872c7..f62bc61e502 100644 --- a/configure.ac +++ b/configure.ac @@ -74,7 +74,7 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63 LIBDRM_INTEL_REQUIRED=2.4.61 LIBDRM_NVVIEUX_REQUIRED=2.4.66 LIBDRM_NOUVEAU_REQUIRED=2.4.66 -LIBDRM_FREEDRENO_REQUIRED=2.4.68 +LIBDRM_FREEDRENO_REQUIRED=2.4.74 LIBDRM_VC4_REQUIRED=2.4.69 DRI2PROTO_REQUIRED=2.6 DRI3PROTO_REQUIRED=1.0 diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am index 148dd0eb538..e5c344d7004 100644 --- a/src/gallium/drivers/freedreno/Makefile.am +++ b/src/gallium/drivers/freedreno/Makefile.am @@ -20,6 +20,7 @@ libfreedreno_la_SOURCES = \ $(a2xx_SOURCES) \ $(a3xx_SOURCES) \ $(a4xx_SOURCES) \ + $(a5xx_SOURCES) \ $(ir3_SOURCES) \ $(ir3_GENERATED_FILES) diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index 92d9186597c..b53a23e0480 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -120,6 +120,33 @@ a4xx_SOURCES := \ a4xx/fd4_zsa.c \ a4xx/fd4_zsa.h +a5xx_SOURCES := \ + a5xx/a5xx.xml.h \ + a5xx/fd5_blend.c \ + a5xx/fd5_blend.h \ + a5xx/fd5_context.c \ + a5xx/fd5_context.h \ + a5xx/fd5_draw.c \ + a5xx/fd5_draw.h \ + a5xx/fd5_emit.c \ + a5xx/fd5_emit.h \ + a5xx/fd5_format.c \ + a5xx/fd5_format.h \ + a5xx/fd5_gmem.c \ + a5xx/fd5_gmem.h \ + a5xx/fd5_program.c \ + a5xx/fd5_program.h \ + a5xx/fd5_query.c \ + a5xx/fd5_query.h \ + a5xx/fd5_rasterizer.c \ + a5xx/fd5_rasterizer.h \ + a5xx/fd5_screen.c \ + a5xx/fd5_screen.h \ + a5xx/fd5_texture.c \ + a5xx/fd5_texture.h \ + a5xx/fd5_zsa.c \ + a5xx/fd5_zsa.h + ir3_SOURCES := \ ir3/disasm-a3xx.c \ ir3/instr-a3xx.h \ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blend.c b/src/gallium/drivers/freedreno/a5xx/fd5_blend.c new file mode 100644 index 00000000000..e5107a718b1 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_blend.c @@ -0,0 +1,144 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_blend.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd5_blend.h" +#include "fd5_context.h" +#include "fd5_format.h" + +// XXX move somewhere common.. same across a3xx/a4xx/a5xx.. +static enum a3xx_rb_blend_opcode +blend_func(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return BLEND_DST_PLUS_SRC; + case PIPE_BLEND_MIN: + return BLEND_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return BLEND_MAX_DST_SRC; + case PIPE_BLEND_SUBTRACT: + return BLEND_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLEND_DST_MINUS_SRC; + default: + DBG("invalid blend func: %x", func); + return 0; + } +} + +void * +fd5_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + struct fd5_blend_stateobj *so; +// enum a3xx_rop_code rop = ROP_COPY; + bool reads_dest = false; + unsigned i, mrt_blend = 0; + + if (cso->logicop_enable) { +// rop = cso->logicop_func; /* maps 1:1 */ + + switch (cso->logicop_func) { + case PIPE_LOGICOP_NOR: + case PIPE_LOGICOP_AND_INVERTED: + case PIPE_LOGICOP_AND_REVERSE: + case PIPE_LOGICOP_INVERT: + case PIPE_LOGICOP_XOR: + case PIPE_LOGICOP_NAND: + case PIPE_LOGICOP_AND: + case PIPE_LOGICOP_EQUIV: + case PIPE_LOGICOP_NOOP: + case PIPE_LOGICOP_OR_INVERTED: + case PIPE_LOGICOP_OR_REVERSE: + case PIPE_LOGICOP_OR: + reads_dest = true; + break; + } + } + + so = CALLOC_STRUCT(fd5_blend_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) { + const struct pipe_rt_blend_state *rt; + + if (cso->independent_blend_enable) + rt = &cso->rt[i]; + else + rt = &cso->rt[0]; + + so->rb_mrt[i].blend_control_rgb = + A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | + A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | + A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)); + + so->rb_mrt[i].blend_control_alpha = + A5XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | + A5XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | + A5XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); + + so->rb_mrt[i].blend_control_no_alpha_rgb = + A5XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_src_factor))) | + A5XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | + A5XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(util_blend_dst_alpha_to_one(rt->rgb_dst_factor))); + + + so->rb_mrt[i].control = +// A5XX_RB_MRT_CONTROL_ROP_CODE(rop) | +// COND(cso->logicop_enable, A5XX_RB_MRT_CONTROL_ROP_ENABLE) | + 0x60 | /* XXX set other than RECTLIST clear blits?? */ + A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); + + if (rt->blend_enable) { + so->rb_mrt[i].control |= +// A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE | + A5XX_RB_MRT_CONTROL_BLEND | + A5XX_RB_MRT_CONTROL_BLEND2; + mrt_blend |= (1 << i); + } + + if (reads_dest) { +// so->rb_mrt[i].control |= A5XX_RB_MRT_CONTROL_READ_DEST_ENABLE; + mrt_blend |= (1 << i); + } + +// if (cso->dither) +// so->rb_mrt[i].buf_info |= A5XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS); + } + + so->rb_blend_cntl = A5XX_RB_BLEND_CNTL_ENABLE_BLEND(mrt_blend) | + COND(cso->independent_blend_enable, A5XX_RB_BLEND_CNTL_INDEPENDENT_BLEND); + + return so; +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_blend.h b/src/gallium/drivers/freedreno/a5xx/fd5_blend.h new file mode 100644 index 00000000000..85c615824db --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_blend.h @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_BLEND_H_ +#define FD5_BLEND_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +#include "freedreno_util.h" + +struct fd5_blend_stateobj { + struct pipe_blend_state base; + + struct { + uint32_t control; + uint32_t buf_info; + /* Blend control bits for color if there is an alpha channel */ + uint32_t blend_control_rgb; + /* Blend control bits for color if there is no alpha channel */ + uint32_t blend_control_no_alpha_rgb; + /* Blend control bits for alpha channel */ + uint32_t blend_control_alpha; + } rb_mrt[A5XX_MAX_RENDER_TARGETS]; + uint32_t rb_blend_cntl; +}; + +static inline struct fd5_blend_stateobj * +fd5_blend_stateobj(struct pipe_blend_state *blend) +{ + return (struct fd5_blend_stateobj *)blend; +} + +void * fd5_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso); + +#endif /* FD5_BLEND_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_context.c b/src/gallium/drivers/freedreno/a5xx/fd5_context.c new file mode 100644 index 00000000000..1e4e83c16fa --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_context.c @@ -0,0 +1,117 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + + +#include "fd5_context.h" +#include "fd5_blend.h" +#include "fd5_draw.h" +#include "fd5_emit.h" +#include "fd5_gmem.h" +#include "fd5_program.h" +#include "fd5_query.h" +#include "fd5_rasterizer.h" +#include "fd5_texture.h" +#include "fd5_zsa.h" + +static void +fd5_context_destroy(struct pipe_context *pctx) +{ + struct fd5_context *fd5_ctx = fd5_context(fd_context(pctx)); + + fd_bo_del(fd5_ctx->vs_pvt_mem); + fd_bo_del(fd5_ctx->fs_pvt_mem); + fd_bo_del(fd5_ctx->vsc_size_mem); + fd_bo_del(fd5_ctx->blit_mem); + + fd_context_cleanup_common_vbos(&fd5_ctx->base); + + u_upload_destroy(fd5_ctx->border_color_uploader); + + fd_context_destroy(pctx); +} + +static const uint8_t primtypes[] = { + [PIPE_PRIM_POINTS] = DI_PT_POINTLIST, + [PIPE_PRIM_LINES] = DI_PT_LINELIST, + [PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP, + [PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP, + [PIPE_PRIM_TRIANGLES] = DI_PT_TRILIST, + [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = DI_PT_TRIFAN, + [PIPE_PRIM_MAX] = DI_PT_RECTLIST, /* internal clear blits */ +}; + +struct pipe_context * +fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags) +{ + struct fd_screen *screen = fd_screen(pscreen); + struct fd5_context *fd5_ctx = CALLOC_STRUCT(fd5_context); + struct pipe_context *pctx; + + if (!fd5_ctx) + return NULL; + + pctx = &fd5_ctx->base.base; + + fd5_ctx->base.dev = fd_device_ref(screen->dev); + fd5_ctx->base.screen = fd_screen(pscreen); + + pctx->destroy = fd5_context_destroy; + pctx->create_blend_state = fd5_blend_state_create; + pctx->create_rasterizer_state = fd5_rasterizer_state_create; + pctx->create_depth_stencil_alpha_state = fd5_zsa_state_create; + + fd5_draw_init(pctx); + fd5_gmem_init(pctx); + fd5_texture_init(pctx); + fd5_prog_init(pctx); + fd5_emit_init(pctx); + + pctx = fd_context_init(&fd5_ctx->base, pscreen, primtypes, priv); + if (!pctx) + return NULL; + + fd5_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd5_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd5_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd5_ctx->blit_mem = fd_bo_new(screen->dev, 0x1000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd_context_setup_common_vbos(&fd5_ctx->base); + + fd5_query_context_init(pctx); + + fd5_ctx->border_color_uploader = u_upload_create(pctx, 4096, 0, + PIPE_USAGE_STREAM); + + return pctx; +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_context.h b/src/gallium/drivers/freedreno/a5xx/fd5_context.h new file mode 100644 index 00000000000..30a11d0e141 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_context.h @@ -0,0 +1,90 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_CONTEXT_H_ +#define FD5_CONTEXT_H_ + +#include "util/u_upload_mgr.h" + +#include "freedreno_drmif.h" + +#include "freedreno_context.h" + +#include "ir3_shader.h" + +struct fd5_context { + struct fd_context base; + + struct fd_bo *vs_pvt_mem, *fs_pvt_mem; + + /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We + * could combine it with another allocation. + * + * (upper area used as scratch bo.. see fd5_query) + * + * XXX remove if unneeded after binning r/e.. + */ + struct fd_bo *vsc_size_mem; + + /* TODO not sure what this is for.. */ + struct fd_bo *blit_mem; + + struct u_upload_mgr *border_color_uploader; + struct pipe_resource *border_color_buf; + + /* if *any* of bits are set in {v,f}saturate_{s,t,r} */ + bool vsaturate, fsaturate; + + /* bitmask of sampler which needs coords clamped for vertex + * shader: + */ + uint16_t vsaturate_s, vsaturate_t, vsaturate_r; + + /* bitmask of sampler which needs coords clamped for frag + * shader: + */ + uint16_t fsaturate_s, fsaturate_t, fsaturate_r; + + /* bitmask of samplers which need astc srgb workaround: */ + uint16_t vastc_srgb, fastc_srgb; + + /* some state changes require a different shader variant. Keep + * track of this so we know when we need to re-emit shader state + * due to variant change. See fixup_shader_state() + */ + struct ir3_shader_key last_key; +}; + +static inline struct fd5_context * +fd5_context(struct fd_context *ctx) +{ + return (struct fd5_context *)ctx; +} + +struct pipe_context * +fd5_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags); + +#endif /* FD5_CONTEXT_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_draw.c b/src/gallium/drivers/freedreno/a5xx/fd5_draw.c new file mode 100644 index 00000000000..e6b42bfcfb4 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_draw.c @@ -0,0 +1,288 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + +#include "freedreno_state.h" +#include "freedreno_resource.h" + +#include "fd5_draw.h" +#include "fd5_context.h" +#include "fd5_emit.h" +#include "fd5_program.h" +#include "fd5_format.h" +#include "fd5_zsa.h" + + +static void +draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd5_emit *emit) +{ + const struct pipe_draw_info *info = emit->info; + enum pc_di_primtype primtype = ctx->primtypes[info->mode]; + + fd5_emit_state(ctx, ring, emit); + + if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) + fd5_emit_vertex_bufs(ring, emit); + + OUT_PKT4(ring, REG_A5XX_VFD_INDEX_OFFSET, 2); + OUT_RING(ring, info->indexed ? info->index_bias : info->start); /* VFD_INDEX_OFFSET */ + OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */ + + OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ + info->restart_index : 0xffffffff); + + /* points + psize -> spritelist: */ + if (ctx->rasterizer->point_size_per_vertex && + fd5_emit_get_vp(emit)->writes_psize && + (info->mode == PIPE_PRIM_POINTS)) + primtype = DI_PT_POINTLIST_PSIZE; + + fd5_emit_render_cntl(ctx, false); + fd5_draw_emit(ctx->batch, ring, primtype, + emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, + info); +} + +/* fixup dirty shader state in case some "unrelated" (from the state- + * tracker's perspective) state change causes us to switch to a + * different variant. + */ +static void +fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) +{ + struct fd5_context *fd5_ctx = fd5_context(ctx); + struct ir3_shader_key *last_key = &fd5_ctx->last_key; + + if (!ir3_shader_key_equal(last_key, key)) { + if (last_key->has_per_samp || key->has_per_samp) { + if ((last_key->vsaturate_s != key->vsaturate_s) || + (last_key->vsaturate_t != key->vsaturate_t) || + (last_key->vsaturate_r != key->vsaturate_r) || + (last_key->vastc_srgb != key->vastc_srgb)) + ctx->dirty |= FD_SHADER_DIRTY_VP; + + if ((last_key->fsaturate_s != key->fsaturate_s) || + (last_key->fsaturate_t != key->fsaturate_t) || + (last_key->fsaturate_r != key->fsaturate_r) || + (last_key->fastc_srgb != key->fastc_srgb)) + ctx->dirty |= FD_SHADER_DIRTY_FP; + } + + if (last_key->vclamp_color != key->vclamp_color) + ctx->dirty |= FD_SHADER_DIRTY_VP; + + if (last_key->fclamp_color != key->fclamp_color) + ctx->dirty |= FD_SHADER_DIRTY_FP; + + if (last_key->color_two_side != key->color_two_side) + ctx->dirty |= FD_SHADER_DIRTY_FP; + + if (last_key->half_precision != key->half_precision) + ctx->dirty |= FD_SHADER_DIRTY_FP; + + if (last_key->rasterflat != key->rasterflat) + ctx->dirty |= FD_SHADER_DIRTY_FP; + + if (last_key->ucp_enables != key->ucp_enables) + ctx->dirty |= FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP; + + fd5_ctx->last_key = *key; + } +} + +static bool +fd5_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) +{ + struct fd5_context *fd5_ctx = fd5_context(ctx); + struct fd5_emit emit = { + .debug = &ctx->debug, + .vtx = &ctx->vtx, + .prog = &ctx->prog, + .info = info, + .key = { + .color_two_side = ctx->rasterizer->light_twoside, + .vclamp_color = ctx->rasterizer->clamp_vertex_color, + .fclamp_color = ctx->rasterizer->clamp_fragment_color, + .rasterflat = ctx->rasterizer->flatshade, + .half_precision = ctx->in_blit && + fd_half_precision(&ctx->batch->framebuffer), + .ucp_enables = ctx->rasterizer->clip_plane_enable, + .has_per_samp = (fd5_ctx->fsaturate || fd5_ctx->vsaturate || + fd5_ctx->fastc_srgb || fd5_ctx->vastc_srgb), + .vsaturate_s = fd5_ctx->vsaturate_s, + .vsaturate_t = fd5_ctx->vsaturate_t, + .vsaturate_r = fd5_ctx->vsaturate_r, + .fsaturate_s = fd5_ctx->fsaturate_s, + .fsaturate_t = fd5_ctx->fsaturate_t, + .fsaturate_r = fd5_ctx->fsaturate_r, + .vastc_srgb = fd5_ctx->vastc_srgb, + .fastc_srgb = fd5_ctx->fastc_srgb, + }, + .rasterflat = ctx->rasterizer->flatshade, + .sprite_coord_enable = ctx->rasterizer->sprite_coord_enable, + .sprite_coord_mode = ctx->rasterizer->sprite_coord_mode, + }; + + fixup_shader_state(ctx, &emit.key); + + unsigned dirty = ctx->dirty; + + /* do regular pass first, since that is more likely to fail compiling: */ + + if (!(fd5_emit_get_vp(&emit) && fd5_emit_get_fp(&emit))) + return false; + + emit.key.binning_pass = false; + emit.dirty = dirty; + + draw_impl(ctx, ctx->batch->draw, &emit); + +// /* and now binning pass: */ +// emit.key.binning_pass = true; +// emit.dirty = dirty & ~(FD_DIRTY_BLEND); +// emit.vp = NULL; /* we changed key so need to refetch vp */ +// emit.fp = NULL; +// draw_impl(ctx, ctx->batch->binning, &emit); + + return true; +} + +static void +fd5_clear(struct fd_context *ctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct fd_ringbuffer *ring = ctx->batch->draw; + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; + struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + + /* TODO handle scissor.. or fallback to slow-clear? */ + + ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); + ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); + ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); + ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); + + fd5_emit_render_cntl(ctx, true); + + if (buffers & PIPE_CLEAR_COLOR) { + for (int i = 0; i < pfb->nr_cbufs; i++) { + union util_color uc = {0}; + + if (!pfb->cbufs[i]) + continue; + + if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) + continue; + + // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP?? + float f[4]; + switch (fd5_pipe2swap(pfb->cbufs[i]->format)) { + case WZYX: + f[0] = color->f[0]; + f[1] = color->f[1]; + f[2] = color->f[2]; + f[3] = color->f[3]; + break; + case WXYZ: + f[2] = color->f[0]; + f[1] = color->f[1]; + f[0] = color->f[2]; + f[3] = color->f[3]; + break; + case ZYXW: + f[3] = color->f[0]; + f[0] = color->f[1]; + f[1] = color->f[2]; + f[2] = color->f[3]; + break; + case XYZW: + f[3] = color->f[0]; + f[2] = color->f[1]; + f[1] = color->f[2]; + f[0] = color->f[3]; + break; + } + util_pack_color(f, pfb->cbufs[i]->format, &uc); + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); + OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | + A5XX_RB_CLEAR_CNTL_MASK(0xf)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 4); + OUT_RING(ring, uc.ui[0]); /* RB_CLEAR_COLOR_DW0 */ + OUT_RING(ring, uc.ui[1]); /* RB_CLEAR_COLOR_DW1 */ + OUT_RING(ring, uc.ui[2]); /* RB_CLEAR_COLOR_DW2 */ + OUT_RING(ring, uc.ui[3]); /* RB_CLEAR_COLOR_DW3 */ + + fd5_emit_blit(ctx, ring); + } + } + + if (pfb->zsbuf && (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) { + uint32_t clear = + util_pack_z_stencil(pfb->zsbuf->format, depth, stencil); + uint32_t mask = 0; + + if (buffers & PIPE_CLEAR_DEPTH) + mask |= 0x1; + + if (buffers & PIPE_CLEAR_STENCIL) + mask |= 0x2; + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_ZS)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); + OUT_RING(ring, A5XX_RB_CLEAR_CNTL_FAST_CLEAR | + A5XX_RB_CLEAR_CNTL_MASK(mask)); + + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_COLOR_DW0, 1); + OUT_RING(ring, clear); /* RB_CLEAR_COLOR_DW0 */ + + fd5_emit_blit(ctx, ring); + } + + /* disable fast clear to not interfere w/ gmem->mem, etc.. */ + OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); + OUT_RING(ring, 0x00000000); /* RB_CLEAR_CNTL */ +} + +void +fd5_draw_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->draw_vbo = fd5_draw_vbo; + ctx->clear = fd5_clear; +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_draw.h b/src/gallium/drivers/freedreno/a5xx/fd5_draw.h new file mode 100644 index 00000000000..677bedf4f1c --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_draw.h @@ -0,0 +1,113 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_DRAW_H_ +#define FD5_DRAW_H_ + +#include "pipe/p_context.h" + +#include "freedreno_draw.h" + +/* some bits in common w/ a4xx: */ +#include "a4xx/fd4_draw.h" + +void fd5_draw_init(struct pipe_context *pctx); + +static inline void +fd5_draw(struct fd_batch *batch, struct fd_ringbuffer *ring, + enum pc_di_primtype primtype, + enum pc_di_vis_cull_mode vismode, + enum pc_di_src_sel src_sel, uint32_t count, + uint32_t instances, enum a4xx_index_size idx_type, + uint32_t idx_size, uint32_t idx_offset, + struct pipe_resource *idx_buffer) +{ + /* for debug after a lock up, write a unique counter value + * to scratch7 for each draw, to make it easier to match up + * register dumps to cmdstream. The combination of IB + * (scratch6) and DRAW is enough to "triangulate" the + * particular draw that caused lockup. + */ + emit_marker5(ring, 7); + + OUT_PKT7(ring, CP_DRAW_INDX_OFFSET, idx_buffer ? 6 : 3); + if (vismode == USE_VISIBILITY) { + /* leave vis mode blank for now, it will be patched up when + * we know if we are binning or not + */ + OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0), + &batch->draw_patches); + } else { + OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode)); + } + OUT_RING(ring, instances); /* NumInstances */ + OUT_RING(ring, count); /* NumIndices */ + if (idx_buffer) { + OUT_RING(ring, 0x0); /* XXX */ + OUT_RELOC(ring, fd_resource(idx_buffer)->bo, idx_offset, 0, 0); + OUT_RING (ring, idx_size); + } + + emit_marker5(ring, 7); + + fd_reset_wfi(batch); +} + +static inline void +fd5_draw_emit(struct fd_batch *batch, struct fd_ringbuffer *ring, + enum pc_di_primtype primtype, + enum pc_di_vis_cull_mode vismode, + const struct pipe_draw_info *info) +{ + struct pipe_resource *idx_buffer = NULL; + enum a4xx_index_size idx_type; + enum pc_di_src_sel src_sel; + uint32_t idx_size, idx_offset; + + if (info->indexed) { + struct pipe_index_buffer *idx = &batch->ctx->indexbuf; + + assert(!idx->user_buffer); + + idx_buffer = idx->buffer; + idx_type = fd4_size2indextype(idx->index_size); + idx_size = idx->index_size * info->count; + idx_offset = idx->offset + (info->start * idx->index_size); + src_sel = DI_SRC_SEL_DMA; + } else { + idx_buffer = NULL; + idx_type = INDEX4_SIZE_32_BIT; + idx_size = 0; + idx_offset = 0; + src_sel = DI_SRC_SEL_AUTO_INDEX; + } + + fd5_draw(batch, ring, primtype, vismode, src_sel, + info->count, info->instance_count, + idx_type, idx_size, idx_offset, idx_buffer); +} + +#endif /* FD5_DRAW_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.c b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c new file mode 100644 index 00000000000..edb1f4f1e43 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.c @@ -0,0 +1,729 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_helpers.h" +#include "util/u_format.h" +#include "util/u_viewport.h" + +#include "freedreno_resource.h" +#include "freedreno_query_hw.h" + +#include "fd5_emit.h" +#include "fd5_blend.h" +#include "fd5_context.h" +#include "fd5_program.h" +#include "fd5_rasterizer.h" +#include "fd5_texture.h" +#include "fd5_format.h" +#include "fd5_zsa.h" + +static const enum adreno_state_block sb[] = { + [SHADER_VERTEX] = SB_VERT_SHADER, + [SHADER_FRAGMENT] = SB_FRAG_SHADER, +}; + +/* regid: base const register + * prsc or dwords: buffer containing constant values + * sizedwords: size of const value buffer + */ +static void +fd5_emit_const(struct fd_ringbuffer *ring, enum shader_t type, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc) +{ + uint32_t i, sz; + enum adreno_state_src src; + + debug_assert((regid % 4) == 0); + debug_assert((sizedwords % 4) == 0); + + if (prsc) { + sz = 0; + src = 0x2; // TODO ?? + } else { + sz = sizedwords; + src = SS_DIRECT; + } + + OUT_PKT7(ring, CP_LOAD_STATE, 3 + sz); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | + CP_LOAD_STATE_0_STATE_SRC(src) | + CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | + CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4)); + if (prsc) { + struct fd_bo *bo = fd_resource(prsc)->bo; + OUT_RELOC(ring, bo, offset, + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); + } else { + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0)); + dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; + } + for (i = 0; i < sz; i++) { + OUT_RING(ring, dwords[i]); + } +} + +static void +fd5_emit_const_bo(struct fd_ringbuffer *ring, enum shader_t type, boolean write, + uint32_t regid, uint32_t num, struct pipe_resource **prscs, uint32_t *offsets) +{ + uint32_t i; + + debug_assert((regid % 4) == 0); + debug_assert((num % 4) == 0); + + OUT_PKT7(ring, CP_LOAD_STATE, 3 + num); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | + CP_LOAD_STATE_0_NUM_UNIT(num/4)); + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0)); + + for (i = 0; i < num; i++) { + if (prscs[i]) { + if (write) { + OUT_RELOCW(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); + } else { + OUT_RELOC(ring, fd_resource(prscs[i])->bo, offsets[i], 0, 0); + } + } else { + OUT_RING(ring, 0xbad00000 | (i << 16)); + } + } +} + +static void +emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum adreno_state_block sb, struct fd_texture_stateobj *tex) +{ + unsigned i; + + if (tex->num_samplers > 0) { + /* output sampler state: */ + OUT_PKT7(ring, CP_LOAD_STATE, 3 + (4 * tex->num_samplers)); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0)); + for (i = 0; i < tex->num_samplers; i++) { + static const struct fd5_sampler_stateobj dummy_sampler = {}; + const struct fd5_sampler_stateobj *sampler = tex->samplers[i] ? + fd5_sampler_stateobj(tex->samplers[i]) : + &dummy_sampler; + OUT_RING(ring, sampler->texsamp0); + OUT_RING(ring, sampler->texsamp1); + OUT_RING(ring, sampler->texsamp2); + OUT_RING(ring, sampler->texsamp3); + } + } + + if (tex->num_textures > 0) { + unsigned num_textures = tex->num_textures; + + /* emit texture state: */ + OUT_PKT7(ring, CP_LOAD_STATE, 3 + (12 * num_textures)); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(num_textures)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0)); + for (i = 0; i < tex->num_textures; i++) { + static const struct fd5_pipe_sampler_view dummy_view = {}; + const struct fd5_pipe_sampler_view *view = tex->textures[i] ? + fd5_pipe_sampler_view(tex->textures[i]) : + &dummy_view; + + OUT_RING(ring, view->texconst0); + OUT_RING(ring, view->texconst1); + OUT_RING(ring, view->texconst2); + OUT_RING(ring, view->texconst3); + if (view->base.texture) { + struct fd_resource *rsc = fd_resource(view->base.texture); + OUT_RELOC(ring, rsc->bo, view->offset, + (uint64_t)view->texconst5 << 32, 0); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, view->texconst5); + } + OUT_RING(ring, view->texconst6); + OUT_RING(ring, view->texconst7); + OUT_RING(ring, view->texconst8); + OUT_RING(ring, view->texconst9); + OUT_RING(ring, view->texconst10); + OUT_RING(ring, view->texconst11); + } + } +} + +void +fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit) +{ + int32_t i, j; + const struct fd_vertex_state *vtx = emit->vtx; + const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit); + + for (i = 0, j = 0; i <= vp->inputs_count; i++) { + if (vp->inputs[i].sysval) + continue; + if (vp->inputs[i].compmask) { + struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; + const struct pipe_vertex_buffer *vb = + &vtx->vertexbuf.vb[elem->vertex_buffer_index]; + struct fd_resource *rsc = fd_resource(vb->buffer); + enum pipe_format pfmt = elem->src_format; + enum a5xx_vtx_fmt fmt = fd5_pipe2vtx(pfmt); + uint32_t off = vb->buffer_offset + elem->src_offset; + uint32_t size = fd_bo_size(rsc->bo) - off; + debug_assert(fmt != ~0); + + OUT_PKT4(ring, REG_A5XX_VFD_FETCH(j), 4); + OUT_RELOC(ring, rsc->bo, off, 0, 0); + OUT_RING(ring, size); /* VFD_FETCH[j].SIZE */ + OUT_RING(ring, vb->stride); /* VFD_FETCH[j].STRIDE */ + + OUT_PKT4(ring, REG_A5XX_VFD_DECODE(j), 2); + OUT_RING(ring, A5XX_VFD_DECODE_INSTR_IDX(j) | + A5XX_VFD_DECODE_INSTR_FORMAT(fmt) | + A5XX_VFD_DECODE_INSTR_SWAP(fd5_pipe2swap(pfmt))); + OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */ + + OUT_PKT4(ring, REG_A5XX_VFD_DEST_CNTL(j), 1); + OUT_RING(ring, A5XX_VFD_DEST_CNTL_INSTR_WRITEMASK(vp->inputs[i].compmask) | + A5XX_VFD_DEST_CNTL_INSTR_REGID(vp->inputs[i].regid)); + + j++; + } + } + + OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_0, 1); + OUT_RING(ring, A5XX_VFD_CONTROL_0_VTXCNT(j)); +} + +void +fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd5_emit *emit) +{ + const struct ir3_shader_variant *vp = fd5_emit_get_vp(emit); + const struct ir3_shader_variant *fp = fd5_emit_get_fp(emit); + uint32_t dirty = emit->dirty; + + emit_marker5(ring, 5); + + if ((dirty & FD_DIRTY_FRAMEBUFFER) && !emit->key.binning_pass) { + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; + unsigned char mrt_comp[A5XX_MAX_RENDER_TARGETS] = {0}; + + for (unsigned i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { + mrt_comp[i] = ((i < pfb->nr_cbufs) && pfb->cbufs[i]) ? 0xf : 0; + } + + OUT_PKT4(ring, REG_A5XX_RB_RENDER_COMPONENTS, 1); + OUT_RING(ring, A5XX_RB_RENDER_COMPONENTS_RT0(mrt_comp[0]) | + A5XX_RB_RENDER_COMPONENTS_RT1(mrt_comp[1]) | + A5XX_RB_RENDER_COMPONENTS_RT2(mrt_comp[2]) | + A5XX_RB_RENDER_COMPONENTS_RT3(mrt_comp[3]) | + A5XX_RB_RENDER_COMPONENTS_RT4(mrt_comp[4]) | + A5XX_RB_RENDER_COMPONENTS_RT5(mrt_comp[5]) | + A5XX_RB_RENDER_COMPONENTS_RT6(mrt_comp[6]) | + A5XX_RB_RENDER_COMPONENTS_RT7(mrt_comp[7])); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) { + struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa); + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; + uint32_t rb_alpha_control = zsa->rb_alpha_control; + + if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0]))) + rb_alpha_control &= ~A5XX_RB_ALPHA_CONTROL_ALPHA_TEST; + + OUT_PKT4(ring, REG_A5XX_RB_ALPHA_CONTROL, 1); + OUT_RING(ring, rb_alpha_control); + + OUT_PKT4(ring, REG_A5XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, zsa->rb_stencil_control); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { + struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa); + struct pipe_stencil_ref *sr = &ctx->stencil_ref; + + OUT_PKT4(ring, REG_A5XX_RB_STENCILREFMASK, 1); + OUT_RING(ring, zsa->rb_stencilrefmask | + A5XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_PLANE_CNTL, 1); + OUT_RING(ring, zsa->gras_su_depth_plane_cntl); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { + struct fd5_zsa_stateobj *zsa = fd5_zsa_stateobj(ctx->zsa); + bool fragz = fp->has_kill | fp->writes_pos; + + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_CNTL, 1); + OUT_RING(ring, zsa->rb_depth_cntl); + + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_PLANE_CNTL, 1); + OUT_RING(ring, COND(fragz, A5XX_RB_DEPTH_PLANE_CNTL_FRAG_WRITES_Z)); + } + + if (dirty & FD_DIRTY_RASTERIZER) { + struct fd5_rasterizer_stateobj *rasterizer = + fd5_rasterizer_stateobj(ctx->rasterizer); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_CNTL, 1); + OUT_RING(ring, rasterizer->gras_su_cntl); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2); + OUT_RING(ring, rasterizer->gras_su_point_minmax); + OUT_RING(ring, rasterizer->gras_su_point_size); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_POLY_OFFSET_SCALE, 3); + OUT_RING(ring, rasterizer->gras_su_poly_offset_scale); + OUT_RING(ring, rasterizer->gras_su_poly_offset_offset); + OUT_RING(ring, rasterizer->gras_su_poly_offset_clamp); + } + + /* NOTE: since primitive_restart is not actually part of any + * state object, we need to make sure that we always emit + * PRIM_VTX_CNTL.. either that or be more clever and detect + * when it changes. + */ + if (emit->info) { + struct fd5_rasterizer_stateobj *rast = + fd5_rasterizer_stateobj(ctx->rasterizer); + uint32_t val = rast->pc_prim_vtx_cntl; + + val |= COND(vp->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE); + + OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, val); + } + + if (dirty & FD_DIRTY_SCISSOR) { + struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0, 2); + OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->minx) | + A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->miny)); + OUT_RING(ring, A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_X(scissor->maxx - 1) | + A5XX_GRAS_SC_SCREEN_SCISSOR_TL_0_Y(scissor->maxy - 1)); + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0, 2); + OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->minx) | + A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->miny)); + OUT_RING(ring, A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_X(scissor->maxx - 1) | + A5XX_GRAS_SC_VIEWPORT_SCISSOR_TL_0_Y(scissor->maxy - 1)); + + ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, scissor->minx); + ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, scissor->miny); + ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, scissor->maxx); + ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, scissor->maxy); + } + + if (dirty & FD_DIRTY_VIEWPORT) { + fd_wfi(ctx->batch, ring); + OUT_PKT4(ring, REG_A5XX_GRAS_CL_VPORT_XOFFSET_0, 6); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0])); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0])); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1])); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1])); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2])); + OUT_RING(ring, A5XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); + } + + if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER)) { + struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; + unsigned n = pfb->nr_cbufs; + /* if we have depth/stencil, we need at least on MRT: */ + if (pfb->zsbuf) + n = MAX2(1, n); + fd5_program_emit(ring, emit, n, pfb->cbufs); + } + + if (emit->prog == &ctx->prog) { /* evil hack to deal sanely with clear path */ + ir3_emit_consts(vp, ring, ctx, emit->info, dirty); + if (!emit->key.binning_pass) + ir3_emit_consts(fp, ring, ctx, emit->info, dirty); + } + + if ((dirty & FD_DIRTY_BLEND)) { + struct fd5_blend_stateobj *blend = fd5_blend_stateobj(ctx->blend); + uint32_t i; + + for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { + enum pipe_format format = pipe_surface_format( + ctx->batch->framebuffer.cbufs[i]); + bool is_int = util_format_is_pure_integer(format); + bool has_alpha = util_format_has_alpha(format); + uint32_t control = blend->rb_mrt[i].control; + uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha; + + if (is_int) { + control &= A5XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; +// control |= A5XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); + } + + if (has_alpha) { + blend_control |= blend->rb_mrt[i].blend_control_rgb; + } else { + blend_control |= blend->rb_mrt[i].blend_control_no_alpha_rgb; + control &= ~A5XX_RB_MRT_CONTROL_BLEND2; + } + + OUT_PKT4(ring, REG_A5XX_RB_MRT_CONTROL(i), 1); + OUT_RING(ring, control); + + OUT_PKT4(ring, REG_A5XX_RB_MRT_BLEND_CONTROL(i), 1); + OUT_RING(ring, blend_control); + } + + OUT_PKT4(ring, REG_A5XX_RB_BLEND_CNTL, 1); + OUT_RING(ring, blend->rb_blend_cntl | + A5XX_RB_BLEND_CNTL_SAMPLE_MASK(0xffff)); + + OUT_PKT4(ring, REG_A5XX_SP_BLEND_CNTL, 1); + OUT_RING(ring, 0x00000100); + } + + if (dirty & FD_DIRTY_BLEND_COLOR) { + struct pipe_blend_color *bcolor = &ctx->blend_color; + + OUT_PKT4(ring, REG_A5XX_RB_BLEND_RED, 8); + OUT_RING(ring, A5XX_RB_BLEND_RED_FLOAT(bcolor->color[0]) | + A5XX_RB_BLEND_RED_UINT(bcolor->color[0] * 0xff) | + A5XX_RB_BLEND_RED_SINT(bcolor->color[0] * 0x7f)); + OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[0])); + OUT_RING(ring, A5XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]) | + A5XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 0xff) | + A5XX_RB_BLEND_GREEN_SINT(bcolor->color[1] * 0x7f)); + OUT_RING(ring, A5XX_RB_BLEND_RED_F32(bcolor->color[1])); + OUT_RING(ring, A5XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]) | + A5XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 0xff) | + A5XX_RB_BLEND_BLUE_SINT(bcolor->color[2] * 0x7f)); + OUT_RING(ring, A5XX_RB_BLEND_BLUE_F32(bcolor->color[2])); + OUT_RING(ring, A5XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]) | + A5XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 0xff) | + A5XX_RB_BLEND_ALPHA_SINT(bcolor->color[3] * 0x7f)); + OUT_RING(ring, A5XX_RB_BLEND_ALPHA_F32(bcolor->color[3])); + } + + if (dirty & FD_DIRTY_VERTTEX) { + if (vp->has_samp) { + emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex); + OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 1); + OUT_RING(ring, ctx->verttex.num_textures); + } else { + dirty &= ~FD_DIRTY_VERTTEX; + } + } + + if (dirty & FD_DIRTY_FRAGTEX) { + if (fp->has_samp) { + emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex); + OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 1); + OUT_RING(ring, ctx->fragtex.num_textures); + } else { + dirty &= ~FD_DIRTY_FRAGTEX; + } + } + + ctx->dirty &= ~dirty; +} + +/* emit setup at begin of new cmdstream buffer (don't rely on previous + * state, there could have been a context switch between ioctls): + */ +void +fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + struct fd_context *ctx = batch->ctx; + + fd5_set_render_mode(ctx, ring, BYPASS); + fd5_cache_flush(batch, ring); + + OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); + OUT_RING(ring, 0xfffff); + +/* +t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) +0000000500024048: 70d08003 00000000 001c5000 00000005 +t7 opcode: CP_PERFCOUNTER_ACTION (50) (4 dwords) +0000000500024058: 70d08003 00000010 001c7000 00000005 + +t7 opcode: CP_WAIT_FOR_IDLE (26) (1 dwords) +0000000500024068: 70268000 +*/ + + OUT_PKT4(ring, REG_A5XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, 0xffffffff); + + OUT_PKT4(ring, REG_A5XX_PC_RASTER_CNTL, 1); + OUT_RING(ring, 0x00000012); + + OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_POINT_MINMAX, 2); + OUT_RING(ring, A5XX_GRAS_SU_POINT_MINMAX_MIN(1.0) | + A5XX_GRAS_SU_POINT_MINMAX_MAX(4092.0)); + OUT_RING(ring, A5XX_GRAS_SU_POINT_SIZE(0.5)); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */ + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_SCREEN_SCISSOR_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_SC_SCREEN_SCISSOR_CNTL */ + + OUT_PKT4(ring, REG_A5XX_SP_VS_CONFIG_MAX_CONST, 1); + OUT_RING(ring, 0); /* SP_VS_CONFIG_MAX_CONST */ + + OUT_PKT4(ring, REG_A5XX_SP_FS_CONFIG_MAX_CONST, 1); + OUT_RING(ring, 0); /* SP_FS_CONFIG_MAX_CONST */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E292, 2); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E292 */ + OUT_RING(ring, 0x00000000); /* UNKNOWN_E293 */ + + OUT_PKT4(ring, REG_A5XX_RB_MODE_CNTL, 1); + OUT_RING(ring, 0x00000044); /* RB_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_RB_DBG_ECO_CNTL, 1); + OUT_RING(ring, 0x00100000); /* RB_DBG_ECO_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VFD_MODE_CNTL, 1); + OUT_RING(ring, 0x00000000); /* VFD_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_PC_MODE_CNTL, 1); + OUT_RING(ring, 0x0000001f); /* PC_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_SP_MODE_CNTL, 1); + OUT_RING(ring, 0x0000001e); /* SP_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_SP_DBG_ECO_CNTL, 1); + OUT_RING(ring, 0x40000800); /* SP_DBG_ECO_CNTL */ + + OUT_PKT4(ring, REG_A5XX_TPL1_MODE_CNTL, 1); + OUT_RING(ring, 0x00000544); /* TPL1_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_TIMEOUT_THRESHOLD_0, 2); + OUT_RING(ring, 0x00000080); /* HLSQ_TIMEOUT_THRESHOLD_0 */ + OUT_RING(ring, 0x00000000); /* HLSQ_TIMEOUT_THRESHOLD_1 */ + + OUT_PKT4(ring, REG_A5XX_VPC_DBG_ECO_CNTL, 1); + OUT_RING(ring, 0x00000400); /* VPC_DBG_ECO_CNTL */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_MODE_CNTL, 1); + OUT_RING(ring, 0x00000001); /* HLSQ_MODE_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1); + OUT_RING(ring, 0x00000000); /* VPC_MODE_CNTL */ + + /* we don't use this yet.. probably best to disable.. */ + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); + OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | + CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE__0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); + OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); + + /* other regs not used (yet?) and always seem to have same value: */ + OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); + OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */ + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_CONSERVATIVE_RAS_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_SU_CONSERVATIVE_RAS_CNTL */ + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_SC_BIN_CNTL */ + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_BIN_CNTL, 1); + OUT_RING(ring, 0x00000000); /* GRAS_SC_BIN_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VPC_FS_PRIMITIVEID_CNTL, 1); + OUT_RING(ring, 0x000000ff); /* VPC_FS_PRIMITIVEID_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); + OUT_RING(ring, 0x00000001); /* VPC_SO_OVERRIDE */ + + OUT_PKT4(ring, REG_A5XX_VPC_SO_BUFFER_BASE_LO_0, 3); + OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_LO_0 */ + OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_BASE_HI_0 */ + OUT_RING(ring, 0x00000000); /* VPC_SO_BUFFER_SIZE_0 */ + + OUT_PKT4(ring, REG_A5XX_VPC_SO_FLUSH_BASE_LO_0, 2); + OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_LO_0 */ + OUT_RING(ring, 0x00000000); /* VPC_SO_FLUSH_BASE_HI_0 */ + + OUT_PKT4(ring, REG_A5XX_PC_GS_PARAM, 1); + OUT_RING(ring, 0x00000000); /* PC_GS_PARAM */ + + OUT_PKT4(ring, REG_A5XX_PC_HS_PARAM, 1); + OUT_RING(ring, 0x00000000); /* PC_HS_PARAM */ + + OUT_PKT4(ring, REG_A5XX_TPL1_TP_FS_ROTATION_CNTL, 1); + OUT_RING(ring, 0x00000000); /* TPL1_TP_FS_ROTATION_CNTL */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E001, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E001 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E004, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E004 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E093, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E093 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E1C7, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E1C7 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E29A, 1); + OUT_RING(ring, 0x00ffff00); /* UNKNOWN_E29A */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2A1, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E2A1 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2AB, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E2AB */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E389, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E389 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E38D, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E38D */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5AB, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E5AB */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5C2, 1); + OUT_RING(ring, 0x00000000); /* UNKNOWN_E5C2 */ + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2AE, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2B2, 6); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2B9, 6); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E2C0, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E5DB, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E600, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E640, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_TPL1_VS_TEX_COUNT, 4); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_TPL1_FS_TEX_COUNT, 2); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C0, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7C5, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CA, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7CF, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D4, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_UNKNOWN_E7D9, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + + // TODO hacks.. these should not be hardcoded: + OUT_PKT4(ring, REG_A5XX_GRAS_SC_CNTL, 1); + OUT_RING(ring, 0x00000008); /* GRAS_SC_CNTL */ + + fd_hw_query_enable(batch, ring); +} + +static void +fd5_emit_ib(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) +{ + __OUT_IB5(ring, target); +} + +void +fd5_emit_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->emit_const = fd5_emit_const; + ctx->emit_const_bo = fd5_emit_const_bo; + ctx->emit_ib = fd5_emit_ib; +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_emit.h b/src/gallium/drivers/freedreno/a5xx/fd5_emit.h new file mode 100644 index 00000000000..2c6b717d570 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_emit.h @@ -0,0 +1,162 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_EMIT_H +#define FD5_EMIT_H + +#include "pipe/p_context.h" + +#include "freedreno_context.h" +#include "fd5_context.h" +#include "fd5_format.h" +#include "fd5_program.h" +#include "ir3_shader.h" + +struct fd_ringbuffer; + +/* grouped together emit-state for prog/vertex/state emit: */ +struct fd5_emit { + struct pipe_debug_callback *debug; + const struct fd_vertex_state *vtx; + const struct fd_program_stateobj *prog; + const struct pipe_draw_info *info; + struct ir3_shader_key key; + uint32_t dirty; + + uint32_t sprite_coord_enable; /* bitmask */ + bool sprite_coord_mode; + bool rasterflat; + bool no_decode_srgb; + + /* cached to avoid repeated lookups of same variants: */ + const struct ir3_shader_variant *vp, *fp; + /* TODO: other shader stages.. */ +}; + +static inline enum a5xx_color_fmt fd5_emit_format(struct pipe_surface *surf) +{ + if (!surf) + return 0; + return fd5_pipe2color(surf->format); +} + +static inline const struct ir3_shader_variant * +fd5_emit_get_vp(struct fd5_emit *emit) +{ + if (!emit->vp) { + struct fd5_shader_stateobj *so = emit->prog->vp; + emit->vp = ir3_shader_variant(so->shader, emit->key, emit->debug); + } + return emit->vp; +} + +static inline const struct ir3_shader_variant * +fd5_emit_get_fp(struct fd5_emit *emit) +{ + if (!emit->fp) { + if (emit->key.binning_pass) { + /* use dummy stateobj to simplify binning vs non-binning: */ + static const struct ir3_shader_variant binning_fp = {}; + emit->fp = &binning_fp; + } else { + struct fd5_shader_stateobj *so = emit->prog->fp; + emit->fp = ir3_shader_variant(so->shader, emit->key, emit->debug); + } + } + return emit->fp; +} + +static inline void +fd5_cache_flush(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + fd_reset_wfi(batch); + OUT_PKT4(ring, REG_A5XX_UCHE_CACHE_INVALIDATE_MIN_LO, 5); + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_LO */ + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MIN_HI */ + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_LO */ + OUT_RING(ring, 0x00000000); /* UCHE_CACHE_INVALIDATE_MAX_HI */ + OUT_RING(ring, 0x00000012); /* UCHE_CACHE_INVALIDATE */ + fd_wfi(batch, ring); +} + +static inline void +fd5_set_render_mode(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum render_mode_cmd mode) +{ + /* TODO add preemption support, gmem bypass, etc */ + emit_marker5(ring, 7); + OUT_PKT7(ring, CP_SET_RENDER_MODE, 5); + OUT_RING(ring, CP_SET_RENDER_MODE_0_MODE(mode)); + OUT_RING(ring, 0x00000000); /* ADDR_LO */ + OUT_RING(ring, 0x00000000); /* ADDR_HI */ + OUT_RING(ring, COND(mode == GMEM, CP_SET_RENDER_MODE_3_GMEM_ENABLE)); + OUT_RING(ring, 0x00000000); + emit_marker5(ring, 7); +} + +static inline void +fd5_emit_blit(struct fd_context *ctx, struct fd_ringbuffer *ring) +{ + struct fd5_context *fd5_ctx = fd5_context(ctx); + + emit_marker5(ring, 7); + + OUT_PKT7(ring, CP_EVENT_WRITE, 4); + OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(BLIT)); + OUT_RELOCW(ring, fd5_ctx->blit_mem, 0, 0, 0); /* ADDR_LO/HI */ + OUT_RING(ring, 0x00000000); + + emit_marker5(ring, 7); +} + +static inline void +fd5_emit_render_cntl(struct fd_context *ctx, bool blit) +{ + struct fd_ringbuffer *ring = ctx->batch->draw; + + /* TODO eventually this partially depends on the pfb state, ie. + * which of the cbuf(s)/zsbuf has an UBWC flag buffer.. that part + * we could probably cache and just regenerate if framebuffer + * state is dirty (or something like that).. + * + * Other bits seem to depend on query state, like if samples-passed + * query is active. + */ + OUT_PKT4(ring, REG_A5XX_RB_RENDER_CNTL, 1); + OUT_RING(ring, 0x00000000 | /* RB_RENDER_CNTL */ + COND(!blit, 0x8)); +} + +void fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit); + +void fd5_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd5_emit *emit); + +void fd5_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring); + +void fd5_emit_init(struct pipe_context *pctx); + +#endif /* FD5_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_format.c b/src/gallium/drivers/freedreno/a5xx/fd5_format.c new file mode 100644 index 00000000000..0e22839701d --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_format.c @@ -0,0 +1,445 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_defines.h" +#include "util/u_format.h" + +#include "fd5_format.h" + + +/* Specifies the table of all the formats and their features. Also supplies + * the helpers that look up various data in those tables. + */ + +struct fd5_format { + enum a5xx_vtx_fmt vtx; + enum a5xx_tex_fmt tex; + enum a5xx_color_fmt rb; + enum a3xx_color_swap swap; + boolean present; +}; + +#define RB5_NONE ~0 + +/* vertex + texture */ +#define VT(pipe, fmt, rbfmt, swapfmt) \ + [PIPE_FORMAT_ ## pipe] = { \ + .present = 1, \ + .vtx = VFMT5_ ## fmt, \ + .tex = TFMT5_ ## fmt, \ + .rb = RB5_ ## rbfmt, \ + .swap = swapfmt \ + } + +/* texture-only */ +#define _T(pipe, fmt, rbfmt, swapfmt) \ + [PIPE_FORMAT_ ## pipe] = { \ + .present = 1, \ + .vtx = ~0, \ + .tex = TFMT5_ ## fmt, \ + .rb = RB5_ ## rbfmt, \ + .swap = swapfmt \ + } + +/* vertex-only */ +#define V_(pipe, fmt, rbfmt, swapfmt) \ + [PIPE_FORMAT_ ## pipe] = { \ + .present = 1, \ + .vtx = VFMT5_ ## fmt, \ + .tex = ~0, \ + .rb = RB5_ ## rbfmt, \ + .swap = swapfmt \ + } + +static struct fd5_format formats[PIPE_FORMAT_COUNT] = { + /* 8-bit */ + VT(R8_UNORM, 8_UNORM, R8_UNORM, WZYX), +// VT(R8_SNORM, 8_SNORM, R8_SNORM, WZYX), +// VT(R8_UINT, 8_UINT, R8_UINT, WZYX), +// VT(R8_SINT, 8_SINT, R8_SINT, WZYX), + V_(R8_USCALED, 8_UINT, NONE, WZYX), + V_(R8_SSCALED, 8_UINT, NONE, WZYX), + +// _T(A8_UNORM, 8_UNORM, A8_UNORM, WZYX), +// _T(L8_UNORM, 8_UNORM, R8_UNORM, WZYX), + _T(I8_UNORM, 8_UNORM, NONE, WZYX), + +// _T(A8_UINT, 8_UINT, NONE, WZYX), +// _T(A8_SINT, 8_SINT, NONE, WZYX), +// _T(L8_UINT, 8_UINT, NONE, WZYX), +// _T(L8_SINT, 8_SINT, NONE, WZYX), +// _T(I8_UINT, 8_UINT, NONE, WZYX), +// _T(I8_SINT, 8_SINT, NONE, WZYX), + +// _T(S8_UINT, 8_UINT, R8_UNORM, WZYX), + + /* 16-bit */ +// VT(R16_UNORM, 16_UNORM, R16_UNORM, WZYX), +// VT(R16_SNORM, 16_SNORM, R16_SNORM, WZYX), +// VT(R16_UINT, 16_UINT, R16_UINT, WZYX), +// VT(R16_SINT, 16_SINT, R16_SINT, WZYX), + V_(R16_USCALED, 16_UINT, NONE, WZYX), + V_(R16_SSCALED, 16_UINT, NONE, WZYX), + VT(R16_FLOAT, 16_FLOAT, R16_FLOAT, WZYX), + +// _T(A16_UNORM, 16_UNORM, NONE, WZYX), +// _T(A16_SNORM, 16_SNORM, NONE, WZYX), +// _T(A16_UINT, 16_UINT, NONE, WZYX), +// _T(A16_SINT, 16_SINT, NONE, WZYX), +// _T(L16_UNORM, 16_UNORM, NONE, WZYX), +// _T(L16_SNORM, 16_SNORM, NONE, WZYX), +// _T(L16_UINT, 16_UINT, NONE, WZYX), +// _T(L16_SINT, 16_SINT, NONE, WZYX), +// _T(I16_UNORM, 16_UNORM, NONE, WZYX), +// _T(I16_SNORM, 16_SNORM, NONE, WZYX), +// _T(I16_UINT, 16_UINT, NONE, WZYX), +// _T(I16_SINT, 16_SINT, NONE, WZYX), + +// VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX), +// VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX), +// VT(R8G8_UINT, 8_8_UINT, R8G8_UINT, WZYX), +// VT(R8G8_SINT, 8_8_SINT, R8G8_SINT, WZYX), + V_(R8G8_USCALED, 8_8_UINT, NONE, WZYX), + V_(R8G8_SSCALED, 8_8_SINT, NONE, WZYX), + +// _T(L8A8_UINT, 8_8_UINT, NONE, WZYX), +// _T(L8A8_SINT, 8_8_SINT, NONE, WZYX), + + _T(B5G6R5_UNORM, 5_6_5_UNORM, R5G6B5_UNORM, WXYZ), +// _T(B5G5R5A1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), +// _T(B5G5R5X1_UNORM, 5_5_5_1_UNORM, R5G5B5A1_UNORM, WXYZ), + _T(B4G4R4A4_UNORM, 4_4_4_4_UNORM, R4G4B4A4_UNORM, WXYZ), + + /* 24-bit */ + V_(R8G8B8_UNORM, 8_8_8_UNORM, NONE, WZYX), + V_(R8G8B8_SNORM, 8_8_8_SNORM, NONE, WZYX), + V_(R8G8B8_UINT, 8_8_8_UINT, NONE, WZYX), + V_(R8G8B8_SINT, 8_8_8_SINT, NONE, WZYX), + V_(R8G8B8_USCALED, 8_8_8_UINT, NONE, WZYX), + V_(R8G8B8_SSCALED, 8_8_8_SINT, NONE, WZYX), + + /* 32-bit */ +// VT(R32_UINT, 32_UINT, R32_UINT, WZYX), +// VT(R32_SINT, 32_SINT, R32_SINT, WZYX), + V_(R32_USCALED, 32_UINT, NONE, WZYX), + V_(R32_SSCALED, 32_UINT, NONE, WZYX), + VT(R32_FLOAT, 32_FLOAT, R32_FLOAT,WZYX), + V_(R32_FIXED, 32_FIXED, NONE, WZYX), + +// _T(A32_UINT, 32_UINT, NONE, WZYX), +// _T(A32_SINT, 32_SINT, NONE, WZYX), +// _T(L32_UINT, 32_UINT, NONE, WZYX), +// _T(L32_SINT, 32_SINT, NONE, WZYX), +// _T(I32_UINT, 32_UINT, NONE, WZYX), +// _T(I32_SINT, 32_SINT, NONE, WZYX), + +// VT(R16G16_UNORM, 16_16_UNORM, R16G16_UNORM, WZYX), +// VT(R16G16_SNORM, 16_16_SNORM, R16G16_SNORM, WZYX), +// VT(R16G16_UINT, 16_16_UINT, R16G16_UINT, WZYX), +// VT(R16G16_SINT, 16_16_SINT, R16G16_SINT, WZYX), + V_(R16G16_USCALED, 16_16_UINT, NONE, WZYX), + V_(R16G16_SSCALED, 16_16_SINT, NONE, WZYX), + VT(R16G16_FLOAT, 16_16_FLOAT, R16G16_FLOAT, WZYX), + +// _T(L16A16_UNORM, 16_16_UNORM, NONE, WZYX), +// _T(L16A16_SNORM, 16_16_SNORM, NONE, WZYX), +// _T(L16A16_UINT, 16_16_UINT, NONE, WZYX), +// _T(L16A16_SINT, 16_16_SINT, NONE, WZYX), + + VT(R8G8B8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + _T(R8G8B8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + _T(R8G8B8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), + _T(R8G8B8X8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WZYX), +// VT(R8G8B8A8_SNORM, 8_8_8_8_SNORM, R8G8B8A8_SNORM, WZYX), +// VT(R8G8B8A8_UINT, 8_8_8_8_UINT, R8G8B8A8_UINT, WZYX), +// VT(R8G8B8A8_SINT, 8_8_8_8_SINT, R8G8B8A8_SINT, WZYX), + V_(R8G8B8A8_USCALED, 8_8_8_8_UINT, NONE, WZYX), + V_(R8G8B8A8_SSCALED, 8_8_8_8_SINT, NONE, WZYX), + + VT(B8G8R8A8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + _T(B8G8R8X8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + VT(B8G8R8A8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + _T(B8G8R8X8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, WXYZ), + + VT(A8B8G8R8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + _T(X8B8G8R8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + _T(A8B8G8R8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + _T(X8B8G8R8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, XYZW), + + VT(A8R8G8B8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + _T(X8R8G8B8_UNORM, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + _T(A8R8G8B8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + _T(X8R8G8B8_SRGB, 8_8_8_8_UNORM, R8G8B8A8_UNORM, ZYXW), + +// VT(R10G10B10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WZYX), +// VT(B10G10R10A2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), +// _T(B10G10R10X2_UNORM, 10_10_10_2_UNORM, R10G10B10A2_UNORM, WXYZ), +// V_(R10G10B10A2_SNORM, 10_10_10_2_SNORM, NONE, WZYX), +// V_(B10G10R10A2_SNORM, 10_10_10_2_SNORM, NONE, WXYZ), +// VT(R10G10B10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WZYX), +// VT(B10G10R10A2_UINT, 10_10_10_2_UINT, R10G10B10A2_UINT, WXYZ), +// V_(R10G10B10A2_USCALED, 10_10_10_2_UINT, NONE, WZYX), +// V_(B10G10R10A2_USCALED, 10_10_10_2_UINT, NONE, WXYZ), +// V_(R10G10B10A2_SSCALED, 10_10_10_2_SINT, NONE, WZYX), +// V_(B10G10R10A2_SSCALED, 10_10_10_2_SINT, NONE, WXYZ), + +// VT(R11G11B10_FLOAT, 11_11_10_FLOAT, R11G11B10_FLOAT, WZYX), +// _T(R9G9B9E5_FLOAT, 9_9_9_E5_FLOAT, NONE, WZYX), + + _T(Z24X8_UNORM, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), + _T(Z24_UNORM_S8_UINT, X8Z24_UNORM, R8G8B8A8_UNORM, WZYX), + _T(Z32_FLOAT, 32_FLOAT, R8G8B8A8_UNORM, WZYX), + _T(Z32_FLOAT_S8X24_UINT, 32_FLOAT,R8G8B8A8_UNORM, WZYX), + + /* 48-bit */ + V_(R16G16B16_UNORM, 16_16_16_UNORM, NONE, WZYX), + V_(R16G16B16_SNORM, 16_16_16_SNORM, NONE, WZYX), + V_(R16G16B16_UINT, 16_16_16_UINT, NONE, WZYX), + V_(R16G16B16_SINT, 16_16_16_SINT, NONE, WZYX), + V_(R16G16B16_USCALED, 16_16_16_UINT, NONE, WZYX), + V_(R16G16B16_SSCALED, 16_16_16_SINT, NONE, WZYX), + V_(R16G16B16_FLOAT, 16_16_16_FLOAT, NONE, WZYX), + + /* 64-bit */ +// VT(R16G16B16A16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), +// VT(R16G16B16X16_UNORM, 16_16_16_16_UNORM, R16G16B16A16_UNORM, WZYX), +// VT(R16G16B16A16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), +// VT(R16G16B16X16_SNORM, 16_16_16_16_SNORM, R16G16B16A16_SNORM, WZYX), +// VT(R16G16B16A16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), +// _T(R16G16B16X16_UINT, 16_16_16_16_UINT, R16G16B16A16_UINT, WZYX), +// VT(R16G16B16A16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), +// _T(R16G16B16X16_SINT, 16_16_16_16_SINT, R16G16B16A16_SINT, WZYX), + V_(R16G16B16A16_USCALED, 16_16_16_16_UINT, NONE, WZYX), + V_(R16G16B16A16_SSCALED, 16_16_16_16_SINT, NONE, WZYX), + VT(R16G16B16A16_FLOAT, 16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX), + _T(R16G16B16X16_FLOAT, 16_16_16_16_FLOAT, R16G16B16A16_FLOAT, WZYX), + +// VT(R32G32_UINT, 32_32_UINT, R32G32_UINT, WZYX), +// VT(R32G32_SINT, 32_32_SINT, R32G32_SINT, WZYX), + V_(R32G32_USCALED, 32_32_UINT, NONE, WZYX), + V_(R32G32_SSCALED, 32_32_SINT, NONE, WZYX), + VT(R32G32_FLOAT, 32_32_FLOAT, R32G32_FLOAT,WZYX), +// V_(R32G32_FIXED, 32_32_FIXED, NONE, WZYX), + +// _T(L32A32_UINT, 32_32_UINT, NONE, WZYX), +// _T(L32A32_SINT, 32_32_SINT, NONE, WZYX), + + /* 96-bit */ +// VT(R32G32B32_UINT, 32_32_32_UINT, NONE, WZYX), +// VT(R32G32B32_SINT, 32_32_32_SINT, NONE, WZYX), + V_(R32G32B32_USCALED, 32_32_32_UINT, NONE, WZYX), + V_(R32G32B32_SSCALED, 32_32_32_SINT, NONE, WZYX), + V_(R32G32B32_FLOAT, 32_32_32_FLOAT, NONE, WZYX), + V_(R32G32B32_FIXED, 32_32_32_FIXED, NONE, WZYX), + + /* 128-bit */ + V_(R32G32B32A32_UINT, 32_32_32_32_UINT, NONE, WZYX), +// _T(R32G32B32X32_UINT, 32_32_32_32_UINT, R32G32B32A32_UINT, WZYX), +// VT(R32G32B32A32_SINT, 32_32_32_32_SINT, R32G32B32A32_SINT, WZYX), +// _T(R32G32B32X32_SINT, 32_32_32_32_SINT, R32G32B32A32_SINT, WZYX), + V_(R32G32B32A32_USCALED, 32_32_32_32_UINT, NONE, WZYX), + V_(R32G32B32A32_SSCALED, 32_32_32_32_SINT, NONE, WZYX), + V_(R32G32B32A32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), + _T(R32G32B32X32_FLOAT, 32_32_32_32_FLOAT, R32G32B32A32_FLOAT, WZYX), + V_(R32G32B32A32_FIXED, 32_32_32_32_FIXED, NONE, WZYX), + + /* compressed */ +// _T(ETC1_RGB8, ETC1, NONE, WZYX), +// _T(ETC2_RGB8, ETC2_RGB8, NONE, WZYX), +// _T(ETC2_SRGB8, ETC2_RGB8, NONE, WZYX), +// _T(ETC2_RGB8A1, ETC2_RGB8A1, NONE, WZYX), +// _T(ETC2_SRGB8A1, ETC2_RGB8A1, NONE, WZYX), +// _T(ETC2_RGBA8, ETC2_RGBA8, NONE, WZYX), +// _T(ETC2_SRGBA8, ETC2_RGBA8, NONE, WZYX), +// _T(ETC2_R11_UNORM, ETC2_R11_UNORM, NONE, WZYX), +// _T(ETC2_R11_SNORM, ETC2_R11_SNORM, NONE, WZYX), +// _T(ETC2_RG11_UNORM, ETC2_RG11_UNORM, NONE, WZYX), +// _T(ETC2_RG11_SNORM, ETC2_RG11_SNORM, NONE, WZYX), + +// _T(DXT1_RGB, DXT1, NONE, WZYX), +// _T(DXT1_SRGB, DXT1, NONE, WZYX), +// _T(DXT1_RGBA, DXT1, NONE, WZYX), +// _T(DXT1_SRGBA, DXT1, NONE, WZYX), +// _T(DXT3_RGBA, DXT3, NONE, WZYX), +// _T(DXT3_SRGBA, DXT3, NONE, WZYX), +// _T(DXT5_RGBA, DXT5, NONE, WZYX), +// _T(DXT5_SRGBA, DXT5, NONE, WZYX), + +// _T(BPTC_RGBA_UNORM, BPTC, NONE, WZYX), +// _T(BPTC_SRGBA, BPTC, NONE, WZYX), +// _T(BPTC_RGB_FLOAT, BPTC_FLOAT, NONE, WZYX), +// _T(BPTC_RGB_UFLOAT, BPTC_UFLOAT, NONE, WZYX), + +// _T(RGTC1_UNORM, RGTC1_UNORM, NONE, WZYX), +// _T(RGTC1_SNORM, RGTC1_SNORM, NONE, WZYX), +// _T(RGTC2_UNORM, RGTC2_UNORM, NONE, WZYX), +// _T(RGTC2_SNORM, RGTC2_SNORM, NONE, WZYX), +// _T(LATC1_UNORM, RGTC1_UNORM, NONE, WZYX), +// _T(LATC1_SNORM, RGTC1_SNORM, NONE, WZYX), +// _T(LATC2_UNORM, RGTC2_UNORM, NONE, WZYX), +// _T(LATC2_SNORM, RGTC2_SNORM, NONE, WZYX), + +// _T(ASTC_4x4, ASTC_4x4, NONE, WZYX), +// _T(ASTC_5x4, ASTC_5x4, NONE, WZYX), +// _T(ASTC_5x5, ASTC_5x5, NONE, WZYX), +// _T(ASTC_6x5, ASTC_6x5, NONE, WZYX), +// _T(ASTC_6x6, ASTC_6x6, NONE, WZYX), +// _T(ASTC_8x5, ASTC_8x5, NONE, WZYX), +// _T(ASTC_8x6, ASTC_8x6, NONE, WZYX), +// _T(ASTC_8x8, ASTC_8x8, NONE, WZYX), +// _T(ASTC_10x5, ASTC_10x5, NONE, WZYX), +// _T(ASTC_10x6, ASTC_10x6, NONE, WZYX), +// _T(ASTC_10x8, ASTC_10x8, NONE, WZYX), +// _T(ASTC_10x10, ASTC_10x10, NONE, WZYX), +// _T(ASTC_12x10, ASTC_12x10, NONE, WZYX), +// _T(ASTC_12x12, ASTC_12x12, NONE, WZYX), + +// _T(ASTC_4x4_SRGB, ASTC_4x4, NONE, WZYX), +// _T(ASTC_5x4_SRGB, ASTC_5x4, NONE, WZYX), +// _T(ASTC_5x5_SRGB, ASTC_5x5, NONE, WZYX), +// _T(ASTC_6x5_SRGB, ASTC_6x5, NONE, WZYX), +// _T(ASTC_6x6_SRGB, ASTC_6x6, NONE, WZYX), +// _T(ASTC_8x5_SRGB, ASTC_8x5, NONE, WZYX), +// _T(ASTC_8x6_SRGB, ASTC_8x6, NONE, WZYX), +// _T(ASTC_8x8_SRGB, ASTC_8x8, NONE, WZYX), +// _T(ASTC_10x5_SRGB, ASTC_10x5, NONE, WZYX), +// _T(ASTC_10x6_SRGB, ASTC_10x6, NONE, WZYX), +// _T(ASTC_10x8_SRGB, ASTC_10x8, NONE, WZYX), +// _T(ASTC_10x10_SRGB, ASTC_10x10, NONE, WZYX), +// _T(ASTC_12x10_SRGB, ASTC_12x10, NONE, WZYX), +// _T(ASTC_12x12_SRGB, ASTC_12x12, NONE, WZYX), +}; + +/* convert pipe format to vertex buffer format: */ +enum a5xx_vtx_fmt +fd5_pipe2vtx(enum pipe_format format) +{ + if (!formats[format].present) + return ~0; + return formats[format].vtx; +} + +/* convert pipe format to texture sampler format: */ +enum a5xx_tex_fmt +fd5_pipe2tex(enum pipe_format format) +{ + if (!formats[format].present) + return ~0; + return formats[format].tex; +} + +/* convert pipe format to MRT / copydest format used for render-target: */ +enum a5xx_color_fmt +fd5_pipe2color(enum pipe_format format) +{ + if (!formats[format].present) + return ~0; + return formats[format].rb; +} + +enum a3xx_color_swap +fd5_pipe2swap(enum pipe_format format) +{ + if (!formats[format].present) + return WZYX; + return formats[format].swap; +} + +// XXX possibly same as a4xx.. +enum a5xx_tex_fetchsize +fd5_pipe2fetchsize(enum pipe_format format) +{ + if (format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) + format = PIPE_FORMAT_Z32_FLOAT; + + if (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC) + return TFETCH5_16_BYTE; + + switch (util_format_get_blocksizebits(format) / util_format_get_blockwidth(format)) { + case 8: return TFETCH5_1_BYTE; + case 16: return TFETCH5_2_BYTE; + case 32: return TFETCH5_4_BYTE; + case 64: return TFETCH5_8_BYTE; + case 96: return TFETCH5_1_BYTE; /* Does this matter? */ + case 128: return TFETCH5_16_BYTE; + default: + debug_printf("Unknown block size for format %s: %d\n", + util_format_name(format), + util_format_get_blocksizebits(format)); + return TFETCH5_1_BYTE; + } +} + +enum a5xx_depth_format +fd5_pipe2depth(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return DEPTH5_16; + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8_UINT_Z24_UNORM: + return DEPTH5_24_8; + case PIPE_FORMAT_Z32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + return DEPTH5_32; + default: + return ~0; + } +} + +static inline enum a5xx_tex_swiz +tex_swiz(unsigned swiz) +{ + switch (swiz) { + default: + case PIPE_SWIZZLE_X: return A5XX_TEX_X; + case PIPE_SWIZZLE_Y: return A5XX_TEX_Y; + case PIPE_SWIZZLE_Z: return A5XX_TEX_Z; + case PIPE_SWIZZLE_W: return A5XX_TEX_W; + case PIPE_SWIZZLE_0: return A5XX_TEX_ZERO; + case PIPE_SWIZZLE_1: return A5XX_TEX_ONE; + } +} + +uint32_t +fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, + unsigned swizzle_b, unsigned swizzle_a) +{ + const struct util_format_description *desc = + util_format_description(format); + unsigned char swiz[4] = { + swizzle_r, swizzle_g, swizzle_b, swizzle_a, + }, rswiz[4]; + + util_format_compose_swizzles(desc->swizzle, swiz, rswiz); + + return A5XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) | + A5XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) | + A5XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) | + A5XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3])); +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_format.h b/src/gallium/drivers/freedreno/a5xx/fd5_format.h new file mode 100644 index 00000000000..b052aa52960 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_format.h @@ -0,0 +1,44 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_UTIL_H_ +#define FD5_UTIL_H_ + +#include "freedreno_util.h" + +#include "a5xx.xml.h" + +enum a5xx_vtx_fmt fd5_pipe2vtx(enum pipe_format format); +enum a5xx_tex_fmt fd5_pipe2tex(enum pipe_format format); +enum a5xx_color_fmt fd5_pipe2color(enum pipe_format format); +enum a3xx_color_swap fd5_pipe2swap(enum pipe_format format); +enum a5xx_tex_fetchsize fd5_pipe2fetchsize(enum pipe_format format); +enum a5xx_depth_format fd5_pipe2depth(enum pipe_format format); + +uint32_t fd5_tex_swiz(enum pipe_format format, unsigned swizzle_r, + unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); + +#endif /* FD5_UTIL_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c new file mode 100644 index 00000000000..d37c9d41f66 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.c @@ -0,0 +1,482 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "freedreno_draw.h" +#include "freedreno_state.h" +#include "freedreno_resource.h" + +#include "fd5_gmem.h" +#include "fd5_context.h" +#include "fd5_draw.h" +#include "fd5_emit.h" +#include "fd5_program.h" +#include "fd5_format.h" +#include "fd5_zsa.h" + +static void +emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, + struct pipe_surface **bufs, struct fd_gmem_stateobj *gmem) +{ + enum a5xx_tile_mode tile_mode; + unsigned i; + + if (gmem) { + tile_mode = TILE5_2; + } else { + tile_mode = TILE5_LINEAR; + } + + for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { + enum a5xx_color_fmt format = 0; + enum a3xx_color_swap swap = WZYX; + bool srgb = false; + struct fd_resource *rsc = NULL; + struct fd_resource_slice *slice = NULL; + uint32_t stride = 0; + uint32_t size = 0; + uint32_t base = 0; + uint32_t offset = 0; + + if ((i < nr_bufs) && bufs[i]) { + struct pipe_surface *psurf = bufs[i]; + enum pipe_format pformat = psurf->format; + + rsc = fd_resource(psurf->texture); + + slice = fd_resource_slice(rsc, psurf->u.tex.level); + format = fd5_pipe2color(pformat); + swap = fd5_pipe2swap(pformat); + srgb = util_format_is_srgb(pformat); + + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + + offset = fd_resource_offset(rsc, psurf->u.tex.level, + psurf->u.tex.first_layer); + + if (gmem) { + stride = gmem->bin_w * rsc->cpp; + size = stride * gmem->bin_h; + base = gmem->cbuf_base[i]; + } else { + stride = slice->pitch * rsc->cpp; + size = slice->size0; + } + } + + OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5); + OUT_RING(ring, A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | + A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | + A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | + 0x800 | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */ + COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB)); + OUT_RING(ring, A5XX_RB_MRT_PITCH(stride)); + OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size)); + if (gmem || (i >= nr_bufs) || !bufs[i]) { + OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */ + } else { + OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */ + } + + OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1); + OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format)); + + /* when we support UBWC, these would be the system memory + * addr/pitch/etc: + */ + OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4); + OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ + OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ + OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0)); + OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); + } +} + +static void +emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, + struct fd_gmem_stateobj *gmem) +{ + if (zsbuf) { + struct fd_resource *rsc = fd_resource(zsbuf->texture); + enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format); + uint32_t cpp = rsc->cpp; + uint32_t stride = 0; + uint32_t size = 0; + + if (gmem) { + stride = cpp * gmem->bin_w; + size = stride * gmem->bin_h; + } else { + struct fd_resource_slice *slice = fd_resource_slice(rsc, 0); + stride = slice->pitch * rsc->cpp; + size = slice->size0; + } + + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5); + OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); + if (gmem) { + OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ + } else { + OUT_RELOCW(ring, rsc->bo, 0, 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */ + } + OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride)); + OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size)); + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); + OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); + + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */ + + if (rsc->stencil) { + if (gmem) { + stride = 1 * gmem->bin_w; + size = stride * gmem->bin_h; + } else { + struct fd_resource_slice *slice = fd_resource_slice(rsc->stencil, 0); + stride = slice->pitch * rsc->cpp; + size = slice->size0; + } + + OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5); + OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL); + if (gmem) { + OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */ + } else { + OUT_RELOCW(ring, rsc->stencil->bo, 0, 0, 0); /* RB_STENCIL_BASE_LO/HI */ + } + OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride)); + OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size)); + } else { + OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1); + OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */ + } + } else { + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5); + OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE)); + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ + + OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); + OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE)); + + OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ + OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */ + + OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1); + OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */ + } +} + +static void +patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) +{ + unsigned i; + for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); + *patch->cs = patch->val | DRAW4(0, 0, 0, vismode); + } + util_dynarray_resize(&batch->draw_patches, 0); +} + +/* before first tile */ +static void +fd5_emit_tile_init(struct fd_batch *batch) +{ + struct fd_ringbuffer *ring = batch->gmem; + + fd5_emit_restore(batch, ring); + + OUT_PKT7(ring, CP_EVENT_WRITE, 1); + OUT_RING(ring, UNK_26); + + OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); + OUT_RING(ring, 0x0); + + OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ + + OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); + OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ + + /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ + fd_wfi(batch, ring); + OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); + OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */ + +/* +opcode: CP_PREEMPT_ENABLE_LOCAL (6a) (2 dwords) + */ + + fd5_set_render_mode(batch->ctx, ring, GMEM); +} + +/* before mem2gmem */ +static void +fd5_emit_tile_prep(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_ringbuffer *ring = batch->gmem; + + uint32_t x1 = tile->xoff; + uint32_t y1 = tile->yoff; + uint32_t x2 = tile->xoff + tile->bin_w - 1; + uint32_t y2 = tile->yoff + tile->bin_h - 1; + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); + OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | + A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); + OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | + A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); + + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | + A5XX_RB_RESOLVE_CNTL_1_Y(y1)); + OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | + A5XX_RB_RESOLVE_CNTL_2_Y(y2)); + + OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); + OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | + A5XX_RB_WINDOW_OFFSET_Y(y1)); +} + + +/* + * transfer from system memory to gmem + */ + +static void +emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, + struct pipe_surface *psurf, enum a5xx_blit_buf buf) +{ + struct fd_ringbuffer *ring = batch->gmem; + struct fd_resource *rsc = fd_resource(psurf->texture); + struct fd_resource_slice *slice; + + slice = fd_resource_slice(rsc, psurf->u.tex.level); + + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4); + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */ + + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5); + OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */ + OUT_RING(ring, base); /* RB_BLIT_DST_LO */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */ + OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0)); + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf)); + + fd5_emit_blit(batch->ctx, ring); +} + +static void +fd5_emit_tile_mem2gmem(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_ringbuffer *ring = batch->gmem; + struct fd_context *ctx = batch->ctx; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + + /* + * setup mrt and zs with system memory base addresses: + */ + + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL); + emit_zs(ring, pfb->zsbuf, NULL); + + OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); + OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) | + A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | + A5XX_RB_CNTL_BYPASS); + + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { + unsigned i; + for (i = 0; i < pfb->nr_cbufs; i++) { + if (!pfb->cbufs[i]) + continue; + if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i))) + continue; + emit_mem2gmem_surf(batch, gmem->cbuf_base[i], + pfb->cbufs[i], BLIT_MRT0 + i); + } + } + + if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + // XXX BLIT_ZS vs BLIT_Z32 .. need some more cmdstream traces + // with z32_x24s8.. + if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) + emit_mem2gmem_surf(batch, ctx->gmem.zsbuf_base[0], pfb->zsbuf, BLIT_ZS); + if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) + emit_mem2gmem_surf(batch, ctx->gmem.zsbuf_base[1], pfb->zsbuf, BLIT_ZS); + } +} + + +/* before IB to rendering cmds: */ +static void +fd5_emit_tile_renderprep(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_ringbuffer *ring = batch->gmem; + struct fd_gmem_stateobj *gmem = &batch->ctx->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + + OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); + OUT_RING(ring, 0x1); + + OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); + OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) | + A5XX_RB_CNTL_HEIGHT(gmem->bin_h)); + + patch_draws(batch, IGNORE_VISIBILITY); + + emit_zs(ring, pfb->zsbuf, gmem); + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem); + + // TODO MSAA + OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE); + + OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A5XX_RB_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE); + + OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2); + OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(MSAA_ONE)); + OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(MSAA_ONE) | + A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE); +} + + +/* + * transfer from gmem to system memory (ie. normal RAM) + */ + +static void +emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, + struct pipe_surface *psurf, enum a5xx_blit_buf buf) +{ + struct fd_ringbuffer *ring = batch->gmem; + struct fd_resource *rsc = fd_resource(psurf->texture); + struct fd_resource_slice *slice; + uint32_t offset; + + slice = fd_resource_slice(rsc, psurf->u.tex.level); + offset = fd_resource_offset(rsc, psurf->u.tex.level, + psurf->u.tex.first_layer); + + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4); + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */ + OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */ + + OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5); + OUT_RING(ring, 0x00000004); /* XXX RB_RESOLVE_CNTL_3 */ + OUT_RELOCW(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */ + OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(slice->size0)); + + OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); + OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf)); + + fd5_emit_blit(batch->ctx, ring); +} + +static void +fd5_emit_tile_gmem2mem(struct fd_batch *batch, struct fd_tile *tile) +{ + struct fd_context *ctx = batch->ctx; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_framebuffer_state *pfb = &batch->framebuffer; + + if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); + // XXX BLIT_ZS vs BLIT_Z32 .. need some more cmdstream traces + // with z32_x24s8.. + if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) + emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS); + if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) + emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_ZS); + } + + if (batch->resolve & FD_BUFFER_COLOR) { + unsigned i; + for (i = 0; i < pfb->nr_cbufs; i++) { + if (!pfb->cbufs[i]) + continue; + if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) + continue; + emit_gmem2mem_surf(batch, gmem->cbuf_base[i], + pfb->cbufs[i], BLIT_MRT0 + i); + } + } +} + +static void +fd5_emit_tile_fini(struct fd_batch *batch) +{ + fd5_cache_flush(batch, batch->gmem); + fd5_set_render_mode(batch->ctx, batch->gmem, BYPASS); +} + +void +fd5_gmem_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + ctx->emit_tile_init = fd5_emit_tile_init; + ctx->emit_tile_prep = fd5_emit_tile_prep; + ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem; + ctx->emit_tile_renderprep = fd5_emit_tile_renderprep; + ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem; + ctx->emit_tile_fini = fd5_emit_tile_fini; +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_gmem.h b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.h new file mode 100644 index 00000000000..7794bfb3383 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_gmem.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2015 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_GMEM_H_ +#define FD5_GMEM_H_ + +#include "pipe/p_context.h" + +void fd5_gmem_init(struct pipe_context *pctx); + +#endif /* FD5_GMEM_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.c b/src/gallium/drivers/freedreno/a5xx/fd5_program.c new file mode 100644 index 00000000000..dbb1a7cfe90 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.c @@ -0,0 +1,608 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "util/bitset.h" + +#include "freedreno_program.h" + +#include "fd5_program.h" +#include "fd5_emit.h" +#include "fd5_texture.h" +#include "fd5_format.h" + +static void +delete_shader_stateobj(struct fd5_shader_stateobj *so) +{ + ir3_shader_destroy(so->shader); + free(so); +} + +static struct fd5_shader_stateobj * +create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, + enum shader_t type) +{ + struct fd_context *ctx = fd_context(pctx); + struct ir3_compiler *compiler = ctx->screen->compiler; + struct fd5_shader_stateobj *so = CALLOC_STRUCT(fd5_shader_stateobj); + so->shader = ir3_shader_create(compiler, cso, type, &ctx->debug); + return so; +} + +static void * +fd5_fp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); +} + +static void +fd5_fp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd5_shader_stateobj *so = hwcso; + delete_shader_stateobj(so); +} + +static void * +fd5_vp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + return create_shader_stateobj(pctx, cso, SHADER_VERTEX); +} + +static void +fd5_vp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd5_shader_stateobj *so = hwcso; + delete_shader_stateobj(so); +} + +static void +emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) +{ + const struct ir3_info *si = &so->info; + enum adreno_state_block sb; + enum adreno_state_src src; + uint32_t i, sz, *bin; + + if (so->type == SHADER_VERTEX) { + sb = SB_VERT_SHADER; + } else { + sb = SB_FRAG_SHADER; + } + + if (fd_mesa_debug & FD_DBG_DIRECT) { + sz = si->sizedwords; + src = SS_DIRECT; + bin = fd_bo_map(so->bo); + } else { + sz = 0; + src = 2; // enums different on a5xx.. + bin = NULL; + } + + OUT_PKT7(ring, CP_LOAD_STATE, 3 + sz); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | + CP_LOAD_STATE_0_STATE_SRC(src) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); + if (bin) { + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); + OUT_RING(ring, CP_LOAD_STATE_2_EXT_SRC_ADDR_HI(0)); + } else { + OUT_RELOC(ring, so->bo, 0, + CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); + } + + /* for how clever coverity is, it is sometimes rather dull, and + * doesn't realize that the only case where bin==NULL, sz==0: + */ + assume(bin || (sz == 0)); + + for (i = 0; i < sz; i++) { + OUT_RING(ring, bin[i]); + } +} + +struct stage { + const struct ir3_shader_variant *v; + const struct ir3_info *i; + /* const sizes are in units of 4 * vec4 */ + uint8_t constoff; + uint8_t constlen; + /* instr sizes are in units of 16 instructions */ + uint8_t instroff; + uint8_t instrlen; +}; + +enum { + VS = 0, + FS = 1, + HS = 2, + DS = 3, + GS = 4, + MAX_STAGES +}; + +static void +setup_stages(struct fd5_emit *emit, struct stage *s) +{ + unsigned i; + + s[VS].v = fd5_emit_get_vp(emit); + s[FS].v = fd5_emit_get_fp(emit); + + s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */ + + for (i = 0; i < MAX_STAGES; i++) { + if (s[i].v) { + s[i].i = &s[i].v->info; + /* constlen is in units of 4 * vec4: */ + s[i].constlen = align(s[i].v->constlen, 4) / 4; + /* instrlen is already in units of 16 instr.. although + * probably we should ditch that and not make the compiler + * care about instruction group size of a3xx vs a5xx + */ + s[i].instrlen = s[i].v->instrlen; + } else { + s[i].i = NULL; + s[i].constlen = 0; + s[i].instrlen = 0; + } + } + + /* NOTE: at least for gles2, blob partitions VS at bottom of const + * space and FS taking entire remaining space. We probably don't + * need to do that the same way, but for now mimic what the blob + * does to make it easier to diff against register values from blob + * + * NOTE: if VS.instrlen + FS.instrlen > 64, then one or both shaders + * is run from external memory. + */ + if ((s[VS].instrlen + s[FS].instrlen) > 64) { + /* prioritize FS for internal memory: */ + if (s[FS].instrlen < 64) { + /* if FS can fit, kick VS out to external memory: */ + s[VS].instrlen = 0; + } else if (s[VS].instrlen < 64) { + /* otherwise if VS can fit, kick out FS: */ + s[FS].instrlen = 0; + } else { + /* neither can fit, run both from external memory: */ + s[VS].instrlen = 0; + s[FS].instrlen = 0; + } + } + + unsigned constoff = 0; + for (i = 0; i < MAX_STAGES; i++) { + s[i].constoff = constoff; + constoff += s[i].constlen; + } + + s[VS].instroff = 0; + s[FS].instroff = 64 - s[FS].instrlen; + s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff; +} + +void +fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit, + int nr, struct pipe_surface **bufs) +{ + struct stage s[MAX_STAGES]; + uint32_t pos_regid, posz_regid, psize_regid, color_regid[8]; + uint32_t face_regid, coord_regid, zwcoord_regid; + uint32_t vcoord_regid, vertex_regid, instance_regid; + int i, j; + + debug_assert(nr <= ARRAY_SIZE(color_regid)); + + if (emit->key.binning_pass) + nr = 0; + + setup_stages(emit, s); + + pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS); + posz_regid = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DEPTH); + psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ); + vertex_regid = ir3_find_output_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); + instance_regid = ir3_find_output_regid(s[VS].v, SYSTEM_VALUE_INSTANCE_ID); + + if (s[FS].v->color0_mrt) { + color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = + color_regid[4] = color_regid[5] = color_regid[6] = color_regid[7] = + ir3_find_output_regid(s[FS].v, FRAG_RESULT_COLOR); + } else { + color_regid[0] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA0); + color_regid[1] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA1); + color_regid[2] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA2); + color_regid[3] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA3); + color_regid[4] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA4); + color_regid[5] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA5); + color_regid[6] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA6); + color_regid[7] = ir3_find_output_regid(s[FS].v, FRAG_RESULT_DATA7); + } + + /* TODO get these dynamically: */ + face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0); + coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0); + zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0); + vcoord_regid = (s[FS].v->total_in > 0) ? regid(0,0) : regid(63,0); + + /* we could probably divide this up into things that need to be + * emitted if frag-prog is dirty vs if vert-prog is dirty.. + */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONTROL_REG, 5); + OUT_RING(ring, A5XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) | + A5XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff) | + COND(s[VS].v, A5XX_HLSQ_VS_CONTROL_REG_ENABLED)); + OUT_RING(ring, A5XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) | + A5XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff) | + COND(s[FS].v, A5XX_HLSQ_FS_CONTROL_REG_ENABLED)); + OUT_RING(ring, A5XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) | + A5XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff) | + COND(s[HS].v, A5XX_HLSQ_HS_CONTROL_REG_ENABLED)); + OUT_RING(ring, A5XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) | + A5XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff) | + COND(s[DS].v, A5XX_HLSQ_DS_CONTROL_REG_ENABLED)); + OUT_RING(ring, A5XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) | + A5XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff) | + COND(s[GS].v, A5XX_HLSQ_GS_CONTROL_REG_ENABLED)); + + OUT_PKT4(ring, REG_A5XX_HLSQ_CS_CONFIG, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CNTL, 5); + OUT_RING(ring, A5XX_HLSQ_VS_CNTL_INSTRLEN(s[VS].instrlen)); + OUT_RING(ring, A5XX_HLSQ_FS_CNTL_INSTRLEN(s[FS].instrlen)); + OUT_RING(ring, A5XX_HLSQ_HS_CNTL_INSTRLEN(s[HS].instrlen)); + OUT_RING(ring, A5XX_HLSQ_DS_CNTL_INSTRLEN(s[DS].instrlen)); + OUT_RING(ring, A5XX_HLSQ_GS_CNTL_INSTRLEN(s[GS].instrlen)); + + OUT_PKT4(ring, REG_A5XX_SP_VS_CONTROL_REG, 5); + OUT_RING(ring, A5XX_SP_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) | + A5XX_SP_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff) | + COND(s[VS].v, A5XX_SP_VS_CONTROL_REG_ENABLED)); + OUT_RING(ring, A5XX_SP_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) | + A5XX_SP_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff) | + COND(s[FS].v, A5XX_SP_FS_CONTROL_REG_ENABLED)); + OUT_RING(ring, A5XX_SP_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) | + A5XX_SP_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff) | + COND(s[HS].v, A5XX_SP_HS_CONTROL_REG_ENABLED)); + OUT_RING(ring, A5XX_SP_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) | + A5XX_SP_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff) | + COND(s[DS].v, A5XX_SP_DS_CONTROL_REG_ENABLED)); + OUT_RING(ring, A5XX_SP_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) | + A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff) | + COND(s[GS].v, A5XX_SP_GS_CONTROL_REG_ENABLED)); + + OUT_PKT4(ring, REG_A5XX_SP_CS_CONFIG, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT4(ring, REG_A5XX_HLSQ_VS_CONSTLEN, 2); + OUT_RING(ring, s[VS].constlen); /* HLSQ_VS_CONSTLEN */ + OUT_RING(ring, s[VS].instrlen); /* HLSQ_VS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_FS_CONSTLEN, 2); + OUT_RING(ring, s[FS].constlen); /* HLSQ_FS_CONSTLEN */ + OUT_RING(ring, s[FS].instrlen); /* HLSQ_FS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_HS_CONSTLEN, 2); + OUT_RING(ring, s[HS].constlen); /* HLSQ_HS_CONSTLEN */ + OUT_RING(ring, s[HS].instrlen); /* HLSQ_HS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_DS_CONSTLEN, 2); + OUT_RING(ring, s[DS].constlen); /* HLSQ_DS_CONSTLEN */ + OUT_RING(ring, s[DS].instrlen); /* HLSQ_DS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_GS_CONSTLEN, 2); + OUT_RING(ring, s[GS].constlen); /* HLSQ_GS_CONSTLEN */ + OUT_RING(ring, s[GS].instrlen); /* HLSQ_GS_INSTRLEN */ + + OUT_PKT4(ring, REG_A5XX_HLSQ_CONTEXT_SWITCH_CS_SW_3, 2); + OUT_RING(ring, 0x00000000); /* HLSQ_CONTEXT_SWITCH_CS_SW_3 */ + OUT_RING(ring, 0x00000000); /* HLSQ_CONTEXT_SWITCH_CS_SW_4 */ + + OUT_PKT4(ring, REG_A5XX_SP_VS_CTRL_REG0, 1); + OUT_RING(ring, A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | + A5XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | + 0x6 | /* XXX seems to be always set? */ + A5XX_SP_VS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + COND(s[VS].v->has_samp, A5XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + + struct ir3_shader_linkage l = {0}; + ir3_link_shaders(&l, s[VS].v, s[FS].v); + + /* a5xx appends pos/psize to end of the linkage map: */ + if (pos_regid != regid(63,0)) + ir3_link_add(&l, pos_regid, 0xf, l.max_loc); + + if (psize_regid != regid(63,0)) + ir3_link_add(&l, psize_regid, 0x1, l.max_loc); + + for (i = 0, j = 0; (i < 16) && (j < l.cnt); i++) { + uint32_t reg = 0; + + OUT_PKT4(ring, REG_A5XX_SP_VS_OUT_REG(i), 1); + + reg |= A5XX_SP_VS_OUT_REG_A_REGID(l.var[j].regid); + reg |= A5XX_SP_VS_OUT_REG_A_COMPMASK(l.var[j].compmask); + j++; + + reg |= A5XX_SP_VS_OUT_REG_B_REGID(l.var[j].regid); + reg |= A5XX_SP_VS_OUT_REG_B_COMPMASK(l.var[j].compmask); + j++; + + OUT_RING(ring, reg); + } + + for (i = 0, j = 0; (i < 8) && (j < l.cnt); i++) { + uint32_t reg = 0; + + OUT_PKT4(ring, REG_A5XX_SP_VS_VPC_DST_REG(i), 1); + + reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC0(l.var[j++].loc); + reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC1(l.var[j++].loc); + reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC2(l.var[j++].loc); + reg |= A5XX_SP_VS_VPC_DST_REG_OUTLOC3(l.var[j++].loc); + + OUT_RING(ring, reg); + } + + OUT_PKT4(ring, REG_A5XX_SP_VS_OBJ_START_LO, 2); + OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_LO/HI */ + + if (s[VS].instrlen) + emit_shader(ring, s[VS].v); + + BITSET_DECLARE(varbs, 128) = {0}; + uint32_t *varmask = (uint32_t *)varbs; + + for (i = 0; i < l.cnt; i++) + for (j = 0; j < util_last_bit(l.var[i].compmask); j++) + BITSET_SET(varbs, l.var[i].loc + j); + + OUT_PKT4(ring, REG_A5XX_VPC_VAR_DISABLE(0), 4); + OUT_RING(ring, ~varmask[0]); /* VPC_VAR[0].DISABLE */ + OUT_RING(ring, ~varmask[1]); /* VPC_VAR[1].DISABLE */ + OUT_RING(ring, ~varmask[2]); /* VPC_VAR[2].DISABLE */ + OUT_RING(ring, ~varmask[3]); /* VPC_VAR[3].DISABLE */ + + // TODO depending on other bits in this reg (if any) set somewhere else? + OUT_PKT4(ring, REG_A5XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, COND(s[VS].v->writes_psize, A5XX_PC_PRIM_VTX_CNTL_PSIZE)); + + if (emit->key.binning_pass) { + OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); + OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_LO */ + OUT_RING(ring, 0x00000000); /* SP_FS_OBJ_START_HI */ + } else { + uint32_t stride_in_vpc = align(s[FS].v->total_in, 4) + 4; + + if (s[VS].v->writes_psize) + stride_in_vpc++; + + // TODO if some of these other bits depend on something other than + // program state we should probably move these next three regs: + + OUT_PKT4(ring, REG_A5XX_SP_PRIMITIVE_CNTL, 1); + OUT_RING(ring, A5XX_SP_PRIMITIVE_CNTL_VSOUT(l.cnt)); + + OUT_PKT4(ring, REG_A5XX_VPC_CNTL_0, 1); + OUT_RING(ring, A5XX_VPC_CNTL_0_STRIDE_IN_VPC(stride_in_vpc) | + COND(s[FS].v->total_in > 0, A5XX_VPC_CNTL_0_VARYING) | + 0x10000); // XXX + + OUT_PKT4(ring, REG_A5XX_PC_PRIMITIVE_CNTL, 1); + OUT_RING(ring, A5XX_PC_PRIMITIVE_CNTL_STRIDE_IN_VPC(stride_in_vpc) | + 0x400); // XXX + + OUT_PKT4(ring, REG_A5XX_SP_FS_OBJ_START_LO, 2); + OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_LO/HI */ + } + + OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5); + OUT_RING(ring, 0x00000881); /* XXX HLSQ_CONTROL_0 */ + OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63)); + OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | + 0xfcfcfc00); /* XXX */ + OUT_RING(ring, A5XX_HLSQ_CONTROL_3_REG_FRAGCOORDXYREGID(vcoord_regid) | + 0xfcfcfc00); /* XXX */ + OUT_RING(ring, A5XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) | + A5XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | + 0x0000fcfc); /* XXX */ + + OUT_PKT4(ring, REG_A5XX_GRAS_CNTL, 1); + OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_GRAS_CNTL_VARYING)); + + OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1); + OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) | + 0x4000e | /* XXX set pretty much everywhere */ + A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | + A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | + A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow.. + COND(s[FS].v->has_samp, A5XX_SP_FS_CTRL_REG0_PIXLODENABLE)); + + OUT_PKT4(ring, REG_A5XX_HLSQ_UPDATE_CNTL, 1); + OUT_RING(ring, 0x020fffff); /* XXX */ + + OUT_PKT4(ring, REG_A5XX_VPC_GS_SIV_CNTL, 1); + OUT_RING(ring, 0x0000ffff); /* XXX */ + + OUT_PKT4(ring, REG_A5XX_SP_SP_CNTL, 1); + OUT_RING(ring, 0x00000010); /* XXX */ + + OUT_PKT4(ring, REG_A5XX_RB_RENDER_CONTROL0, 3); + OUT_RING(ring, + COND(s[FS].v->total_in > 0, A5XX_RB_RENDER_CONTROL0_VARYING) | + COND(s[FS].v->frag_coord, A5XX_RB_RENDER_CONTROL0_XCOORD | + A5XX_RB_RENDER_CONTROL0_YCOORD | + A5XX_RB_RENDER_CONTROL0_ZCOORD | + A5XX_RB_RENDER_CONTROL0_WCOORD)); + OUT_RING(ring, + COND(s[FS].v->frag_face, A5XX_RB_RENDER_CONTROL1_FACENESS)); + OUT_RING(ring, A5XX_RB_FS_OUTPUT_CNTL_MRT(nr) | + COND(s[FS].v->writes_pos, A5XX_RB_FS_OUTPUT_CNTL_FRAG_WRITES_Z)); + + OUT_PKT4(ring, REG_A5XX_SP_FS_OUTPUT_CNTL, 9); + OUT_RING(ring, A5XX_SP_FS_OUTPUT_CNTL_MRT(nr) | + A5XX_SP_FS_OUTPUT_CNTL_DEPTH_REGID(posz_regid) | + A5XX_SP_FS_OUTPUT_CNTL_SAMPLEMASK_REGID(regid(63, 0))); + for (i = 0; i < 8; i++) { + OUT_RING(ring, A5XX_SP_FS_OUTPUT_REG_REGID(color_regid[i]) | + COND(emit->key.half_precision, + A5XX_SP_FS_OUTPUT_REG_HALF_PRECISION)); + } + + if (emit->key.binning_pass) { + OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1); + OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(0)); + } else { + uint32_t vinterp[8], vpsrepl[8]; + + memset(vinterp, 0, sizeof(vinterp)); + memset(vpsrepl, 0, sizeof(vpsrepl)); + + /* looks like we need to do int varyings in the frag + * shader on a5xx (no flatshad reg? or a420.0 bug?): + * + * (sy)(ss)nop + * (sy)ldlv.u32 r0.x,l[r0.x], 1 + * ldlv.u32 r0.y,l[r0.x+1], 1 + * (ss)bary.f (ei)r63.x, 0, r0.x + * (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x + * (rpt5)nop + * sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0 + * + * Possibly on later a5xx variants we'll be able to use + * something like the code below instead of workaround + * in the shader: + */ + /* figure out VARYING_INTERP / VARYING_PS_REPL register values: */ + for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) { + /* NOTE: varyings are packed, so if compmask is 0xb + * then first, third, and fourth component occupy + * three consecutive varying slots: + */ + unsigned compmask = s[FS].v->inputs[j].compmask; + + uint32_t inloc = s[FS].v->inputs[j].inloc; + + if ((s[FS].v->inputs[j].interpolate == INTERP_MODE_FLAT) || + (s[FS].v->inputs[j].rasterflat && emit->rasterflat)) { + uint32_t loc = inloc; + + for (i = 0; i < 4; i++) { + if (compmask & (1 << i)) { + vinterp[loc / 16] |= 1 << ((loc % 16) * 2); + //flatshade[loc / 32] |= 1 << (loc % 32); + loc++; + } + } + } + + gl_varying_slot slot = s[FS].v->inputs[j].slot; + + /* since we don't enable PIPE_CAP_TGSI_TEXCOORD: */ + if (slot >= VARYING_SLOT_VAR0) { + unsigned texmask = 1 << (slot - VARYING_SLOT_VAR0); + /* Replace the .xy coordinates with S/T from the point sprite. Set + * interpolation bits for .zw such that they become .01 + */ + if (emit->sprite_coord_enable & texmask) { + /* mask is two 2-bit fields, where: + * '01' -> S + * '10' -> T + * '11' -> 1 - T (flip mode) + */ + unsigned mask = emit->sprite_coord_mode ? 0b1101 : 0b1001; + uint32_t loc = inloc; + if (compmask & 0x1) { + vpsrepl[loc / 16] |= ((mask >> 0) & 0x3) << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x2) { + vpsrepl[loc / 16] |= ((mask >> 2) & 0x3) << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x4) { + /* .z <- 0.0f */ + vinterp[loc / 16] |= 0b10 << ((loc % 16) * 2); + loc++; + } + if (compmask & 0x8) { + /* .w <- 1.0f */ + vinterp[loc / 16] |= 0b11 << ((loc % 16) * 2); + loc++; + } + } + } + } + + OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1); + OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) | + (s[VS].v->writes_psize ? 0x0c00 : 0xff00)); // XXX + + OUT_PKT4(ring, REG_A5XX_VPC_VARYING_INTERP_MODE(0), 8); + for (i = 0; i < 8; i++) + OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */ + + OUT_PKT4(ring, REG_A5XX_VPC_VARYING_PS_REPL_MODE(0), 8); + for (i = 0; i < 8; i++) + OUT_RING(ring, vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ + } + + if (!emit->key.binning_pass) + if (s[FS].instrlen) + emit_shader(ring, s[FS].v); + + OUT_PKT4(ring, REG_A5XX_VFD_CONTROL_1, 5); + OUT_RING(ring, A5XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) | + A5XX_VFD_CONTROL_1_REGID4INST(instance_regid) | + 0xfc); + OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_2 */ + OUT_RING(ring, 0x0000fcfc); /* VFD_CONTROL_3 */ + OUT_RING(ring, 0x000000fc); /* VFD_CONTROL_4 */ + OUT_RING(ring, 0x00000000); /* VFD_CONTROL_5 */ +} + +void +fd5_prog_init(struct pipe_context *pctx) +{ + pctx->create_fs_state = fd5_fp_state_create; + pctx->delete_fs_state = fd5_fp_state_delete; + + pctx->create_vs_state = fd5_vp_state_create; + pctx->delete_vs_state = fd5_vp_state_delete; + + fd_prog_init(pctx); +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_program.h b/src/gallium/drivers/freedreno/a5xx/fd5_program.h new file mode 100644 index 00000000000..cd03bc5afd8 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_program.h @@ -0,0 +1,45 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_PROGRAM_H_ +#define FD5_PROGRAM_H_ + +#include "pipe/p_context.h" +#include "freedreno_context.h" +#include "ir3_shader.h" + +struct fd5_shader_stateobj { + struct ir3_shader *shader; +}; + +struct fd5_emit; + +void fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit, + int nr, struct pipe_surface **bufs); + +void fd5_prog_init(struct pipe_context *pctx); + +#endif /* FD5_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_query.c b/src/gallium/drivers/freedreno/a5xx/fd5_query.c new file mode 100644 index 00000000000..894c6825203 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_query.c @@ -0,0 +1,32 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "fd5_query.h" + +void fd5_query_context_init(struct pipe_context *pctx) +{ + /* TODO */ +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_query.h b/src/gallium/drivers/freedreno/a5xx/fd5_query.h new file mode 100644 index 00000000000..2e563b0d5cc --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_query.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_QUERY_H_ +#define FD5_QUERY_H_ + +#include "pipe/p_context.h" + +void fd5_query_context_init(struct pipe_context *pctx); + +#endif /* FD5_QUERY_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c b/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c new file mode 100644 index 00000000000..6741852f31b --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.c @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd5_rasterizer.h" +#include "fd5_context.h" +#include "fd5_format.h" + +void * +fd5_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct fd5_rasterizer_stateobj *so; + float psize_min, psize_max; + + so = CALLOC_STRUCT(fd5_rasterizer_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + if (cso->point_size_per_vertex) { + psize_min = util_get_min_point_size(cso); + psize_max = 4092; + } else { + /* Force the point size to be as if the vertex output was disabled. */ + psize_min = cso->point_size; + psize_max = cso->point_size; + } + + so->gras_cl_clip_cntl = 0x80000; /* ??? */ + so->gras_su_point_minmax = + A5XX_GRAS_SU_POINT_MINMAX_MIN(psize_min) | + A5XX_GRAS_SU_POINT_MINMAX_MAX(psize_max); + so->gras_su_point_size = A5XX_GRAS_SU_POINT_SIZE(cso->point_size); + so->gras_su_poly_offset_scale = + A5XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale); + so->gras_su_poly_offset_offset = + A5XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units); + so->gras_su_poly_offset_clamp = + A5XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(cso->offset_clamp); + + so->gras_su_cntl = + A5XX_GRAS_SU_CNTL_LINEHALFWIDTH(cso->line_width/2.0); +// so->pc_prim_vtx_cntl2 = +// A5XX_PC_PRIM_VTX_CNTL2_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) | +// A5XX_PC_PRIM_VTX_CNTL2_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back)); + +// if (cso->fill_front != PIPE_POLYGON_MODE_FILL || +// cso->fill_back != PIPE_POLYGON_MODE_FILL) +// so->pc_prim_vtx_cntl2 |= A5XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE; +// +// if (cso->cull_face & PIPE_FACE_FRONT) +// so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT; +// if (cso->cull_face & PIPE_FACE_BACK) +// so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK; + if (!cso->front_ccw) + so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW; +// if (!cso->flatshade_first) +// so->pc_prim_vtx_cntl |= A5XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST; + + if (cso->offset_tri) + so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_POLY_OFFSET; + +// if (!cso->depth_clip) +// so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZNEAR_CLIP_DISABLE | +// A5XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE; +// if (cso->clip_halfz) +// so->gras_cl_clip_cntl |= A5XX_GRAS_CL_CLIP_CNTL_ZERO_GB_SCALE_Z; + + return so; +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.h b/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.h new file mode 100644 index 00000000000..1c8771fb1ba --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_rasterizer.h @@ -0,0 +1,57 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_RASTERIZER_H_ +#define FD5_RASTERIZER_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +struct fd5_rasterizer_stateobj { + struct pipe_rasterizer_state base; + + uint32_t gras_su_point_minmax; + uint32_t gras_su_point_size; + uint32_t gras_su_poly_offset_scale; + uint32_t gras_su_poly_offset_offset; + uint32_t gras_su_poly_offset_clamp; + + uint32_t gras_su_cntl; + uint32_t gras_cl_clip_cntl; + uint32_t pc_prim_vtx_cntl; + uint32_t pc_prim_vtx_cntl2; +}; + +static inline struct fd5_rasterizer_stateobj * +fd5_rasterizer_stateobj(struct pipe_rasterizer_state *rast) +{ + return (struct fd5_rasterizer_stateobj *)rast; +} + +void * fd5_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso); + +#endif /* FD5_RASTERIZER_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_screen.c b/src/gallium/drivers/freedreno/a5xx/fd5_screen.c new file mode 100644 index 00000000000..96f83ed3340 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_screen.c @@ -0,0 +1,104 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_screen.h" +#include "util/u_format.h" + +#include "fd5_screen.h" +#include "fd5_context.h" +#include "fd5_format.h" +#include "ir3_compiler.h" + +static boolean +fd5_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + (sample_count > 1) || /* TODO add MSAA */ + !util_format_is_supported(format, usage)) { + DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", + util_format_name(format), target, sample_count, usage); + return FALSE; + } + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + (fd5_pipe2vtx(format) != (enum a5xx_vtx_fmt)~0)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + (target == PIPE_BUFFER || + util_format_get_blocksize(format) != 12) && + (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + (fd5_pipe2color(format) != (enum a5xx_color_fmt)~0) && + (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0)) { + retval |= usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + (fd5_pipe2depth(format) != (enum a5xx_depth_format)~0) && + (fd5_pipe2tex(format) != (enum a5xx_tex_fmt)~0)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + (fd_pipe2index(format) != (enum pc_di_index_size)~0)) { + retval |= PIPE_BIND_INDEX_BUFFER; + } + + if (retval != usage) { + DBG("not supported: format=%s, target=%d, sample_count=%d, " + "usage=%x, retval=%x", util_format_name(format), + target, sample_count, usage, retval); + } + + return retval == usage; +} + +void +fd5_screen_init(struct pipe_screen *pscreen) +{ + struct fd_screen *screen = fd_screen(pscreen); + screen->max_rts = A5XX_MAX_RENDER_TARGETS; + screen->compiler = ir3_compiler_create(screen->dev, screen->gpu_id); + pscreen->context_create = fd5_context_create; + pscreen->is_format_supported = fd5_screen_is_format_supported; +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_screen.h b/src/gallium/drivers/freedreno/a5xx/fd5_screen.h new file mode 100644 index 00000000000..ba0c7f15ff9 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_screen.h @@ -0,0 +1,34 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_SCREEN_H_ +#define FD5_SCREEN_H_ + +#include "pipe/p_screen.h" + +void fd5_screen_init(struct pipe_screen *pscreen); + +#endif /* FD5_SCREEN_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_texture.c b/src/gallium/drivers/freedreno/a5xx/fd5_texture.c new file mode 100644 index 00000000000..a8604b76020 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_texture.c @@ -0,0 +1,353 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "fd5_texture.h" +#include "fd5_format.h" + +static enum a5xx_tex_clamp +tex_clamp(unsigned wrap, bool clamp_to_edge, bool *needs_border) +{ + /* Hardware does not support _CLAMP, but we emulate it: */ + if (wrap == PIPE_TEX_WRAP_CLAMP) { + wrap = (clamp_to_edge) ? + PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER; + } + + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return A5XX_TEX_REPEAT; + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return A5XX_TEX_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: + *needs_border = true; + return A5XX_TEX_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: + /* only works for PoT.. need to emulate otherwise! */ + return A5XX_TEX_MIRROR_CLAMP; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return A5XX_TEX_MIRROR_REPEAT; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + /* these two we could perhaps emulate, but we currently + * just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP + */ + default: + DBG("invalid wrap: %u", wrap); + return 0; + } +} + +static enum a5xx_tex_filter +tex_filter(unsigned filter, bool aniso) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return A5XX_TEX_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return aniso ? A5XX_TEX_ANISO : A5XX_TEX_LINEAR; + default: + DBG("invalid filter: %u", filter); + return 0; + } +} + +static void * +fd5_sampler_state_create(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + struct fd5_sampler_stateobj *so = CALLOC_STRUCT(fd5_sampler_stateobj); + unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8)); + bool miplinear = false; + bool clamp_to_edge; + + if (!so) + return NULL; + + so->base = *cso; + + if (cso->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR) + miplinear = true; + + /* + * For nearest filtering, _CLAMP means _CLAMP_TO_EDGE; for linear + * filtering, _CLAMP means _CLAMP_TO_BORDER while additionally + * clamping the texture coordinates to [0.0, 1.0]. + * + * The clamping will be taken care of in the shaders. There are two + * filters here, but let the minification one has a say. + */ + clamp_to_edge = (cso->min_img_filter == PIPE_TEX_FILTER_NEAREST); + if (!clamp_to_edge) { + so->saturate_s = (cso->wrap_s == PIPE_TEX_WRAP_CLAMP); + so->saturate_t = (cso->wrap_t == PIPE_TEX_WRAP_CLAMP); + so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP); + } + + so->needs_border = false; + so->texsamp0 = + COND(miplinear, A5XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) | + A5XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) | + A5XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) | + A5XX_TEX_SAMP_0_ANISO(aniso) | + A5XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge, &so->needs_border)) | + A5XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge, &so->needs_border)) | + A5XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge, &so->needs_border)); + + so->texsamp1 = +// COND(miplinear, A5XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR) | + COND(!cso->seamless_cube_map, A5XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | + COND(!cso->normalized_coords, A5XX_TEX_SAMP_1_UNNORM_COORDS); + + if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + so->texsamp0 |= A5XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias); + so->texsamp1 |= + A5XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | + A5XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); + } + + if (cso->compare_mode) + so->texsamp1 |= A5XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */ + + return so; +} + +static void +fd5_sampler_states_bind(struct pipe_context *pctx, + enum pipe_shader_type shader, unsigned start, + unsigned nr, void **hwcso) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd5_context *fd5_ctx = fd5_context(ctx); + uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0; + unsigned i; + + if (!hwcso) + nr = 0; + + for (i = 0; i < nr; i++) { + if (hwcso[i]) { + struct fd5_sampler_stateobj *sampler = + fd5_sampler_stateobj(hwcso[i]); + if (sampler->saturate_s) + saturate_s |= (1 << i); + if (sampler->saturate_t) + saturate_t |= (1 << i); + if (sampler->saturate_r) + saturate_r |= (1 << i); + } + } + + fd_sampler_states_bind(pctx, shader, start, nr, hwcso); + + if (shader == PIPE_SHADER_FRAGMENT) { + fd5_ctx->fsaturate = + (saturate_s != 0) || + (saturate_t != 0) || + (saturate_r != 0); + fd5_ctx->fsaturate_s = saturate_s; + fd5_ctx->fsaturate_t = saturate_t; + fd5_ctx->fsaturate_r = saturate_r; + } else if (shader == PIPE_SHADER_VERTEX) { + fd5_ctx->vsaturate = + (saturate_s != 0) || + (saturate_t != 0) || + (saturate_r != 0); + fd5_ctx->vsaturate_s = saturate_s; + fd5_ctx->vsaturate_t = saturate_t; + fd5_ctx->vsaturate_r = saturate_r; + } +} + +static enum a5xx_tex_type +tex_type(unsigned target) +{ + switch (target) { + default: + assert(0); + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return A5XX_TEX_1D; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + return A5XX_TEX_2D; + case PIPE_TEXTURE_3D: + return A5XX_TEX_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return A5XX_TEX_CUBE; + } +} + +static bool +use_astc_srgb_workaround(struct pipe_context *pctx, enum pipe_format format) +{ + return (fd_screen(pctx->screen)->gpu_id == 420) && + (util_format_description(format)->layout == UTIL_FORMAT_LAYOUT_ASTC); +} + +static struct pipe_sampler_view * +fd5_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct fd5_pipe_sampler_view *so = CALLOC_STRUCT(fd5_pipe_sampler_view); + struct fd_resource *rsc = fd_resource(prsc); + unsigned lvl, layers; + uint32_t sz2 = 0; + + if (!so) + return NULL; + + so->base = *cso; + pipe_reference(NULL, &prsc->reference); + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + so->texconst0 = + A5XX_TEX_CONST_0_FMT(fd5_pipe2tex(cso->format)) | + fd5_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, + cso->swizzle_b, cso->swizzle_a); + + if (util_format_is_srgb(cso->format)) { + if (use_astc_srgb_workaround(pctx, cso->format)) + so->astc_srgb = true; + so->texconst0 |= A5XX_TEX_CONST_0_SRGB; + } + + if (cso->target == PIPE_BUFFER) { + unsigned elements = cso->u.buf.size / util_format_get_blocksize(cso->format); + + lvl = 0; + so->texconst1 = + A5XX_TEX_CONST_1_WIDTH(elements) | + A5XX_TEX_CONST_1_HEIGHT(1); + so->texconst2 = + A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(cso->format)) | + A5XX_TEX_CONST_2_PITCH(elements * rsc->cpp); + so->offset = cso->u.buf.offset; + } else { +// unsigned miplevels; + + lvl = fd_sampler_first_level(cso); +// miplevels = fd_sampler_last_level(cso) - lvl; + layers = cso->u.tex.last_layer - cso->u.tex.first_layer + 1; + +// so->texconst0 |= A5XX_TEX_CONST_0_MIPLVLS(miplevels); + so->texconst1 = + A5XX_TEX_CONST_1_WIDTH(u_minify(prsc->width0, lvl)) | + A5XX_TEX_CONST_1_HEIGHT(u_minify(prsc->height0, lvl)); + so->texconst2 = + A5XX_TEX_CONST_2_FETCHSIZE(fd5_pipe2fetchsize(cso->format)) | + A5XX_TEX_CONST_2_PITCH( + util_format_get_nblocksx( + cso->format, rsc->slices[lvl].pitch) * rsc->cpp); + so->offset = fd_resource_offset(rsc, lvl, cso->u.tex.first_layer); + } + + so->texconst2 |= A5XX_TEX_CONST_2_TYPE(tex_type(cso->target)); + + switch (cso->target) { + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_2D: + so->texconst3 = + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0); + so->texconst5 = + A5XX_TEX_CONST_5_DEPTH(1); + break; + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + so->texconst3 = + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + so->texconst5 = + A5XX_TEX_CONST_5_DEPTH(layers); + break; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + so->texconst3 = + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->layer_size); + so->texconst5 = + A5XX_TEX_CONST_5_DEPTH(layers / 6); + break; + case PIPE_TEXTURE_3D: + while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0) + sz2 = rsc->slices[++lvl].size0; + so->texconst3 = + A5XX_TEX_CONST_3_ARRAY_PITCH(rsc->slices[lvl].size0); + so->texconst5 = + A5XX_TEX_CONST_5_DEPTH(u_minify(prsc->depth0, lvl)); + break; + default: + so->texconst3 = 0x00000000; + break; + } + + return &so->base; +} + +static void +fd5_set_sampler_views(struct pipe_context *pctx, enum pipe_shader_type shader, + unsigned start, unsigned nr, + struct pipe_sampler_view **views) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd5_context *fd5_ctx = fd5_context(ctx); + uint16_t astc_srgb = 0; + unsigned i; + + for (i = 0; i < nr; i++) { + if (views[i]) { + struct fd5_pipe_sampler_view *view = + fd5_pipe_sampler_view(views[i]); + if (view->astc_srgb) + astc_srgb |= (1 << i); + } + } + + fd_set_sampler_views(pctx, shader, start, nr, views); + + if (shader == PIPE_SHADER_FRAGMENT) { + fd5_ctx->fastc_srgb = astc_srgb; + } else if (shader == PIPE_SHADER_VERTEX) { + fd5_ctx->vastc_srgb = astc_srgb; + } +} + +void +fd5_texture_init(struct pipe_context *pctx) +{ + pctx->create_sampler_state = fd5_sampler_state_create; + pctx->bind_sampler_states = fd5_sampler_states_bind; + pctx->create_sampler_view = fd5_sampler_view_create; + pctx->set_sampler_views = fd5_set_sampler_views; +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_texture.h b/src/gallium/drivers/freedreno/a5xx/fd5_texture.h new file mode 100644 index 00000000000..c4d109376e1 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_texture.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_TEXTURE_H_ +#define FD5_TEXTURE_H_ + +#include "pipe/p_context.h" + +#include "freedreno_texture.h" +#include "freedreno_resource.h" + +#include "fd5_context.h" +#include "fd5_format.h" + +struct fd5_sampler_stateobj { + struct pipe_sampler_state base; + uint32_t texsamp0, texsamp1, texsamp2, texsamp3; + bool saturate_s, saturate_t, saturate_r; + bool needs_border; +}; + +static inline struct fd5_sampler_stateobj * +fd5_sampler_stateobj(struct pipe_sampler_state *samp) +{ + return (struct fd5_sampler_stateobj *)samp; +} + +struct fd5_pipe_sampler_view { + struct pipe_sampler_view base; + uint32_t texconst0, texconst1, texconst2, texconst3, texconst5; + uint32_t texconst6, texconst7, texconst8, texconst9, texconst10, texconst11; + uint32_t offset; + bool astc_srgb; +}; + +static inline struct fd5_pipe_sampler_view * +fd5_pipe_sampler_view(struct pipe_sampler_view *pview) +{ + return (struct fd5_pipe_sampler_view *)pview; +} + +unsigned fd5_get_const_idx(struct fd_context *ctx, + struct fd_texture_stateobj *tex, unsigned samp_id); + +void fd5_texture_init(struct pipe_context *pctx); + +#endif /* FD5_TEXTURE_H_ */ diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c new file mode 100644 index 00000000000..f113a92353a --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd5_zsa.h" +#include "fd5_context.h" +#include "fd5_format.h" + +void * +fd5_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct fd5_zsa_stateobj *so; + + so = CALLOC_STRUCT(fd5_zsa_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + so->rb_depth_cntl |= + A5XX_RB_DEPTH_CNTL_ZFUNC(cso->depth.func); /* maps 1:1 */ + + if (cso->depth.enabled) + so->rb_depth_cntl |= + A5XX_RB_DEPTH_CNTL_Z_ENABLE | + A5XX_RB_DEPTH_CNTL_Z_TEST_ENABLE; + + if (cso->depth.writemask) + so->rb_depth_cntl |= A5XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; + + if (cso->stencil[0].enabled) { + const struct pipe_stencil_state *s = &cso->stencil[0]; + + so->rb_stencil_control |= + A5XX_RB_STENCIL_CONTROL_STENCIL_READ | + A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A5XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */ + A5XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) | + A5XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) | + A5XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op)); + so->rb_stencilrefmask |= + A5XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) | + A5XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask); + + if (cso->stencil[1].enabled) { + const struct pipe_stencil_state *bs = &cso->stencil[1]; + + so->rb_stencil_control |= + A5XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A5XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */ + A5XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) | + A5XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) | + A5XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op)); +// so->rb_stencilrefmask_bf |= +// A5XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) | +// A5XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask); + } + } + + if (cso->alpha.enabled) { + uint32_t ref = cso->alpha.ref_value * 255.0; + so->gras_su_depth_plane_cntl = + A5XX_GRAS_SU_DEPTH_PLANE_CNTL_ALPHA_TEST_ENABLE; + so->rb_alpha_control = + A5XX_RB_ALPHA_CONTROL_ALPHA_TEST | + A5XX_RB_ALPHA_CONTROL_ALPHA_REF(ref) | + A5XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func); +// so->rb_depth_control |= +// A5XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; + } + + return so; +} diff --git a/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h new file mode 100644 index 00000000000..02c116a8357 --- /dev/null +++ b/src/gallium/drivers/freedreno/a5xx/fd5_zsa.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2016 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD5_ZSA_H_ +#define FD5_ZSA_H_ + + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +#include "freedreno_util.h" + +struct fd5_zsa_stateobj { + struct pipe_depth_stencil_alpha_state base; + + uint32_t gras_su_depth_plane_cntl; + uint32_t rb_alpha_control; + uint32_t rb_depth_cntl; + uint32_t rb_stencil_control; + uint32_t rb_stencilrefmask; +}; + +static inline struct fd5_zsa_stateobj * +fd5_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa) +{ + return (struct fd5_zsa_stateobj *)zsa; +} + +void * fd5_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso); + +#endif /* FD5_ZSA_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_batch.c b/src/gallium/drivers/freedreno/freedreno_batch.c index 276f6be93d9..ec6f1cd37e5 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.c +++ b/src/gallium/drivers/freedreno/freedreno_batch.c @@ -403,3 +403,18 @@ fd_batch_check_size(struct fd_batch *batch) (fd_mesa_debug & FD_DBG_FLUSH)) fd_batch_flush(batch, true); } + +/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already + * been one since last draw: + */ +void +fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) +{ + if (batch->needs_wfi) { + if (batch->ctx->screen->gpu_id >= 500) + OUT_WFI5(ring); + else + OUT_WFI(ring); + batch->needs_wfi = false; + } +} diff --git a/src/gallium/drivers/freedreno/freedreno_batch.h b/src/gallium/drivers/freedreno/freedreno_batch.h index aeeb9c58ad6..1e9545971be 100644 --- a/src/gallium/drivers/freedreno/freedreno_batch.h +++ b/src/gallium/drivers/freedreno/freedreno_batch.h @@ -261,17 +261,7 @@ fd_reset_wfi(struct fd_batch *batch) batch->needs_wfi = true; } -/* emit a WAIT_FOR_IDLE only if needed, ie. if there has not already - * been one since last draw: - */ -static inline void -fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring) -{ - if (batch->needs_wfi) { - OUT_WFI(ring); - batch->needs_wfi = false; - } -} +void fd_wfi(struct fd_batch *batch, struct fd_ringbuffer *ring); /* emit a CP_EVENT_WRITE: */ diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h index c4c08a682be..e56fef94422 100644 --- a/src/gallium/drivers/freedreno/freedreno_context.h +++ b/src/gallium/drivers/freedreno/freedreno_context.h @@ -257,6 +257,7 @@ struct fd_context { void (*emit_tile_mem2gmem)(struct fd_batch *batch, struct fd_tile *tile); void (*emit_tile_renderprep)(struct fd_batch *batch, struct fd_tile *tile); void (*emit_tile_gmem2mem)(struct fd_batch *batch, struct fd_tile *tile); + void (*emit_tile_fini)(struct fd_batch *batch); /* optional */ /* optional, for GMEM bypass: */ void (*emit_sysmem_prep)(struct fd_batch *batch); diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c index b94e33d285e..3656538f26c 100644 --- a/src/gallium/drivers/freedreno/freedreno_gmem.c +++ b/src/gallium/drivers/freedreno/freedreno_gmem.c @@ -69,7 +69,7 @@ static uint32_t bin_width(struct fd_screen *screen) { - if (is_a4xx(screen)) + if (is_a4xx(screen) || is_a5xx(screen)) return 1024; if (is_a3xx(screen)) return 992; @@ -339,6 +339,9 @@ render_tiles(struct fd_batch *batch) /* emit gmem2mem to transfer tile back to system memory: */ ctx->emit_tile_gmem2mem(batch, tile); } + + if (ctx->emit_tile_fini) + ctx->emit_tile_fini(batch); } static void diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index f6ec5763f76..48b796406d1 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -53,6 +53,7 @@ #include "a2xx/fd2_screen.h" #include "a3xx/fd3_screen.h" #include "a4xx/fd4_screen.h" +#include "a5xx/fd5_screen.h" #include "ir3/ir3_nir.h" @@ -667,6 +668,9 @@ fd_screen_create(struct fd_device *dev) case 430: fd4_screen_init(pscreen); break; + case 530: + fd5_screen_init(pscreen); + break; default: debug_printf("unsupported GPU: a%03d\n", screen->gpu_id); goto fail; diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h index 3fc66fb9607..6a7b2a80f50 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.h +++ b/src/gallium/drivers/freedreno/freedreno_screen.h @@ -114,6 +114,12 @@ is_a4xx(struct fd_screen *screen) return (screen->gpu_id >= 400) && (screen->gpu_id < 500); } +static inline boolean +is_a5xx(struct fd_screen *screen) +{ + return (screen->gpu_id >= 500) && (screen->gpu_id < 600); +} + /* is it using the ir3 compiler (shader isa introduced with a3xx)? */ static inline boolean is_ir3(struct fd_screen *screen) diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h index 30097008e2a..a2d13589158 100644 --- a/src/gallium/drivers/freedreno/freedreno_util.h +++ b/src/gallium/drivers/freedreno/freedreno_util.h @@ -57,8 +57,9 @@ enum adreno_stencil_op fd_stencil_op(unsigned op); #define A2XX_MAX_RENDER_TARGETS 1 #define A3XX_MAX_RENDER_TARGETS 4 #define A4XX_MAX_RENDER_TARGETS 8 +#define A5XX_MAX_RENDER_TARGETS 8 -#define MAX_RENDER_TARGETS A4XX_MAX_RENDER_TARGETS +#define MAX_RENDER_TARGETS A5XX_MAX_RENDER_TARGETS #define FD_DBG_MSGS 0x0001 #define FD_DBG_DISASM 0x0002 @@ -176,6 +177,7 @@ fd_half_precision(struct pipe_framebuffer_state *pfb) #define LOG_DWORDS 0 static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx); +static inline void emit_marker5(struct fd_ringbuffer *ring, int scratch_idx); static inline void OUT_RING(struct fd_ringbuffer *ring, uint32_t data) @@ -202,39 +204,45 @@ OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, })); } +/* + * NOTE: OUT_RELOC*() is 2 dwords (64b) on a5xx+ + */ + static inline void OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint32_t or, int32_t shift) + uint32_t offset, uint64_t or, int32_t shift) { if (LOG_DWORDS) { DBG("ring[%p]: OUT_RELOC %04x: %p+%u << %d", ring, (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); } debug_assert(offset < fd_bo_size(bo)); - fd_ringbuffer_reloc(ring, &(struct fd_reloc){ + fd_ringbuffer_reloc2(ring, &(struct fd_reloc){ .bo = bo, .flags = FD_RELOC_READ, .offset = offset, .or = or, .shift = shift, + .orhi = or >> 32, }); } static inline void OUT_RELOCW(struct fd_ringbuffer *ring, struct fd_bo *bo, - uint32_t offset, uint32_t or, int32_t shift) + uint32_t offset, uint64_t or, int32_t shift) { if (LOG_DWORDS) { DBG("ring[%p]: OUT_RELOCW %04x: %p+%u << %d", ring, (uint32_t)(ring->cur - ring->last_start), bo, offset, shift); } debug_assert(offset < fd_bo_size(bo)); - fd_ringbuffer_reloc(ring, &(struct fd_reloc){ + fd_ringbuffer_reloc2(ring, &(struct fd_reloc){ .bo = bo, .flags = FD_RELOC_READ | FD_RELOC_WRITE, .offset = offset, .or = or, .shift = shift, + .orhi = or >> 32, }); } @@ -244,9 +252,18 @@ static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords) fd_ringbuffer_grow(ring, ndwords); } +static inline uint32_t +__gpu_id(struct fd_ringbuffer *ring) +{ + uint64_t val; + fd_pipe_get_param(ring->pipe, FD_GPU_ID, &val); + return val; +} + static inline void OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { + debug_assert(__gpu_id(ring) < 500); BEGIN_RING(ring, cnt+1); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } @@ -254,6 +271,7 @@ OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) static inline void OUT_PKT2(struct fd_ringbuffer *ring) { + debug_assert(__gpu_id(ring) < 500); BEGIN_RING(ring, 1); OUT_RING(ring, CP_TYPE2_PKT); } @@ -261,10 +279,48 @@ OUT_PKT2(struct fd_ringbuffer *ring) static inline void OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { + debug_assert(__gpu_id(ring) < 500); BEGIN_RING(ring, cnt+1); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); } +/* + * Starting with a5xx, pkt4/pkt7 are used instead of pkt0/pkt3 + */ + +static inline unsigned +_odd_parity_bit(unsigned val) +{ + /* See: http://graphics.stanford.edu/~seander/bithacks.html#ParityParallel + * note that we want odd parity so 0x6996 is inverted. + */ + val ^= val >> 16; + val ^= val >> 8; + val ^= val >> 4; + val &= 0xf; + return (~0x6996 >> val) & 1; +} + +static inline void +OUT_PKT4(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE4_PKT | cnt | + (_odd_parity_bit(cnt) << 7) | + ((regindx & 0x3ffff) << 8) | + ((_odd_parity_bit(regindx) << 27))); +} + +static inline void +OUT_PKT7(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt) +{ + BEGIN_RING(ring, cnt+1); + OUT_RING(ring, CP_TYPE7_PKT | cnt | + (_odd_parity_bit(cnt) << 15) | + ((opcode & 0x7f) << 16) | + ((_odd_parity_bit(opcode) << 23))); +} + static inline void OUT_WFI(struct fd_ringbuffer *ring) { @@ -272,11 +328,19 @@ OUT_WFI(struct fd_ringbuffer *ring) OUT_RING(ring, 0x00000000); } +static inline void +OUT_WFI5(struct fd_ringbuffer *ring) +{ + OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0); +} + static inline void __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target) { unsigned count = fd_ringbuffer_cmd_count(target); + debug_assert(__gpu_id(ring) < 500); + /* for debug after a lock up, write a unique counter value * to scratch6 for each IB, to make it easier to match up * register dumps to cmdstream. The combination of IB and @@ -297,7 +361,34 @@ __OUT_IB(struct fd_ringbuffer *ring, bool prefetch, struct fd_ringbuffer *target emit_marker(ring, 6); } +static inline void +__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) +{ + unsigned count = fd_ringbuffer_cmd_count(target); + + /* for debug after a lock up, write a unique counter value + * to scratch6 for each IB, to make it easier to match up + * register dumps to cmdstream. The combination of IB and + * DRAW (scratch7) is enough to "triangulate" the particular + * draw that caused lockup. + */ + emit_marker5(ring, 6); + + for (unsigned i = 0; i < count; i++) { + uint32_t dwords; + OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); + dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; + assert(dwords > 0); + OUT_RING(ring, dwords); + } + + emit_marker5(ring, 6); +} + /* CP_SCRATCH_REG4 is used to hold base address for query results: */ +// XXX annoyingly scratch regs move on a5xx.. and additionally different +// packet types.. so freedreno_query_hw is going to need a bit of +// rework.. #define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4 static inline void @@ -312,6 +403,21 @@ emit_marker(struct fd_ringbuffer *ring, int scratch_idx) OUT_RING(ring, ++marker_cnt); } +static inline void +emit_marker5(struct fd_ringbuffer *ring, int scratch_idx) +{ + extern unsigned marker_cnt; +//XXX unsigned reg = REG_A5XX_CP_SCRATCH_REG(scratch_idx); + unsigned reg = 0x00000b78 + scratch_idx; + assert(reg != HW_QUERY_BASE_REG); + if (reg == HW_QUERY_BASE_REG) + return; + OUT_WFI5(ring); + OUT_PKT4(ring, reg, 1); + OUT_RING(ring, ++marker_cnt); + OUT_WFI5(ring); +} + /* helper to get numeric value from environment variable.. mostly * just leaving this here because it is helpful to brute-force figure * out unknown formats, etc, which blob driver does not support: