From: Rob Clark Date: Thu, 31 Jul 2014 19:42:55 +0000 (-0400) Subject: freedreno: add adreno 420 support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=61c68b69d704b5faa5ff9d2b73b24bebf7e19412;p=mesa.git freedreno: add adreno 420 support Very initial support. Basic stuff working (es2gears, es2tri, and maybe about half of glmark2). Expect broken stuff. Still missing: mem->gmem (restore), queries, mipmaps (blob segfaults!), hw binning, etc. Signed-off-by: Rob Clark --- diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am index ee5d506f2a3..df00add1cb0 100644 --- a/src/gallium/drivers/freedreno/Makefile.am +++ b/src/gallium/drivers/freedreno/Makefile.am @@ -15,6 +15,7 @@ libfreedreno_la_SOURCES = \ $(C_SOURCES) \ $(a2xx_SOURCES) \ $(a3xx_SOURCES) \ + $(a4xx_SOURCES) \ $(ir3_SOURCES) noinst_PROGRAMS = ir3_compiler diff --git a/src/gallium/drivers/freedreno/Makefile.sources b/src/gallium/drivers/freedreno/Makefile.sources index edaaadfb478..9d2710cc0fc 100644 --- a/src/gallium/drivers/freedreno/Makefile.sources +++ b/src/gallium/drivers/freedreno/Makefile.sources @@ -89,6 +89,20 @@ a3xx_SOURCES := \ a3xx/fd3_zsa.c \ a3xx/fd3_zsa.h +a4xx_SOURCES := \ + a4xx/fd4_blend.c \ + a4xx/fd4_context.c \ + a4xx/fd4_draw.c \ + a4xx/fd4_emit.c \ + a4xx/fd4_gmem.c \ + a4xx/fd4_program.c \ + a4xx/fd4_query.c \ + a4xx/fd4_rasterizer.c \ + a4xx/fd4_screen.c \ + a4xx/fd4_texture.c \ + a4xx/fd4_util.c \ + a4xx/fd4_zsa.c + ir3_SOURCES := \ ir3/disasm-a3xx.c \ ir3/instr-a3xx.h \ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c new file mode 100644 index 00000000000..f569e9313c3 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c @@ -0,0 +1,127 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd4_blend.h" +#include "fd4_context.h" +#include "fd4_util.h" + +static enum a4xx_rb_blend_opcode +blend_func(unsigned func) +{ + switch (func) { + case PIPE_BLEND_ADD: + return BLEND_DST_PLUS_SRC; + case PIPE_BLEND_MIN: + return BLEND_MIN_DST_SRC; + case PIPE_BLEND_MAX: + return BLEND_MAX_DST_SRC; + case PIPE_BLEND_SUBTRACT: + return BLEND_SRC_MINUS_DST; + case PIPE_BLEND_REVERSE_SUBTRACT: + return BLEND_DST_MINUS_SRC; + default: + DBG("invalid blend func: %x", func); + return 0; + } +} + +void * +fd4_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso) +{ + struct fd4_blend_stateobj *so; +// enum a3xx_rop_code rop = ROP_COPY; + bool reads_dest = false; + int i; + + if (cso->logicop_enable) { +// rop = cso->logicop_func; /* maps 1:1 */ + + switch (cso->logicop_func) { + case PIPE_LOGICOP_NOR: + case PIPE_LOGICOP_AND_INVERTED: + case PIPE_LOGICOP_AND_REVERSE: + case PIPE_LOGICOP_INVERT: + case PIPE_LOGICOP_XOR: + case PIPE_LOGICOP_NAND: + case PIPE_LOGICOP_AND: + case PIPE_LOGICOP_EQUIV: + case PIPE_LOGICOP_NOOP: + case PIPE_LOGICOP_OR_INVERTED: + case PIPE_LOGICOP_OR_REVERSE: + case PIPE_LOGICOP_OR: + reads_dest = true; + break; + } + } + + if (cso->independent_blend_enable) { + DBG("Unsupported! independent blend state"); + return NULL; + } + + so = CALLOC_STRUCT(fd4_blend_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) { + const struct pipe_rt_blend_state *rt = &cso->rt[i]; + + so->rb_mrt[i].blend_control = + A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) | + A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) | + A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) | + A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)); + + so->rb_mrt[i].control = + 0xc00 | /* XXX ROP_CODE ?? */ + A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask); + + if (rt->blend_enable) + so->rb_mrt[i].control |= + A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE | + A4XX_RB_MRT_CONTROL_BLEND | + A4XX_RB_MRT_CONTROL_BLEND2; + + if (reads_dest) + so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE; + + if (cso->dither) + so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS); + } + + return so; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h new file mode 100644 index 00000000000..68fcf23ff5c --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h @@ -0,0 +1,53 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_BLEND_H_ +#define FD4_BLEND_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +struct fd4_blend_stateobj { + struct pipe_blend_state base; + struct { + uint32_t control; + uint32_t buf_info; + uint32_t blend_control; + } rb_mrt[8]; +}; + +static INLINE struct fd4_blend_stateobj * +fd4_blend_stateobj(struct pipe_blend_state *blend) +{ + return (struct fd4_blend_stateobj *)blend; +} + +void * fd4_blend_state_create(struct pipe_context *pctx, + const struct pipe_blend_state *cso); + +#endif /* FD4_BLEND_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c new file mode 100644 index 00000000000..2321876dd48 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.c @@ -0,0 +1,172 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + + +#include "fd4_context.h" +#include "fd4_blend.h" +#include "fd4_draw.h" +#include "fd4_emit.h" +#include "fd4_gmem.h" +#include "fd4_program.h" +#include "fd4_query.h" +#include "fd4_rasterizer.h" +#include "fd4_texture.h" +#include "fd4_zsa.h" + +static void +fd4_context_destroy(struct pipe_context *pctx) +{ + struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx)); + + util_dynarray_fini(&fd4_ctx->rbrc_patches); + + fd_bo_del(fd4_ctx->vs_pvt_mem); + fd_bo_del(fd4_ctx->fs_pvt_mem); + fd_bo_del(fd4_ctx->vsc_size_mem); + + pctx->delete_vertex_elements_state(pctx, fd4_ctx->solid_vbuf_state.vtx); + pctx->delete_vertex_elements_state(pctx, fd4_ctx->blit_vbuf_state.vtx); + + pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL); + pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL); + + fd_context_destroy(pctx); +} + +/* TODO we could combine a few of these small buffers (solid_vbuf, + * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and + * save a tiny bit of memory + */ + +static struct pipe_resource * +create_solid_vertexbuf(struct pipe_context *pctx) +{ + static const float init_shader_const[] = { + -1.000000, +1.000000, +1.000000, + +1.000000, -1.000000, +1.000000, + }; + struct pipe_resource *prsc = pipe_buffer_create(pctx->screen, + PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const)); + pipe_buffer_write(pctx, prsc, 0, + sizeof(init_shader_const), init_shader_const); + return prsc; +} + +static struct pipe_resource * +create_blit_texcoord_vertexbuf(struct pipe_context *pctx) +{ + struct pipe_resource *prsc = pipe_buffer_create(pctx->screen, + PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16); + return prsc; +} + +static const uint8_t primtypes[PIPE_PRIM_MAX] = { + [PIPE_PRIM_POINTS] = DI_PT_POINTLIST_A3XX, + [PIPE_PRIM_LINES] = DI_PT_LINELIST, + [PIPE_PRIM_LINE_STRIP] = DI_PT_LINESTRIP, + [PIPE_PRIM_LINE_LOOP] = DI_PT_LINELOOP, + [PIPE_PRIM_TRIANGLES] = DI_PT_TRILIST, + [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP, + [PIPE_PRIM_TRIANGLE_FAN] = DI_PT_TRIFAN, +}; + +struct pipe_context * +fd4_context_create(struct pipe_screen *pscreen, void *priv) +{ + struct fd_screen *screen = fd_screen(pscreen); + struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context); + struct pipe_context *pctx; + + if (!fd4_ctx) + return NULL; + + pctx = &fd4_ctx->base.base; + + fd4_ctx->base.dev = fd_device_ref(screen->dev); + fd4_ctx->base.screen = fd_screen(pscreen); + + pctx->destroy = fd4_context_destroy; + pctx->create_blend_state = fd4_blend_state_create; + pctx->create_rasterizer_state = fd4_rasterizer_state_create; + pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create; + + fd4_draw_init(pctx); + fd4_gmem_init(pctx); + fd4_texture_init(pctx); + fd4_prog_init(pctx); + + pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv); + if (!pctx) + return NULL; + + util_dynarray_init(&fd4_ctx->rbrc_patches); + + fd4_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd4_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd4_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + + fd4_ctx->solid_vbuf = create_solid_vertexbuf(pctx); + fd4_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx); + + /* setup solid_vbuf_state: */ + fd4_ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state( + pctx, 1, (struct pipe_vertex_element[]){{ + .vertex_buffer_index = 0, + .src_offset = 0, + .src_format = PIPE_FORMAT_R32G32B32_FLOAT, + }}); + fd4_ctx->solid_vbuf_state.vertexbuf.count = 1; + fd4_ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12; + fd4_ctx->solid_vbuf_state.vertexbuf.vb[0].buffer = fd4_ctx->solid_vbuf; + + /* setup blit_vbuf_state: */ + fd4_ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state( + pctx, 2, (struct pipe_vertex_element[]){{ + .vertex_buffer_index = 0, + .src_offset = 0, + .src_format = PIPE_FORMAT_R32G32_FLOAT, + }, { + .vertex_buffer_index = 1, + .src_offset = 0, + .src_format = PIPE_FORMAT_R32G32B32_FLOAT, + }}); + fd4_ctx->blit_vbuf_state.vertexbuf.count = 2; + fd4_ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8; + fd4_ctx->blit_vbuf_state.vertexbuf.vb[0].buffer = fd4_ctx->blit_texcoord_vbuf; + fd4_ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12; + fd4_ctx->blit_vbuf_state.vertexbuf.vb[1].buffer = fd4_ctx->solid_vbuf; + + fd4_query_context_init(pctx); + + return pctx; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h new file mode 100644 index 00000000000..87e69fa613a --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_context.h @@ -0,0 +1,102 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_CONTEXT_H_ +#define FD4_CONTEXT_H_ + +#include "freedreno_drmif.h" + +#include "freedreno_context.h" + +#include "ir3_shader.h" + +struct fd4_context { + struct fd_context base; + + /* Keep track of writes to RB_RENDER_CONTROL which need to be patched + * once we know whether or not to use GMEM, and GMEM tile pitch. + */ + struct util_dynarray rbrc_patches; + + struct fd_bo *vs_pvt_mem, *fs_pvt_mem; + + /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes). We + * could combine it with another allocation. + */ + struct fd_bo *vsc_size_mem; + + /* vertex buf used for clear/gmem->mem vertices, and mem->gmem + * vertices: + */ + struct pipe_resource *solid_vbuf; + + /* vertex buf used for mem->gmem tex coords: + */ + struct pipe_resource *blit_texcoord_vbuf; + + /* vertex state for solid_vbuf: + * - solid_vbuf / 12 / R32G32B32_FLOAT + */ + struct fd_vertex_state solid_vbuf_state; + + /* vertex state for blit_prog: + * - blit_texcoord_vbuf / 8 / R32G32_FLOAT + * - solid_vbuf / 12 / R32G32B32_FLOAT + */ + struct fd_vertex_state blit_vbuf_state; + + /* if *any* of bits are set in {v,f}saturate_{s,t,r} */ + bool vsaturate, fsaturate; + + /* bitmask of sampler which needs coords clamped for vertex + * shader: + */ + unsigned vsaturate_s, vsaturate_t, vsaturate_r; + + /* bitmask of sampler which needs coords clamped for frag + * shader: + */ + unsigned fsaturate_s, fsaturate_t, fsaturate_r; + + /* some state changes require a different shader variant. Keep + * track of this so we know when we need to re-emit shader state + * due to variant change. See fixup_shader_state() + */ + struct ir3_shader_key last_key; +}; + +static INLINE struct fd4_context * +fd4_context(struct fd_context *ctx) +{ + return (struct fd4_context *)ctx; +} + +struct pipe_context * +fd4_context_create(struct pipe_screen *pscreen, void *priv); + +#endif /* FD4_CONTEXT_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c new file mode 100644 index 00000000000..2427a8b8a5b --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c @@ -0,0 +1,326 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_prim.h" + +#include "freedreno_state.h" +#include "freedreno_resource.h" + +#include "fd4_draw.h" +#include "fd4_context.h" +#include "fd4_emit.h" +#include "fd4_program.h" +#include "fd4_util.h" +#include "fd4_zsa.h" + + +static void +draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd4_emit *emit) +{ + const struct pipe_draw_info *info = emit->info; + + fd4_emit_state(ctx, ring, emit); + + if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE)) + fd4_emit_vertex_bufs(ring, emit); + + OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2); + OUT_RING(ring, info->start); /* VFD_INDEX_OFFSET */ + OUT_RING(ring, info->start_instance); /* ??? UNKNOWN_2209 */ + + OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */ + info->restart_index : 0xffffffff); + + fd4_draw_emit(ctx, ring, + emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY, + info); +} + +/* fixup dirty shader state in case some "unrelated" (from the state- + * tracker's perspective) state change causes us to switch to a + * different variant. + */ +static void +fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key) +{ + struct fd4_context *fd4_ctx = fd4_context(ctx); + struct ir3_shader_key *last_key = &fd4_ctx->last_key; + + if (!ir3_shader_key_equal(last_key, key)) { + ctx->dirty |= FD_DIRTY_PROG; + + if (last_key->has_per_samp || key->has_per_samp) { + if ((last_key->vsaturate_s != key->vsaturate_s) || + (last_key->vsaturate_t != key->vsaturate_t) || + (last_key->vsaturate_r != key->vsaturate_r)) + ctx->prog.dirty |= FD_SHADER_DIRTY_VP; + + if ((last_key->fsaturate_s != key->fsaturate_s) || + (last_key->fsaturate_t != key->fsaturate_t) || + (last_key->fsaturate_r != key->fsaturate_r)) + ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + } + + if (last_key->color_two_side != key->color_two_side) + ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + + if (last_key->half_precision != key->half_precision) + ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + + if (last_key->alpha != key->alpha) + ctx->prog.dirty |= FD_SHADER_DIRTY_FP; + + fd4_ctx->last_key = *key; + } +} + +static void +fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info) +{ + struct fd4_context *fd4_ctx = fd4_context(ctx); + struct fd4_emit emit = { + .vtx = &ctx->vtx, + .prog = &ctx->prog, + .info = info, + .key = { + /* do binning pass first: */ + .binning_pass = true, + .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false, + .alpha = util_format_is_alpha(pipe_surface_format(ctx->framebuffer.cbufs[0])), + // TODO set .half_precision based on render target format, + // ie. float16 and smaller use half, float32 use full.. + .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF), + .has_per_samp = fd4_ctx->fsaturate || fd4_ctx->vsaturate, + .vsaturate_s = fd4_ctx->vsaturate_s, + .vsaturate_t = fd4_ctx->vsaturate_t, + .vsaturate_r = fd4_ctx->vsaturate_r, + .fsaturate_s = fd4_ctx->fsaturate_s, + .fsaturate_t = fd4_ctx->fsaturate_t, + .fsaturate_r = fd4_ctx->fsaturate_r, + }, + .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade, + }; + unsigned dirty; + + fixup_shader_state(ctx, &emit.key); + + dirty = ctx->dirty; + emit.dirty = dirty & ~(FD_DIRTY_BLEND); + draw_impl(ctx, ctx->binning_ring, &emit); + + /* and now regular (non-binning) pass: */ + emit.key.binning_pass = false; + emit.dirty = dirty; + emit.vp = NULL; /* we changed key so need to refetch vp */ + draw_impl(ctx, ctx->ring, &emit); +} + +/* clear operations ignore viewport state, so we need to reset it + * based on framebuffer state: + */ +static void +reset_viewport(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb) +{ + float half_width = pfb->width * 0.5f; + float half_height = pfb->height * 0.5f; + + OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 4); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(half_width)); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(half_width)); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(half_height)); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-half_height)); +} + +static void +fd4_clear(struct fd_context *ctx, unsigned buffers, + const union pipe_color_union *color, double depth, unsigned stencil) +{ + struct fd4_context *fd4_ctx = fd4_context(ctx); + struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + unsigned dirty = ctx->dirty; + unsigned ce, i; + struct fd4_emit emit = { + .vtx = &fd4_ctx->solid_vbuf_state, + .prog = &ctx->solid_prog, + .key = { + .half_precision = true, + }, + }; + uint32_t colr = 0; + + if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs) + colr = pack_rgba(pfb->cbufs[0]->format, color->f); + + dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR; + dirty |= FD_DIRTY_PROG; + emit.dirty = dirty; + + OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); + + /* emit generic state now: */ + fd4_emit_state(ctx, ring, &emit); + reset_viewport(ring, pfb); + + if (buffers & PIPE_CLEAR_DEPTH) { + OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE | + A4XX_RB_DEPTH_CONTROL_Z_ENABLE | + A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS)); + + fd_wfi(ctx, ring); + OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0, 2); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0)); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(depth)); + ctx->dirty |= FD_DIRTY_VIEWPORT; + } else { + OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); + } + + if (buffers & PIPE_CLEAR_STENCIL) { + OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2); + OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(stencil) | + A4XX_RB_STENCILREFMASK_STENCILMASK(stencil) | + A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); + OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) | + A4XX_RB_STENCILREFMASK_STENCILMASK(0) | + 0xff000000 | // XXX ??? + A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); + + OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2); + OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) | + A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) | + A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | + A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */ + } else { + OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2); + OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) | + A4XX_RB_STENCILREFMASK_STENCILMASK(0) | + A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0)); + OUT_RING(ring, A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) | + A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) | + A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0)); + + OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2); + OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | + A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | + A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */ + } + + if (buffers & PIPE_CLEAR_COLOR) { + OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1); + OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER)); + ce = 0xf; + } else { + ce = 0x0; + } + + for (i = 0; i < 8; i++) { + OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); + OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR | + A4XX_RB_MRT_CONTROL_B11 | + A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce)); + + OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); + OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | + A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) | + A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) | + A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) | + A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO)); + } + + fd4_emit_vertex_bufs(ring, &emit); + + OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); + OUT_RING(ring, 0x0); /* XXX GRAS_ALPHA_CONTROL */ + + OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4); + OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW0 */ + OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW1 */ + OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW2 */ + OUT_RING(ring, colr); /* RB_CLEAR_COLOR_DW3 */ + + /* until fastclear works: */ + fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL); + + OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2); + OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ + OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */ + + OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1); + OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ + + OUT_PKT3(ring, CP_UNKNOWN_1A, 1); + OUT_RING(ring, 0x00000001); + + fd4_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); + + OUT_PKT3(ring, CP_UNKNOWN_1A, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1); + OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR); + + OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A4XX_GRAS_SC_CONTROL_MSAA_DISABLE | + A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A4XX_GRAS_SC_CONTROL_RASTER_MODE(0)); +} + +void +fd4_draw_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + ctx->draw_vbo = fd4_draw_vbo; + ctx->clear = fd4_clear; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h new file mode 100644 index 00000000000..f775cc77795 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h @@ -0,0 +1,122 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_DRAW_H_ +#define FD4_DRAW_H_ + +#include "pipe/p_context.h" + +#include "freedreno_draw.h" + +void fd4_draw_init(struct pipe_context *pctx); + +/* draw packet changed on a4xx, so cannot reuse one from a2xx/a3xx.. */ + +static inline uint32_t DRAW4(enum pc_di_primtype prim_type, + enum pc_di_src_sel source_select, enum pc_di_index_size index_size, + enum pc_di_vis_cull_mode vis_cull_mode) +{ + return (prim_type << 0) | + (source_select << 6) | + ((index_size & 1) << 11) | + ((index_size >> 1) << 13) | + (vis_cull_mode << 8); +} + +static inline void +fd4_draw(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum pc_di_primtype primtype, + enum pc_di_vis_cull_mode vismode, + enum pc_di_src_sel src_sel, uint32_t count, + enum pc_di_index_size idx_type, + uint32_t idx_size, uint32_t idx_offset, + struct fd_bo *idx_bo) +{ + /* for debug after a lock up, write a unique counter value + * to scratch7 for each draw, to make it easier to match up + * register dumps to cmdstream. The combination of IB + * (scratch6) and DRAW is enough to "triangulate" the + * particular draw that caused lockup. + */ + emit_marker(ring, 7); + + OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_bo ? 6 : 3); + if (vismode == USE_VISIBILITY) { + /* leave vis mode blank for now, it will be patched up when + * we know if we are binning or not + */ + OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0), + &ctx->draw_patches); + } else { + OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode)); + } + OUT_RING(ring, 0x1); /* XXX */ + OUT_RING(ring, count); /* NumIndices */ + if (idx_bo) { + OUT_RING(ring, 0x0); /* XXX */ + OUT_RELOC(ring, idx_bo, idx_offset, 0, 0); + OUT_RING (ring, idx_size); + } + + emit_marker(ring, 7); + + fd_reset_wfi(ctx); +} + +static inline void +fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum pc_di_vis_cull_mode vismode, + const struct pipe_draw_info *info) +{ + struct pipe_index_buffer *idx = &ctx->indexbuf; + struct fd_bo *idx_bo = NULL; + enum pc_di_index_size idx_type = INDEX_SIZE_IGN; + enum pc_di_src_sel src_sel; + uint32_t idx_size, idx_offset; + + if (info->indexed) { + assert(!idx->user_buffer); + + idx_bo = fd_resource(idx->buffer)->bo; + idx_type = size2indextype(idx->index_size); + idx_size = idx->index_size * info->count; + idx_offset = idx->offset + (info->start * idx->index_size); + src_sel = DI_SRC_SEL_DMA; + } else { + idx_bo = NULL; + idx_type = INDEX_SIZE_IGN; + idx_size = 0; + idx_offset = 0; + src_sel = DI_SRC_SEL_AUTO_INDEX; + } + + fd4_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel, + info->count, idx_type, idx_size, idx_offset, idx_bo); +} + +#endif /* FD4_DRAW_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c new file mode 100644 index 00000000000..1a0986a1925 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c @@ -0,0 +1,625 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_helpers.h" +#include "util/u_format.h" + +#include "freedreno_resource.h" + +#include "fd4_emit.h" +#include "fd4_blend.h" +#include "fd4_context.h" +#include "fd4_program.h" +#include "fd4_rasterizer.h" +#include "fd4_texture.h" +#include "fd4_util.h" +#include "fd4_zsa.h" + +/* regid: base const register + * prsc or dwords: buffer containing constant values + * sizedwords: size of const value buffer + */ +void +fd4_emit_constant(struct fd_ringbuffer *ring, + enum adreno_state_block sb, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc) +{ + uint32_t i, sz; + enum adreno_state_src src; + + if (prsc) { + sz = 0; + src = 0x2; // TODO ?? + } else { + sz = sizedwords; + src = SS_DIRECT; + } + + OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) | + CP_LOAD_STATE_0_STATE_SRC(src) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4)); + if (prsc) { + struct fd_bo *bo = fd_resource(prsc)->bo; + OUT_RELOC(ring, bo, offset, + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); + } else { + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); + dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; + } + for (i = 0; i < sz; i++) { + OUT_RING(ring, dwords[i]); + } +} + +static void +emit_constants(struct fd_ringbuffer *ring, + enum adreno_state_block sb, + struct fd_constbuf_stateobj *constbuf, + struct ir3_shader_variant *shader) +{ + uint32_t enabled_mask = constbuf->enabled_mask; + uint32_t first_immediate; + uint32_t base = 0; + + // XXX TODO only emit dirty consts.. but we need to keep track if + // they are clobbered by a clear, gmem2mem, or mem2gmem.. + constbuf->dirty_mask = enabled_mask; + + /* in particular, with binning shader we may end up with unused + * consts, ie. we could end up w/ constlen that is smaller + * than first_immediate. In that case truncate the user consts + * early to avoid HLSQ lockup caused by writing too many consts + */ + first_immediate = MIN2(shader->first_immediate, shader->constlen); + + /* emit user constants: */ + while (enabled_mask) { + unsigned index = ffs(enabled_mask) - 1; + struct pipe_constant_buffer *cb = &constbuf->cb[index]; + unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */ + + // I expect that size should be a multiple of vec4's: + assert(size == align(size, 4)); + + /* gallium could leave const buffers bound above what the + * current shader uses.. don't let that confuse us. + */ + if (base >= (4 * first_immediate)) + break; + + if (constbuf->dirty_mask & (1 << index)) { + /* and even if the start of the const buffer is before + * first_immediate, the end may not be: + */ + size = MIN2(size, (4 * first_immediate) - base); + fd4_emit_constant(ring, sb, base, + cb->buffer_offset, size, + cb->user_buffer, cb->buffer); + constbuf->dirty_mask &= ~(1 << index); + } + + base += size; + enabled_mask &= ~(1 << index); + } + + /* emit shader immediates: */ + if (shader) { + int size = shader->immediates_count; + base = shader->first_immediate; + + /* truncate size to avoid writing constants that shader + * does not use: + */ + size = MIN2(size + base, shader->constlen) - base; + + /* convert out of vec4: */ + base *= 4; + size *= 4; + + if (size > 0) { + fd4_emit_constant(ring, sb, base, + 0, size, shader->immediates[0].val, NULL); + } + } +} + +static void +emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, + enum adreno_state_block sb, struct fd_texture_stateobj *tex) +{ + unsigned i; + + if (tex->num_samplers > 0) { + /* output sampler state: */ + OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 + (2 * tex->num_samplers)); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + for (i = 0; i < tex->num_samplers; i++) { + static const struct fd4_sampler_stateobj dummy_sampler = {}; + const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ? + fd4_sampler_stateobj(tex->samplers[i]) : + &dummy_sampler; + OUT_RING(ring, sampler->texsamp0); + OUT_RING(ring, sampler->texsamp1); + } + /* maybe an a420.0 (or a4xx.0) workaround?? or just driver bug? */ + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + + if (tex->num_textures > 0) { + /* emit texture state: */ + OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * tex->num_textures)); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | + CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures)); + OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | + CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); + for (i = 0; i < tex->num_textures; i++) { + static const struct fd4_pipe_sampler_view dummy_view = {}; + const struct fd4_pipe_sampler_view *view = tex->textures[i] ? + fd4_pipe_sampler_view(tex->textures[i]) : + &dummy_view; + struct fd_resource *rsc = view->tex_resource; + struct fd_resource_slice *slice = fd_resource_slice(rsc, 0); + OUT_RING(ring, view->texconst0); + OUT_RING(ring, view->texconst1); + OUT_RING(ring, view->texconst2); + OUT_RING(ring, view->texconst3); + OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + } +} + +/* emit texture state for mem->gmem restore operation.. eventually it would + * be good to get rid of this and use normal CSO/etc state for more of these + * special cases.. + */ +void +fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf) +{ + /* TODO */ +} + + +void +fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit) +{ + uint32_t i, j, last = 0; + uint32_t total_in = 0; + const struct fd_vertex_state *vtx = emit->vtx; + struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); + unsigned n = MIN2(vtx->vtx->num_elements, vp->inputs_count); + + /* hw doesn't like to be configured for zero vbo's, it seems: */ + if (vtx->vtx->num_elements == 0) + return; + + for (i = 0; i < n; i++) + if (vp->inputs[i].compmask) + last = i; + + for (i = 0, j = 0; i <= last; i++) { + if (vp->inputs[i].compmask) { + struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; + const struct pipe_vertex_buffer *vb = + &vtx->vertexbuf.vb[elem->vertex_buffer_index]; + struct fd_resource *rsc = fd_resource(vb->buffer); + enum pipe_format pfmt = elem->src_format; + enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt); + bool switchnext = (i != last); + uint32_t fs = util_format_get_blocksize(pfmt); + uint32_t off = vb->buffer_offset + elem->src_offset; + uint32_t size = fd_bo_size(rsc->bo) - off; + debug_assert(fmt != ~0); + + OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) | + A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) | + COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT)); + OUT_RELOC(ring, rsc->bo, off, 0, 0); + OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size)); + OUT_RING(ring, 0x00000001); + + OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1); + OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL | + A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) | + A4XX_VFD_DECODE_INSTR_FORMAT(fmt) | + A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) | + A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) | + A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) | + A4XX_VFD_DECODE_INSTR_LASTCOMPVALID | + COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT)); + + total_in += vp->inputs[i].ncomp; + j++; + } + } + + OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5); + OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) | + 0xa0000 | /* XXX */ + A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) | + A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j)); + OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX + A4XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) | + A4XX_VFD_CONTROL_1_REGID4INST(regid(63,0))); + OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_2 */ + OUT_RING(ring, 0x0000fc00); /* XXX VFD_CONTROL_3 */ + OUT_RING(ring, 0x00000000); /* XXX VFD_CONTROL_4 */ +} + +void +fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd4_emit *emit) +{ + struct ir3_shader_variant *vp = fd4_emit_get_vp(emit); + struct ir3_shader_variant *fp = fd4_emit_get_fp(emit); + uint32_t dirty = emit->dirty; + + emit_marker(ring, 5); + + if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) { + uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control; + + /* I suppose if we needed to (which I don't *think* we need + * to), we could emit this for binning pass too. But we + * would need to keep a different patch-list for binning + * vs render pass. + */ + + OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1); + OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { + struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa); + struct pipe_stencil_ref *sr = &ctx->stencil_ref; + + OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); + OUT_RING(ring, zsa->gras_alpha_control); + + OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, zsa->rb_stencil_control); + + OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2); + OUT_RING(ring, zsa->rb_stencilrefmask | + A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); + OUT_RING(ring, zsa->rb_stencilrefmask_bf | + A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); + } + + if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) { + uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_depth_control; + if (fp->writes_pos) { + val |= A4XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z; + val |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; + } + if (fp->has_kill) { + val |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; + } + OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, val); + } + + if (dirty & FD_DIRTY_RASTERIZER) { + struct fd4_rasterizer_stateobj *rasterizer = + fd4_rasterizer_stateobj(ctx->rasterizer); + + OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1); + OUT_RING(ring, rasterizer->gras_su_mode_control | + A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS); + + OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2); + OUT_RING(ring, rasterizer->gras_su_point_minmax); + OUT_RING(ring, rasterizer->gras_su_point_size); + + OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2); + OUT_RING(ring, rasterizer->gras_su_poly_offset_scale); + OUT_RING(ring, rasterizer->gras_su_poly_offset_offset); + } + + if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { + uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer) + ->gras_cl_clip_cntl; + OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, val); + } + + /* NOTE: since primitive_restart is not actually part of any + * state object, we need to make sure that we always emit + * PRIM_VTX_CNTL.. either that or be more clever and detect + * when it changes. + */ + if (emit->info) { + uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer) + ->pc_prim_vtx_cntl; + + val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE); + val |= COND(fp->total_in > 0, A4XX_PC_PRIM_VTX_CNTL_VAROUT); + + OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2); + OUT_RING(ring, val); + OUT_RING(ring, 0x12); /* XXX UNKNOWN_21C5 */ + } + + if (dirty & FD_DIRTY_SCISSOR) { + struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); + + OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2); + OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) | + A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1)); + OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) | + A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny)); + + ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx); + ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny); + ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx); + ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy); + } + + if (dirty & FD_DIRTY_VIEWPORT) { + fd_wfi(ctx, ring); + OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0])); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0])); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1])); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1])); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2])); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2])); + } + + if (dirty & FD_DIRTY_PROG) + fd4_program_emit(ring, emit); + + if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) && + /* evil hack to deal sanely with clear path: */ + (emit->prog == &ctx->prog)) { + fd_wfi(ctx, ring); + emit_constants(ring, SB_VERT_SHADER, + &ctx->constbuf[PIPE_SHADER_VERTEX], + (emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL); + if (!emit->key.binning_pass) { + emit_constants(ring, SB_FRAG_SHADER, + &ctx->constbuf[PIPE_SHADER_FRAGMENT], + (emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL); + } + } + + if ((dirty & FD_DIRTY_BLEND) && ctx->blend) { + struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend); + uint32_t i; + + for (i = 0; i < 8; i++) { + OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1); + OUT_RING(ring, blend->rb_mrt[i].control); + + OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1); + OUT_RING(ring, blend->rb_mrt[i].blend_control); + } + } + + if (dirty & FD_DIRTY_VERTTEX) { + if (vp->has_samp) + emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex); + else + dirty &= ~FD_DIRTY_VERTTEX; + } + + if (dirty & FD_DIRTY_FRAGTEX) { + if (fp->has_samp) + emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex); + else + dirty &= ~FD_DIRTY_FRAGTEX; + } + + ctx->dirty &= ~dirty; +} + +/* emit setup at begin of new cmdstream buffer (don't rely on previous + * state, there could have been a context switch between ioctls): + */ +void +fd4_emit_restore(struct fd_context *ctx) +{ + struct fd4_context *fd4_ctx = fd4_context(ctx); + struct fd_ringbuffer *ring = ctx->ring; + + OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1); + OUT_RING(ring, 0x00000001); + + OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC3, 1); + OUT_RING(ring, 0x00000006); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_0F03, 1); + OUT_RING(ring, 0x0000003a); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1); + OUT_RING(ring, 0x00000001); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1); + OUT_RING(ring, 0x00000007); + + OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000012); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E05, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1); + OUT_RING(ring, 0x00000006); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1); + OUT_RING(ring, 0x00040000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); + OUT_RING(ring, 0x00001000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F3, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F4, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F5, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F6, 1); + OUT_RING(ring, 0x3c007fff); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1); + OUT_RING(ring, 0x3f800000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1); + OUT_RING(ring, 0x0000001d); + + OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1); + OUT_RING(ring, 0x00000001); + + OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1); + OUT_RING(ring, 0x00000000); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_2381, 1); + OUT_RING(ring, 0x00000010); + + OUT_PKT0(ring, REG_A4XX_UNKNOWN_23A0, 1); + OUT_RING(ring, 0x00000010); + + /* we don't use this yet.. probably best to disable.. */ + OUT_PKT3(ring, CP_SET_DRAW_STATE, 2); + OUT_RING(ring, CP_SET_DRAW_STATE_0_COUNT(0) | + CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS | + CP_SET_DRAW_STATE_0_GROUP_ID(0)); + OUT_RING(ring, CP_SET_DRAW_STATE_1_ADDR(0)); + + OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2); + OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_PARAM */ + OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */ + + OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2); + OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_PARAM */ + OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */ + + OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A4XX_GRAS_SC_CONTROL_MSAA_DISABLE | + A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A4XX_GRAS_SC_CONTROL_RASTER_MODE(0)); + + OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1); + OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE | + A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE)); + + OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1); + OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) | + A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0)); + + OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1); + OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS)); + + OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1); + OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff)); + + OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL3, 1); + OUT_RING(ring, A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE(0xf)); + + OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1); + OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_COLOR_PIPE_ENABLE); + + ctx->needs_rb_fbd = true; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h new file mode 100644 index 00000000000..c5fb24d8d13 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h @@ -0,0 +1,91 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_EMIT_H +#define FD4_EMIT_H + +#include "pipe/p_context.h" + +#include "freedreno_context.h" +#include "fd4_util.h" +#include "fd4_program.h" +#include "ir3_shader.h" + +struct fd_ringbuffer; +enum adreno_state_block; + +void fd4_emit_constant(struct fd_ringbuffer *ring, + enum adreno_state_block sb, + uint32_t regid, uint32_t offset, uint32_t sizedwords, + const uint32_t *dwords, struct pipe_resource *prsc); + +void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, + struct pipe_surface *psurf); + +/* grouped together emit-state for prog/vertex/state emit: */ +struct fd4_emit { + const struct fd_vertex_state *vtx; + const struct fd_program_stateobj *prog; + const struct pipe_draw_info *info; + struct ir3_shader_key key; + uint32_t dirty; + bool rasterflat; + + /* cached to avoid repeated lookups of same variants: */ + struct ir3_shader_variant *vp, *fp; + /* TODO: other shader stages.. */ +}; + +static inline struct ir3_shader_variant * +fd4_emit_get_vp(struct fd4_emit *emit) +{ + if (!emit->vp) { + struct fd4_shader_stateobj *so = emit->prog->vp; + emit->vp = ir3_shader_variant(so->shader, emit->key); + } + return emit->vp; +} + +static inline struct ir3_shader_variant * +fd4_emit_get_fp(struct fd4_emit *emit) +{ + if (!emit->fp) { + struct fd4_shader_stateobj *so = emit->prog->fp; + emit->fp = ir3_shader_variant(so->shader, emit->key); + } + return emit->fp; +} + +void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit); + +void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, + struct fd4_emit *emit); + +void fd4_emit_restore(struct fd_context *ctx); + +#endif /* FD4_EMIT_H */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c new file mode 100644 index 00000000000..8cb6bc4cb6c --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c @@ -0,0 +1,415 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "freedreno_draw.h" +#include "freedreno_state.h" +#include "freedreno_resource.h" + +#include "fd4_gmem.h" +#include "fd4_context.h" +#include "fd4_draw.h" +#include "fd4_emit.h" +#include "fd4_program.h" +#include "fd4_util.h" +#include "fd4_zsa.h" + +static const struct ir3_shader_key key = { + // XXX should set this based on render target format! We don't + // want half_precision if float32 render target!!! + .half_precision = true, +}; + +static void +emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, + struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w) +{ + unsigned i; + + for (i = 0; i < 8; i++) { + enum a4xx_color_fmt format = 0; + enum a3xx_color_swap swap = WZYX; + struct fd_resource *rsc = NULL; + struct fd_resource_slice *slice = NULL; + uint32_t stride = 0; + uint32_t base = 0; + uint32_t layer_offset = 0; + + if ((i < nr_bufs) && bufs[i]) { + struct pipe_surface *psurf = bufs[i]; + + rsc = fd_resource(psurf->texture); + slice = &rsc->slices[psurf->u.tex.level]; + format = fd4_pipe2color(psurf->format); + swap = fd4_pipe2swap(psurf->format); + + debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); + + layer_offset = slice->size0 * psurf->u.tex.first_layer; + + if (bin_w) { + stride = bin_w * rsc->cpp; + + if (bases) { + base = bases[i]; + } + } else { + stride = slice->pitch * rsc->cpp; + } + } + + OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3); + OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | + 0x80 | /* XXX not on gmem2mem?? tile-mode? */ + A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) | + A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap)); + if (bin_w || (i >= nr_bufs)) { + OUT_RING(ring, base); + } else { + OUT_RELOCW(ring, rsc->bo, + slice->offset + layer_offset, 0, -1); + } + OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride)); + } +} + +static uint32_t +depth_base(struct fd_context *ctx) +{ + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + uint32_t cpp = 4; + if (pfb->cbufs[0]) { + struct fd_resource *rsc = + fd_resource(pfb->cbufs[0]->texture); + cpp = rsc->cpp; + } + return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000); +} + +/* transfer from gmem to system memory (ie. normal RAM) */ + +static void +emit_gmem2mem_surf(struct fd_context *ctx, + uint32_t base, struct pipe_surface *psurf) +{ + struct fd_ringbuffer *ring = ctx->ring; + struct fd_resource *rsc = fd_resource(psurf->texture); + struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level]; + + OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4); + OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) | + A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) | + A4XX_RB_COPY_CONTROL_GMEM_BASE(base)); + OUT_RELOCW(ring, rsc->bo, slice->offset, 0, 0); /* RB_COPY_DEST_BASE */ + OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp)); + OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) | + A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) | + A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) | + A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) | + A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(psurf->format))); + + fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, + DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL); +} + +static void +fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile) +{ + struct fd4_context *fd4_ctx = fd4_context(ctx); + struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd4_emit emit = { + .vtx = &fd4_ctx->solid_vbuf_state, + .prog = &ctx->solid_prog, + .key = key, + }; + + OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1); + OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER)); + + OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 1); + OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) | + A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) | + A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) | + A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP)); + + OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2); + OUT_RING(ring, 0xff000000 | + A4XX_RB_STENCILREFMASK_STENCILREF(0) | + A4XX_RB_STENCILREFMASK_STENCILMASK(0) | + A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); + OUT_RING(ring, 0xff000000 | + A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) | + A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) | + A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff)); + + OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1); + OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0)); + + fd_wfi(ctx, ring); + + OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1); + OUT_RING(ring, 0x80000); /* GRAS_CL_CLIP_CNTL */ + + OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0)); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0)); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0)); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0)); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0)); + OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0)); + + OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1); + OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE | + 0xa); /* XXX */ + + OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) | + A4XX_GRAS_SC_CONTROL_MSAA_DISABLE | + A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A4XX_GRAS_SC_CONTROL_RASTER_MODE(1)); + + OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1); + OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST); + + OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1); + OUT_RING(ring, 0x00000002); + + OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2); + OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) | + A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1)); + OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | + A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); + + OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2); + OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */ + OUT_RING(ring, 0); /* ??? UNKNOWN_2209 */ + + fd4_program_emit(ring, &emit); + fd4_emit_vertex_bufs(ring, &emit); + + if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { + uint32_t base = depth_base(ctx); + emit_gmem2mem_surf(ctx, base, pfb->zsbuf); + } + + if (ctx->resolve & FD_BUFFER_COLOR) { + emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]); + } + + OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1); + OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | + A4XX_GRAS_SC_CONTROL_MSAA_DISABLE | + A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | + A4XX_GRAS_SC_CONTROL_RASTER_MODE(0)); +} + +/* transfer from system memory to gmem */ + +static void +fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile) +{ + /* TODO */ +} + +static void +patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode) +{ + unsigned i; + for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i); + *patch->cs = patch->val | DRAW4(0, 0, 0, vismode); + } + util_dynarray_resize(&ctx->draw_patches, 0); +} + +static void +patch_rbrc(struct fd_context *ctx, uint32_t val) +{ + struct fd4_context *fd4_ctx = fd4_context(ctx); + unsigned i; + for (i = 0; i < fd_patch_num_elements(&fd4_ctx->rbrc_patches); i++) { + struct fd_cs_patch *patch = fd_patch_element(&fd4_ctx->rbrc_patches, i); + *patch->cs = patch->val | val; + } + util_dynarray_resize(&fd4_ctx->rbrc_patches, 0); +} + +static void +update_vsc_pipe(struct fd_context *ctx) +{ + struct fd4_context *fd4_ctx = fd4_context(ctx); + struct fd_ringbuffer *ring = ctx->ring; + int i; + + OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1); + OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */ + + OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8); + for (i = 0; i < 8; i++) { + struct fd_vsc_pipe *pipe = &ctx->pipe[i]; + OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) | + A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) | + A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) | + A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h)); + } + + OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8); + for (i = 0; i < 8; i++) { + struct fd_vsc_pipe *pipe = &ctx->pipe[i]; + if (!pipe->bo) { + pipe->bo = fd_bo_new(ctx->dev, 0x40000, + DRM_FREEDRENO_GEM_TYPE_KMEM); + } + OUT_RELOCW(ring, pipe->bo, 0, 0, 0); /* VSC_PIPE_DATA_ADDRESS[i] */ + } + + OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8); + for (i = 0; i < 8; i++) { + struct fd_vsc_pipe *pipe = &ctx->pipe[i]; + OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */ + } +} + +/* before first tile */ +static void +fd4_emit_tile_init(struct fd_context *ctx) +{ + struct fd_ringbuffer *ring = ctx->ring; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + uint32_t rb_render_control; + + fd4_emit_restore(ctx); + + OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1); + OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | + A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); + + OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1); + OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) | + A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) | + 0x00010000); /* XXX */ + + update_vsc_pipe(ctx); + patch_draws(ctx, IGNORE_VISIBILITY); + + rb_render_control = 0; // XXX or BINNING_PASS.. but maybe we can emit only from gmem + patch_rbrc(ctx, rb_render_control); +} + +/* before mem2gmem */ +static void +fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile) +{ + struct fd_ringbuffer *ring = ctx->ring; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + uint32_t reg; + + OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3); + reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx)); + if (pfb->zsbuf) { + reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); + } + OUT_RING(ring, reg); + if (pfb->zsbuf) { + OUT_RING(ring, A4XX_RB_DEPTH_PITCH(gmem->bin_w)); + OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(gmem->bin_w)); + } else { + OUT_RING(ring, 0x00000000); + OUT_RING(ring, 0x00000000); + } + + if (pfb->zsbuf) { + OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1); + OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT( + fd_pipe2depth(pfb->zsbuf->format))); + } + + if (ctx->needs_rb_fbd) { + fd_wfi(ctx, ring); + OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1); + OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) | + A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height)); + ctx->needs_rb_fbd = false; + } +} + +/* before IB to rendering cmds: */ +static void +fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile) +{ + struct fd_ringbuffer *ring = ctx->ring; + struct fd_gmem_stateobj *gmem = &ctx->gmem; + struct pipe_framebuffer_state *pfb = &ctx->framebuffer; + + uint32_t x1 = tile->xoff; + uint32_t y1 = tile->yoff; + uint32_t x2 = tile->xoff + tile->bin_w - 1; + uint32_t y2 = tile->yoff + tile->bin_h - 1; + + OUT_PKT3(ring, CP_SET_BIN, 3); + OUT_RING(ring, 0x00000000); + OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1)); + OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2)); + + emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w); + + /* setup scissor/offset for current tile: */ + OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1); + OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) | + A4XX_RB_BIN_OFFSET_Y(tile->yoff)); + + OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2); + OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) | + A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1)); + OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) | + A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2)); +} + +void +fd4_gmem_init(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + + ctx->emit_tile_init = fd4_emit_tile_init; + ctx->emit_tile_prep = fd4_emit_tile_prep; + ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem; + ctx->emit_tile_renderprep = fd4_emit_tile_renderprep; + ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.h b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.h new file mode 100644 index 00000000000..8964714d79b --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.h @@ -0,0 +1,36 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_GMEM_H_ +#define FD4_GMEM_H_ + +#include "pipe/p_context.h" + +void fd4_gmem_init(struct pipe_context *pctx); + +#endif /* FD4_GMEM_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c new file mode 100644 index 00000000000..591a1d87012 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c @@ -0,0 +1,480 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" + +#include "freedreno_program.h" + +#include "fd4_program.h" +#include "fd4_emit.h" +#include "fd4_texture.h" +#include "fd4_util.h" + +static void +delete_shader_stateobj(struct fd4_shader_stateobj *so) +{ + ir3_shader_destroy(so->shader); + free(so); +} + +static struct fd4_shader_stateobj * +create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, + enum shader_t type) +{ + struct fd4_shader_stateobj *so = CALLOC_STRUCT(fd4_shader_stateobj); + so->shader = ir3_shader_create(pctx, cso->tokens, type); + return so; +} + +static void * +fd4_fp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); +} + +static void +fd4_fp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd4_shader_stateobj *so = hwcso; + delete_shader_stateobj(so); +} + +static void * +fd4_vp_state_create(struct pipe_context *pctx, + const struct pipe_shader_state *cso) +{ + return create_shader_stateobj(pctx, cso, SHADER_VERTEX); +} + +static void +fd4_vp_state_delete(struct pipe_context *pctx, void *hwcso) +{ + struct fd4_shader_stateobj *so = hwcso; + delete_shader_stateobj(so); +} + +static void +emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) +{ + const struct ir3_info *si = &so->info; + enum adreno_state_block sb; + enum adreno_state_src src; + uint32_t i, sz, *bin; + + if (so->type == SHADER_VERTEX) { + sb = SB_VERT_SHADER; + } else { + sb = SB_FRAG_SHADER; + } + + if (fd_mesa_debug & FD_DBG_DIRECT) { + sz = si->sizedwords; + src = SS_DIRECT; + bin = fd_bo_map(so->bo); + } else { + sz = 0; + src = 2; // enums different on a4xx.. + bin = NULL; + } + + OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); + OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | + CP_LOAD_STATE_0_STATE_SRC(src) | + CP_LOAD_STATE_0_STATE_BLOCK(sb) | + CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); + if (bin) { + OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | + CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); + } else { + OUT_RELOC(ring, so->bo, 0, + CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); + } + for (i = 0; i < sz; i++) { + OUT_RING(ring, bin[i]); + } +} + +struct stage { + const struct ir3_shader_variant *v; + const struct ir3_info *i; + /* const sizes are in units of 4 * vec4 */ + uint8_t constoff; + uint8_t constlen; + /* instr sizes are in units of 16 instructions */ + uint8_t instroff; + uint8_t instrlen; +}; + +enum { + VS = 0, + FS = 1, + HS = 2, + DS = 3, + GS = 4, + MAX_STAGES +}; + +static void +setup_stages(struct fd4_emit *emit, struct stage *s) +{ + unsigned i; + + s[VS].v = fd4_emit_get_vp(emit); + + if (emit->key.binning_pass) { + /* use dummy stateobj to simplify binning vs non-binning: */ + static const struct ir3_shader_variant binning_fp = {}; + s[FS].v = &binning_fp; + } else { + s[FS].v = fd4_emit_get_fp(emit); + } + + s[HS].v = s[DS].v = s[GS].v = NULL; /* for now */ + + for (i = 0; i < MAX_STAGES; i++) { + if (s[i].v) { + s[i].i = &s[i].v->info; + /* constlen is in units of 4 * vec4: */ + s[i].constlen = align(s[i].v->constlen, 4) / 4; + /* instrlen is already in units of 16 instr.. although + * probably we should ditch that and not make the compiler + * care about instruction group size of a3xx vs a4xx + */ + s[i].instrlen = s[i].v->instrlen; + } else { + s[i].i = NULL; + s[i].constlen = 0; + s[i].instrlen = 0; + } + } + + /* NOTE: at least for gles2, blob partitions VS at bottom of const + * space and FS taking entire remaining space. We probably don't + * need to do that the same way, but for now mimic what the blob + * does to make it easier to diff against register values from blob + */ + s[VS].constlen = 66; + s[FS].constlen = 128 - s[VS].constlen; + s[VS].instroff = 0; + s[VS].constoff = 0; + s[FS].instroff = 64 - s[FS].instrlen; + s[FS].constoff = s[VS].constlen; + s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff; + s[HS].constoff = s[DS].constoff = s[GS].constoff = s[FS].constoff; +} + +void +fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit) +{ + struct stage s[MAX_STAGES]; + uint32_t pos_regid, posz_regid, psize_regid, color_regid; + int constmode; + int i, j, k; + + setup_stages(emit, s); + + /* blob seems to always use constmode currently: */ + constmode = 1; + + pos_regid = ir3_find_output_regid(s[VS].v, + ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); + posz_regid = ir3_find_output_regid(s[FS].v, + ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); + psize_regid = ir3_find_output_regid(s[VS].v, + ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); + color_regid = ir3_find_output_regid(s[FS].v, + ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); + + /* we could probably divide this up into things that need to be + * emitted if frag-prog is dirty vs if vert-prog is dirty.. + */ + + OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1); + OUT_RING(ring, 0x00000003); + + OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 4); + OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | + A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | + A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE | + /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe + * flush some caches? I think we only need to set those + * bits if we have updated const or shader.. + */ + A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | + A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); + OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | + 0xfcfc0000 | /* XXX */ + A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | + COND(s[FS].v->frag_coord, A4XX_HLSQ_CONTROL_1_REG_ZWCOORD)); + OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); + OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid)); + + OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5); + OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) | + A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) | + A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) | + A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff)); + OUT_RING(ring, A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) | + A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) | + A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) | + A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff)); + OUT_RING(ring, A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) | + A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) | + A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) | + A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff)); + OUT_RING(ring, A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) | + A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) | + A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) | + A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff)); + OUT_RING(ring, A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) | + A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) | + A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) | + A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff)); + + OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1); + OUT_RING(ring, 0x140010 | /* XXX */ + COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS)); + + OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1); + OUT_RING(ring, 0x1c3); /* XXX SP_INSTR_CACHE_CTRL */ + + OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1); + OUT_RING(ring, s[VS].v->instrlen); /* SP_VS_LENGTH_REG */ + + OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3); + OUT_RING(ring, A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) | + A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) | + A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) | + A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | + A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | + A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | + COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE)); + OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) | + A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in)); + OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | + A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | + A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(s[FS].v->total_in, 4) / 4)); + + for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) { + uint32_t reg = 0; + + OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1); + + j = ir3_next_varying(s[FS].v, j); + if (j < s[FS].v->inputs_count) { + k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic); + reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid); + reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask); + } + + j = ir3_next_varying(s[FS].v, j); + if (j < s[FS].v->inputs_count) { + k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic); + reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid); + reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask); + } + + OUT_RING(ring, reg); + } + + for (i = 0, j = -1; (i < 8) && (j < (int)s[FS].v->inputs_count); i++) { + uint32_t reg = 0; + + OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1); + + j = ir3_next_varying(s[FS].v, j); + if (j < s[FS].v->inputs_count) + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(s[FS].v->inputs[j].inloc); + j = ir3_next_varying(s[FS].v, j); + if (j < s[FS].v->inputs_count) + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(s[FS].v->inputs[j].inloc); + j = ir3_next_varying(s[FS].v, j); + if (j < s[FS].v->inputs_count) + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(s[FS].v->inputs[j].inloc); + j = ir3_next_varying(s[FS].v, j); + if (j < s[FS].v->inputs_count) + reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(s[FS].v->inputs[j].inloc); + + OUT_RING(ring, reg); + } + + OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2); + OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) | + A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff)); + OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ + + OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1); + OUT_RING(ring, s[FS].v->instrlen); /* SP_FS_LENGTH_REG */ + + OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2); + OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | + COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) | + A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) | + A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) | + A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | + A4XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | + A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | + COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE)); + OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) | + 0x80000000 | /* XXX */ + COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING)); + + OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2); + OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) | + A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff)); + if (emit->key.binning_pass) + OUT_RING(ring, 0x00000000); + else + OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ + + OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1); + OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) | + A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff)); + + OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1); + OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) | + A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff)); + + OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1); + OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) | + A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff)); + + OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL2, 1); + OUT_RING(ring, A4XX_RB_MSAA_CONTROL2_MSAA_SAMPLES(0) | + COND(s[FS].v->total_in > 0, A4XX_RB_MSAA_CONTROL2_VARYING)); + + OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1); + if (s[FS].v->writes_pos) { + OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE | + A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid)); + } else { + OUT_RING(ring, 0x00000001); + } + + OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8); + OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid) | +// XXX do we need to patch? or update when RT format changes.. maybe +// move this to emit?? + A4XX_SP_FS_MRT_REG_MRTFORMAT(RB4_R8G8B8A8_UNORM) | // XXX patch? + COND(s[FS].v->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION)); + OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); + OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); + OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); + OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); + OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); + OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); + OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0)); + + if (emit->key.binning_pass) { + OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); + OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) | + 0x40000000 | /* XXX */ + COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE)); + OUT_RING(ring, 0x00000000); + } else { + uint32_t vinterp[8] = {0}, flatshade[2] = {0}; + + /* figure out VARYING_INTERP / FLAT_SHAD register values: */ + for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) { + uint32_t interp = s[FS].v->inputs[j].interpolate; + if ((interp == TGSI_INTERPOLATE_CONSTANT) || + ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) { + /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG + * instead.. rather than -8 everywhere else.. + */ + uint32_t loc = s[FS].v->inputs[j].inloc - 8; + + /* currently assuming varyings aligned to 4 (not + * packed): + */ + debug_assert((loc % 4) == 0); + + for (i = 0; i < 4; i++, loc++) { + vinterp[loc / 16] |= 1 << ((loc % 16) * 2); + flatshade[loc / 32] |= 1 << (loc % 32); + } + } + } + + OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2); + OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) | + A4XX_VPC_ATTR_THRDASSIGN(1) | + COND(s[FS].v->total_in > 0, A4XX_VPC_ATTR_ENABLE) | + 0x40000000 | /* XXX */ + COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE)); + OUT_RING(ring, A4XX_VPC_PACK_NUMFPNONPOSVAR(s[FS].v->total_in) | + A4XX_VPC_PACK_NUMNONPOSVSVAR(s[FS].v->total_in)); + + OUT_PKT0(ring, REG_A4XX_VPC_VARYING_INTERP_MODE(0), 8); + for (i = 0; i < 8; i++) + OUT_RING(ring, vinterp[i]); /* VPC_VARYING_INTERP[i].MODE */ + + OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8); + for (i = 0; i < 8; i++) + OUT_RING(ring, s[FS].v->shader->vpsrepl[i]); /* VPC_VARYING_PS_REPL[i] */ + } + + emit_shader(ring, s[VS].v); + + if (!emit->key.binning_pass) + emit_shader(ring, s[FS].v); +} + +/* hack.. until we figure out how to deal w/ vpsrepl properly.. */ +static void +fix_blit_fp(struct pipe_context *pctx) +{ + struct fd_context *ctx = fd_context(pctx); + struct fd4_shader_stateobj *so = ctx->blit_prog.fp; + + so->shader->vpsrepl[0] = 0x99999999; + so->shader->vpsrepl[1] = 0x99999999; + so->shader->vpsrepl[2] = 0x99999999; + so->shader->vpsrepl[3] = 0x99999999; +} + +void +fd4_prog_init(struct pipe_context *pctx) +{ + pctx->create_fs_state = fd4_fp_state_create; + pctx->delete_fs_state = fd4_fp_state_delete; + + pctx->create_vs_state = fd4_vp_state_create; + pctx->delete_vs_state = fd4_vp_state_delete; + + fd_prog_init(pctx); + + fix_blit_fp(pctx); +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.h b/src/gallium/drivers/freedreno/a4xx/fd4_program.h new file mode 100644 index 00000000000..52306a4c60d --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.h @@ -0,0 +1,46 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_PROGRAM_H_ +#define FD4_PROGRAM_H_ + +#include "pipe/p_context.h" +#include "freedreno_context.h" +#include "ir3_shader.h" + +struct fd4_shader_stateobj { + struct ir3_shader *shader; +}; + +struct fd4_emit; + +void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit); + +void fd4_prog_init(struct pipe_context *pctx); + +#endif /* FD4_PROGRAM_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c new file mode 100644 index 00000000000..9a50626aaeb --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.c @@ -0,0 +1,39 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "freedreno_query_hw.h" +#include "freedreno_context.h" +#include "freedreno_util.h" + +#include "fd4_query.h" +#include "fd4_util.h" + +void fd4_query_context_init(struct pipe_context *pctx) +{ + /* TODO */ +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.h b/src/gallium/drivers/freedreno/a4xx/fd4_query.h new file mode 100644 index 00000000000..a2e91569a46 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_query.h @@ -0,0 +1,36 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_QUERY_H_ +#define FD4_QUERY_H_ + +#include "pipe/p_context.h" + +void fd4_query_context_init(struct pipe_context *pctx); + +#endif /* FD4_QUERY_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c new file mode 100644 index 00000000000..b363cb79a28 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c @@ -0,0 +1,94 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd4_rasterizer.h" +#include "fd4_context.h" +#include "fd4_util.h" + +void * +fd4_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso) +{ + struct fd4_rasterizer_stateobj *so; + float psize_min, psize_max; + + so = CALLOC_STRUCT(fd4_rasterizer_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + if (cso->point_size_per_vertex) { + psize_min = util_get_min_point_size(cso); + psize_max = 8192; + } else { + /* Force the point size to be as if the vertex output was disabled. */ + psize_min = cso->point_size; + psize_max = cso->point_size; + } + +/* + if (cso->line_stipple_enable) { + ??? TODO line stipple + } + TODO cso->half_pixel_center + if (cso->multisample) + TODO +*/ + so->gras_cl_clip_cntl = 0x80000; /* ??? */ + so->gras_su_point_minmax = + A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) | + A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2); + so->gras_su_point_size = A4XX_GRAS_SU_POINT_SIZE(cso->point_size/2); + so->gras_su_poly_offset_scale = + A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale); + so->gras_su_poly_offset_offset = + A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units); + + so->gras_su_mode_control = + A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0); + + if (cso->cull_face & PIPE_FACE_FRONT) + so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT; + if (cso->cull_face & PIPE_FACE_BACK) + so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK; + if (!cso->front_ccw) + so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW; + if (!cso->flatshade_first) + so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST; + + if (cso->offset_tri) + so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET; + + return so; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h new file mode 100644 index 00000000000..06c728f2f1f --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h @@ -0,0 +1,56 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_RASTERIZER_H_ +#define FD4_RASTERIZER_H_ + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +struct fd4_rasterizer_stateobj { + struct pipe_rasterizer_state base; + uint32_t gras_su_point_minmax; + uint32_t gras_su_point_size; + uint32_t gras_su_poly_offset_scale; + uint32_t gras_su_poly_offset_offset; + + uint32_t gras_su_mode_control; + uint32_t gras_cl_clip_cntl; + uint32_t pc_prim_vtx_cntl; +}; + +static INLINE struct fd4_rasterizer_stateobj * +fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast) +{ + return (struct fd4_rasterizer_stateobj *)rast; +} + +void * fd4_rasterizer_state_create(struct pipe_context *pctx, + const struct pipe_rasterizer_state *cso); + +#endif /* FD4_RASTERIZER_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c new file mode 100644 index 00000000000..8ee246b611c --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c @@ -0,0 +1,105 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_screen.h" +#include "util/u_format.h" + +#include "fd4_screen.h" +#include "fd4_context.h" +#include "fd4_util.h" + +static boolean +fd4_screen_is_format_supported(struct pipe_screen *pscreen, + enum pipe_format format, + enum pipe_texture_target target, + unsigned sample_count, + unsigned usage) +{ + unsigned retval = 0; + + if ((target >= PIPE_MAX_TEXTURE_TYPES) || + (sample_count > 1) || /* TODO add MSAA */ + !util_format_is_supported(format, usage)) { + DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x", + util_format_name(format), target, sample_count, usage); + return FALSE; + } + + if ((usage & PIPE_BIND_VERTEX_BUFFER) && + (fd4_pipe2vtx(format) != ~0)) { + retval |= PIPE_BIND_VERTEX_BUFFER; + } + + if ((usage & PIPE_BIND_SAMPLER_VIEW) && + (fd4_pipe2tex(format) != ~0)) { + retval |= PIPE_BIND_SAMPLER_VIEW; + } + + if ((usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED)) && + (fd4_pipe2color(format) != ~0) && + (fd4_pipe2tex(format) != ~0)) { + retval |= usage & (PIPE_BIND_RENDER_TARGET | + PIPE_BIND_DISPLAY_TARGET | + PIPE_BIND_SCANOUT | + PIPE_BIND_SHARED); + } + + if ((usage & PIPE_BIND_DEPTH_STENCIL) && + (fd_pipe2depth(format) != ~0) && + (fd4_pipe2tex(format) != ~0)) { + retval |= PIPE_BIND_DEPTH_STENCIL; + } + + if ((usage & PIPE_BIND_INDEX_BUFFER) && + (fd_pipe2index(format) != ~0)) { + retval |= PIPE_BIND_INDEX_BUFFER; + } + + if (usage & PIPE_BIND_TRANSFER_READ) + retval |= PIPE_BIND_TRANSFER_READ; + if (usage & PIPE_BIND_TRANSFER_WRITE) + retval |= PIPE_BIND_TRANSFER_WRITE; + + if (retval != usage) { + DBG("not supported: format=%s, target=%d, sample_count=%d, " + "usage=%x, retval=%x", util_format_name(format), + target, sample_count, usage, retval); + } + + return retval == usage; +} + +void +fd4_screen_init(struct pipe_screen *pscreen) +{ + pscreen->context_create = fd4_context_create; + pscreen->is_format_supported = fd4_screen_is_format_supported; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.h b/src/gallium/drivers/freedreno/a4xx/fd4_screen.h new file mode 100644 index 00000000000..09b68ef20c4 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_screen.h @@ -0,0 +1,36 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_SCREEN_H_ +#define FD4_SCREEN_H_ + +#include "pipe/p_screen.h" + +void fd4_screen_init(struct pipe_screen *pscreen); + +#endif /* FD4_SCREEN_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c new file mode 100644 index 00000000000..fc9c8735815 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c @@ -0,0 +1,190 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_format.h" + +#include "fd4_texture.h" +#include "fd4_util.h" + +/* TODO do we need to emulate clamp-to-edge like a3xx? */ +static enum a4xx_tex_clamp +tex_clamp(unsigned wrap) +{ + /* hardware probably supports more, but we can't coax all the + * wrap/clamp modes out of the GLESv2 blob driver. + * + * TODO once we have basics working, go back and just try + * different values and see what happens + */ + switch (wrap) { + case PIPE_TEX_WRAP_REPEAT: + return A4XX_TEX_REPEAT; + case PIPE_TEX_WRAP_CLAMP: + case PIPE_TEX_WRAP_CLAMP_TO_EDGE: + return A4XX_TEX_CLAMP_TO_EDGE; + case PIPE_TEX_WRAP_CLAMP_TO_BORDER: +// TODO +// return A4XX_TEX_CLAMP_TO_BORDER; + case PIPE_TEX_WRAP_MIRROR_CLAMP: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: + case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: +// TODO +// return A4XX_TEX_MIRROR_CLAMP; + case PIPE_TEX_WRAP_MIRROR_REPEAT: + return A4XX_TEX_MIRROR_REPEAT; + default: + DBG("invalid wrap: %u", wrap); + return 0; + } +} + +static enum a4xx_tex_filter +tex_filter(unsigned filter) +{ + switch (filter) { + case PIPE_TEX_FILTER_NEAREST: + return A4XX_TEX_NEAREST; + case PIPE_TEX_FILTER_LINEAR: + return A4XX_TEX_LINEAR; + default: + DBG("invalid filter: %u", filter); + return 0; + } +} + +static void * +fd4_sampler_state_create(struct pipe_context *pctx, + const struct pipe_sampler_state *cso) +{ + struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj); + + if (!so) + return NULL; + + so->base = *cso; + + so->texsamp0 = + A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) | + A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) | + A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) | + A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) | + A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r)); + + if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) { + so->texsamp1 = + A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) | + A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod); + } else { + so->texsamp1 = 0x00000000; + } + + if (cso->compare_mode) + so->texsamp1 |= A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */ + + return so; +} + +static enum a4xx_tex_type +tex_type(unsigned target) +{ + switch (target) { + default: + assert(0); + case PIPE_BUFFER: + case PIPE_TEXTURE_1D: + case PIPE_TEXTURE_1D_ARRAY: + return A4XX_TEX_1D; + case PIPE_TEXTURE_RECT: + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_2D_ARRAY: + return A4XX_TEX_2D; + case PIPE_TEXTURE_3D: + return A4XX_TEX_3D; + case PIPE_TEXTURE_CUBE: + case PIPE_TEXTURE_CUBE_ARRAY: + return A4XX_TEX_CUBE; + } +} + +static struct pipe_sampler_view * +fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc, + const struct pipe_sampler_view *cso) +{ + struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view); + struct fd_resource *rsc = fd_resource(prsc); + unsigned lvl = cso->u.tex.first_level; + + if (!so) + return NULL; + + so->base = *cso; + pipe_reference(NULL, &prsc->reference); + so->base.texture = prsc; + so->base.reference.count = 1; + so->base.context = pctx; + + so->tex_resource = rsc; + + so->texconst0 = + A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) | + A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) | + fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g, + cso->swizzle_b, cso->swizzle_a); + + so->texconst1 = + A4XX_TEX_CONST_1_WIDTH(prsc->width0) | + A4XX_TEX_CONST_1_HEIGHT(prsc->height0); + so->texconst2 = + A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp); + + switch (prsc->target) { + case PIPE_TEXTURE_1D_ARRAY: + case PIPE_TEXTURE_2D_ARRAY: + case PIPE_TEXTURE_3D: + so->texconst3 = + A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0); + break; + default: + so->texconst3 = 0x00000000; + break; + } + + return &so->base; +} + +void +fd4_texture_init(struct pipe_context *pctx) +{ + pctx->create_sampler_state = fd4_sampler_state_create; + pctx->bind_sampler_states = fd_sampler_states_bind; + pctx->create_sampler_view = fd4_sampler_view_create; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h new file mode 100644 index 00000000000..3592b1049b1 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h @@ -0,0 +1,68 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_TEXTURE_H_ +#define FD4_TEXTURE_H_ + +#include "pipe/p_context.h" + +#include "freedreno_texture.h" +#include "freedreno_resource.h" + +#include "fd4_context.h" +#include "fd4_util.h" + +struct fd4_sampler_stateobj { + struct pipe_sampler_state base; + uint32_t texsamp0, texsamp1; +}; + +static INLINE struct fd4_sampler_stateobj * +fd4_sampler_stateobj(struct pipe_sampler_state *samp) +{ + return (struct fd4_sampler_stateobj *)samp; +} + +struct fd4_pipe_sampler_view { + struct pipe_sampler_view base; + struct fd_resource *tex_resource; + uint32_t texconst0, texconst1, texconst2, texconst3; +}; + +static INLINE struct fd4_pipe_sampler_view * +fd4_pipe_sampler_view(struct pipe_sampler_view *pview) +{ + return (struct fd4_pipe_sampler_view *)pview; +} + +unsigned fd4_get_const_idx(struct fd_context *ctx, + struct fd_texture_stateobj *tex, unsigned samp_id); + +void fd4_texture_init(struct pipe_context *pctx); + +#endif /* FD4_TEXTURE_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_util.c b/src/gallium/drivers/freedreno/a4xx/fd4_util.c new file mode 100644 index 00000000000..ddff977f0e5 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_util.c @@ -0,0 +1,401 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "pipe/p_defines.h" +#include "util/u_format.h" + +#include "fd4_util.h" + +/* convert pipe format to vertex buffer format: */ +enum a4xx_vtx_fmt +fd4_pipe2vtx(enum pipe_format format) +{ + switch (format) { + /* 8-bit buffers. */ + case PIPE_FORMAT_R8_UNORM: + return VFMT4_NORM_UBYTE_8; + + case PIPE_FORMAT_R8_SNORM: + return VFMT4_NORM_BYTE_8; + + case PIPE_FORMAT_R8_UINT: + case PIPE_FORMAT_R8_USCALED: + return VFMT4_UBYTE_8; + + case PIPE_FORMAT_R8_SINT: + case PIPE_FORMAT_R8_SSCALED: + return VFMT4_BYTE_8; + + /* 16-bit buffers. */ + case PIPE_FORMAT_R16_UNORM: + case PIPE_FORMAT_Z16_UNORM: + return VFMT4_NORM_USHORT_16; + + case PIPE_FORMAT_R16_SNORM: + return VFMT4_NORM_SHORT_16; + + case PIPE_FORMAT_R16_UINT: + case PIPE_FORMAT_R16_USCALED: + return VFMT4_USHORT_16; + + case PIPE_FORMAT_R16_SINT: + case PIPE_FORMAT_R16_SSCALED: + return VFMT4_SHORT_16; + + case PIPE_FORMAT_R16_FLOAT: + return VFMT4_FLOAT_16; + + case PIPE_FORMAT_R8G8_UNORM: + return VFMT4_NORM_UBYTE_8_8; + + case PIPE_FORMAT_R8G8_SNORM: + return VFMT4_NORM_BYTE_8_8; + + case PIPE_FORMAT_R8G8_UINT: + case PIPE_FORMAT_R8G8_USCALED: + return VFMT4_UBYTE_8_8; + + case PIPE_FORMAT_R8G8_SINT: + case PIPE_FORMAT_R8G8_SSCALED: + return VFMT4_BYTE_8_8; + + /* 24-bit buffers. */ + case PIPE_FORMAT_R8G8B8_UNORM: + return VFMT4_NORM_UBYTE_8_8_8; + + case PIPE_FORMAT_R8G8B8_SNORM: + return VFMT4_NORM_BYTE_8_8_8; + + case PIPE_FORMAT_R8G8B8_UINT: + case PIPE_FORMAT_R8G8B8_USCALED: + return VFMT4_UBYTE_8_8_8; + + case PIPE_FORMAT_R8G8B8_SINT: + case PIPE_FORMAT_R8G8B8_SSCALED: + return VFMT4_BYTE_8_8_8; + + /* 32-bit buffers. */ + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + return VFMT4_NORM_UBYTE_8_8_8_8; + + case PIPE_FORMAT_R8G8B8A8_SNORM: + return VFMT4_NORM_BYTE_8_8_8_8; + + case PIPE_FORMAT_R8G8B8A8_UINT: + case PIPE_FORMAT_R8G8B8A8_USCALED: + return VFMT4_UBYTE_8_8_8_8; + + case PIPE_FORMAT_R8G8B8A8_SINT: + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return VFMT4_BYTE_8_8_8_8; + + case PIPE_FORMAT_R16G16_SSCALED: + case PIPE_FORMAT_R16G16_SINT: + return VFMT4_SHORT_16_16; + + case PIPE_FORMAT_R16G16_FLOAT: + return VFMT4_FLOAT_16_16; + + case PIPE_FORMAT_R16G16_UINT: + case PIPE_FORMAT_R16G16_USCALED: + return VFMT4_USHORT_16_16; + + case PIPE_FORMAT_R16G16_UNORM: + return VFMT4_NORM_USHORT_16_16; + + case PIPE_FORMAT_R16G16_SNORM: + return VFMT4_NORM_SHORT_16_16; + + case PIPE_FORMAT_R10G10B10A2_UNORM: + return VFMT4_NORM_UINT_10_10_10_2; + + case PIPE_FORMAT_R10G10B10A2_SNORM: + return VFMT4_NORM_INT_10_10_10_2; + + case PIPE_FORMAT_R10G10B10A2_UINT: + case PIPE_FORMAT_R10G10B10A2_USCALED: + return VFMT4_UINT_10_10_10_2; + + case PIPE_FORMAT_R10G10B10A2_SSCALED: + return VFMT4_INT_10_10_10_2; + + /* 48-bit buffers. */ + case PIPE_FORMAT_R16G16B16_FLOAT: + return VFMT4_FLOAT_16_16_16; + + case PIPE_FORMAT_R16G16B16_SINT: + case PIPE_FORMAT_R16G16B16_SSCALED: + return VFMT4_SHORT_16_16_16; + + case PIPE_FORMAT_R16G16B16_UINT: + case PIPE_FORMAT_R16G16B16_USCALED: + return VFMT4_USHORT_16_16_16; + + case PIPE_FORMAT_R16G16B16_SNORM: + return VFMT4_NORM_SHORT_16_16_16; + + case PIPE_FORMAT_R16G16B16_UNORM: + return VFMT4_NORM_USHORT_16_16_16; + + case PIPE_FORMAT_R32_FLOAT: + case PIPE_FORMAT_Z32_FLOAT: + return VFMT4_FLOAT_32; + + case PIPE_FORMAT_R32_FIXED: + return VFMT4_FIXED_32; + + /* 64-bit buffers. */ + case PIPE_FORMAT_R16G16B16A16_UNORM: + return VFMT4_NORM_USHORT_16_16_16_16; + + case PIPE_FORMAT_R16G16B16A16_SNORM: + return VFMT4_NORM_SHORT_16_16_16_16; + + case PIPE_FORMAT_R16G16B16A16_UINT: + case PIPE_FORMAT_R16G16B16A16_USCALED: + return VFMT4_USHORT_16_16_16_16; + + case PIPE_FORMAT_R16G16B16A16_SINT: + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return VFMT4_SHORT_16_16_16_16; + + case PIPE_FORMAT_R32G32_FLOAT: + return VFMT4_FLOAT_32_32; + + case PIPE_FORMAT_R32G32_FIXED: + return VFMT4_FIXED_32_32; + + case PIPE_FORMAT_R16G16B16A16_FLOAT: + return VFMT4_FLOAT_16_16_16_16; + + /* 96-bit buffers. */ + case PIPE_FORMAT_R32G32B32_FLOAT: + return VFMT4_FLOAT_32_32_32; + + case PIPE_FORMAT_R32G32B32_FIXED: + return VFMT4_FIXED_32_32_32; + + /* 128-bit buffers. */ + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return VFMT4_FLOAT_32_32_32_32; + + case PIPE_FORMAT_R32G32B32A32_FIXED: + return VFMT4_FIXED_32_32_32_32; + +/* TODO probably need gles3 blob drivers to find the 32bit int formats: + case PIPE_FORMAT_R32G32B32A32_SNORM: + case PIPE_FORMAT_R32G32B32A32_UNORM: + case PIPE_FORMAT_R32G32B32A32_SINT: + case PIPE_FORMAT_R32G32B32A32_UINT: + + case PIPE_FORMAT_R32_UINT: + case PIPE_FORMAT_R32_SINT: + case PIPE_FORMAT_A32_UINT: + case PIPE_FORMAT_A32_SINT: + case PIPE_FORMAT_L32_UINT: + case PIPE_FORMAT_L32_SINT: + case PIPE_FORMAT_I32_UINT: + case PIPE_FORMAT_I32_SINT: + + case PIPE_FORMAT_R32G32_SINT: + case PIPE_FORMAT_R32G32_UINT: + case PIPE_FORMAT_L32A32_UINT: + case PIPE_FORMAT_L32A32_SINT: +*/ + + default: + return ~0; + } +} + +/* convert pipe format to texture sampler format: */ +enum a4xx_tex_fmt +fd4_pipe2tex(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: + return TFMT4_NORM_UINT_8; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_SRGB: + case PIPE_FORMAT_R8G8B8A8_SRGB: + case PIPE_FORMAT_R8G8B8X8_SRGB: + return TFMT4_NORM_UINT_8_8_8_8; + + case PIPE_FORMAT_Z24X8_UNORM: + return TFMT4_NORM_UINT_X8Z24; + + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + return TFMT4_NORM_UINT_8_8_8_8; + +// case PIPE_FORMAT_Z16_UNORM: +// return TFMT4_NORM_UINT_8_8; +// + case PIPE_FORMAT_R16G16B16A16_FLOAT: + case PIPE_FORMAT_R16G16B16X16_FLOAT: + return TFMT4_FLOAT_16_16_16_16; + + case PIPE_FORMAT_R32G32B32A32_FLOAT: + case PIPE_FORMAT_R32G32B32X32_FLOAT: + return TFMT4_FLOAT_32_32_32_32; + + // TODO add more.. + + default: + return ~0; + } +} + +/* convert pipe format to MRT / copydest format used for render-target: */ +enum a4xx_color_fmt +fd4_pipe2color(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_R8G8B8A8_UNORM: + return RB4_R8G8B8A8_UNORM; + + case PIPE_FORMAT_Z16_UNORM: + return RB4_Z16_UNORM; + + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + /* for DEPTHX_24_8, blob driver also seems to use R8G8B8A8 fmt.. */ + return RB4_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_UNORM: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + return RB4_A8_UNORM; +// +// case PIPE_FORMAT_R16G16B16A16_FLOAT: +// case PIPE_FORMAT_R16G16B16X16_FLOAT: +// return RB4_R16G16B16A16_FLOAT; +// +// case PIPE_FORMAT_R32G32B32A32_FLOAT: +// case PIPE_FORMAT_R32G32B32X32_FLOAT: +// return RB4_R32G32B32A32_FLOAT; + + // TODO add more.. + + default: + return ~0; + } +} + +/* we need to special case a bit the depth/stencil restore, because we are + * using the texture sampler to blit into the depth/stencil buffer, *not* + * into a color buffer. Otherwise fd4_tex_swiz() will do the wrong thing, + * as it is assuming that you are sampling into normal render target.. + */ +enum pipe_format +fd4_gmem_restore_format(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + case PIPE_FORMAT_Z16_UNORM: + return PIPE_FORMAT_B8G8R8A8_UNORM; + default: + return format; + } +} + +/* TODO share w/ a3xx?? */ +enum a3xx_color_swap +fd4_pipe2swap(enum pipe_format format) +{ + switch (format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: + case PIPE_FORMAT_B8G8R8A8_SRGB: + case PIPE_FORMAT_B8G8R8X8_SRGB: + return WXYZ; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_A8R8G8B8_SRGB: + case PIPE_FORMAT_X8R8G8B8_SRGB: + return ZYXW; + + case PIPE_FORMAT_A8B8G8R8_UNORM: + case PIPE_FORMAT_X8B8G8R8_UNORM: + case PIPE_FORMAT_A8B8G8R8_SRGB: + case PIPE_FORMAT_X8B8G8R8_SRGB: + return XYZW; + + case PIPE_FORMAT_R8G8B8A8_UNORM: + case PIPE_FORMAT_R8G8B8X8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_Z24_UNORM_S8_UINT: + default: + return WZYX; + } +} + +static inline enum a4xx_tex_swiz +tex_swiz(unsigned swiz) +{ + switch (swiz) { + default: + case PIPE_SWIZZLE_RED: return A4XX_TEX_X; + case PIPE_SWIZZLE_GREEN: return A4XX_TEX_Y; + case PIPE_SWIZZLE_BLUE: return A4XX_TEX_Z; + case PIPE_SWIZZLE_ALPHA: return A4XX_TEX_W; + case PIPE_SWIZZLE_ZERO: return A4XX_TEX_ZERO; + case PIPE_SWIZZLE_ONE: return A4XX_TEX_ONE; + } +} + +uint32_t +fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g, + unsigned swizzle_b, unsigned swizzle_a) +{ + const struct util_format_description *desc = + util_format_description(format); + unsigned char swiz[4] = { + swizzle_r, swizzle_g, swizzle_b, swizzle_a, + }, rswiz[4]; + + util_format_compose_swizzles(desc->swizzle, swiz, rswiz); + + return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) | + A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) | + A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) | + A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3])); +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_util.h b/src/gallium/drivers/freedreno/a4xx/fd4_util.h new file mode 100644 index 00000000000..359882f599e --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_util.h @@ -0,0 +1,45 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_UTIL_H_ +#define FD4_UTIL_H_ + +#include "freedreno_util.h" + +#include "a4xx.xml.h" + +enum a4xx_vtx_fmt fd4_pipe2vtx(enum pipe_format format); +enum a4xx_tex_fmt fd4_pipe2tex(enum pipe_format format); +enum a4xx_color_fmt fd4_pipe2color(enum pipe_format format); +enum pipe_format fd4_gmem_restore_format(enum pipe_format format); +enum a3xx_color_swap fd4_pipe2swap(enum pipe_format format); + +uint32_t fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, + unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a); + +#endif /* FD4_UTIL_H_ */ diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_zsa.c b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.c new file mode 100644 index 00000000000..6f09ec9d047 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.c @@ -0,0 +1,105 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + + +#include "pipe/p_state.h" +#include "util/u_string.h" +#include "util/u_memory.h" + +#include "fd4_zsa.h" +#include "fd4_context.h" +#include "fd4_util.h" + +void * +fd4_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso) +{ + struct fd4_zsa_stateobj *so; + + so = CALLOC_STRUCT(fd4_zsa_stateobj); + if (!so) + return NULL; + + so->base = *cso; + + so->rb_depth_control |= + A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */ + + if (cso->depth.enabled) + so->rb_depth_control |= + A4XX_RB_DEPTH_CONTROL_Z_ENABLE | + A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE; + + if (cso->depth.writemask) + so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE; + + if (cso->stencil[0].enabled) { + const struct pipe_stencil_state *s = &cso->stencil[0]; + + so->rb_stencil_control |= + A4XX_RB_STENCIL_CONTROL_STENCIL_READ | + A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | + A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */ + A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) | + A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) | + A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op)); + so->rb_stencilrefmask |= + 0xff000000 | /* ??? */ + A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) | + A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask); + + if (cso->stencil[1].enabled) { + const struct pipe_stencil_state *bs = &cso->stencil[1]; + + so->rb_stencil_control |= + A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | + A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */ + A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) | + A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) | + A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op)); + so->rb_stencilrefmask_bf |= + 0xff000000 | /* ??? */ + A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) | + A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask); + } + } + + if (cso->alpha.enabled) { + so->gras_alpha_control = + A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE; + so->rb_alpha_control = + A4XX_RB_ALPHA_CONTROL_ALPHA_TEST | + A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func); + so->rb_depth_control |= + A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; + } + + so->rb_render_control = 0x8; /* XXX */ + + return so; +} diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h new file mode 100644 index 00000000000..aea12047c49 --- /dev/null +++ b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h @@ -0,0 +1,58 @@ +/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ + +/* + * Copyright (C) 2014 Rob Clark + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#ifndef FD4_ZSA_H_ +#define FD4_ZSA_H_ + + +#include "pipe/p_state.h" +#include "pipe/p_context.h" + +#include "freedreno_util.h" + +struct fd4_zsa_stateobj { + struct pipe_depth_stencil_alpha_state base; + uint32_t gras_alpha_control; + uint32_t rb_alpha_control; + uint32_t rb_render_control; + uint32_t rb_depth_control; + uint32_t rb_stencil_control; + uint32_t rb_stencilrefmask; + uint32_t rb_stencilrefmask_bf; +}; + +static INLINE struct fd4_zsa_stateobj * +fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa) +{ + return (struct fd4_zsa_stateobj *)zsa; +} + +void * fd4_zsa_state_create(struct pipe_context *pctx, + const struct pipe_depth_stencil_alpha_state *cso); + +#endif /* FD4_ZSA_H_ */ diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c index e873af92943..ce105b8786b 100644 --- a/src/gallium/drivers/freedreno/freedreno_screen.c +++ b/src/gallium/drivers/freedreno/freedreno_screen.c @@ -52,6 +52,7 @@ #include "a2xx/fd2_screen.h" #include "a3xx/fd3_screen.h" +#include "a4xx/fd4_screen.h" /* XXX this should go away */ #include "state_tracker/drm_driver.h" @@ -514,7 +515,7 @@ fd_screen_create(struct fd_device *dev) * before enabling: * * If you have a different adreno version, feel free to add it to one - * of the two cases below and see what happens. And if it works, please + * of the cases below and see what happens. And if it works, please * send a patch ;-) */ switch (screen->gpu_id) { @@ -525,6 +526,9 @@ fd_screen_create(struct fd_device *dev) case 330: fd3_screen_init(pscreen); break; + case 420: + fd4_screen_init(pscreen); + break; default: debug_printf("unsupported GPU: a%03d\n", screen->gpu_id); goto fail; diff --git a/src/gallium/drivers/freedreno/ir3/ir3.c b/src/gallium/drivers/freedreno/ir3/ir3.c index 60d4e4a15d5..41112460155 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.c +++ b/src/gallium/drivers/freedreno/ir3/ir3.c @@ -540,7 +540,8 @@ static int (*emit[])(struct ir3_instruction *instr, void *ptr, emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6, }; -void * ir3_assemble(struct ir3 *shader, struct ir3_info *info) +void * ir3_assemble(struct ir3 *shader, struct ir3_info *info, + uint32_t gpu_id) { uint32_t *ptr, *dwords; uint32_t i; @@ -550,11 +551,15 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info) info->max_const = -1; info->instrs_count = 0; - /* need a integer number of instruction "groups" (sets of four - * instructions), so pad out w/ NOPs if needed: - * (each instruction is 64bits) + /* need a integer number of instruction "groups" (sets of 16 + * instructions on a4xx or sets of 4 instructions on a3xx), + * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits) */ - info->sizedwords = 2 * align(shader->instrs_count, 4); + if (gpu_id >= 400) { + info->sizedwords = 2 * align(shader->instrs_count, 16); + } else { + info->sizedwords = 2 * align(shader->instrs_count, 4); + } ptr = dwords = calloc(4, info->sizedwords); diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index 8a5e9fd687c..06bad6e26fc 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -264,7 +264,7 @@ struct ir3_block { struct ir3 * ir3_create(void); void ir3_destroy(struct ir3 *shader); void * ir3_assemble(struct ir3 *shader, - struct ir3_info *info); + struct ir3_info *info, uint32_t gpu_id); void * ir3_alloc(struct ir3 *shader, int sz); struct ir3_block * ir3_block_create(struct ir3 *shader, diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c index 7de29f33d88..f28ce27a00d 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c @@ -49,7 +49,8 @@ static void dump_info(struct ir3_shader_variant *so, const char *str) const char *type = (so->type == SHADER_VERTEX) ? "VERT" : "FRAG"; // for debug, dump some before/after info: - bin = ir3_assemble(so->ir, &info); + // TODO make gpu_id configurable on cmdline + bin = ir3_assemble(so->ir, &info, 320); if (fd_mesa_debug & FD_DBG_DISASM) { struct ir3_block *block = so->ir->block; struct ir3_register *reg; diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c index 1f7e869d9f3..0c74f2f26f2 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c @@ -56,7 +56,7 @@ assemble_variant(struct ir3_shader_variant *v) struct fd_context *ctx = fd_context(v->shader->pctx); uint32_t sz, *bin; - bin = ir3_assemble(v->ir, &v->info); + bin = ir3_assemble(v->ir, &v->info, ctx->screen->gpu_id); sz = v->info.sizedwords * 4; v->bo = fd_bo_new(ctx->dev, sz, @@ -67,7 +67,11 @@ assemble_variant(struct ir3_shader_variant *v) free(bin); - v->instrlen = v->info.sizedwords / 8; + if (ctx->screen->gpu_id >= 400) { + v->instrlen = v->info.sizedwords / (2 * 16); + } else { + v->instrlen = v->info.sizedwords / (2 * 4); + } /* NOTE: if relative addressing is used, we set constlen in * the compiler (to worst-case value) since we don't know in diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h index 3d51603fcfb..f70886e2d3b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h @@ -111,7 +111,8 @@ struct ir3_shader_variant { struct ir3 *ir; /* the instructions length is in units of instruction groups - * (4 instructions, 8 dwords): + * (4 instructions for a3xx, 16 instructions for a4xx.. each + * instruction is 2 dwords): */ unsigned instrlen; @@ -203,7 +204,7 @@ struct ir3_shader { /* so far, only used for blit_prog shader.. values for * VPC_VARYING_PS_REPL[i].MODE */ - uint32_t vpsrepl[4]; + uint32_t vpsrepl[8]; };