freedreno: add adreno 420 support
authorRob Clark <robclark@freedesktop.org>
Thu, 31 Jul 2014 19:42:55 +0000 (15:42 -0400)
committerRob Clark <robclark@freedesktop.org>
Sat, 15 Nov 2014 13:30:31 +0000 (08:30 -0500)
Very initial support.  Basic stuff working (es2gears, es2tri, and maybe
about half of glmark2).  Expect broken stuff.  Still missing: mem->gmem
(restore), queries, mipmaps (blob segfaults!), hw binning, etc.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
32 files changed:
src/gallium/drivers/freedreno/Makefile.am
src/gallium/drivers/freedreno/Makefile.sources
src/gallium/drivers/freedreno/a4xx/fd4_blend.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_blend.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_context.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_context.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_draw.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_draw.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_emit.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_emit.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_gmem.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_gmem.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_program.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_program.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_query.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_query.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_screen.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_screen.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_texture.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_texture.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_util.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_util.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_zsa.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a4xx/fd4_zsa.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_screen.c
src/gallium/drivers/freedreno/ir3/ir3.c
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
src/gallium/drivers/freedreno/ir3/ir3_shader.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index ee5d506f2a388f8cd13ea52371e30312792b1cac..df00add1cb05a251da4e82664cfa094852e8c6f0 100644 (file)
@@ -15,6 +15,7 @@ libfreedreno_la_SOURCES = \
        $(C_SOURCES) \
        $(a2xx_SOURCES) \
        $(a3xx_SOURCES) \
+       $(a4xx_SOURCES) \
        $(ir3_SOURCES)
 
 noinst_PROGRAMS = ir3_compiler
index edaaadfb4781ae57fd28efc309c5668da510f2d1..9d2710cc0fc0ae2946d303b45021b2c18b0f5aaf 100644 (file)
@@ -89,6 +89,20 @@ a3xx_SOURCES := \
        a3xx/fd3_zsa.c \
        a3xx/fd3_zsa.h
 
+a4xx_SOURCES := \
+       a4xx/fd4_blend.c \
+       a4xx/fd4_context.c \
+       a4xx/fd4_draw.c \
+       a4xx/fd4_emit.c \
+       a4xx/fd4_gmem.c \
+       a4xx/fd4_program.c \
+       a4xx/fd4_query.c \
+       a4xx/fd4_rasterizer.c \
+       a4xx/fd4_screen.c \
+       a4xx/fd4_texture.c \
+       a4xx/fd4_util.c \
+       a4xx/fd4_zsa.c
+
 ir3_SOURCES := \
        ir3/disasm-a3xx.c \
        ir3/instr-a3xx.h \
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.c b/src/gallium/drivers/freedreno/a4xx/fd4_blend.c
new file mode 100644 (file)
index 0000000..f569e93
--- /dev/null
@@ -0,0 +1,127 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd4_blend.h"
+#include "fd4_context.h"
+#include "fd4_util.h"
+
+static enum a4xx_rb_blend_opcode
+blend_func(unsigned func)
+{
+       switch (func) {
+       case PIPE_BLEND_ADD:
+               return BLEND_DST_PLUS_SRC;
+       case PIPE_BLEND_MIN:
+               return BLEND_MIN_DST_SRC;
+       case PIPE_BLEND_MAX:
+               return BLEND_MAX_DST_SRC;
+       case PIPE_BLEND_SUBTRACT:
+               return BLEND_SRC_MINUS_DST;
+       case PIPE_BLEND_REVERSE_SUBTRACT:
+               return BLEND_DST_MINUS_SRC;
+       default:
+               DBG("invalid blend func: %x", func);
+               return 0;
+       }
+}
+
+void *
+fd4_blend_state_create(struct pipe_context *pctx,
+               const struct pipe_blend_state *cso)
+{
+       struct fd4_blend_stateobj *so;
+//     enum a3xx_rop_code rop = ROP_COPY;
+       bool reads_dest = false;
+       int i;
+
+       if (cso->logicop_enable) {
+//             rop = cso->logicop_func;  /* maps 1:1 */
+
+               switch (cso->logicop_func) {
+               case PIPE_LOGICOP_NOR:
+               case PIPE_LOGICOP_AND_INVERTED:
+               case PIPE_LOGICOP_AND_REVERSE:
+               case PIPE_LOGICOP_INVERT:
+               case PIPE_LOGICOP_XOR:
+               case PIPE_LOGICOP_NAND:
+               case PIPE_LOGICOP_AND:
+               case PIPE_LOGICOP_EQUIV:
+               case PIPE_LOGICOP_NOOP:
+               case PIPE_LOGICOP_OR_INVERTED:
+               case PIPE_LOGICOP_OR_REVERSE:
+               case PIPE_LOGICOP_OR:
+                       reads_dest = true;
+                       break;
+               }
+       }
+
+       if (cso->independent_blend_enable) {
+               DBG("Unsupported! independent blend state");
+               return NULL;
+       }
+
+       so = CALLOC_STRUCT(fd4_blend_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
+               const struct pipe_rt_blend_state *rt = &cso->rt[i];
+
+               so->rb_mrt[i].blend_control =
+                               A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
+                               A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(blend_func(rt->rgb_func)) |
+                               A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
+                               A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
+                               A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(blend_func(rt->alpha_func)) |
+                               A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor));
+
+               so->rb_mrt[i].control =
+                               0xc00 | /* XXX ROP_CODE ?? */
+                               A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
+
+               if (rt->blend_enable)
+                       so->rb_mrt[i].control |=
+                                       A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
+                                       A4XX_RB_MRT_CONTROL_BLEND |
+                                       A4XX_RB_MRT_CONTROL_BLEND2;
+
+               if (reads_dest)
+                       so->rb_mrt[i].control |= A4XX_RB_MRT_CONTROL_READ_DEST_ENABLE;
+
+               if (cso->dither)
+                       so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
+       }
+
+       return so;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_blend.h b/src/gallium/drivers/freedreno/a4xx/fd4_blend.h
new file mode 100644 (file)
index 0000000..68fcf23
--- /dev/null
@@ -0,0 +1,53 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_BLEND_H_
+#define FD4_BLEND_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd4_blend_stateobj {
+       struct pipe_blend_state base;
+       struct {
+               uint32_t control;
+               uint32_t buf_info;
+               uint32_t blend_control;
+       } rb_mrt[8];
+};
+
+static INLINE struct fd4_blend_stateobj *
+fd4_blend_stateobj(struct pipe_blend_state *blend)
+{
+       return (struct fd4_blend_stateobj *)blend;
+}
+
+void * fd4_blend_state_create(struct pipe_context *pctx,
+               const struct pipe_blend_state *cso);
+
+#endif /* FD4_BLEND_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.c b/src/gallium/drivers/freedreno/a4xx/fd4_context.c
new file mode 100644 (file)
index 0000000..2321876
--- /dev/null
@@ -0,0 +1,172 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "fd4_context.h"
+#include "fd4_blend.h"
+#include "fd4_draw.h"
+#include "fd4_emit.h"
+#include "fd4_gmem.h"
+#include "fd4_program.h"
+#include "fd4_query.h"
+#include "fd4_rasterizer.h"
+#include "fd4_texture.h"
+#include "fd4_zsa.h"
+
+static void
+fd4_context_destroy(struct pipe_context *pctx)
+{
+       struct fd4_context *fd4_ctx = fd4_context(fd_context(pctx));
+
+       util_dynarray_fini(&fd4_ctx->rbrc_patches);
+
+       fd_bo_del(fd4_ctx->vs_pvt_mem);
+       fd_bo_del(fd4_ctx->fs_pvt_mem);
+       fd_bo_del(fd4_ctx->vsc_size_mem);
+
+       pctx->delete_vertex_elements_state(pctx, fd4_ctx->solid_vbuf_state.vtx);
+       pctx->delete_vertex_elements_state(pctx, fd4_ctx->blit_vbuf_state.vtx);
+
+       pipe_resource_reference(&fd4_ctx->solid_vbuf, NULL);
+       pipe_resource_reference(&fd4_ctx->blit_texcoord_vbuf, NULL);
+
+       fd_context_destroy(pctx);
+}
+
+/* TODO we could combine a few of these small buffers (solid_vbuf,
+ * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and
+ * save a tiny bit of memory
+ */
+
+static struct pipe_resource *
+create_solid_vertexbuf(struct pipe_context *pctx)
+{
+       static const float init_shader_const[] = {
+                       -1.000000, +1.000000, +1.000000,
+                       +1.000000, -1.000000, +1.000000,
+       };
+       struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+                       PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
+       pipe_buffer_write(pctx, prsc, 0,
+                       sizeof(init_shader_const), init_shader_const);
+       return prsc;
+}
+
+static struct pipe_resource *
+create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
+{
+       struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+                       PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
+       return prsc;
+}
+
+static const uint8_t primtypes[PIPE_PRIM_MAX] = {
+               [PIPE_PRIM_POINTS]         = DI_PT_POINTLIST_A3XX,
+               [PIPE_PRIM_LINES]          = DI_PT_LINELIST,
+               [PIPE_PRIM_LINE_STRIP]     = DI_PT_LINESTRIP,
+               [PIPE_PRIM_LINE_LOOP]      = DI_PT_LINELOOP,
+               [PIPE_PRIM_TRIANGLES]      = DI_PT_TRILIST,
+               [PIPE_PRIM_TRIANGLE_STRIP] = DI_PT_TRISTRIP,
+               [PIPE_PRIM_TRIANGLE_FAN]   = DI_PT_TRIFAN,
+};
+
+struct pipe_context *
+fd4_context_create(struct pipe_screen *pscreen, void *priv)
+{
+       struct fd_screen *screen = fd_screen(pscreen);
+       struct fd4_context *fd4_ctx = CALLOC_STRUCT(fd4_context);
+       struct pipe_context *pctx;
+
+       if (!fd4_ctx)
+               return NULL;
+
+       pctx = &fd4_ctx->base.base;
+
+       fd4_ctx->base.dev = fd_device_ref(screen->dev);
+       fd4_ctx->base.screen = fd_screen(pscreen);
+
+       pctx->destroy = fd4_context_destroy;
+       pctx->create_blend_state = fd4_blend_state_create;
+       pctx->create_rasterizer_state = fd4_rasterizer_state_create;
+       pctx->create_depth_stencil_alpha_state = fd4_zsa_state_create;
+
+       fd4_draw_init(pctx);
+       fd4_gmem_init(pctx);
+       fd4_texture_init(pctx);
+       fd4_prog_init(pctx);
+
+       pctx = fd_context_init(&fd4_ctx->base, pscreen, primtypes, priv);
+       if (!pctx)
+               return NULL;
+
+       util_dynarray_init(&fd4_ctx->rbrc_patches);
+
+       fd4_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd4_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd4_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd4_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
+       fd4_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+
+       /* setup solid_vbuf_state: */
+       fd4_ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
+                       pctx, 1, (struct pipe_vertex_element[]){{
+                               .vertex_buffer_index = 0,
+                               .src_offset = 0,
+                               .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+                       }});
+       fd4_ctx->solid_vbuf_state.vertexbuf.count = 1;
+       fd4_ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
+       fd4_ctx->solid_vbuf_state.vertexbuf.vb[0].buffer = fd4_ctx->solid_vbuf;
+
+       /* setup blit_vbuf_state: */
+       fd4_ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
+                       pctx, 2, (struct pipe_vertex_element[]){{
+                               .vertex_buffer_index = 0,
+                               .src_offset = 0,
+                               .src_format = PIPE_FORMAT_R32G32_FLOAT,
+                       }, {
+                               .vertex_buffer_index = 1,
+                               .src_offset = 0,
+                               .src_format = PIPE_FORMAT_R32G32B32_FLOAT,
+                       }});
+       fd4_ctx->blit_vbuf_state.vertexbuf.count = 2;
+       fd4_ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
+       fd4_ctx->blit_vbuf_state.vertexbuf.vb[0].buffer = fd4_ctx->blit_texcoord_vbuf;
+       fd4_ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
+       fd4_ctx->blit_vbuf_state.vertexbuf.vb[1].buffer = fd4_ctx->solid_vbuf;
+
+       fd4_query_context_init(pctx);
+
+       return pctx;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_context.h b/src/gallium/drivers/freedreno/a4xx/fd4_context.h
new file mode 100644 (file)
index 0000000..87e69fa
--- /dev/null
@@ -0,0 +1,102 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_CONTEXT_H_
+#define FD4_CONTEXT_H_
+
+#include "freedreno_drmif.h"
+
+#include "freedreno_context.h"
+
+#include "ir3_shader.h"
+
+struct fd4_context {
+       struct fd_context base;
+
+       /* Keep track of writes to RB_RENDER_CONTROL which need to be patched
+        * once we know whether or not to use GMEM, and GMEM tile pitch.
+        */
+       struct util_dynarray rbrc_patches;
+
+       struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
+
+       /* This only needs to be 4 * num_of_pipes bytes (ie. 32 bytes).  We
+        * could combine it with another allocation.
+        */
+       struct fd_bo *vsc_size_mem;
+
+       /* vertex buf used for clear/gmem->mem vertices, and mem->gmem
+        * vertices:
+        */
+       struct pipe_resource *solid_vbuf;
+
+       /* vertex buf used for mem->gmem tex coords:
+        */
+       struct pipe_resource *blit_texcoord_vbuf;
+
+       /* vertex state for solid_vbuf:
+        *    - solid_vbuf / 12 / R32G32B32_FLOAT
+        */
+       struct fd_vertex_state solid_vbuf_state;
+
+       /* vertex state for blit_prog:
+        *    - blit_texcoord_vbuf / 8 / R32G32_FLOAT
+        *    - solid_vbuf / 12 / R32G32B32_FLOAT
+        */
+       struct fd_vertex_state blit_vbuf_state;
+
+       /* if *any* of bits are set in {v,f}saturate_{s,t,r} */
+       bool vsaturate, fsaturate;
+
+       /* bitmask of sampler which needs coords clamped for vertex
+        * shader:
+        */
+       unsigned vsaturate_s, vsaturate_t, vsaturate_r;
+
+       /* bitmask of sampler which needs coords clamped for frag
+        * shader:
+        */
+       unsigned fsaturate_s, fsaturate_t, fsaturate_r;
+
+       /* some state changes require a different shader variant.  Keep
+        * track of this so we know when we need to re-emit shader state
+        * due to variant change.  See fixup_shader_state()
+        */
+       struct ir3_shader_key last_key;
+};
+
+static INLINE struct fd4_context *
+fd4_context(struct fd_context *ctx)
+{
+       return (struct fd4_context *)ctx;
+}
+
+struct pipe_context *
+fd4_context_create(struct pipe_screen *pscreen, void *priv);
+
+#endif /* FD4_CONTEXT_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
new file mode 100644 (file)
index 0000000..2427a8b
--- /dev/null
@@ -0,0 +1,326 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+
+#include "fd4_draw.h"
+#include "fd4_context.h"
+#include "fd4_emit.h"
+#include "fd4_program.h"
+#include "fd4_util.h"
+#include "fd4_zsa.h"
+
+
+static void
+draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               struct fd4_emit *emit)
+{
+       const struct pipe_draw_info *info = emit->info;
+
+       fd4_emit_state(ctx, ring, emit);
+
+       if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
+               fd4_emit_vertex_bufs(ring, emit);
+
+       OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+       OUT_RING(ring, info->start);            /* VFD_INDEX_OFFSET */
+       OUT_RING(ring, info->start_instance);   /* ??? UNKNOWN_2209 */
+
+       OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
+       OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
+                       info->restart_index : 0xffffffff);
+
+       fd4_draw_emit(ctx, ring,
+                       emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
+                       info);
+}
+
+/* fixup dirty shader state in case some "unrelated" (from the state-
+ * tracker's perspective) state change causes us to switch to a
+ * different variant.
+ */
+static void
+fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
+{
+       struct fd4_context *fd4_ctx = fd4_context(ctx);
+       struct ir3_shader_key *last_key = &fd4_ctx->last_key;
+
+       if (!ir3_shader_key_equal(last_key, key)) {
+               ctx->dirty |= FD_DIRTY_PROG;
+
+               if (last_key->has_per_samp || key->has_per_samp) {
+                       if ((last_key->vsaturate_s != key->vsaturate_s) ||
+                                       (last_key->vsaturate_t != key->vsaturate_t) ||
+                                       (last_key->vsaturate_r != key->vsaturate_r))
+                               ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+
+                       if ((last_key->fsaturate_s != key->fsaturate_s) ||
+                                       (last_key->fsaturate_t != key->fsaturate_t) ||
+                                       (last_key->fsaturate_r != key->fsaturate_r))
+                               ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+               }
+
+               if (last_key->color_two_side != key->color_two_side)
+                       ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+
+               if (last_key->half_precision != key->half_precision)
+                       ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+
+               if (last_key->alpha != key->alpha)
+                       ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+
+               fd4_ctx->last_key = *key;
+       }
+}
+
+static void
+fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+       struct fd4_context *fd4_ctx = fd4_context(ctx);
+       struct fd4_emit emit = {
+               .vtx  = &ctx->vtx,
+               .prog = &ctx->prog,
+               .info = info,
+               .key = {
+                       /* do binning pass first: */
+                       .binning_pass = true,
+                       .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
+                       .alpha = util_format_is_alpha(pipe_surface_format(ctx->framebuffer.cbufs[0])),
+                       // TODO set .half_precision based on render target format,
+                       // ie. float16 and smaller use half, float32 use full..
+                       .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+                       .has_per_samp = fd4_ctx->fsaturate || fd4_ctx->vsaturate,
+                       .vsaturate_s = fd4_ctx->vsaturate_s,
+                       .vsaturate_t = fd4_ctx->vsaturate_t,
+                       .vsaturate_r = fd4_ctx->vsaturate_r,
+                       .fsaturate_s = fd4_ctx->fsaturate_s,
+                       .fsaturate_t = fd4_ctx->fsaturate_t,
+                       .fsaturate_r = fd4_ctx->fsaturate_r,
+               },
+               .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
+       };
+       unsigned dirty;
+
+       fixup_shader_state(ctx, &emit.key);
+
+       dirty = ctx->dirty;
+       emit.dirty = dirty & ~(FD_DIRTY_BLEND);
+       draw_impl(ctx, ctx->binning_ring, &emit);
+
+       /* and now regular (non-binning) pass: */
+       emit.key.binning_pass = false;
+       emit.dirty = dirty;
+       emit.vp = NULL;   /* we changed key so need to refetch vp */
+       draw_impl(ctx, ctx->ring, &emit);
+}
+
+/* clear operations ignore viewport state, so we need to reset it
+ * based on framebuffer state:
+ */
+static void
+reset_viewport(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb)
+{
+       float half_width = pfb->width * 0.5f;
+       float half_height = pfb->height * 0.5f;
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 4);
+       OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(half_width));
+       OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(half_width));
+       OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(half_height));
+       OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-half_height));
+}
+
+static void
+fd4_clear(struct fd_context *ctx, unsigned buffers,
+               const union pipe_color_union *color, double depth, unsigned stencil)
+{
+       struct fd4_context *fd4_ctx = fd4_context(ctx);
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+       unsigned dirty = ctx->dirty;
+       unsigned ce, i;
+       struct fd4_emit emit = {
+               .vtx  = &fd4_ctx->solid_vbuf_state,
+               .prog = &ctx->solid_prog,
+               .key = {
+                       .half_precision = true,
+               },
+       };
+       uint32_t colr = 0;
+
+       if ((buffers & PIPE_CLEAR_COLOR) && pfb->nr_cbufs)
+               colr  = pack_rgba(pfb->cbufs[0]->format, color->f);
+
+       dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
+       dirty |= FD_DIRTY_PROG;
+       emit.dirty = dirty;
+
+       OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
+       OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+       /* emit generic state now: */
+       fd4_emit_state(ctx, ring, &emit);
+       reset_viewport(ring, pfb);
+
+       if (buffers & PIPE_CLEAR_DEPTH) {
+               OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+               OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+                               A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
+                               A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));
+
+               fd_wfi(ctx, ring);
+               OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_ZOFFSET_0, 2);
+               OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
+               OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(depth));
+               ctx->dirty |= FD_DIRTY_VIEWPORT;
+       } else {
+               OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+               OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+       }
+
+       if (buffers & PIPE_CLEAR_STENCIL) {
+               OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+               OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(stencil) |
+                               A4XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
+                               A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+               OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
+                               A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                               0xff000000 | // XXX ???
+                               A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+
+               OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+               OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                               A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+                               A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                               A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
+                               A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                               A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                               A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                               A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                               A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+               OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
+       } else {
+               OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+               OUT_RING(ring, A4XX_RB_STENCILREFMASK_STENCILREF(0) |
+                               A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                               A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
+               OUT_RING(ring, A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
+                               A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
+                               A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));
+
+               OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
+               OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+                               A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                               A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                               A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                               A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                               A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                               A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                               A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+               OUT_RING(ring, 0x00000000); /* RB_STENCIL_CONTROL2 */
+       }
+
+       if (buffers & PIPE_CLEAR_COLOR) {
+               OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
+               OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+               ce = 0xf;
+       } else {
+               ce = 0x0;
+       }
+
+       for (i = 0; i < 8; i++) {
+               OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
+               OUT_RING(ring, A4XX_RB_MRT_CONTROL_FASTCLEAR |
+                               A4XX_RB_MRT_CONTROL_B11 |
+                               A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+
+               OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
+               OUT_RING(ring, A4XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+                               A4XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                               A4XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+                               A4XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+                               A4XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                               A4XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO));
+       }
+
+       fd4_emit_vertex_bufs(ring, &emit);
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+       OUT_RING(ring, 0x0);          /* XXX GRAS_ALPHA_CONTROL */
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_RB_CLEAR_COLOR_DW0, 4);
+       OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW0 */
+       OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW1 */
+       OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW2 */
+       OUT_RING(ring, colr);         /* RB_CLEAR_COLOR_DW3 */
+
+       /* until fastclear works: */
+       fd4_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+
+       OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+       OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+       OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */
+
+       OUT_PKT0(ring, REG_A4XX_PC_RESTART_INDEX, 1);
+       OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */
+
+       OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
+       OUT_RING(ring, 0x00000001);
+
+       fd4_draw(ctx, ring, DI_PT_RECTLIST, USE_VISIBILITY,
+                       DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
+
+       OUT_PKT3(ring, CP_UNKNOWN_1A, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_CLEAR_CNTL, 1);
+       OUT_RING(ring, A4XX_GRAS_CLEAR_CNTL_NOT_FASTCLEAR);
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+       OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                       A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+                       A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                       A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+
+void
+fd4_draw_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->draw_vbo = fd4_draw_vbo;
+       ctx->clear = fd4_clear;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_draw.h b/src/gallium/drivers/freedreno/a4xx/fd4_draw.h
new file mode 100644 (file)
index 0000000..f775cc7
--- /dev/null
@@ -0,0 +1,122 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_DRAW_H_
+#define FD4_DRAW_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_draw.h"
+
+void fd4_draw_init(struct pipe_context *pctx);
+
+/* draw packet changed on a4xx, so cannot reuse one from a2xx/a3xx.. */
+
+static inline uint32_t DRAW4(enum pc_di_primtype prim_type,
+               enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
+               enum pc_di_vis_cull_mode vis_cull_mode)
+{
+       return (prim_type         << 0) |
+                       (source_select     << 6) |
+                       ((index_size & 1)  << 11) |
+                       ((index_size >> 1) << 13) |
+                       (vis_cull_mode     << 8);
+}
+
+static inline void
+fd4_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               enum pc_di_primtype primtype,
+               enum pc_di_vis_cull_mode vismode,
+               enum pc_di_src_sel src_sel, uint32_t count,
+               enum pc_di_index_size idx_type,
+               uint32_t idx_size, uint32_t idx_offset,
+               struct fd_bo *idx_bo)
+{
+       /* for debug after a lock up, write a unique counter value
+        * to scratch7 for each draw, to make it easier to match up
+        * register dumps to cmdstream.  The combination of IB
+        * (scratch6) and DRAW is enough to "triangulate" the
+        * particular draw that caused lockup.
+        */
+       emit_marker(ring, 7);
+
+       OUT_PKT3(ring, CP_DRAW_INDX_OFFSET, idx_bo ? 6 : 3);
+       if (vismode == USE_VISIBILITY) {
+               /* leave vis mode blank for now, it will be patched up when
+                * we know if we are binning or not
+                */
+               OUT_RINGP(ring, DRAW4(primtype, src_sel, idx_type, 0),
+                               &ctx->draw_patches);
+       } else {
+               OUT_RING(ring, DRAW4(primtype, src_sel, idx_type, vismode));
+       }
+       OUT_RING(ring, 0x1);               /* XXX */
+       OUT_RING(ring, count);             /* NumIndices */
+       if (idx_bo) {
+               OUT_RING(ring, 0x0);           /* XXX */
+               OUT_RELOC(ring, idx_bo, idx_offset, 0, 0);
+               OUT_RING (ring, idx_size);
+       }
+
+       emit_marker(ring, 7);
+
+       fd_reset_wfi(ctx);
+}
+
+static inline void
+fd4_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               enum pc_di_vis_cull_mode vismode,
+               const struct pipe_draw_info *info)
+{
+       struct pipe_index_buffer *idx = &ctx->indexbuf;
+       struct fd_bo *idx_bo = NULL;
+       enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
+       enum pc_di_src_sel src_sel;
+       uint32_t idx_size, idx_offset;
+
+       if (info->indexed) {
+               assert(!idx->user_buffer);
+
+               idx_bo = fd_resource(idx->buffer)->bo;
+               idx_type = size2indextype(idx->index_size);
+               idx_size = idx->index_size * info->count;
+               idx_offset = idx->offset + (info->start * idx->index_size);
+               src_sel = DI_SRC_SEL_DMA;
+       } else {
+               idx_bo = NULL;
+               idx_type = INDEX_SIZE_IGN;
+               idx_size = 0;
+               idx_offset = 0;
+               src_sel = DI_SRC_SEL_AUTO_INDEX;
+       }
+
+       fd4_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
+                       info->count, idx_type, idx_size, idx_offset, idx_bo);
+}
+
+#endif /* FD4_DRAW_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
new file mode 100644 (file)
index 0000000..1a0986a
--- /dev/null
@@ -0,0 +1,625 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+#include "util/u_format.h"
+
+#include "freedreno_resource.h"
+
+#include "fd4_emit.h"
+#include "fd4_blend.h"
+#include "fd4_context.h"
+#include "fd4_program.h"
+#include "fd4_rasterizer.h"
+#include "fd4_texture.h"
+#include "fd4_util.h"
+#include "fd4_zsa.h"
+
+/* regid:          base const register
+ * prsc or dwords: buffer containing constant values
+ * sizedwords:     size of const value buffer
+ */
+void
+fd4_emit_constant(struct fd_ringbuffer *ring,
+               enum adreno_state_block sb,
+               uint32_t regid, uint32_t offset, uint32_t sizedwords,
+               const uint32_t *dwords, struct pipe_resource *prsc)
+{
+       uint32_t i, sz;
+       enum adreno_state_src src;
+
+       if (prsc) {
+               sz = 0;
+               src = 0x2;  // TODO ??
+       } else {
+               sz = sizedwords;
+               src = SS_DIRECT;
+       }
+
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/4) |
+                       CP_LOAD_STATE_0_STATE_SRC(src) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                       CP_LOAD_STATE_0_NUM_UNIT(sizedwords/4));
+       if (prsc) {
+               struct fd_bo *bo = fd_resource(prsc)->bo;
+               OUT_RELOC(ring, bo, offset,
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0);
+       } else {
+               OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+               dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
+       }
+       for (i = 0; i < sz; i++) {
+               OUT_RING(ring, dwords[i]);
+       }
+}
+
+static void
+emit_constants(struct fd_ringbuffer *ring,
+               enum adreno_state_block sb,
+               struct fd_constbuf_stateobj *constbuf,
+               struct ir3_shader_variant *shader)
+{
+       uint32_t enabled_mask = constbuf->enabled_mask;
+       uint32_t first_immediate;
+       uint32_t base = 0;
+
+       // XXX TODO only emit dirty consts.. but we need to keep track if
+       // they are clobbered by a clear, gmem2mem, or mem2gmem..
+       constbuf->dirty_mask = enabled_mask;
+
+       /* in particular, with binning shader we may end up with unused
+        * consts, ie. we could end up w/ constlen that is smaller
+        * than first_immediate.  In that case truncate the user consts
+        * early to avoid HLSQ lockup caused by writing too many consts
+        */
+       first_immediate = MIN2(shader->first_immediate, shader->constlen);
+
+       /* emit user constants: */
+       while (enabled_mask) {
+               unsigned index = ffs(enabled_mask) - 1;
+               struct pipe_constant_buffer *cb = &constbuf->cb[index];
+               unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+               // I expect that size should be a multiple of vec4's:
+               assert(size == align(size, 4));
+
+               /* gallium could leave const buffers bound above what the
+                * current shader uses.. don't let that confuse us.
+                */
+               if (base >= (4 * first_immediate))
+                       break;
+
+               if (constbuf->dirty_mask & (1 << index)) {
+                       /* and even if the start of the const buffer is before
+                        * first_immediate, the end may not be:
+                        */
+                       size = MIN2(size, (4 * first_immediate) - base);
+                       fd4_emit_constant(ring, sb, base,
+                                       cb->buffer_offset, size,
+                                       cb->user_buffer, cb->buffer);
+                       constbuf->dirty_mask &= ~(1 << index);
+               }
+
+               base += size;
+               enabled_mask &= ~(1 << index);
+       }
+
+       /* emit shader immediates: */
+       if (shader) {
+               int size = shader->immediates_count;
+               base = shader->first_immediate;
+
+               /* truncate size to avoid writing constants that shader
+                * does not use:
+                */
+               size = MIN2(size + base, shader->constlen) - base;
+
+               /* convert out of vec4: */
+               base *= 4;
+               size *= 4;
+
+               if (size > 0) {
+                       fd4_emit_constant(ring, sb, base,
+                               0, size, shader->immediates[0].val, NULL);
+               }
+       }
+}
+
+static void
+emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               enum adreno_state_block sb, struct fd_texture_stateobj *tex)
+{
+       unsigned i;
+
+       if (tex->num_samplers > 0) {
+               /* output sampler state: */
+               OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 + (2 * tex->num_samplers));
+               OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                               CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                               CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                               CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
+               OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+                               CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+               for (i = 0; i < tex->num_samplers; i++) {
+                       static const struct fd4_sampler_stateobj dummy_sampler = {};
+                       const struct fd4_sampler_stateobj *sampler = tex->samplers[i] ?
+                                       fd4_sampler_stateobj(tex->samplers[i]) :
+                                       &dummy_sampler;
+                       OUT_RING(ring, sampler->texsamp0);
+                       OUT_RING(ring, sampler->texsamp1);
+               }
+               /* maybe an a420.0 (or a4xx.0) workaround?? or just driver bug? */
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+       }
+
+       if (tex->num_textures > 0) {
+               /* emit texture state: */
+               OUT_PKT3(ring, CP_LOAD_STATE, 2 + (8 * tex->num_textures));
+               OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                               CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                               CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                               CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
+               OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                               CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+               for (i = 0; i < tex->num_textures; i++) {
+                       static const struct fd4_pipe_sampler_view dummy_view = {};
+                       const struct fd4_pipe_sampler_view *view = tex->textures[i] ?
+                                       fd4_pipe_sampler_view(tex->textures[i]) :
+                                       &dummy_view;
+                       struct fd_resource *rsc = view->tex_resource;
+                       struct fd_resource_slice *slice = fd_resource_slice(rsc, 0);
+                       OUT_RING(ring, view->texconst0);
+                       OUT_RING(ring, view->texconst1);
+                       OUT_RING(ring, view->texconst2);
+                       OUT_RING(ring, view->texconst3);
+                       OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0);
+                       OUT_RING(ring, 0x00000000);
+                       OUT_RING(ring, 0x00000000);
+                       OUT_RING(ring, 0x00000000);
+               }
+       }
+}
+
+/* emit texture state for mem->gmem restore operation.. eventually it would
+ * be good to get rid of this and use normal CSO/etc state for more of these
+ * special cases..
+ */
+void
+fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
+{
+       /* TODO */
+}
+
+
+void
+fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit)
+{
+       uint32_t i, j, last = 0;
+       uint32_t total_in = 0;
+       const struct fd_vertex_state *vtx = emit->vtx;
+       struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
+       unsigned n = MIN2(vtx->vtx->num_elements, vp->inputs_count);
+
+       /* hw doesn't like to be configured for zero vbo's, it seems: */
+       if (vtx->vtx->num_elements == 0)
+               return;
+
+       for (i = 0; i < n; i++)
+               if (vp->inputs[i].compmask)
+                       last = i;
+
+       for (i = 0, j = 0; i <= last; i++) {
+               if (vp->inputs[i].compmask) {
+                       struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
+                       const struct pipe_vertex_buffer *vb =
+                                       &vtx->vertexbuf.vb[elem->vertex_buffer_index];
+                       struct fd_resource *rsc = fd_resource(vb->buffer);
+                       enum pipe_format pfmt = elem->src_format;
+                       enum a4xx_vtx_fmt fmt = fd4_pipe2vtx(pfmt);
+                       bool switchnext = (i != last);
+                       uint32_t fs = util_format_get_blocksize(pfmt);
+                       uint32_t off = vb->buffer_offset + elem->src_offset;
+                       uint32_t size = fd_bo_size(rsc->bo) - off;
+                       debug_assert(fmt != ~0);
+
+                       OUT_PKT0(ring, REG_A4XX_VFD_FETCH(j), 4);
+                       OUT_RING(ring, A4XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+                                       A4XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
+                                       COND(switchnext, A4XX_VFD_FETCH_INSTR_0_SWITCHNEXT));
+                       OUT_RELOC(ring, rsc->bo, off, 0, 0);
+                       OUT_RING(ring, A4XX_VFD_FETCH_INSTR_2_SIZE(size));
+                       OUT_RING(ring, 0x00000001);
+
+                       OUT_PKT0(ring, REG_A4XX_VFD_DECODE_INSTR(j), 1);
+                       OUT_RING(ring, A4XX_VFD_DECODE_INSTR_CONSTFILL |
+                                       A4XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+                                       A4XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+                                       A4XX_VFD_DECODE_INSTR_SWAP(fd4_pipe2swap(pfmt)) |
+                                       A4XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+                                       A4XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+                                       A4XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+                                       COND(switchnext, A4XX_VFD_DECODE_INSTR_SWITCHNEXT));
+
+                       total_in += vp->inputs[i].ncomp;
+                       j++;
+               }
+       }
+
+       OUT_PKT0(ring, REG_A4XX_VFD_CONTROL_0, 5);
+       OUT_RING(ring, A4XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) |
+                       0xa0000 | /* XXX */
+                       A4XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) |
+                       A4XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j));
+       OUT_RING(ring, A4XX_VFD_CONTROL_1_MAXSTORAGE(129) | // XXX
+                       A4XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
+                       A4XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
+       OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_2 */
+       OUT_RING(ring, 0x0000fc00);   /* XXX VFD_CONTROL_3 */
+       OUT_RING(ring, 0x00000000);   /* XXX VFD_CONTROL_4 */
+}
+
+void
+fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               struct fd4_emit *emit)
+{
+       struct ir3_shader_variant *vp = fd4_emit_get_vp(emit);
+       struct ir3_shader_variant *fp = fd4_emit_get_fp(emit);
+       uint32_t dirty = emit->dirty;
+
+       emit_marker(ring, 5);
+
+       if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
+               uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_render_control;
+
+               /* I suppose if we needed to (which I don't *think* we need
+                * to), we could emit this for binning pass too.  But we
+                * would need to keep a different patch-list for binning
+                * vs render pass.
+                */
+
+               OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+               OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
+       }
+
+       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+               struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
+               struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+               OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+               OUT_RING(ring, zsa->gras_alpha_control);
+
+               OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 1);
+               OUT_RING(ring, zsa->rb_stencil_control);
+
+               OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+               OUT_RING(ring, zsa->rb_stencilrefmask |
+                               A4XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+               OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+                               A4XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
+       }
+
+       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) {
+               uint32_t val = fd4_zsa_stateobj(ctx->zsa)->rb_depth_control;
+               if (fp->writes_pos) {
+                       val |= A4XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
+                       val |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+               }
+               if (fp->has_kill) {
+                       val |= A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+               }
+               OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+               OUT_RING(ring, val);
+       }
+
+       if (dirty & FD_DIRTY_RASTERIZER) {
+               struct fd4_rasterizer_stateobj *rasterizer =
+                               fd4_rasterizer_stateobj(ctx->rasterizer);
+
+               OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
+               OUT_RING(ring, rasterizer->gras_su_mode_control |
+                               A4XX_GRAS_SU_MODE_CONTROL_RENDERING_PASS);
+
+               OUT_PKT0(ring, REG_A4XX_GRAS_SU_POINT_MINMAX, 2);
+               OUT_RING(ring, rasterizer->gras_su_point_minmax);
+               OUT_RING(ring, rasterizer->gras_su_point_size);
+
+               OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
+               OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
+               OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+       }
+
+       if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+               uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
+                               ->gras_cl_clip_cntl;
+               OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+               OUT_RING(ring, val);
+       }
+
+       /* NOTE: since primitive_restart is not actually part of any
+        * state object, we need to make sure that we always emit
+        * PRIM_VTX_CNTL.. either that or be more clever and detect
+        * when it changes.
+        */
+       if (emit->info) {
+               uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
+                               ->pc_prim_vtx_cntl;
+
+               val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
+               val |= COND(fp->total_in > 0, A4XX_PC_PRIM_VTX_CNTL_VAROUT);
+
+               OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
+               OUT_RING(ring, val);
+               OUT_RING(ring, 0x12);     /* XXX UNKNOWN_21C5 */
+       }
+
+       if (dirty & FD_DIRTY_SCISSOR) {
+               struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
+
+               OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
+               OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(scissor->maxx - 1) |
+                               A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(scissor->maxy - 1));
+               OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(scissor->minx) |
+                               A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(scissor->miny));
+
+               ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, scissor->minx);
+               ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, scissor->miny);
+               ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, scissor->maxx);
+               ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, scissor->maxy);
+       }
+
+       if (dirty & FD_DIRTY_VIEWPORT) {
+               fd_wfi(ctx, ring);
+               OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+               OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0(ctx->viewport.translate[0]));
+               OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0(ctx->viewport.scale[0]));
+               OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0(ctx->viewport.translate[1]));
+               OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(ctx->viewport.scale[1]));
+               OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(ctx->viewport.translate[2]));
+               OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(ctx->viewport.scale[2]));
+       }
+
+       if (dirty & FD_DIRTY_PROG)
+               fd4_program_emit(ring, emit);
+
+       if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
+                       /* evil hack to deal sanely with clear path: */
+                       (emit->prog == &ctx->prog)) {
+               fd_wfi(ctx, ring);
+               emit_constants(ring,  SB_VERT_SHADER,
+                               &ctx->constbuf[PIPE_SHADER_VERTEX],
+                               (emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
+               if (!emit->key.binning_pass) {
+                       emit_constants(ring, SB_FRAG_SHADER,
+                                       &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+                                       (emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
+               }
+       }
+
+       if ((dirty & FD_DIRTY_BLEND) && ctx->blend) {
+               struct fd4_blend_stateobj *blend = fd4_blend_stateobj(ctx->blend);
+               uint32_t i;
+
+               for (i = 0; i < 8; i++) {
+                       OUT_PKT0(ring, REG_A4XX_RB_MRT_CONTROL(i), 1);
+                       OUT_RING(ring, blend->rb_mrt[i].control);
+
+                       OUT_PKT0(ring, REG_A4XX_RB_MRT_BLEND_CONTROL(i), 1);
+                       OUT_RING(ring, blend->rb_mrt[i].blend_control);
+               }
+       }
+
+       if (dirty & FD_DIRTY_VERTTEX) {
+               if (vp->has_samp)
+                       emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex);
+               else
+                       dirty &= ~FD_DIRTY_VERTTEX;
+       }
+
+       if (dirty & FD_DIRTY_FRAGTEX) {
+               if (fp->has_samp)
+                       emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex);
+               else
+                       dirty &= ~FD_DIRTY_FRAGTEX;
+       }
+
+       ctx->dirty &= ~dirty;
+}
+
+/* emit setup at begin of new cmdstream buffer (don't rely on previous
+ * state, there could have been a context switch between ioctls):
+ */
+void
+fd4_emit_restore(struct fd_context *ctx)
+{
+       struct fd4_context *fd4_ctx = fd4_context(ctx);
+       struct fd_ringbuffer *ring = ctx->ring;
+
+       OUT_PKT0(ring, REG_A4XX_RBBM_PERFCTR_CTL, 1);
+       OUT_RING(ring, 0x00000001);
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_DEBUG_ECO_CONTROL, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC3, 1);
+       OUT_RING(ring, 0x00000006);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_0F03, 1);
+       OUT_RING(ring, 0x0000003a);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_0D01, 1);
+       OUT_RING(ring, 0x00000001);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E42, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_WAYS_VFD, 1);
+       OUT_RING(ring, 0x00000007);
+
+       OUT_PKT0(ring, REG_A4XX_UCHE_CACHE_MODE_CONTROL, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UCHE_INVALIDATE0, 2);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000012);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_0E05, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC5, 1);
+       OUT_RING(ring, 0x00000006);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_0CC6, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_0EC2, 1);
+       OUT_RING(ring, 0x00040000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_2001, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+       OUT_RING(ring, 0x00001000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_20EF, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F0, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F1, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F2, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F3, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F4, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F5, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F6, 1);
+       OUT_RING(ring, 0x3c007fff);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_20F7, 1);
+       OUT_RING(ring, 0x3f800000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_2152, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_2153, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_2154, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_2155, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_2156, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_2157, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_21C3, 1);
+       OUT_RING(ring, 0x0000001d);
+
+       OUT_PKT0(ring, REG_A4XX_PC_GS_PARAM, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_21E6, 1);
+       OUT_RING(ring, 0x00000001);
+
+       OUT_PKT0(ring, REG_A4XX_PC_HS_PARAM, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_22D7, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_TPL1_TP_TEX_OFFSET, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_2381, 1);
+       OUT_RING(ring, 0x00000010);
+
+       OUT_PKT0(ring, REG_A4XX_UNKNOWN_23A0, 1);
+       OUT_RING(ring, 0x00000010);
+
+       /* we don't use this yet.. probably best to disable.. */
+       OUT_PKT3(ring, CP_SET_DRAW_STATE, 2);
+       OUT_RING(ring, CP_SET_DRAW_STATE_0_COUNT(0) |
+                       CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS |
+                       CP_SET_DRAW_STATE_0_GROUP_ID(0));
+       OUT_RING(ring, CP_SET_DRAW_STATE_1_ADDR(0));
+
+       OUT_PKT0(ring, REG_A4XX_SP_VS_PVT_MEM_PARAM, 2);
+       OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_PARAM */
+       OUT_RELOC(ring, fd4_ctx->vs_pvt_mem, 0,0,0); /* SP_VS_PVT_MEM_ADDR */
+
+       OUT_PKT0(ring, REG_A4XX_SP_FS_PVT_MEM_PARAM, 2);
+       OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_PARAM */
+       OUT_RELOC(ring, fd4_ctx->fs_pvt_mem, 0,0,0); /* SP_FS_PVT_MEM_ADDR */
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+       OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                       A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+                       A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                       A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+       OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL, 1);
+       OUT_RING(ring, A4XX_RB_MSAA_CONTROL_DISABLE |
+                       A4XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE));
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_CL_GB_CLIP_ADJ, 1);
+       OUT_RING(ring, A4XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+                       A4XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+
+       OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
+       OUT_RING(ring, A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
+
+       OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT, 1);
+       OUT_RING(ring, A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
+
+       OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL3, 1);
+       OUT_RING(ring, A4XX_RB_RENDER_CONTROL3_COMPONENT_ENABLE(0xf));
+
+       OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
+       OUT_RING(ring, A4XX_RB_FS_OUTPUT_REG_COLOR_PIPE_ENABLE);
+
+       ctx->needs_rb_fbd = true;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
new file mode 100644 (file)
index 0000000..c5fb24d
--- /dev/null
@@ -0,0 +1,91 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_EMIT_H
+#define FD4_EMIT_H
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+#include "fd4_util.h"
+#include "fd4_program.h"
+#include "ir3_shader.h"
+
+struct fd_ringbuffer;
+enum adreno_state_block;
+
+void fd4_emit_constant(struct fd_ringbuffer *ring,
+               enum adreno_state_block sb,
+               uint32_t regid, uint32_t offset, uint32_t sizedwords,
+               const uint32_t *dwords, struct pipe_resource *prsc);
+
+void fd4_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
+               struct pipe_surface *psurf);
+
+/* grouped together emit-state for prog/vertex/state emit: */
+struct fd4_emit {
+       const struct fd_vertex_state *vtx;
+       const struct fd_program_stateobj *prog;
+       const struct pipe_draw_info *info;
+       struct ir3_shader_key key;
+       uint32_t dirty;
+       bool rasterflat;
+
+       /* cached to avoid repeated lookups of same variants: */
+       struct ir3_shader_variant *vp, *fp;
+       /* TODO: other shader stages.. */
+};
+
+static inline struct ir3_shader_variant *
+fd4_emit_get_vp(struct fd4_emit *emit)
+{
+       if (!emit->vp) {
+               struct fd4_shader_stateobj *so = emit->prog->vp;
+               emit->vp = ir3_shader_variant(so->shader, emit->key);
+       }
+       return emit->vp;
+}
+
+static inline struct ir3_shader_variant *
+fd4_emit_get_fp(struct fd4_emit *emit)
+{
+       if (!emit->fp) {
+               struct fd4_shader_stateobj *so = emit->prog->fp;
+               emit->fp = ir3_shader_variant(so->shader, emit->key);
+       }
+       return emit->fp;
+}
+
+void fd4_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd4_emit *emit);
+
+void fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               struct fd4_emit *emit);
+
+void fd4_emit_restore(struct fd_context *ctx);
+
+#endif /* FD4_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
new file mode 100644 (file)
index 0000000..8cb6bc4
--- /dev/null
@@ -0,0 +1,415 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "freedreno_draw.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+
+#include "fd4_gmem.h"
+#include "fd4_context.h"
+#include "fd4_draw.h"
+#include "fd4_emit.h"
+#include "fd4_program.h"
+#include "fd4_util.h"
+#include "fd4_zsa.h"
+
+static const struct ir3_shader_key key = {
+               // XXX should set this based on render target format!  We don't
+               // want half_precision if float32 render target!!!
+               .half_precision = true,
+};
+
+static void
+emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
+               struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
+{
+       unsigned i;
+
+       for (i = 0; i < 8; i++) {
+               enum a4xx_color_fmt format = 0;
+               enum a3xx_color_swap swap = WZYX;
+               struct fd_resource *rsc = NULL;
+               struct fd_resource_slice *slice = NULL;
+               uint32_t stride = 0;
+               uint32_t base = 0;
+               uint32_t layer_offset = 0;
+
+               if ((i < nr_bufs) && bufs[i]) {
+                       struct pipe_surface *psurf = bufs[i];
+
+                       rsc = fd_resource(psurf->texture);
+                       slice = &rsc->slices[psurf->u.tex.level];
+                       format = fd4_pipe2color(psurf->format);
+                       swap = fd4_pipe2swap(psurf->format);
+
+                       debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
+
+                       layer_offset = slice->size0 * psurf->u.tex.first_layer;
+
+                       if (bin_w) {
+                               stride = bin_w * rsc->cpp;
+
+                               if (bases) {
+                                       base = bases[i];
+                               }
+                       } else {
+                               stride = slice->pitch * rsc->cpp;
+                       }
+               }
+
+               OUT_PKT0(ring, REG_A4XX_RB_MRT_BUF_INFO(i), 3);
+               OUT_RING(ring, A4XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+                               0x80 | /* XXX not on gmem2mem?? tile-mode? */
+                               A4XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
+                               A4XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
+               if (bin_w || (i >= nr_bufs)) {
+                       OUT_RING(ring, base);
+               } else {
+                       OUT_RELOCW(ring, rsc->bo,
+                                       slice->offset + layer_offset, 0, -1);
+               }
+               OUT_RING(ring, A4XX_RB_MRT_CONTROL3_STRIDE(stride));
+       }
+}
+
+static uint32_t
+depth_base(struct fd_context *ctx)
+{
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+       uint32_t cpp = 4;
+       if (pfb->cbufs[0]) {
+               struct fd_resource *rsc =
+                               fd_resource(pfb->cbufs[0]->texture);
+               cpp = rsc->cpp;
+       }
+       return align(gmem->bin_w * gmem->bin_h * cpp, 0x4000);
+}
+
+/* transfer from gmem to system memory (ie. normal RAM) */
+
+static void
+emit_gmem2mem_surf(struct fd_context *ctx,
+               uint32_t base, struct pipe_surface *psurf)
+{
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct fd_resource *rsc = fd_resource(psurf->texture);
+       struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
+
+       OUT_PKT0(ring, REG_A4XX_RB_COPY_CONTROL, 4);
+       OUT_RING(ring, A4XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+                       A4XX_RB_COPY_CONTROL_MODE(RB_COPY_RESOLVE) |
+                       A4XX_RB_COPY_CONTROL_GMEM_BASE(base));
+       OUT_RELOCW(ring, rsc->bo, slice->offset, 0, 0);   /* RB_COPY_DEST_BASE */
+       OUT_RING(ring, A4XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
+       OUT_RING(ring, A4XX_RB_COPY_DEST_INFO_TILE(TILE4_LINEAR) |
+                       A4XX_RB_COPY_DEST_INFO_FORMAT(fd4_pipe2color(psurf->format)) |
+                       A4XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+                       A4XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
+                       A4XX_RB_COPY_DEST_INFO_SWAP(fd4_pipe2swap(psurf->format)));
+
+       fd4_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
+                       DI_SRC_SEL_AUTO_INDEX, 2, INDEX_SIZE_IGN, 0, 0, NULL);
+}
+
+static void
+fd4_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
+{
+       struct fd4_context *fd4_ctx = fd4_context(ctx);
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+       struct fd4_emit emit = {
+                       .vtx = &fd4_ctx->solid_vbuf_state,
+                       .prog = &ctx->solid_prog,
+                       .key = key,
+       };
+
+       OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
+       OUT_RING(ring, A4XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+
+       OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 1);
+       OUT_RING(ring, A4XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+                       A4XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                       A4XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                       A4XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                       A4XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                       A4XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                       A4XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                       A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+       OUT_PKT0(ring, REG_A4XX_RB_STENCILREFMASK, 2);
+       OUT_RING(ring, 0xff000000 |
+                       A4XX_RB_STENCILREFMASK_STENCILREF(0) |
+                       A4XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                       A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+       OUT_RING(ring, 0xff000000 |
+                       A4XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
+                       A4XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
+                       A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff));
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_SU_MODE_CONTROL, 1);
+       OUT_RING(ring, A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+
+       fd_wfi(ctx, ring);
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
+       OUT_RING(ring, 0x80000);      /* GRAS_CL_CLIP_CNTL */
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_CL_VPORT_XOFFSET_0, 6);
+       OUT_RING(ring, A4XX_GRAS_CL_VPORT_XOFFSET_0((float)pfb->width/2.0));
+       OUT_RING(ring, A4XX_GRAS_CL_VPORT_XSCALE_0((float)pfb->width/2.0));
+       OUT_RING(ring, A4XX_GRAS_CL_VPORT_YOFFSET_0((float)pfb->height/2.0));
+       OUT_RING(ring, A4XX_GRAS_CL_VPORT_YSCALE_0(-(float)pfb->height/2.0));
+       OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZOFFSET_0(0.0));
+       OUT_RING(ring, A4XX_GRAS_CL_VPORT_ZSCALE_0(1.0));
+
+       OUT_PKT0(ring, REG_A4XX_RB_RENDER_CONTROL, 1);
+       OUT_RING(ring, A4XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+                       0xa);       /* XXX */
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+       OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+                       A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+                       A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                       A4XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+       OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
+       OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
+       OUT_RING(ring, 0x00000002);
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_SC_WINDOW_SCISSOR_BR, 2);
+       OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
+                       A4XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
+       OUT_RING(ring, A4XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+                       A4XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+
+       OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
+       OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+       OUT_RING(ring, 0);            /* ??? UNKNOWN_2209 */
+
+       fd4_program_emit(ring, &emit);
+       fd4_emit_vertex_bufs(ring, &emit);
+
+       if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+               uint32_t base = depth_base(ctx);
+               emit_gmem2mem_surf(ctx, base, pfb->zsbuf);
+       }
+
+       if (ctx->resolve & FD_BUFFER_COLOR) {
+               emit_gmem2mem_surf(ctx, 0, pfb->cbufs[0]);
+       }
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_SC_CONTROL, 1);
+       OUT_RING(ring, A4XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                       A4XX_GRAS_SC_CONTROL_MSAA_DISABLE |
+                       A4XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                       A4XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+
+/* transfer from system memory to gmem */
+
+static void
+fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
+{
+       /* TODO */
+}
+
+static void
+patch_draws(struct fd_context *ctx, enum pc_di_vis_cull_mode vismode)
+{
+       unsigned i;
+       for (i = 0; i < fd_patch_num_elements(&ctx->draw_patches); i++) {
+               struct fd_cs_patch *patch = fd_patch_element(&ctx->draw_patches, i);
+               *patch->cs = patch->val | DRAW4(0, 0, 0, vismode);
+       }
+       util_dynarray_resize(&ctx->draw_patches, 0);
+}
+
+static void
+patch_rbrc(struct fd_context *ctx, uint32_t val)
+{
+       struct fd4_context *fd4_ctx = fd4_context(ctx);
+       unsigned i;
+       for (i = 0; i < fd_patch_num_elements(&fd4_ctx->rbrc_patches); i++) {
+               struct fd_cs_patch *patch = fd_patch_element(&fd4_ctx->rbrc_patches, i);
+               *patch->cs = patch->val | val;
+       }
+       util_dynarray_resize(&fd4_ctx->rbrc_patches, 0);
+}
+
+static void
+update_vsc_pipe(struct fd_context *ctx)
+{
+       struct fd4_context *fd4_ctx = fd4_context(ctx);
+       struct fd_ringbuffer *ring = ctx->ring;
+       int i;
+
+       OUT_PKT0(ring, REG_A4XX_VSC_SIZE_ADDRESS, 1);
+       OUT_RELOCW(ring, fd4_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS */
+
+       OUT_PKT0(ring, REG_A4XX_VSC_PIPE_CONFIG_REG(0), 8);
+       for (i = 0; i < 8; i++) {
+               struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+               OUT_RING(ring, A4XX_VSC_PIPE_CONFIG_REG_X(pipe->x) |
+                               A4XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) |
+                               A4XX_VSC_PIPE_CONFIG_REG_W(pipe->w) |
+                               A4XX_VSC_PIPE_CONFIG_REG_H(pipe->h));
+       }
+
+       OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_ADDRESS_REG(0), 8);
+       for (i = 0; i < 8; i++) {
+               struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+               if (!pipe->bo) {
+                       pipe->bo = fd_bo_new(ctx->dev, 0x40000,
+                                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+               }
+               OUT_RELOCW(ring, pipe->bo, 0, 0, 0);       /* VSC_PIPE_DATA_ADDRESS[i] */
+       }
+
+       OUT_PKT0(ring, REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(0), 8);
+       for (i = 0; i < 8; i++) {
+               struct fd_vsc_pipe *pipe = &ctx->pipe[i];
+               OUT_RING(ring, fd_bo_size(pipe->bo) - 32); /* VSC_PIPE_DATA_LENGTH[i] */
+       }
+}
+
+/* before first tile */
+static void
+fd4_emit_tile_init(struct fd_context *ctx)
+{
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+       uint32_t rb_render_control;
+
+       fd4_emit_restore(ctx);
+
+       OUT_PKT0(ring, REG_A4XX_VSC_BIN_SIZE, 1);
+       OUT_RING(ring, A4XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+                       A4XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+
+       OUT_PKT0(ring, REG_A4XX_RB_MODE_CONTROL, 1);
+       OUT_RING(ring, A4XX_RB_MODE_CONTROL_WIDTH(gmem->bin_w) |
+                       A4XX_RB_MODE_CONTROL_HEIGHT(gmem->bin_h) |
+                       0x00010000);  /* XXX */
+
+       update_vsc_pipe(ctx);
+       patch_draws(ctx, IGNORE_VISIBILITY);
+
+       rb_render_control = 0; // XXX or BINNING_PASS.. but maybe we can emit only from gmem
+       patch_rbrc(ctx, rb_render_control);
+}
+
+/* before mem2gmem */
+static void
+fd4_emit_tile_prep(struct fd_context *ctx, struct fd_tile *tile)
+{
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+       uint32_t reg;
+
+       OUT_PKT0(ring, REG_A4XX_RB_DEPTH_INFO, 3);
+       reg = A4XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(ctx));
+       if (pfb->zsbuf) {
+               reg |= A4XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+       }
+       OUT_RING(ring, reg);
+       if (pfb->zsbuf) {
+               OUT_RING(ring, A4XX_RB_DEPTH_PITCH(gmem->bin_w));
+               OUT_RING(ring, A4XX_RB_DEPTH_PITCH2(gmem->bin_w));
+       } else {
+               OUT_RING(ring, 0x00000000);
+               OUT_RING(ring, 0x00000000);
+       }
+
+       if (pfb->zsbuf) {
+               OUT_PKT0(ring, REG_A4XX_GRAS_DEPTH_CONTROL, 1);
+               OUT_RING(ring, A4XX_GRAS_DEPTH_CONTROL_FORMAT(
+                               fd_pipe2depth(pfb->zsbuf->format)));
+       }
+
+       if (ctx->needs_rb_fbd) {
+               fd_wfi(ctx, ring);
+               OUT_PKT0(ring, REG_A4XX_RB_FRAME_BUFFER_DIMENSION, 1);
+               OUT_RING(ring, A4XX_RB_FRAME_BUFFER_DIMENSION_WIDTH(pfb->width) |
+                               A4XX_RB_FRAME_BUFFER_DIMENSION_HEIGHT(pfb->height));
+               ctx->needs_rb_fbd = false;
+       }
+}
+
+/* before IB to rendering cmds: */
+static void
+fd4_emit_tile_renderprep(struct fd_context *ctx, struct fd_tile *tile)
+{
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+
+       uint32_t x1 = tile->xoff;
+       uint32_t y1 = tile->yoff;
+       uint32_t x2 = tile->xoff + tile->bin_w - 1;
+       uint32_t y2 = tile->yoff + tile->bin_h - 1;
+
+       OUT_PKT3(ring, CP_SET_BIN, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
+       OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
+
+       emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
+
+       /* setup scissor/offset for current tile: */
+       OUT_PKT0(ring, REG_A4XX_RB_BIN_OFFSET, 1);
+       OUT_RING(ring, A4XX_RB_BIN_OFFSET_X(tile->xoff) |
+                       A4XX_RB_BIN_OFFSET_Y(tile->yoff));
+
+       OUT_PKT0(ring, REG_A4XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+       OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+                       A4XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+       OUT_RING(ring, A4XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+                       A4XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+}
+
+void
+fd4_gmem_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       ctx->emit_tile_init = fd4_emit_tile_init;
+       ctx->emit_tile_prep = fd4_emit_tile_prep;
+       ctx->emit_tile_mem2gmem = fd4_emit_tile_mem2gmem;
+       ctx->emit_tile_renderprep = fd4_emit_tile_renderprep;
+       ctx->emit_tile_gmem2mem = fd4_emit_tile_gmem2mem;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.h b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.h
new file mode 100644 (file)
index 0000000..8964714
--- /dev/null
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_GMEM_H_
+#define FD4_GMEM_H_
+
+#include "pipe/p_context.h"
+
+void fd4_gmem_init(struct pipe_context *pctx);
+
+#endif /* FD4_GMEM_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
new file mode 100644 (file)
index 0000000..591a1d8
--- /dev/null
@@ -0,0 +1,480 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "freedreno_program.h"
+
+#include "fd4_program.h"
+#include "fd4_emit.h"
+#include "fd4_texture.h"
+#include "fd4_util.h"
+
+static void
+delete_shader_stateobj(struct fd4_shader_stateobj *so)
+{
+       ir3_shader_destroy(so->shader);
+       free(so);
+}
+
+static struct fd4_shader_stateobj *
+create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso,
+               enum shader_t type)
+{
+       struct fd4_shader_stateobj *so = CALLOC_STRUCT(fd4_shader_stateobj);
+       so->shader = ir3_shader_create(pctx, cso->tokens, type);
+       return so;
+}
+
+static void *
+fd4_fp_state_create(struct pipe_context *pctx,
+               const struct pipe_shader_state *cso)
+{
+       return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT);
+}
+
+static void
+fd4_fp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd4_shader_stateobj *so = hwcso;
+       delete_shader_stateobj(so);
+}
+
+static void *
+fd4_vp_state_create(struct pipe_context *pctx,
+               const struct pipe_shader_state *cso)
+{
+       return create_shader_stateobj(pctx, cso, SHADER_VERTEX);
+}
+
+static void
+fd4_vp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd4_shader_stateobj *so = hwcso;
+       delete_shader_stateobj(so);
+}
+
+static void
+emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so)
+{
+       const struct ir3_info *si = &so->info;
+       enum adreno_state_block sb;
+       enum adreno_state_src src;
+       uint32_t i, sz, *bin;
+
+       if (so->type == SHADER_VERTEX) {
+               sb = SB_VERT_SHADER;
+       } else {
+               sb = SB_FRAG_SHADER;
+       }
+
+       if (fd_mesa_debug & FD_DBG_DIRECT) {
+               sz = si->sizedwords;
+               src = SS_DIRECT;
+               bin = fd_bo_map(so->bo);
+       } else {
+               sz = 0;
+               src = 2;  // enums different on a4xx..
+               bin = NULL;
+       }
+
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                       CP_LOAD_STATE_0_STATE_SRC(src) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                       CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
+       if (bin) {
+               OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER));
+       } else {
+               OUT_RELOC(ring, so->bo, 0,
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0);
+       }
+       for (i = 0; i < sz; i++) {
+               OUT_RING(ring, bin[i]);
+       }
+}
+
+struct stage {
+       const struct ir3_shader_variant *v;
+       const struct ir3_info *i;
+       /* const sizes are in units of 4 * vec4 */
+       uint8_t constoff;
+       uint8_t constlen;
+       /* instr sizes are in units of 16 instructions */
+       uint8_t instroff;
+       uint8_t instrlen;
+};
+
+enum {
+       VS = 0,
+       FS = 1,
+       HS = 2,
+       DS = 3,
+       GS = 4,
+       MAX_STAGES
+};
+
+static void
+setup_stages(struct fd4_emit *emit, struct stage *s)
+{
+       unsigned i;
+
+       s[VS].v = fd4_emit_get_vp(emit);
+
+       if (emit->key.binning_pass) {
+               /* use dummy stateobj to simplify binning vs non-binning: */
+               static const struct ir3_shader_variant binning_fp = {};
+               s[FS].v = &binning_fp;
+       } else {
+               s[FS].v = fd4_emit_get_fp(emit);
+       }
+
+       s[HS].v = s[DS].v = s[GS].v = NULL;  /* for now */
+
+       for (i = 0; i < MAX_STAGES; i++) {
+               if (s[i].v) {
+                       s[i].i = &s[i].v->info;
+                       /* constlen is in units of 4 * vec4: */
+                       s[i].constlen = align(s[i].v->constlen, 4) / 4;
+                       /* instrlen is already in units of 16 instr.. although
+                        * probably we should ditch that and not make the compiler
+                        * care about instruction group size of a3xx vs a4xx
+                        */
+                       s[i].instrlen = s[i].v->instrlen;
+               } else {
+                       s[i].i = NULL;
+                       s[i].constlen = 0;
+                       s[i].instrlen = 0;
+               }
+       }
+
+       /* NOTE: at least for gles2, blob partitions VS at bottom of const
+        * space and FS taking entire remaining space.  We probably don't
+        * need to do that the same way, but for now mimic what the blob
+        * does to make it easier to diff against register values from blob
+        */
+       s[VS].constlen = 66;
+       s[FS].constlen = 128 - s[VS].constlen;
+       s[VS].instroff = 0;
+       s[VS].constoff = 0;
+       s[FS].instroff = 64 - s[FS].instrlen;
+       s[FS].constoff = s[VS].constlen;
+       s[HS].instroff = s[DS].instroff = s[GS].instroff = s[FS].instroff;
+       s[HS].constoff = s[DS].constoff = s[GS].constoff = s[FS].constoff;
+}
+
+void
+fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
+{
+       struct stage s[MAX_STAGES];
+       uint32_t pos_regid, posz_regid, psize_regid, color_regid;
+       int constmode;
+       int i, j, k;
+
+       setup_stages(emit, s);
+
+       /* blob seems to always use constmode currently: */
+       constmode = 1;
+
+       pos_regid = ir3_find_output_regid(s[VS].v,
+               ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
+       posz_regid = ir3_find_output_regid(s[FS].v,
+               ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
+       psize_regid = ir3_find_output_regid(s[VS].v,
+               ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0));
+       color_regid = ir3_find_output_regid(s[FS].v,
+               ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0));
+
+       /* we could probably divide this up into things that need to be
+        * emitted if frag-prog is dirty vs if vert-prog is dirty..
+        */
+
+       OUT_PKT0(ring, REG_A4XX_HLSQ_UPDATE_CONTROL, 1);
+       OUT_RING(ring, 0x00000003);
+
+       OUT_PKT0(ring, REG_A4XX_HLSQ_CONTROL_0_REG, 4);
+       OUT_RING(ring, A4XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+                       A4XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
+                       A4XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE |
+                       /* NOTE:  I guess SHADERRESTART and CONSTFULLUPDATE maybe
+                        * flush some caches? I think we only need to set those
+                        * bits if we have updated const or shader..
+                        */
+                       A4XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
+                       A4XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+       OUT_RING(ring, A4XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+                       0xfcfc0000 |          /* XXX */
+                       A4XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
+                       COND(s[FS].v->frag_coord, A4XX_HLSQ_CONTROL_1_REG_ZWCOORD));
+       OUT_RING(ring, A4XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
+       OUT_RING(ring, A4XX_HLSQ_CONTROL_3_REG_REGID(s[FS].v->pos_regid));
+
+       OUT_PKT0(ring, REG_A4XX_HLSQ_VS_CONTROL_REG, 5);
+       OUT_RING(ring, A4XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(s[VS].constlen) |
+                       A4XX_HLSQ_VS_CONTROL_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
+                       A4XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(s[VS].instrlen) |
+                       A4XX_HLSQ_VS_CONTROL_REG_SHADEROBJOFFSET(s[VS].instroff));
+       OUT_RING(ring, A4XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(s[FS].constlen) |
+                       A4XX_HLSQ_FS_CONTROL_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+                       A4XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(s[FS].instrlen) |
+                       A4XX_HLSQ_FS_CONTROL_REG_SHADEROBJOFFSET(s[FS].instroff));
+       OUT_RING(ring, A4XX_HLSQ_HS_CONTROL_REG_CONSTLENGTH(s[HS].constlen) |
+                       A4XX_HLSQ_HS_CONTROL_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
+                       A4XX_HLSQ_HS_CONTROL_REG_INSTRLENGTH(s[HS].instrlen) |
+                       A4XX_HLSQ_HS_CONTROL_REG_SHADEROBJOFFSET(s[HS].instroff));
+       OUT_RING(ring, A4XX_HLSQ_DS_CONTROL_REG_CONSTLENGTH(s[DS].constlen) |
+                       A4XX_HLSQ_DS_CONTROL_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
+                       A4XX_HLSQ_DS_CONTROL_REG_INSTRLENGTH(s[DS].instrlen) |
+                       A4XX_HLSQ_DS_CONTROL_REG_SHADEROBJOFFSET(s[DS].instroff));
+       OUT_RING(ring, A4XX_HLSQ_GS_CONTROL_REG_CONSTLENGTH(s[GS].constlen) |
+                       A4XX_HLSQ_GS_CONTROL_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
+                       A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(s[GS].instrlen) |
+                       A4XX_HLSQ_GS_CONTROL_REG_SHADEROBJOFFSET(s[GS].instroff));
+
+       OUT_PKT0(ring, REG_A4XX_SP_SP_CTRL_REG, 1);
+       OUT_RING(ring, 0x140010 | /* XXX */
+                       COND(emit->key.binning_pass, A4XX_SP_SP_CTRL_REG_BINNING_PASS));
+
+       OUT_PKT0(ring, REG_A4XX_SP_INSTR_CACHE_CTRL, 1);
+       OUT_RING(ring, 0x1c3);   /* XXX SP_INSTR_CACHE_CTRL */
+
+       OUT_PKT0(ring, REG_A4XX_SP_VS_LENGTH_REG, 1);
+       OUT_RING(ring, s[VS].v->instrlen);      /* SP_VS_LENGTH_REG */
+
+       OUT_PKT0(ring, REG_A4XX_SP_VS_CTRL_REG0, 3);
+       OUT_RING(ring, A4XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
+                       A4XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(s[VS].i->max_half_reg + 1) |
+                       A4XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(s[VS].i->max_reg + 1) |
+                       A4XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
+                       A4XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
+                       A4XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
+                       COND(s[VS].v->has_samp, A4XX_SP_VS_CTRL_REG0_PIXLODENABLE));
+       OUT_RING(ring, A4XX_SP_VS_CTRL_REG1_CONSTLENGTH(s[VS].constlen) |
+                       A4XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(s[VS].v->total_in));
+       OUT_RING(ring, A4XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
+                       A4XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
+                       A4XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(s[FS].v->total_in, 4) / 4));
+
+       for (i = 0, j = -1; (i < 16) && (j < (int)s[FS].v->inputs_count); i++) {
+               uint32_t reg = 0;
+
+               OUT_PKT0(ring, REG_A4XX_SP_VS_OUT_REG(i), 1);
+
+               j = ir3_next_varying(s[FS].v, j);
+               if (j < s[FS].v->inputs_count) {
+                       k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+                       reg |= A4XX_SP_VS_OUT_REG_A_REGID(s[VS].v->outputs[k].regid);
+                       reg |= A4XX_SP_VS_OUT_REG_A_COMPMASK(s[FS].v->inputs[j].compmask);
+               }
+
+               j = ir3_next_varying(s[FS].v, j);
+               if (j < s[FS].v->inputs_count) {
+                       k = ir3_find_output(s[VS].v, s[FS].v->inputs[j].semantic);
+                       reg |= A4XX_SP_VS_OUT_REG_B_REGID(s[VS].v->outputs[k].regid);
+                       reg |= A4XX_SP_VS_OUT_REG_B_COMPMASK(s[FS].v->inputs[j].compmask);
+               }
+
+               OUT_RING(ring, reg);
+       }
+
+       for (i = 0, j = -1; (i < 8) && (j < (int)s[FS].v->inputs_count); i++) {
+               uint32_t reg = 0;
+
+               OUT_PKT0(ring, REG_A4XX_SP_VS_VPC_DST_REG(i), 1);
+
+               j = ir3_next_varying(s[FS].v, j);
+               if (j < s[FS].v->inputs_count)
+                       reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC0(s[FS].v->inputs[j].inloc);
+               j = ir3_next_varying(s[FS].v, j);
+               if (j < s[FS].v->inputs_count)
+                       reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC1(s[FS].v->inputs[j].inloc);
+               j = ir3_next_varying(s[FS].v, j);
+               if (j < s[FS].v->inputs_count)
+                       reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC2(s[FS].v->inputs[j].inloc);
+               j = ir3_next_varying(s[FS].v, j);
+               if (j < s[FS].v->inputs_count)
+                       reg |= A4XX_SP_VS_VPC_DST_REG_OUTLOC3(s[FS].v->inputs[j].inloc);
+
+               OUT_RING(ring, reg);
+       }
+
+       OUT_PKT0(ring, REG_A4XX_SP_VS_OBJ_OFFSET_REG, 2);
+       OUT_RING(ring, A4XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[VS].constoff) |
+                       A4XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[VS].instroff));
+       OUT_RELOC(ring, s[VS].v->bo, 0, 0, 0);  /* SP_VS_OBJ_START_REG */
+
+       OUT_PKT0(ring, REG_A4XX_SP_FS_LENGTH_REG, 1);
+       OUT_RING(ring, s[FS].v->instrlen);  /* SP_FS_LENGTH_REG */
+
+       OUT_PKT0(ring, REG_A4XX_SP_FS_CTRL_REG0, 2);
+       OUT_RING(ring, A4XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+                       COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG0_VARYING) |
+                       A4XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
+                       A4XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
+                       A4XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+                       A4XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
+                       A4XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
+                       COND(s[FS].v->has_samp, A4XX_SP_FS_CTRL_REG0_PIXLODENABLE));
+       OUT_RING(ring, A4XX_SP_FS_CTRL_REG1_CONSTLENGTH(s[FS].constlen) |
+                       0x80000000 |      /* XXX */
+                       COND(s[FS].v->total_in > 0, A4XX_SP_FS_CTRL_REG1_VARYING));
+
+       OUT_PKT0(ring, REG_A4XX_SP_FS_OBJ_OFFSET_REG, 2);
+       OUT_RING(ring, A4XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[FS].constoff) |
+                       A4XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[FS].instroff));
+       if (emit->key.binning_pass)
+               OUT_RING(ring, 0x00000000);
+       else
+               OUT_RELOC(ring, s[FS].v->bo, 0, 0, 0);  /* SP_FS_OBJ_START_REG */
+
+       OUT_PKT0(ring, REG_A4XX_SP_HS_OBJ_OFFSET_REG, 1);
+       OUT_RING(ring, A4XX_SP_HS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[HS].constoff) |
+                       A4XX_SP_HS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[HS].instroff));
+
+       OUT_PKT0(ring, REG_A4XX_SP_DS_OBJ_OFFSET_REG, 1);
+       OUT_RING(ring, A4XX_SP_DS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[DS].constoff) |
+                       A4XX_SP_DS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[DS].instroff));
+
+       OUT_PKT0(ring, REG_A4XX_SP_GS_OBJ_OFFSET_REG, 1);
+       OUT_RING(ring, A4XX_SP_GS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(s[GS].constoff) |
+                       A4XX_SP_GS_OBJ_OFFSET_REG_SHADEROBJOFFSET(s[GS].instroff));
+
+       OUT_PKT0(ring, REG_A4XX_RB_MSAA_CONTROL2, 1);
+       OUT_RING(ring, A4XX_RB_MSAA_CONTROL2_MSAA_SAMPLES(0) |
+                       COND(s[FS].v->total_in > 0, A4XX_RB_MSAA_CONTROL2_VARYING));
+
+       OUT_PKT0(ring, REG_A4XX_SP_FS_OUTPUT_REG, 1);
+       if (s[FS].v->writes_pos) {
+               OUT_RING(ring, A4XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
+                               A4XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid));
+       } else {
+               OUT_RING(ring, 0x00000001);
+       }
+
+       OUT_PKT0(ring, REG_A4XX_SP_FS_MRT_REG(0), 8);
+       OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(color_regid) |
+// XXX do we need to patch? or update when RT format changes..  maybe
+// move this to emit??
+                       A4XX_SP_FS_MRT_REG_MRTFORMAT(RB4_R8G8B8A8_UNORM) |  // XXX patch?
+                       COND(s[FS].v->key.half_precision, A4XX_SP_FS_MRT_REG_HALF_PRECISION));
+       OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+       OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+       OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+       OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+       OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+       OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+       OUT_RING(ring, A4XX_SP_FS_MRT_REG_REGID(0));
+
+       if (emit->key.binning_pass) {
+               OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
+               OUT_RING(ring, A4XX_VPC_ATTR_THRDASSIGN(1) |
+                               0x40000000 |      /* XXX */
+                               COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
+               OUT_RING(ring, 0x00000000);
+       } else {
+               uint32_t vinterp[8] = {0}, flatshade[2] = {0};
+
+               /* figure out VARYING_INTERP / FLAT_SHAD register values: */
+               for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
+                       uint32_t interp = s[FS].v->inputs[j].interpolate;
+                       if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
+                                       ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
+                               /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
+                                * instead.. rather than -8 everywhere else..
+                                */
+                               uint32_t loc = s[FS].v->inputs[j].inloc - 8;
+
+                               /* currently assuming varyings aligned to 4 (not
+                                * packed):
+                                */
+                               debug_assert((loc % 4) == 0);
+
+                               for (i = 0; i < 4; i++, loc++) {
+                                       vinterp[loc / 16] |= 1 << ((loc % 16) * 2);
+                                       flatshade[loc / 32] |= 1 << (loc % 32);
+                               }
+                       }
+               }
+
+               OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
+               OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
+                               A4XX_VPC_ATTR_THRDASSIGN(1) |
+                               COND(s[FS].v->total_in > 0, A4XX_VPC_ATTR_ENABLE) |
+                               0x40000000 |      /* XXX */
+                               COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
+               OUT_RING(ring, A4XX_VPC_PACK_NUMFPNONPOSVAR(s[FS].v->total_in) |
+                               A4XX_VPC_PACK_NUMNONPOSVSVAR(s[FS].v->total_in));
+
+               OUT_PKT0(ring, REG_A4XX_VPC_VARYING_INTERP_MODE(0), 8);
+               for (i = 0; i < 8; i++)
+                       OUT_RING(ring, vinterp[i]);     /* VPC_VARYING_INTERP[i].MODE */
+
+               OUT_PKT0(ring, REG_A4XX_VPC_VARYING_PS_REPL_MODE(0), 8);
+               for (i = 0; i < 8; i++)
+                       OUT_RING(ring, s[FS].v->shader->vpsrepl[i]);   /* VPC_VARYING_PS_REPL[i] */
+       }
+
+       emit_shader(ring, s[VS].v);
+
+       if (!emit->key.binning_pass)
+               emit_shader(ring, s[FS].v);
+}
+
+/* hack.. until we figure out how to deal w/ vpsrepl properly.. */
+static void
+fix_blit_fp(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd4_shader_stateobj *so = ctx->blit_prog.fp;
+
+       so->shader->vpsrepl[0] = 0x99999999;
+       so->shader->vpsrepl[1] = 0x99999999;
+       so->shader->vpsrepl[2] = 0x99999999;
+       so->shader->vpsrepl[3] = 0x99999999;
+}
+
+void
+fd4_prog_init(struct pipe_context *pctx)
+{
+       pctx->create_fs_state = fd4_fp_state_create;
+       pctx->delete_fs_state = fd4_fp_state_delete;
+
+       pctx->create_vs_state = fd4_vp_state_create;
+       pctx->delete_vs_state = fd4_vp_state_delete;
+
+       fd_prog_init(pctx);
+
+       fix_blit_fp(pctx);
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.h b/src/gallium/drivers/freedreno/a4xx/fd4_program.h
new file mode 100644 (file)
index 0000000..52306a4
--- /dev/null
@@ -0,0 +1,46 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_PROGRAM_H_
+#define FD4_PROGRAM_H_
+
+#include "pipe/p_context.h"
+#include "freedreno_context.h"
+#include "ir3_shader.h"
+
+struct fd4_shader_stateobj {
+       struct ir3_shader *shader;
+};
+
+struct fd4_emit;
+
+void fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit);
+
+void fd4_prog_init(struct pipe_context *pctx);
+
+#endif /* FD4_PROGRAM_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.c b/src/gallium/drivers/freedreno/a4xx/fd4_query.c
new file mode 100644 (file)
index 0000000..9a50626
--- /dev/null
@@ -0,0 +1,39 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_query_hw.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+#include "fd4_query.h"
+#include "fd4_util.h"
+
+void fd4_query_context_init(struct pipe_context *pctx)
+{
+       /* TODO */
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_query.h b/src/gallium/drivers/freedreno/a4xx/fd4_query.h
new file mode 100644 (file)
index 0000000..a2e9156
--- /dev/null
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_QUERY_H_
+#define FD4_QUERY_H_
+
+#include "pipe/p_context.h"
+
+void fd4_query_context_init(struct pipe_context *pctx);
+
+#endif /* FD4_QUERY_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.c
new file mode 100644 (file)
index 0000000..b363cb7
--- /dev/null
@@ -0,0 +1,94 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd4_rasterizer.h"
+#include "fd4_context.h"
+#include "fd4_util.h"
+
+void *
+fd4_rasterizer_state_create(struct pipe_context *pctx,
+               const struct pipe_rasterizer_state *cso)
+{
+       struct fd4_rasterizer_stateobj *so;
+       float psize_min, psize_max;
+
+       so = CALLOC_STRUCT(fd4_rasterizer_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       if (cso->point_size_per_vertex) {
+               psize_min = util_get_min_point_size(cso);
+               psize_max = 8192;
+       } else {
+               /* Force the point size to be as if the vertex output was disabled. */
+               psize_min = cso->point_size;
+               psize_max = cso->point_size;
+       }
+
+/*
+       if (cso->line_stipple_enable) {
+               ??? TODO line stipple
+       }
+       TODO cso->half_pixel_center
+       if (cso->multisample)
+               TODO
+*/
+       so->gras_cl_clip_cntl = 0x80000; /* ??? */
+       so->gras_su_point_minmax =
+                       A4XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
+                       A4XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
+       so->gras_su_point_size   = A4XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
+       so->gras_su_poly_offset_scale =
+                       A4XX_GRAS_SU_POLY_OFFSET_SCALE(cso->offset_scale);
+       so->gras_su_poly_offset_offset =
+                       A4XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
+
+       so->gras_su_mode_control =
+                       A4XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2.0);
+
+       if (cso->cull_face & PIPE_FACE_FRONT)
+               so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
+       if (cso->cull_face & PIPE_FACE_BACK)
+               so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
+       if (!cso->front_ccw)
+               so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_FRONT_CW;
+       if (!cso->flatshade_first)
+               so->pc_prim_vtx_cntl |= A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
+
+       if (cso->offset_tri)
+               so->gras_su_mode_control |= A4XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+
+       return so;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h b/src/gallium/drivers/freedreno/a4xx/fd4_rasterizer.h
new file mode 100644 (file)
index 0000000..06c728f
--- /dev/null
@@ -0,0 +1,56 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_RASTERIZER_H_
+#define FD4_RASTERIZER_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd4_rasterizer_stateobj {
+       struct pipe_rasterizer_state base;
+       uint32_t gras_su_point_minmax;
+       uint32_t gras_su_point_size;
+       uint32_t gras_su_poly_offset_scale;
+       uint32_t gras_su_poly_offset_offset;
+
+       uint32_t gras_su_mode_control;
+       uint32_t gras_cl_clip_cntl;
+       uint32_t pc_prim_vtx_cntl;
+};
+
+static INLINE struct fd4_rasterizer_stateobj *
+fd4_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
+{
+       return (struct fd4_rasterizer_stateobj *)rast;
+}
+
+void * fd4_rasterizer_state_create(struct pipe_context *pctx,
+               const struct pipe_rasterizer_state *cso);
+
+#endif /* FD4_RASTERIZER_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.c b/src/gallium/drivers/freedreno/a4xx/fd4_screen.c
new file mode 100644 (file)
index 0000000..8ee246b
--- /dev/null
@@ -0,0 +1,105 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+
+#include "fd4_screen.h"
+#include "fd4_context.h"
+#include "fd4_util.h"
+
+static boolean
+fd4_screen_is_format_supported(struct pipe_screen *pscreen,
+               enum pipe_format format,
+               enum pipe_texture_target target,
+               unsigned sample_count,
+               unsigned usage)
+{
+       unsigned retval = 0;
+
+       if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+                       (sample_count > 1) || /* TODO add MSAA */
+                       !util_format_is_supported(format, usage)) {
+               DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+                               util_format_name(format), target, sample_count, usage);
+               return FALSE;
+       }
+
+       if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+                       (fd4_pipe2vtx(format) != ~0)) {
+               retval |= PIPE_BIND_VERTEX_BUFFER;
+       }
+
+       if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+                       (fd4_pipe2tex(format) != ~0)) {
+               retval |= PIPE_BIND_SAMPLER_VIEW;
+       }
+
+       if ((usage & (PIPE_BIND_RENDER_TARGET |
+                               PIPE_BIND_DISPLAY_TARGET |
+                               PIPE_BIND_SCANOUT |
+                               PIPE_BIND_SHARED)) &&
+                       (fd4_pipe2color(format) != ~0) &&
+                       (fd4_pipe2tex(format) != ~0)) {
+               retval |= usage & (PIPE_BIND_RENDER_TARGET |
+                               PIPE_BIND_DISPLAY_TARGET |
+                               PIPE_BIND_SCANOUT |
+                               PIPE_BIND_SHARED);
+       }
+
+       if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+                       (fd_pipe2depth(format) != ~0) &&
+                       (fd4_pipe2tex(format) != ~0)) {
+               retval |= PIPE_BIND_DEPTH_STENCIL;
+       }
+
+       if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+                       (fd_pipe2index(format) != ~0)) {
+               retval |= PIPE_BIND_INDEX_BUFFER;
+       }
+
+       if (usage & PIPE_BIND_TRANSFER_READ)
+               retval |= PIPE_BIND_TRANSFER_READ;
+       if (usage & PIPE_BIND_TRANSFER_WRITE)
+               retval |= PIPE_BIND_TRANSFER_WRITE;
+
+       if (retval != usage) {
+               DBG("not supported: format=%s, target=%d, sample_count=%d, "
+                               "usage=%x, retval=%x", util_format_name(format),
+                               target, sample_count, usage, retval);
+       }
+
+       return retval == usage;
+}
+
+void
+fd4_screen_init(struct pipe_screen *pscreen)
+{
+       pscreen->context_create = fd4_context_create;
+       pscreen->is_format_supported = fd4_screen_is_format_supported;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_screen.h b/src/gallium/drivers/freedreno/a4xx/fd4_screen.h
new file mode 100644 (file)
index 0000000..09b68ef
--- /dev/null
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_SCREEN_H_
+#define FD4_SCREEN_H_
+
+#include "pipe/p_screen.h"
+
+void fd4_screen_init(struct pipe_screen *pscreen);
+
+#endif /* FD4_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
new file mode 100644 (file)
index 0000000..fc9c873
--- /dev/null
@@ -0,0 +1,190 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "fd4_texture.h"
+#include "fd4_util.h"
+
+/* TODO do we need to emulate clamp-to-edge like a3xx? */
+static enum a4xx_tex_clamp
+tex_clamp(unsigned wrap)
+{
+       /* hardware probably supports more, but we can't coax all the
+        * wrap/clamp modes out of the GLESv2 blob driver.
+        *
+        * TODO once we have basics working, go back and just try
+        * different values and see what happens
+        */
+       switch (wrap) {
+       case PIPE_TEX_WRAP_REPEAT:
+               return A4XX_TEX_REPEAT;
+       case PIPE_TEX_WRAP_CLAMP:
+       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+               return A4XX_TEX_CLAMP_TO_EDGE;
+       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+// TODO
+//             return A4XX_TEX_CLAMP_TO_BORDER;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP:
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+// TODO
+//             return A4XX_TEX_MIRROR_CLAMP;
+       case PIPE_TEX_WRAP_MIRROR_REPEAT:
+               return A4XX_TEX_MIRROR_REPEAT;
+       default:
+               DBG("invalid wrap: %u", wrap);
+               return 0;
+       }
+}
+
+static enum a4xx_tex_filter
+tex_filter(unsigned filter)
+{
+       switch (filter) {
+       case PIPE_TEX_FILTER_NEAREST:
+               return A4XX_TEX_NEAREST;
+       case PIPE_TEX_FILTER_LINEAR:
+               return A4XX_TEX_LINEAR;
+       default:
+               DBG("invalid filter: %u", filter);
+               return 0;
+       }
+}
+
+static void *
+fd4_sampler_state_create(struct pipe_context *pctx,
+               const struct pipe_sampler_state *cso)
+{
+       struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
+
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       so->texsamp0 =
+               A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
+               A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
+               A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
+               A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
+               A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
+
+       if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
+               so->texsamp1 =
+                       A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
+                       A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
+       } else {
+               so->texsamp1 = 0x00000000;
+       }
+
+       if (cso->compare_mode)
+               so->texsamp1 |= A4XX_TEX_SAMP_1_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
+
+       return so;
+}
+
+static enum a4xx_tex_type
+tex_type(unsigned target)
+{
+       switch (target) {
+       default:
+               assert(0);
+       case PIPE_BUFFER:
+       case PIPE_TEXTURE_1D:
+       case PIPE_TEXTURE_1D_ARRAY:
+               return A4XX_TEX_1D;
+       case PIPE_TEXTURE_RECT:
+       case PIPE_TEXTURE_2D:
+       case PIPE_TEXTURE_2D_ARRAY:
+               return A4XX_TEX_2D;
+       case PIPE_TEXTURE_3D:
+               return A4XX_TEX_3D;
+       case PIPE_TEXTURE_CUBE:
+       case PIPE_TEXTURE_CUBE_ARRAY:
+               return A4XX_TEX_CUBE;
+       }
+}
+
+static struct pipe_sampler_view *
+fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+               const struct pipe_sampler_view *cso)
+{
+       struct fd4_pipe_sampler_view *so = CALLOC_STRUCT(fd4_pipe_sampler_view);
+       struct fd_resource *rsc = fd_resource(prsc);
+       unsigned lvl = cso->u.tex.first_level;
+
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+       pipe_reference(NULL, &prsc->reference);
+       so->base.texture = prsc;
+       so->base.reference.count = 1;
+       so->base.context = pctx;
+
+       so->tex_resource =  rsc;
+
+       so->texconst0 =
+               A4XX_TEX_CONST_0_TYPE(tex_type(prsc->target)) |
+               A4XX_TEX_CONST_0_FMT(fd4_pipe2tex(cso->format)) |
+               fd4_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+                               cso->swizzle_b, cso->swizzle_a);
+
+       so->texconst1 =
+               A4XX_TEX_CONST_1_WIDTH(prsc->width0) |
+               A4XX_TEX_CONST_1_HEIGHT(prsc->height0);
+       so->texconst2 =
+               A4XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
+
+       switch (prsc->target) {
+       case PIPE_TEXTURE_1D_ARRAY:
+       case PIPE_TEXTURE_2D_ARRAY:
+       case PIPE_TEXTURE_3D:
+               so->texconst3 =
+                       A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0);
+               break;
+       default:
+               so->texconst3 = 0x00000000;
+               break;
+       }
+
+       return &so->base;
+}
+
+void
+fd4_texture_init(struct pipe_context *pctx)
+{
+       pctx->create_sampler_state = fd4_sampler_state_create;
+       pctx->bind_sampler_states = fd_sampler_states_bind;
+       pctx->create_sampler_view = fd4_sampler_view_create;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_texture.h b/src/gallium/drivers/freedreno/a4xx/fd4_texture.h
new file mode 100644 (file)
index 0000000..3592b10
--- /dev/null
@@ -0,0 +1,68 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_TEXTURE_H_
+#define FD4_TEXTURE_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_texture.h"
+#include "freedreno_resource.h"
+
+#include "fd4_context.h"
+#include "fd4_util.h"
+
+struct fd4_sampler_stateobj {
+       struct pipe_sampler_state base;
+       uint32_t texsamp0, texsamp1;
+};
+
+static INLINE struct fd4_sampler_stateobj *
+fd4_sampler_stateobj(struct pipe_sampler_state *samp)
+{
+       return (struct fd4_sampler_stateobj *)samp;
+}
+
+struct fd4_pipe_sampler_view {
+       struct pipe_sampler_view base;
+       struct fd_resource *tex_resource;
+       uint32_t texconst0, texconst1, texconst2, texconst3;
+};
+
+static INLINE struct fd4_pipe_sampler_view *
+fd4_pipe_sampler_view(struct pipe_sampler_view *pview)
+{
+       return (struct fd4_pipe_sampler_view *)pview;
+}
+
+unsigned fd4_get_const_idx(struct fd_context *ctx,
+               struct fd_texture_stateobj *tex, unsigned samp_id);
+
+void fd4_texture_init(struct pipe_context *pctx);
+
+#endif /* FD4_TEXTURE_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_util.c b/src/gallium/drivers/freedreno/a4xx/fd4_util.c
new file mode 100644 (file)
index 0000000..ddff977
--- /dev/null
@@ -0,0 +1,401 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+
+#include "fd4_util.h"
+
+/* convert pipe format to vertex buffer format: */
+enum a4xx_vtx_fmt
+fd4_pipe2vtx(enum pipe_format format)
+{
+       switch (format) {
+       /* 8-bit buffers. */
+       case PIPE_FORMAT_R8_UNORM:
+               return VFMT4_NORM_UBYTE_8;
+
+       case PIPE_FORMAT_R8_SNORM:
+               return VFMT4_NORM_BYTE_8;
+
+       case PIPE_FORMAT_R8_UINT:
+       case PIPE_FORMAT_R8_USCALED:
+               return VFMT4_UBYTE_8;
+
+       case PIPE_FORMAT_R8_SINT:
+       case PIPE_FORMAT_R8_SSCALED:
+               return VFMT4_BYTE_8;
+
+       /* 16-bit buffers. */
+       case PIPE_FORMAT_R16_UNORM:
+       case PIPE_FORMAT_Z16_UNORM:
+               return VFMT4_NORM_USHORT_16;
+
+       case PIPE_FORMAT_R16_SNORM:
+               return VFMT4_NORM_SHORT_16;
+
+       case PIPE_FORMAT_R16_UINT:
+       case PIPE_FORMAT_R16_USCALED:
+               return VFMT4_USHORT_16;
+
+       case PIPE_FORMAT_R16_SINT:
+       case PIPE_FORMAT_R16_SSCALED:
+               return VFMT4_SHORT_16;
+
+       case PIPE_FORMAT_R16_FLOAT:
+               return VFMT4_FLOAT_16;
+
+       case PIPE_FORMAT_R8G8_UNORM:
+               return VFMT4_NORM_UBYTE_8_8;
+
+       case PIPE_FORMAT_R8G8_SNORM:
+               return VFMT4_NORM_BYTE_8_8;
+
+       case PIPE_FORMAT_R8G8_UINT:
+       case PIPE_FORMAT_R8G8_USCALED:
+               return VFMT4_UBYTE_8_8;
+
+       case PIPE_FORMAT_R8G8_SINT:
+       case PIPE_FORMAT_R8G8_SSCALED:
+               return VFMT4_BYTE_8_8;
+
+       /* 24-bit buffers. */
+       case PIPE_FORMAT_R8G8B8_UNORM:
+               return VFMT4_NORM_UBYTE_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8_SNORM:
+               return VFMT4_NORM_BYTE_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8_UINT:
+       case PIPE_FORMAT_R8G8B8_USCALED:
+               return VFMT4_UBYTE_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8_SINT:
+       case PIPE_FORMAT_R8G8B8_SSCALED:
+               return VFMT4_BYTE_8_8_8;
+
+       /* 32-bit buffers. */
+       case PIPE_FORMAT_A8B8G8R8_UNORM:
+       case PIPE_FORMAT_A8R8G8B8_UNORM:
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_R8G8B8A8_UNORM:
+               return VFMT4_NORM_UBYTE_8_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8A8_SNORM:
+               return VFMT4_NORM_BYTE_8_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8A8_UINT:
+       case PIPE_FORMAT_R8G8B8A8_USCALED:
+               return VFMT4_UBYTE_8_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8A8_SINT:
+       case PIPE_FORMAT_R8G8B8A8_SSCALED:
+               return VFMT4_BYTE_8_8_8_8;
+
+       case PIPE_FORMAT_R16G16_SSCALED:
+       case PIPE_FORMAT_R16G16_SINT:
+               return VFMT4_SHORT_16_16;
+
+       case PIPE_FORMAT_R16G16_FLOAT:
+               return VFMT4_FLOAT_16_16;
+
+       case PIPE_FORMAT_R16G16_UINT:
+       case PIPE_FORMAT_R16G16_USCALED:
+               return VFMT4_USHORT_16_16;
+
+       case PIPE_FORMAT_R16G16_UNORM:
+               return VFMT4_NORM_USHORT_16_16;
+
+       case PIPE_FORMAT_R16G16_SNORM:
+               return VFMT4_NORM_SHORT_16_16;
+
+       case PIPE_FORMAT_R10G10B10A2_UNORM:
+               return VFMT4_NORM_UINT_10_10_10_2;
+
+       case PIPE_FORMAT_R10G10B10A2_SNORM:
+               return VFMT4_NORM_INT_10_10_10_2;
+
+       case PIPE_FORMAT_R10G10B10A2_UINT:
+       case PIPE_FORMAT_R10G10B10A2_USCALED:
+               return VFMT4_UINT_10_10_10_2;
+
+       case PIPE_FORMAT_R10G10B10A2_SSCALED:
+               return VFMT4_INT_10_10_10_2;
+
+       /* 48-bit buffers. */
+       case PIPE_FORMAT_R16G16B16_FLOAT:
+               return VFMT4_FLOAT_16_16_16;
+
+       case PIPE_FORMAT_R16G16B16_SINT:
+       case PIPE_FORMAT_R16G16B16_SSCALED:
+               return VFMT4_SHORT_16_16_16;
+
+       case PIPE_FORMAT_R16G16B16_UINT:
+       case PIPE_FORMAT_R16G16B16_USCALED:
+               return VFMT4_USHORT_16_16_16;
+
+       case PIPE_FORMAT_R16G16B16_SNORM:
+               return VFMT4_NORM_SHORT_16_16_16;
+
+       case PIPE_FORMAT_R16G16B16_UNORM:
+               return VFMT4_NORM_USHORT_16_16_16;
+
+       case PIPE_FORMAT_R32_FLOAT:
+       case PIPE_FORMAT_Z32_FLOAT:
+               return VFMT4_FLOAT_32;
+
+       case PIPE_FORMAT_R32_FIXED:
+               return VFMT4_FIXED_32;
+
+       /* 64-bit buffers. */
+       case PIPE_FORMAT_R16G16B16A16_UNORM:
+               return VFMT4_NORM_USHORT_16_16_16_16;
+
+       case PIPE_FORMAT_R16G16B16A16_SNORM:
+               return VFMT4_NORM_SHORT_16_16_16_16;
+
+       case PIPE_FORMAT_R16G16B16A16_UINT:
+       case PIPE_FORMAT_R16G16B16A16_USCALED:
+               return VFMT4_USHORT_16_16_16_16;
+
+       case PIPE_FORMAT_R16G16B16A16_SINT:
+       case PIPE_FORMAT_R16G16B16A16_SSCALED:
+               return VFMT4_SHORT_16_16_16_16;
+
+       case PIPE_FORMAT_R32G32_FLOAT:
+               return VFMT4_FLOAT_32_32;
+
+       case PIPE_FORMAT_R32G32_FIXED:
+               return VFMT4_FIXED_32_32;
+
+       case PIPE_FORMAT_R16G16B16A16_FLOAT:
+               return VFMT4_FLOAT_16_16_16_16;
+
+       /* 96-bit buffers. */
+       case PIPE_FORMAT_R32G32B32_FLOAT:
+               return VFMT4_FLOAT_32_32_32;
+
+       case PIPE_FORMAT_R32G32B32_FIXED:
+               return VFMT4_FIXED_32_32_32;
+
+       /* 128-bit buffers. */
+       case PIPE_FORMAT_R32G32B32A32_FLOAT:
+               return VFMT4_FLOAT_32_32_32_32;
+
+       case PIPE_FORMAT_R32G32B32A32_FIXED:
+               return VFMT4_FIXED_32_32_32_32;
+
+/* TODO probably need gles3 blob drivers to find the 32bit int formats:
+       case PIPE_FORMAT_R32G32B32A32_SNORM:
+       case PIPE_FORMAT_R32G32B32A32_UNORM:
+       case PIPE_FORMAT_R32G32B32A32_SINT:
+       case PIPE_FORMAT_R32G32B32A32_UINT:
+
+       case PIPE_FORMAT_R32_UINT:
+       case PIPE_FORMAT_R32_SINT:
+       case PIPE_FORMAT_A32_UINT:
+       case PIPE_FORMAT_A32_SINT:
+       case PIPE_FORMAT_L32_UINT:
+       case PIPE_FORMAT_L32_SINT:
+       case PIPE_FORMAT_I32_UINT:
+       case PIPE_FORMAT_I32_SINT:
+
+       case PIPE_FORMAT_R32G32_SINT:
+       case PIPE_FORMAT_R32G32_UINT:
+       case PIPE_FORMAT_L32A32_UINT:
+       case PIPE_FORMAT_L32A32_SINT:
+*/
+
+       default:
+               return ~0;
+       }
+}
+
+/* convert pipe format to texture sampler format: */
+enum a4xx_tex_fmt
+fd4_pipe2tex(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_L8_UNORM:
+       case PIPE_FORMAT_A8_UNORM:
+       case PIPE_FORMAT_I8_UNORM:
+               return TFMT4_NORM_UINT_8;
+
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_B8G8R8X8_UNORM:
+       case PIPE_FORMAT_R8G8B8A8_UNORM:
+       case PIPE_FORMAT_R8G8B8X8_UNORM:
+       case PIPE_FORMAT_B8G8R8A8_SRGB:
+       case PIPE_FORMAT_B8G8R8X8_SRGB:
+       case PIPE_FORMAT_R8G8B8A8_SRGB:
+       case PIPE_FORMAT_R8G8B8X8_SRGB:
+               return TFMT4_NORM_UINT_8_8_8_8;
+
+       case PIPE_FORMAT_Z24X8_UNORM:
+               return TFMT4_NORM_UINT_X8Z24;
+
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+               return TFMT4_NORM_UINT_8_8_8_8;
+
+//     case PIPE_FORMAT_Z16_UNORM:
+//             return TFMT4_NORM_UINT_8_8;
+//
+       case PIPE_FORMAT_R16G16B16A16_FLOAT:
+       case PIPE_FORMAT_R16G16B16X16_FLOAT:
+               return TFMT4_FLOAT_16_16_16_16;
+
+       case PIPE_FORMAT_R32G32B32A32_FLOAT:
+       case PIPE_FORMAT_R32G32B32X32_FLOAT:
+               return TFMT4_FLOAT_32_32_32_32;
+
+       // TODO add more..
+
+       default:
+               return ~0;
+       }
+}
+
+/* convert pipe format to MRT / copydest format used for render-target: */
+enum a4xx_color_fmt
+fd4_pipe2color(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_B8G8R8X8_UNORM:
+       case PIPE_FORMAT_R8G8B8A8_UNORM:
+               return RB4_R8G8B8A8_UNORM;
+
+       case PIPE_FORMAT_Z16_UNORM:
+               return RB4_Z16_UNORM;
+
+       case PIPE_FORMAT_Z24X8_UNORM:
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+               /* for DEPTHX_24_8, blob driver also seems to use R8G8B8A8 fmt.. */
+               return RB4_R8G8B8A8_UNORM;
+
+       case PIPE_FORMAT_R8_UNORM:
+       case PIPE_FORMAT_L8_UNORM:
+       case PIPE_FORMAT_A8_UNORM:
+               return RB4_A8_UNORM;
+//
+//     case PIPE_FORMAT_R16G16B16A16_FLOAT:
+//     case PIPE_FORMAT_R16G16B16X16_FLOAT:
+//             return RB4_R16G16B16A16_FLOAT;
+//
+//     case PIPE_FORMAT_R32G32B32A32_FLOAT:
+//     case PIPE_FORMAT_R32G32B32X32_FLOAT:
+//             return RB4_R32G32B32A32_FLOAT;
+
+       // TODO add more..
+
+       default:
+               return ~0;
+       }
+}
+
+/* we need to special case a bit the depth/stencil restore, because we are
+ * using the texture sampler to blit into the depth/stencil buffer, *not*
+ * into a color buffer.  Otherwise fd4_tex_swiz() will do the wrong thing,
+ * as it is assuming that you are sampling into normal render target..
+ */
+enum pipe_format
+fd4_gmem_restore_format(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_Z24X8_UNORM:
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+       case PIPE_FORMAT_Z16_UNORM:
+               return PIPE_FORMAT_B8G8R8A8_UNORM;
+       default:
+               return format;
+       }
+}
+
+/* TODO share w/ a3xx?? */
+enum a3xx_color_swap
+fd4_pipe2swap(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_B8G8R8X8_UNORM:
+       case PIPE_FORMAT_B8G8R8A8_SRGB:
+       case PIPE_FORMAT_B8G8R8X8_SRGB:
+               return WXYZ;
+
+       case PIPE_FORMAT_A8R8G8B8_UNORM:
+       case PIPE_FORMAT_X8R8G8B8_UNORM:
+       case PIPE_FORMAT_A8R8G8B8_SRGB:
+       case PIPE_FORMAT_X8R8G8B8_SRGB:
+               return ZYXW;
+
+       case PIPE_FORMAT_A8B8G8R8_UNORM:
+       case PIPE_FORMAT_X8B8G8R8_UNORM:
+       case PIPE_FORMAT_A8B8G8R8_SRGB:
+       case PIPE_FORMAT_X8B8G8R8_SRGB:
+               return XYZW;
+
+       case PIPE_FORMAT_R8G8B8A8_UNORM:
+       case PIPE_FORMAT_R8G8B8X8_UNORM:
+       case PIPE_FORMAT_Z24X8_UNORM:
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+       default:
+               return WZYX;
+       }
+}
+
+static inline enum a4xx_tex_swiz
+tex_swiz(unsigned swiz)
+{
+       switch (swiz) {
+       default:
+       case PIPE_SWIZZLE_RED:   return A4XX_TEX_X;
+       case PIPE_SWIZZLE_GREEN: return A4XX_TEX_Y;
+       case PIPE_SWIZZLE_BLUE:  return A4XX_TEX_Z;
+       case PIPE_SWIZZLE_ALPHA: return A4XX_TEX_W;
+       case PIPE_SWIZZLE_ZERO:  return A4XX_TEX_ZERO;
+       case PIPE_SWIZZLE_ONE:   return A4XX_TEX_ONE;
+       }
+}
+
+uint32_t
+fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+               unsigned swizzle_b, unsigned swizzle_a)
+{
+       const struct util_format_description *desc =
+                       util_format_description(format);
+       unsigned char swiz[4] = {
+                       swizzle_r, swizzle_g, swizzle_b, swizzle_a,
+       }, rswiz[4];
+
+       util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
+
+       return A4XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
+                       A4XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
+                       A4XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
+                       A4XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_util.h b/src/gallium/drivers/freedreno/a4xx/fd4_util.h
new file mode 100644 (file)
index 0000000..359882f
--- /dev/null
@@ -0,0 +1,45 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_UTIL_H_
+#define FD4_UTIL_H_
+
+#include "freedreno_util.h"
+
+#include "a4xx.xml.h"
+
+enum a4xx_vtx_fmt fd4_pipe2vtx(enum pipe_format format);
+enum a4xx_tex_fmt fd4_pipe2tex(enum pipe_format format);
+enum a4xx_color_fmt fd4_pipe2color(enum pipe_format format);
+enum pipe_format fd4_gmem_restore_format(enum pipe_format format);
+enum a3xx_color_swap fd4_pipe2swap(enum pipe_format format);
+
+uint32_t fd4_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+               unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+
+#endif /* FD4_UTIL_H_ */
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_zsa.c b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.c
new file mode 100644 (file)
index 0000000..6f09ec9
--- /dev/null
@@ -0,0 +1,105 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd4_zsa.h"
+#include "fd4_context.h"
+#include "fd4_util.h"
+
+void *
+fd4_zsa_state_create(struct pipe_context *pctx,
+               const struct pipe_depth_stencil_alpha_state *cso)
+{
+       struct fd4_zsa_stateobj *so;
+
+       so = CALLOC_STRUCT(fd4_zsa_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       so->rb_depth_control |=
+                       A4XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */
+
+       if (cso->depth.enabled)
+               so->rb_depth_control |=
+                       A4XX_RB_DEPTH_CONTROL_Z_ENABLE |
+                       A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
+
+       if (cso->depth.writemask)
+               so->rb_depth_control |= A4XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
+
+       if (cso->stencil[0].enabled) {
+               const struct pipe_stencil_state *s = &cso->stencil[0];
+
+               so->rb_stencil_control |=
+                       A4XX_RB_STENCIL_CONTROL_STENCIL_READ |
+                       A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                       A4XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+                       A4XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+                       A4XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+                       A4XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+               so->rb_stencilrefmask |=
+                       0xff000000 | /* ??? */
+                       A4XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+                       A4XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+
+               if (cso->stencil[1].enabled) {
+                       const struct pipe_stencil_state *bs = &cso->stencil[1];
+
+                       so->rb_stencil_control |=
+                               A4XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+                               A4XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+                               A4XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+                               A4XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+                               A4XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+                       so->rb_stencilrefmask_bf |=
+                               0xff000000 | /* ??? */
+                               A4XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(bs->writemask) |
+                               A4XX_RB_STENCILREFMASK_BF_STENCILMASK(bs->valuemask);
+               }
+       }
+
+       if (cso->alpha.enabled) {
+               so->gras_alpha_control =
+                       A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE;
+               so->rb_alpha_control =
+                       A4XX_RB_ALPHA_CONTROL_ALPHA_TEST |
+                       A4XX_RB_ALPHA_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
+               so->rb_depth_control |=
+                       A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
+       }
+
+       so->rb_render_control = 0x8;  /* XXX */
+
+       return so;
+}
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h b/src/gallium/drivers/freedreno/a4xx/fd4_zsa.h
new file mode 100644 (file)
index 0000000..aea1204
--- /dev/null
@@ -0,0 +1,58 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD4_ZSA_H_
+#define FD4_ZSA_H_
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+#include "freedreno_util.h"
+
+struct fd4_zsa_stateobj {
+       struct pipe_depth_stencil_alpha_state base;
+       uint32_t gras_alpha_control;
+       uint32_t rb_alpha_control;
+       uint32_t rb_render_control;
+       uint32_t rb_depth_control;
+       uint32_t rb_stencil_control;
+       uint32_t rb_stencilrefmask;
+       uint32_t rb_stencilrefmask_bf;
+};
+
+static INLINE struct fd4_zsa_stateobj *
+fd4_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
+{
+       return (struct fd4_zsa_stateobj *)zsa;
+}
+
+void * fd4_zsa_state_create(struct pipe_context *pctx,
+               const struct pipe_depth_stencil_alpha_state *cso);
+
+#endif /* FD4_ZSA_H_ */
index e873af9294370ca6187c0115221de79223124f11..ce105b8786b90722feaa5c7529b0b128d72d1267 100644 (file)
@@ -52,6 +52,7 @@
 
 #include "a2xx/fd2_screen.h"
 #include "a3xx/fd3_screen.h"
+#include "a4xx/fd4_screen.h"
 
 /* XXX this should go away */
 #include "state_tracker/drm_driver.h"
@@ -514,7 +515,7 @@ fd_screen_create(struct fd_device *dev)
         * before enabling:
         *
         * If you have a different adreno version, feel free to add it to one
-        * of the two cases below and see what happens.  And if it works, please
+        * of the cases below and see what happens.  And if it works, please
         * send a patch ;-)
         */
        switch (screen->gpu_id) {
@@ -525,6 +526,9 @@ fd_screen_create(struct fd_device *dev)
        case 330:
                fd3_screen_init(pscreen);
                break;
+       case 420:
+               fd4_screen_init(pscreen);
+               break;
        default:
                debug_printf("unsupported GPU: a%03d\n", screen->gpu_id);
                goto fail;
index 60d4e4a15d5f0a51189fbcde09fce115e1ecdd60..41112460155619e040d618ca4a88d29778f1dabb 100644 (file)
@@ -540,7 +540,8 @@ static int (*emit[])(struct ir3_instruction *instr, void *ptr,
        emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
 };
 
-void * ir3_assemble(struct ir3 *shader, struct ir3_info *info)
+void * ir3_assemble(struct ir3 *shader, struct ir3_info *info,
+               uint32_t gpu_id)
 {
        uint32_t *ptr, *dwords;
        uint32_t i;
@@ -550,11 +551,15 @@ void * ir3_assemble(struct ir3 *shader, struct ir3_info *info)
        info->max_const     = -1;
        info->instrs_count  = 0;
 
-       /* need a integer number of instruction "groups" (sets of four
-        * instructions), so pad out w/ NOPs if needed:
-        * (each instruction is 64bits)
+       /* need a integer number of instruction "groups" (sets of 16
+        * instructions on a4xx or sets of 4 instructions on a3xx),
+        * so pad out w/ NOPs if needed: (NOTE each instruction is 64bits)
         */
-       info->sizedwords = 2 * align(shader->instrs_count, 4);
+       if (gpu_id >= 400) {
+               info->sizedwords = 2 * align(shader->instrs_count, 16);
+       } else {
+               info->sizedwords = 2 * align(shader->instrs_count, 4);
+       }
 
        ptr = dwords = calloc(4, info->sizedwords);
 
index 8a5e9fd687c5594e1389ae6b61a58153c564d048..06bad6e26fcfe65ba9d1107543f81faf50f468f4 100644 (file)
@@ -264,7 +264,7 @@ struct ir3_block {
 struct ir3 * ir3_create(void);
 void ir3_destroy(struct ir3 *shader);
 void * ir3_assemble(struct ir3 *shader,
-               struct ir3_info *info);
+               struct ir3_info *info, uint32_t gpu_id);
 void * ir3_alloc(struct ir3 *shader, int sz);
 
 struct ir3_block * ir3_block_create(struct ir3 *shader,
index 7de29f33d881ca901de4c89e0c69fd53bf9a5e33..f28ce27a00dba9035eb6f3f6b13b0ef025c3b786 100644 (file)
@@ -49,7 +49,8 @@ static void dump_info(struct ir3_shader_variant *so, const char *str)
        const char *type = (so->type == SHADER_VERTEX) ? "VERT" : "FRAG";
 
        // for debug, dump some before/after info:
-       bin = ir3_assemble(so->ir, &info);
+       // TODO make gpu_id configurable on cmdline
+       bin = ir3_assemble(so->ir, &info, 320);
        if (fd_mesa_debug & FD_DBG_DISASM) {
                struct ir3_block *block = so->ir->block;
                struct ir3_register *reg;
index 1f7e869d9f3f4c60adf1519e49fa48c9f3a2c4cd..0c74f2f26f27171a2c78a9815be2c7f88160dc4c 100644 (file)
@@ -56,7 +56,7 @@ assemble_variant(struct ir3_shader_variant *v)
        struct fd_context *ctx = fd_context(v->shader->pctx);
        uint32_t sz, *bin;
 
-       bin = ir3_assemble(v->ir, &v->info);
+       bin = ir3_assemble(v->ir, &v->info, ctx->screen->gpu_id);
        sz = v->info.sizedwords * 4;
 
        v->bo = fd_bo_new(ctx->dev, sz,
@@ -67,7 +67,11 @@ assemble_variant(struct ir3_shader_variant *v)
 
        free(bin);
 
-       v->instrlen = v->info.sizedwords / 8;
+       if (ctx->screen->gpu_id >= 400) {
+               v->instrlen = v->info.sizedwords / (2 * 16);
+       } else {
+               v->instrlen = v->info.sizedwords / (2 * 4);
+       }
 
        /* NOTE: if relative addressing is used, we set constlen in
         * the compiler (to worst-case value) since we don't know in
index 3d51603fcfbc3477dc0119519a87a935cc5b49da..f70886e2d3bab2eae8be9a6fdbbaa7e15352a97a 100644 (file)
@@ -111,7 +111,8 @@ struct ir3_shader_variant {
        struct ir3 *ir;
 
        /* the instructions length is in units of instruction groups
-        * (4 instructions, 8 dwords):
+        * (4 instructions for a3xx, 16 instructions for a4xx.. each
+        * instruction is 2 dwords):
         */
        unsigned instrlen;
 
@@ -203,7 +204,7 @@ struct ir3_shader {
        /* so far, only used for blit_prog shader.. values for
         * VPC_VARYING_PS_REPL[i].MODE
         */
-       uint32_t vpsrepl[4];
+       uint32_t vpsrepl[8];
 };