freedreno: gallium driver for adreno
authorRob Clark <robclark@freedesktop.org>
Sat, 27 Oct 2012 16:07:34 +0000 (11:07 -0500)
committerRob Clark <robdclark@gmail.com>
Tue, 12 Mar 2013 01:53:24 +0000 (21:53 -0400)
Currently works on a220.  Others in the a2xx family look pretty similar
and should be pretty straightforward to support with the same driver.

The a3xx has a new shader ISA, and while many registers appear similar,
the register addresses have been completely shuffled around.  I am not
sure yet whether it is best to support with the same driver, but
different compiler, or whether it should be split into a different
driver.

v1: original
v2: build file updates from review comments, and remove GPL licensed
    header files from msm kernel
v3: smarter temp/pred register assignment, fix clear and depth/stencil
    format issues, resource_transfer fixes, scissor fixes

Signed-off-by: Rob Clark <robdclark@gmail.com>
49 files changed:
configure.ac
src/gallium/drivers/freedreno/Makefile.am [new file with mode: 0644]
src/gallium/drivers/freedreno/disasm.c [new file with mode: 0644]
src/gallium/drivers/freedreno/disasm.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_a2xx_reg.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_blend.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_blend.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_clear.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_clear.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_compiler.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_compiler.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_context.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_context.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_fence.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_fence.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_gmem.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_gmem.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_pm4.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_program.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_program.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_rasterizer.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_rasterizer.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_resource.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_resource.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_screen.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_screen.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_state.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_state.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_surface.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_surface.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_texture.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_texture.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_util.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_util.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_vbo.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_vbo.h [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_zsa.c [new file with mode: 0644]
src/gallium/drivers/freedreno/freedreno_zsa.h [new file with mode: 0644]
src/gallium/drivers/freedreno/instr.h [new file with mode: 0644]
src/gallium/drivers/freedreno/ir.c [new file with mode: 0644]
src/gallium/drivers/freedreno/ir.h [new file with mode: 0644]
src/gallium/targets/dri-freedreno/Makefile.am [new file with mode: 0644]
src/gallium/targets/dri-freedreno/target.c [new file with mode: 0644]
src/gallium/targets/egl-static/Makefile.am
src/gallium/targets/egl-static/egl_pipe.c
src/gallium/winsys/freedreno/drm/.gitignore [new file with mode: 0644]
src/gallium/winsys/freedreno/drm/Makefile.am [new file with mode: 0644]
src/gallium/winsys/freedreno/drm/freedreno_drm_public.h [new file with mode: 0644]
src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c [new file with mode: 0644]

index ddca0022f8cfea92e433123c5d20fcff88f85e1c..5bc0684b15dd9253aecb333073f79e569db516f8 100644 (file)
@@ -35,6 +35,7 @@ LIBDRM_RADEON_REQUIRED=2.4.42
 LIBDRM_INTEL_REQUIRED=2.4.38
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED="2.4.33 libdrm >= 2.4.41"
+LIBDRM_FREEDRENO_REQUIRED=2.4.39
 DRI2PROTO_REQUIRED=2.6
 GLPROTO_REQUIRED=1.4.14
 LIBDRM_XORG_REQUIRED=2.4.24
@@ -653,7 +654,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
 AC_ARG_WITH([gallium-drivers],
     [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
         [comma delimited Gallium drivers list, e.g.
-        "i915,nouveau,r300,r600,radeonsi,svga,swrast"
+        "i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast"
         @<:@default=r300,r600,svga,swrast@:>@])],
     [with_gallium_drivers="$withval"],
     [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -1840,6 +1841,13 @@ if test "x$with_gallium_drivers" != x; then
             GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS nouveau nv30 nv50 nvc0"
             gallium_check_st "nouveau/drm" "dri-nouveau" "xorg-nouveau" "" "xvmc-nouveau" "vdpau-nouveau"
             ;;
+        xfreedreno)
+            HAVE_GALLIUM_FREEDRENO=yes
+            PKG_CHECK_MODULES([FREEDRENO], [libdrm_freedreno >= $LIBDRM_FREEDRENO_REQUIRED])
+            gallium_require_drm_loader
+            GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS freedreno"
+            gallium_check_st "freedreno/drm" "dri-freedreno" "" "" "" ""
+            ;;
         xswrast)
             HAVE_GALLIUM_SOFTPIPE=yes
             GALLIUM_DRIVERS_DIRS="$GALLIUM_DRIVERS_DIRS softpipe"
@@ -1920,6 +1928,7 @@ AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_NOUVEAU, test "x$HAVE_GALLIUM_NOUVEAU" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes)
 
@@ -2051,6 +2060,7 @@ AC_CONFIG_FILES([Makefile
                src/gallium/drivers/rbug/Makefile
                src/gallium/drivers/softpipe/Makefile
                src/gallium/drivers/svga/Makefile
+               src/gallium/drivers/freedreno/Makefile
                src/gallium/drivers/trace/Makefile
                src/gallium/state_trackers/Makefile
                src/gallium/state_trackers/clover/Makefile
@@ -2071,6 +2081,7 @@ AC_CONFIG_FILES([Makefile
                src/gallium/targets/dri-r300/Makefile
                src/gallium/targets/dri-r600/Makefile
                src/gallium/targets/dri-radeonsi/Makefile
+               src/gallium/targets/dri-freedreno/Makefile
                src/gallium/targets/dri-swrast/Makefile
                src/gallium/targets/dri-vmwgfx/Makefile
                src/gallium/targets/egl-static/Makefile
@@ -2101,6 +2112,7 @@ AC_CONFIG_FILES([Makefile
                src/gallium/winsys/nouveau/drm/Makefile
                src/gallium/winsys/radeon/drm/Makefile
                src/gallium/winsys/svga/drm/Makefile
+               src/gallium/winsys/freedreno/drm/Makefile
                src/gallium/winsys/sw/Makefile
                src/gallium/winsys/sw/dri/Makefile
                src/gallium/winsys/sw/fbdev/Makefile
diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am
new file mode 100644 (file)
index 0000000..9bb532d
--- /dev/null
@@ -0,0 +1,32 @@
+include $(top_srcdir)/src/gallium/Automake.inc
+
+noinst_LTLIBRARIES = libfreedreno.la
+
+AM_CFLAGS = \
+       -Wno-packed-bitfield-compat \
+       -I$(top_srcdir)/src/gallium/drivers \
+       $(GALLIUM_CFLAGS) \
+       $(FREEDRENO_CFLAGS) \
+       $(PIC_FLAGS) \
+       $(VISIBILITY_CFLAGS)
+
+libfreedreno_la_SOURCES = \
+       freedreno_util.c \
+       freedreno_fence.c \
+       freedreno_resource.c \
+       freedreno_surface.c \
+       freedreno_vbo.c \
+       freedreno_blend.c \
+       freedreno_rasterizer.c \
+       freedreno_zsa.c \
+       freedreno_state.c \
+       freedreno_clear.c \
+       freedreno_program.c \
+       freedreno_texture.c \
+       freedreno_context.c \
+       freedreno_screen.c \
+       freedreno_gmem.c \
+       freedreno_compiler.c \
+       ir.c \
+       disasm.c
+
diff --git a/src/gallium/drivers/freedreno/disasm.c b/src/gallium/drivers/freedreno/disasm.c
new file mode 100644 (file)
index 0000000..ee14ced
--- /dev/null
@@ -0,0 +1,632 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+
+#include "disasm.h"
+#include "instr.h"
+
+static const char *levels[] = {
+               "\t",
+               "\t\t",
+               "\t\t\t",
+               "\t\t\t\t",
+               "\t\t\t\t\t",
+               "\t\t\t\t\t\t",
+               "\t\t\t\t\t\t\t",
+               "\t\t\t\t\t\t\t\t",
+               "\t\t\t\t\t\t\t\t\t",
+               "x",
+               "x",
+               "x",
+               "x",
+               "x",
+               "x",
+};
+
+static enum debug_t debug;
+
+/*
+ * ALU instructions:
+ */
+
+static const char chan_names[] = {
+               'x', 'y', 'z', 'w',
+               /* these only apply to FETCH dst's: */
+               '0', '1', '?', '_',
+};
+
+static void print_srcreg(uint32_t num, uint32_t type,
+               uint32_t swiz, uint32_t negate, uint32_t abs)
+{
+       if (negate)
+               printf("-");
+       if (abs)
+               printf("|");
+       printf("%c%u", type ? 'R' : 'C', num);
+       if (swiz) {
+               int i;
+               printf(".");
+               for (i = 0; i < 4; i++) {
+                       printf("%c", chan_names[(swiz + i) & 0x3]);
+                       swiz >>= 2;
+               }
+       }
+       if (abs)
+               printf("|");
+}
+
+static void print_dstreg(uint32_t num, uint32_t mask, uint32_t dst_exp)
+{
+       printf("%s%u", dst_exp ? "export" : "R", num);
+       if (mask != 0xf) {
+               int i;
+               printf(".");
+               for (i = 0; i < 4; i++) {
+                       printf("%c", (mask & 0x1) ? chan_names[i] : '_');
+                       mask >>= 1;
+               }
+       }
+}
+
+static void print_export_comment(uint32_t num, enum shader_t type)
+{
+       const char *name = NULL;
+       switch (type) {
+       case SHADER_VERTEX:
+               switch (num) {
+               case 62: name = "gl_Position";  break;
+               case 63: name = "gl_PointSize"; break;
+               }
+               break;
+       case SHADER_FRAGMENT:
+               switch (num) {
+               case 0:  name = "gl_FragColor"; break;
+               }
+               break;
+       }
+       /* if we had a symbol table here, we could look
+        * up the name of the varying..
+        */
+       if (name) {
+               printf("\t; %s", name);
+       }
+}
+
+struct {
+       uint32_t num_srcs;
+       const char *name;
+} vector_instructions[0x20] = {
+#define INSTR(opc, num_srcs) [opc] = { num_srcs, #opc }
+               INSTR(ADDv, 2),
+               INSTR(MULv, 2),
+               INSTR(MAXv, 2),
+               INSTR(MINv, 2),
+               INSTR(SETEv, 2),
+               INSTR(SETGTv, 2),
+               INSTR(SETGTEv, 2),
+               INSTR(SETNEv, 2),
+               INSTR(FRACv, 1),
+               INSTR(TRUNCv, 1),
+               INSTR(FLOORv, 1),
+               INSTR(MULADDv, 3),
+               INSTR(CNDEv, 3),
+               INSTR(CNDGTEv, 3),
+               INSTR(CNDGTv, 3),
+               INSTR(DOT4v, 2),
+               INSTR(DOT3v, 2),
+               INSTR(DOT2ADDv, 3),  // ???
+               INSTR(CUBEv, 2),
+               INSTR(MAX4v, 1),
+               INSTR(PRED_SETE_PUSHv, 2),
+               INSTR(PRED_SETNE_PUSHv, 2),
+               INSTR(PRED_SETGT_PUSHv, 2),
+               INSTR(PRED_SETGTE_PUSHv, 2),
+               INSTR(KILLEv, 2),
+               INSTR(KILLGTv, 2),
+               INSTR(KILLGTEv, 2),
+               INSTR(KILLNEv, 2),
+               INSTR(DSTv, 2),
+               INSTR(MOVAv, 1),
+}, scalar_instructions[0x40] = {
+               INSTR(ADDs, 1),
+               INSTR(ADD_PREVs, 1),
+               INSTR(MULs, 1),
+               INSTR(MUL_PREVs, 1),
+               INSTR(MUL_PREV2s, 1),
+               INSTR(MAXs, 1),
+               INSTR(MINs, 1),
+               INSTR(SETEs, 1),
+               INSTR(SETGTs, 1),
+               INSTR(SETGTEs, 1),
+               INSTR(SETNEs, 1),
+               INSTR(FRACs, 1),
+               INSTR(TRUNCs, 1),
+               INSTR(FLOORs, 1),
+               INSTR(EXP_IEEE, 1),
+               INSTR(LOG_CLAMP, 1),
+               INSTR(LOG_IEEE, 1),
+               INSTR(RECIP_CLAMP, 1),
+               INSTR(RECIP_FF, 1),
+               INSTR(RECIP_IEEE, 1),
+               INSTR(RECIPSQ_CLAMP, 1),
+               INSTR(RECIPSQ_FF, 1),
+               INSTR(RECIPSQ_IEEE, 1),
+               INSTR(MOVAs, 1),
+               INSTR(MOVA_FLOORs, 1),
+               INSTR(SUBs, 1),
+               INSTR(SUB_PREVs, 1),
+               INSTR(PRED_SETEs, 1),
+               INSTR(PRED_SETNEs, 1),
+               INSTR(PRED_SETGTs, 1),
+               INSTR(PRED_SETGTEs, 1),
+               INSTR(PRED_SET_INVs, 1),
+               INSTR(PRED_SET_POPs, 1),
+               INSTR(PRED_SET_CLRs, 1),
+               INSTR(PRED_SET_RESTOREs, 1),
+               INSTR(KILLEs, 1),
+               INSTR(KILLGTs, 1),
+               INSTR(KILLGTEs, 1),
+               INSTR(KILLNEs, 1),
+               INSTR(KILLONEs, 1),
+               INSTR(SQRT_IEEE, 1),
+               INSTR(MUL_CONST_0, 1),
+               INSTR(MUL_CONST_1, 1),
+               INSTR(ADD_CONST_0, 1),
+               INSTR(ADD_CONST_1, 1),
+               INSTR(SUB_CONST_0, 1),
+               INSTR(SUB_CONST_1, 1),
+               INSTR(SIN, 1),
+               INSTR(COS, 1),
+               INSTR(RETAIN_PREV, 1),
+#undef INSTR
+};
+
+static int disasm_alu(uint32_t *dwords, uint32_t alu_off,
+               int level, int sync, enum shader_t type)
+{
+       instr_alu_t *alu = (instr_alu_t *)dwords;
+
+       printf("%s", levels[level]);
+       if (debug & PRINT_RAW) {
+               printf("%02x: %08x %08x %08x\t", alu_off,
+                               dwords[0], dwords[1], dwords[2]);
+       }
+
+       printf("   %sALU:\t", sync ? "(S)" : "   ");
+
+       printf("%s", vector_instructions[alu->vector_opc].name);
+
+       if (alu->pred_select & 0x2) {
+               /* seems to work similar to conditional execution in ARM instruction
+                * set, so let's use a similar syntax for now:
+                */
+               printf((alu->pred_select & 0x1) ? "EQ" : "NE");
+       }
+
+       printf("\t");
+
+       print_dstreg(alu->vector_dest, alu->vector_write_mask, alu->export_data);
+       printf(" = ");
+       if (vector_instructions[alu->vector_opc].num_srcs == 3) {
+               print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+                               alu->src3_reg_negate, alu->src3_reg_abs);
+               printf(", ");
+       }
+       print_srcreg(alu->src1_reg, alu->src1_sel, alu->src1_swiz,
+                       alu->src1_reg_negate, alu->src1_reg_abs);
+       if (vector_instructions[alu->vector_opc].num_srcs > 1) {
+               printf(", ");
+               print_srcreg(alu->src2_reg, alu->src2_sel, alu->src2_swiz,
+                               alu->src2_reg_negate, alu->src2_reg_abs);
+       }
+
+       if (alu->vector_clamp)
+               printf(" CLAMP");
+
+       if (alu->export_data)
+               print_export_comment(alu->vector_dest, type);
+
+       printf("\n");
+
+       if (alu->scalar_write_mask || !alu->vector_write_mask) {
+               /* 2nd optional scalar op: */
+
+               printf("%s", levels[level]);
+               if (debug & PRINT_RAW)
+                       printf("                          \t");
+
+               if (scalar_instructions[alu->scalar_opc].name) {
+                       printf("\t    \t%s\t", scalar_instructions[alu->scalar_opc].name);
+               } else {
+                       printf("\t    \tOP(%u)\t", alu->scalar_opc);
+               }
+
+               print_dstreg(alu->scalar_dest, alu->scalar_write_mask, alu->export_data);
+               printf(" = ");
+               print_srcreg(alu->src3_reg, alu->src3_sel, alu->src3_swiz,
+                               alu->src3_reg_negate, alu->src3_reg_abs);
+               // TODO ADD/MUL must have another src?!?
+               if (alu->scalar_clamp)
+                       printf(" CLAMP");
+               if (alu->export_data)
+                       print_export_comment(alu->scalar_dest, type);
+               printf("\n");
+       }
+
+       return 0;
+}
+
+
+/*
+ * FETCH instructions:
+ */
+
+struct {
+       const char *name;
+} fetch_types[0xff] = {
+#define TYPE(id) [id] = { #id }
+               TYPE(FMT_1_REVERSE),
+               TYPE(FMT_32_FLOAT),
+               TYPE(FMT_32_32_FLOAT),
+               TYPE(FMT_32_32_32_FLOAT),
+               TYPE(FMT_32_32_32_32_FLOAT),
+               TYPE(FMT_16),
+               TYPE(FMT_16_16),
+               TYPE(FMT_16_16_16_16),
+               TYPE(FMT_8),
+               TYPE(FMT_8_8),
+               TYPE(FMT_8_8_8_8),
+               TYPE(FMT_32),
+               TYPE(FMT_32_32),
+               TYPE(FMT_32_32_32_32),
+#undef TYPE
+};
+
+static void print_fetch_dst(uint32_t dst_reg, uint32_t dst_swiz)
+{
+       int i;
+       printf("\tR%u.", dst_reg);
+       for (i = 0; i < 4; i++) {
+               printf("%c", chan_names[dst_swiz & 0x7]);
+               dst_swiz >>= 3;
+       }
+}
+
+static void print_fetch_vtx(instr_fetch_t *fetch)
+{
+       instr_fetch_vtx_t *vtx = &fetch->vtx;
+
+       if (vtx->pred_select) {
+               /* seems to work similar to conditional execution in ARM instruction
+                * set, so let's use a similar syntax for now:
+                */
+               printf(vtx->pred_condition ? "EQ" : "NE");
+       }
+
+       print_fetch_dst(vtx->dst_reg, vtx->dst_swiz);
+       printf(" = R%u.", vtx->src_reg);
+       printf("%c", chan_names[vtx->src_swiz & 0x3]);
+       if (fetch_types[vtx->format].name) {
+               printf(" %s", fetch_types[vtx->format].name);
+       } else  {
+               printf(" TYPE(0x%x)", vtx->format);
+       }
+       printf(" %s", vtx->format_comp_all ? "SIGNED" : "UNSIGNED");
+       if (!vtx->num_format_all)
+               printf(" NORMALIZED");
+       printf(" STRIDE(%u)", vtx->stride);
+       if (vtx->offset)
+               printf(" OFFSET(%u)", vtx->offset);
+       printf(" CONST(%u, %u)", vtx->const_index, vtx->const_index_sel);
+       if (0) {
+               // XXX
+               printf(" src_reg_am=%u", vtx->src_reg_am);
+               printf(" dst_reg_am=%u", vtx->dst_reg_am);
+               printf(" num_format_all=%u", vtx->num_format_all);
+               printf(" signed_rf_mode_all=%u", vtx->signed_rf_mode_all);
+               printf(" exp_adjust_all=%u", vtx->exp_adjust_all);
+       }
+}
+
+static void print_fetch_tex(instr_fetch_t *fetch)
+{
+       static const char *filter[] = {
+                       [TEX_FILTER_POINT] = "POINT",
+                       [TEX_FILTER_LINEAR] = "LINEAR",
+                       [TEX_FILTER_BASEMAP] = "BASEMAP",
+       };
+       static const char *aniso_filter[] = {
+                       [ANISO_FILTER_DISABLED] = "DISABLED",
+                       [ANISO_FILTER_MAX_1_1] = "MAX_1_1",
+                       [ANISO_FILTER_MAX_2_1] = "MAX_2_1",
+                       [ANISO_FILTER_MAX_4_1] = "MAX_4_1",
+                       [ANISO_FILTER_MAX_8_1] = "MAX_8_1",
+                       [ANISO_FILTER_MAX_16_1] = "MAX_16_1",
+       };
+       static const char *arbitrary_filter[] = {
+                       [ARBITRARY_FILTER_2X4_SYM] = "2x4_SYM",
+                       [ARBITRARY_FILTER_2X4_ASYM] = "2x4_ASYM",
+                       [ARBITRARY_FILTER_4X2_SYM] = "4x2_SYM",
+                       [ARBITRARY_FILTER_4X2_ASYM] = "4x2_ASYM",
+                       [ARBITRARY_FILTER_4X4_SYM] = "4x4_SYM",
+                       [ARBITRARY_FILTER_4X4_ASYM] = "4x4_ASYM",
+       };
+       static const char *sample_loc[] = {
+                       [SAMPLE_CENTROID] = "CENTROID",
+                       [SAMPLE_CENTER] = "CENTER",
+       };
+       instr_fetch_tex_t *tex = &fetch->tex;
+       uint32_t src_swiz = tex->src_swiz;
+       int i;
+
+       if (tex->pred_select) {
+               /* seems to work similar to conditional execution in ARM instruction
+                * set, so let's use a similar syntax for now:
+                */
+               printf(tex->pred_condition ? "EQ" : "NE");
+       }
+
+       print_fetch_dst(tex->dst_reg, tex->dst_swiz);
+       printf(" = R%u.", tex->src_reg);
+       for (i = 0; i < 3; i++) {
+               printf("%c", chan_names[src_swiz & 0x3]);
+               src_swiz >>= 2;
+       }
+       printf(" CONST(%u)", tex->const_idx);
+       if (tex->fetch_valid_only)
+               printf(" VALID_ONLY");
+       if (tex->tx_coord_denorm)
+               printf(" DENORM");
+       if (tex->mag_filter != TEX_FILTER_USE_FETCH_CONST)
+               printf(" MAG(%s)", filter[tex->mag_filter]);
+       if (tex->min_filter != TEX_FILTER_USE_FETCH_CONST)
+               printf(" MIN(%s)", filter[tex->min_filter]);
+       if (tex->mip_filter != TEX_FILTER_USE_FETCH_CONST)
+               printf(" MIP(%s)", filter[tex->mip_filter]);
+       if (tex->aniso_filter != ANISO_FILTER_USE_FETCH_CONST)
+               printf(" ANISO(%s)", aniso_filter[tex->aniso_filter]);
+       if (tex->arbitrary_filter != ARBITRARY_FILTER_USE_FETCH_CONST)
+               printf(" ARBITRARY(%s)", arbitrary_filter[tex->arbitrary_filter]);
+       if (tex->vol_mag_filter != TEX_FILTER_USE_FETCH_CONST)
+               printf(" VOL_MAG(%s)", filter[tex->vol_mag_filter]);
+       if (tex->vol_min_filter != TEX_FILTER_USE_FETCH_CONST)
+               printf(" VOL_MIN(%s)", filter[tex->vol_min_filter]);
+       if (!tex->use_comp_lod) {
+               printf(" LOD(%u)", tex->use_comp_lod);
+               printf(" LOD_BIAS(%u)", tex->lod_bias);
+       }
+       if (tex->use_reg_gradients)
+               printf(" USE_REG_GRADIENTS");
+       printf(" LOCATION(%s)", sample_loc[tex->sample_location]);
+       if (tex->offset_x || tex->offset_y || tex->offset_z)
+               printf(" OFFSET(%u,%u,%u)", tex->offset_x, tex->offset_y, tex->offset_z);
+}
+
+struct {
+       const char *name;
+       void (*fxn)(instr_fetch_t *cf);
+} fetch_instructions[] = {
+#define INSTR(opc, name, fxn) [opc] = { name, fxn }
+               INSTR(VTX_FETCH, "VERTEX", print_fetch_vtx),
+               INSTR(TEX_FETCH, "SAMPLE", print_fetch_tex),
+               INSTR(TEX_GET_BORDER_COLOR_FRAC, "?", print_fetch_tex),
+               INSTR(TEX_GET_COMP_TEX_LOD, "?", print_fetch_tex),
+               INSTR(TEX_GET_GRADIENTS, "?", print_fetch_tex),
+               INSTR(TEX_GET_WEIGHTS, "?", print_fetch_tex),
+               INSTR(TEX_SET_TEX_LOD, "SET_TEX_LOD", print_fetch_tex),
+               INSTR(TEX_SET_GRADIENTS_H, "?", print_fetch_tex),
+               INSTR(TEX_SET_GRADIENTS_V, "?", print_fetch_tex),
+               INSTR(TEX_RESERVED_4, "?", print_fetch_tex),
+#undef INSTR
+};
+
+static int disasm_fetch(uint32_t *dwords, uint32_t alu_off, int level, int sync)
+{
+       instr_fetch_t *fetch = (instr_fetch_t *)dwords;
+
+       printf("%s", levels[level]);
+       if (debug & PRINT_RAW) {
+               printf("%02x: %08x %08x %08x\t", alu_off,
+                               dwords[0], dwords[1], dwords[2]);
+       }
+
+       printf("   %sFETCH:\t", sync ? "(S)" : "   ");
+       printf("%s", fetch_instructions[fetch->opc].name);
+       fetch_instructions[fetch->opc].fxn(fetch);
+       printf("\n");
+
+       return 0;
+}
+
+/*
+ * CF instructions:
+ */
+
+static int cf_exec(instr_cf_t *cf)
+{
+       return (cf->opc == EXEC) ||
+                       (cf->opc == EXEC_END) ||
+                       (cf->opc == COND_EXEC) ||
+                       (cf->opc == COND_EXEC_END) ||
+                       (cf->opc == COND_PRED_EXEC) ||
+                       (cf->opc == COND_PRED_EXEC_END) ||
+                       (cf->opc == COND_EXEC_PRED_CLEAN) ||
+                       (cf->opc == COND_EXEC_PRED_CLEAN_END);
+}
+
+static int cf_cond_exec(instr_cf_t *cf)
+{
+       return (cf->opc == COND_EXEC) ||
+                       (cf->opc == COND_EXEC_END) ||
+                       (cf->opc == COND_PRED_EXEC) ||
+                       (cf->opc == COND_PRED_EXEC_END) ||
+                       (cf->opc == COND_EXEC_PRED_CLEAN) ||
+                       (cf->opc == COND_EXEC_PRED_CLEAN_END);
+}
+
+static void print_cf_nop(instr_cf_t *cf)
+{
+}
+
+static void print_cf_exec(instr_cf_t *cf)
+{
+       printf(" ADDR(0x%x) CNT(0x%x)", cf->exec.address, cf->exec.count);
+       if (cf->exec.yeild)
+               printf(" YIELD");
+       if (cf->exec.vc)
+               printf(" VC(0x%x)", cf->exec.vc);
+       if (cf->exec.bool_addr)
+               printf(" BOOL_ADDR(0x%x)", cf->exec.bool_addr);
+       if (cf->exec.address_mode == ABSOLUTE_ADDR)
+               printf(" ABSOLUTE_ADDR");
+       if (cf_cond_exec(cf))
+               printf(" COND(%d)", cf->exec.condition);
+}
+
+static void print_cf_loop(instr_cf_t *cf)
+{
+       printf(" ADDR(0x%x) LOOP_ID(%d)", cf->loop.address, cf->loop.loop_id);
+       if (cf->loop.address_mode == ABSOLUTE_ADDR)
+               printf(" ABSOLUTE_ADDR");
+}
+
+static void print_cf_jmp_call(instr_cf_t *cf)
+{
+       printf(" ADDR(0x%x) DIR(%d)", cf->jmp_call.address, cf->jmp_call.direction);
+       if (cf->jmp_call.force_call)
+               printf(" FORCE_CALL");
+       if (cf->jmp_call.predicated_jmp)
+               printf(" COND(%d)", cf->jmp_call.condition);
+       if (cf->jmp_call.bool_addr)
+               printf(" BOOL_ADDR(0x%x)", cf->jmp_call.bool_addr);
+       if (cf->jmp_call.address_mode == ABSOLUTE_ADDR)
+               printf(" ABSOLUTE_ADDR");
+}
+
+static void print_cf_alloc(instr_cf_t *cf)
+{
+       static const char *bufname[] = {
+                       [SQ_NO_ALLOC] = "NO ALLOC",
+                       [SQ_POSITION] = "POSITION",
+                       [SQ_PARAMETER_PIXEL] = "PARAM/PIXEL",
+                       [SQ_MEMORY] = "MEMORY",
+       };
+       printf(" %s SIZE(0x%x)", bufname[cf->alloc.buffer_select], cf->alloc.size);
+       if (cf->alloc.no_serial)
+               printf(" NO_SERIAL");
+       if (cf->alloc.alloc_mode) // ???
+               printf(" ALLOC_MODE");
+}
+
+struct {
+       const char *name;
+       void (*fxn)(instr_cf_t *cf);
+} cf_instructions[] = {
+#define INSTR(opc, fxn) [opc] = { #opc, fxn }
+               INSTR(NOP, print_cf_nop),
+               INSTR(EXEC, print_cf_exec),
+               INSTR(EXEC_END, print_cf_exec),
+               INSTR(COND_EXEC, print_cf_exec),
+               INSTR(COND_EXEC_END, print_cf_exec),
+               INSTR(COND_PRED_EXEC, print_cf_exec),
+               INSTR(COND_PRED_EXEC_END, print_cf_exec),
+               INSTR(LOOP_START, print_cf_loop),
+               INSTR(LOOP_END, print_cf_loop),
+               INSTR(COND_CALL, print_cf_jmp_call),
+               INSTR(RETURN, print_cf_jmp_call),
+               INSTR(COND_JMP, print_cf_jmp_call),
+               INSTR(ALLOC, print_cf_alloc),
+               INSTR(COND_EXEC_PRED_CLEAN, print_cf_exec),
+               INSTR(COND_EXEC_PRED_CLEAN_END, print_cf_exec),
+               INSTR(MARK_VS_FETCH_DONE, print_cf_nop),  // ??
+#undef INSTR
+};
+
+static void print_cf(instr_cf_t *cf, int level)
+{
+       printf("%s", levels[level]);
+       if (debug & PRINT_RAW) {
+               uint16_t *words = (uint16_t *)cf;
+               printf("    %04x %04x %04x            \t",
+                               words[0], words[1], words[2]);
+       }
+       printf("%s", cf_instructions[cf->opc].name);
+       cf_instructions[cf->opc].fxn(cf);
+       printf("\n");
+}
+
+/*
+ * The adreno shader microcode consists of two parts:
+ *   1) A CF (control-flow) program, at the header of the compiled shader,
+ *      which refers to ALU/FETCH instructions that follow it by address.
+ *   2) ALU and FETCH instructions
+ */
+
+int disasm(uint32_t *dwords, int sizedwords, int level, enum shader_t type)
+{
+       instr_cf_t *cfs = (instr_cf_t *)dwords;
+       int idx, max_idx;
+
+       for (idx = 0; ; idx++) {
+               instr_cf_t *cf = &cfs[idx];
+               if (cf_exec(cf)) {
+                       max_idx = 2 * cf->exec.address;
+                       break;
+               }
+       }
+
+       for (idx = 0; idx < max_idx; idx++) {
+               instr_cf_t *cf = &cfs[idx];
+
+               print_cf(cf, level);
+
+               if (cf_exec(cf)) {
+                       uint32_t sequence = cf->exec.serialize;
+                       uint32_t i;
+                       for (i = 0; i < cf->exec.count; i++) {
+                               uint32_t alu_off = (cf->exec.address + i);
+                               if (sequence & 0x1) {
+                                       disasm_fetch(dwords + alu_off * 3, alu_off, level, sequence & 0x2);
+                               } else {
+                                       disasm_alu(dwords + alu_off * 3, alu_off, level, sequence & 0x2, type);
+                               }
+                               sequence >>= 2;
+                       }
+               }
+       }
+
+       return 0;
+}
+
+void disasm_set_debug(enum debug_t d)
+{
+       debug= d;
+}
diff --git a/src/gallium/drivers/freedreno/disasm.h b/src/gallium/drivers/freedreno/disasm.h
new file mode 100644 (file)
index 0000000..92efd5a
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright © 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef DISASM_H_
+#define DISASM_H_
+
+enum shader_t {
+       SHADER_VERTEX,
+       SHADER_FRAGMENT,
+};
+
+/* bitmask of debug flags */
+enum debug_t {
+       PRINT_RAW      = 0x1,    /* dump raw hexdump */
+};
+
+int disasm(uint32_t *dwords, int sizedwords, int level, enum shader_t type);
+void disasm_set_debug(enum debug_t debug);
+
+#endif /* DISASM_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_a2xx_reg.h b/src/gallium/drivers/freedreno/freedreno_a2xx_reg.h
new file mode 100644 (file)
index 0000000..7c5982c
--- /dev/null
@@ -0,0 +1,1172 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef FREEDRENO_A2XX_REG_H_
+#define FREEDRENO_A2XX_REG_H_
+
+#include <GLES2/gl2.h>
+
+/* convert float to dword */
+static inline uint32_t f2d(float f)
+{
+       union {
+               float f;
+               uint32_t d;
+       } u = {
+               .f = f,
+       };
+       return u.d;
+}
+
+/* convert float to 12.4 fixed point */
+static inline uint32_t f2d12_4(float f)
+{
+       return (uint32_t)(f * 8.0);
+}
+
+/* convert x,y to dword */
+static inline uint32_t xy2d(uint16_t x, uint16_t y)
+{
+       return ((y & 0x3fff) << 16) | (x & 0x3fff);
+}
+
+/*
+ * Values for CP_EVENT_WRITE:
+ */
+
+enum VGT_EVENT_TYPE {
+       VS_DEALLOC = 0,
+       PS_DEALLOC = 1,
+       VS_DONE_TS = 2,
+       PS_DONE_TS = 3,
+       CACHE_FLUSH_TS = 4,
+       CONTEXT_DONE = 5,
+       CACHE_FLUSH = 6,
+       VIZQUERY_START = 7,
+       VIZQUERY_END = 8,
+       SC_WAIT_WC = 9,
+       RST_PIX_CNT = 13,
+       RST_VTX_CNT = 14,
+       TILE_FLUSH = 15,
+       CACHE_FLUSH_AND_INV_TS_EVENT = 20,
+       ZPASS_DONE = 21,
+       CACHE_FLUSH_AND_INV_EVENT = 22,
+       PERFCOUNTER_START = 23,
+       PERFCOUNTER_STOP = 24,
+       VS_FETCH_DONE = 27,
+       FACENESS_FLUSH = 28,
+};
+
+/*
+ * Color/surface formats:
+ */
+
+enum rb_colorformatx {
+       COLORX_4_4_4_4 = 0,
+       COLORX_1_5_5_5 = 1,
+       COLORX_5_6_5 = 2,
+       COLORX_8 = 3,
+       COLORX_8_8 = 4,
+       COLORX_8_8_8_8 = 5,
+       COLORX_S8_8_8_8 = 6,
+       COLORX_16_FLOAT = 7,
+       COLORX_16_16_FLOAT = 8,
+       COLORX_16_16_16_16_FLOAT = 9,
+       COLORX_32_FLOAT = 10,
+       COLORX_32_32_FLOAT = 11,
+       COLORX_32_32_32_32_FLOAT = 12,
+       COLORX_2_3_3 = 13,
+       COLORX_8_8_8 = 14,
+       COLORX_INVALID,
+};
+
+enum sq_surfaceformat {
+       FMT_1_REVERSE                  = 0,
+       FMT_1                          = 1,
+       FMT_8                          = 2,
+       FMT_1_5_5_5                    = 3,
+       FMT_5_6_5                      = 4,
+       FMT_6_5_5                      = 5,
+       FMT_8_8_8_8                    = 6,
+       FMT_2_10_10_10                 = 7,
+       FMT_8_A                        = 8,
+       FMT_8_B                        = 9,
+       FMT_8_8                        = 10,
+       FMT_Cr_Y1_Cb_Y0                = 11,
+       FMT_Y1_Cr_Y0_Cb                = 12,
+       FMT_5_5_5_1                    = 13,
+       FMT_8_8_8_8_A                  = 14,
+       FMT_4_4_4_4                    = 15,
+       FMT_10_11_11                   = 16,
+       FMT_11_11_10                   = 17,
+       FMT_DXT1                       = 18,
+       FMT_DXT2_3                     = 19,
+       FMT_DXT4_5                     = 20,
+       FMT_24_8                       = 22,
+       FMT_24_8_FLOAT                 = 23,
+       FMT_16                         = 24,
+       FMT_16_16                      = 25,
+       FMT_16_16_16_16                = 26,
+       FMT_16_EXPAND                  = 27,
+       FMT_16_16_EXPAND               = 28,
+       FMT_16_16_16_16_EXPAND         = 29,
+       FMT_16_FLOAT                   = 30,
+       FMT_16_16_FLOAT                = 31,
+       FMT_16_16_16_16_FLOAT          = 32,
+       FMT_32                         = 33,
+       FMT_32_32                      = 34,
+       FMT_32_32_32_32                = 35,
+       FMT_32_FLOAT                   = 36,
+       FMT_32_32_FLOAT                = 37,
+       FMT_32_32_32_32_FLOAT          = 38,
+       FMT_32_AS_8                    = 39,
+       FMT_32_AS_8_8                  = 40,
+       FMT_16_MPEG                    = 41,
+       FMT_16_16_MPEG                 = 42,
+       FMT_8_INTERLACED               = 43,
+       FMT_32_AS_8_INTERLACED         = 44,
+       FMT_32_AS_8_8_INTERLACED       = 45,
+       FMT_16_INTERLACED              = 46,
+       FMT_16_MPEG_INTERLACED         = 47,
+       FMT_16_16_MPEG_INTERLACED      = 48,
+       FMT_DXN                        = 49,
+       FMT_8_8_8_8_AS_16_16_16_16     = 50,
+       FMT_DXT1_AS_16_16_16_16        = 51,
+       FMT_DXT2_3_AS_16_16_16_16      = 52,
+       FMT_DXT4_5_AS_16_16_16_16      = 53,
+       FMT_2_10_10_10_AS_16_16_16_16  = 54,
+       FMT_10_11_11_AS_16_16_16_16    = 55,
+       FMT_11_11_10_AS_16_16_16_16    = 56,
+       FMT_32_32_32_FLOAT             = 57,
+       FMT_DXT3A                      = 58,
+       FMT_DXT5A                      = 59,
+       FMT_CTX1                       = 60,
+       FMT_DXT3A_AS_1_1_1_1           = 61,
+       FMT_INVALID
+};
+
+/*
+ * Register addresses:
+ */
+
+#define REG_COHER_BASE_PM4                  0xa2a
+#define REG_COHER_DEST_BASE_0               0x2006
+#define REG_COHER_SIZE_PM4                  0xa29
+#define REG_COHER_STATUS_PM4                0xa2b
+#define REG_CP_CSQ_IB1_STAT                 0x01fe
+#define REG_CP_CSQ_IB2_STAT                 0x01ff
+#define REG_CP_CSQ_RB_STAT                  0x01fd
+#define REG_CP_DEBUG                        0x01fc
+#define REG_CP_IB1_BASE                     0x0458
+#define REG_CP_IB1_BUFSZ                    0x0459
+#define REG_CP_IB2_BASE                     0x045a
+#define REG_CP_IB2_BUFSZ                    0x045b
+#define REG_CP_INT_ACK                      0x01f4
+#define REG_CP_INT_CNTL                     0x01f2
+#define REG_CP_INT_STATUS                   0x01f3
+#define REG_CP_ME_CNTL                      0x01f6
+#define REG_CP_ME_RAM_DATA                  0x01fa
+#define REG_CP_ME_RAM_RADDR                 0x01f9
+#define REG_CP_ME_RAM_WADDR                 0x01f8
+#define REG_CP_ME_STATUS                    0x01f7
+#define REG_CP_PERFCOUNTER_HI               0x0447
+#define REG_CP_PERFCOUNTER_LO               0x0446
+#define REG_CP_PERFCOUNTER_SELECT           0x0445
+#define REG_CP_PERFMON_CNTL                 0x0444
+#define REG_CP_PFP_UCODE_ADDR               0x00c0
+#define REG_CP_PFP_UCODE_DATA               0x00c1
+#define REG_CP_QUEUE_THRESHOLDS             0x01d5
+#define REG_CP_RB_BASE                      0x01c0
+#define REG_CP_RB_CNTL                      0x01c1
+#define REG_CP_RB_RPTR                      0x01c4
+#define REG_CP_RB_RPTR_ADDR                 0x01c3
+#define REG_CP_RB_RPTR_WR                   0x01c7
+#define REG_CP_RB_WPTR                      0x01c5
+#define REG_CP_RB_WPTR_BASE                 0x01c8
+#define REG_CP_RB_WPTR_DELAY                0x01c6
+#define REG_CP_STAT                         0x047f
+#define REG_CP_STATE_DEBUG_DATA             0x01ed
+#define REG_CP_STATE_DEBUG_INDEX            0x01ec
+#define REG_CP_ST_BASE                      0x044d
+#define REG_CP_ST_BUFSZ                     0x044e
+#define REG_GRAS_DEBUG_CNTL                 0x0c80
+#define REG_GRAS_DEBUG_DATA                 0x0c81
+#define REG_MASTER_INT_SIGNAL               0x03b7
+#define REG_PA_CL_CLIP_CNTL                 0x2204
+#define REG_PA_CL_GB_HORZ_CLIP_ADJ          0x2305
+#define REG_PA_CL_GB_HORZ_DISC_ADJ          0x2306
+#define REG_PA_CL_GB_VERT_CLIP_ADJ          0x2303
+#define REG_PA_CL_GB_VERT_DISC_ADJ          0x2304
+#define REG_PA_CL_VPORT_XOFFSET             0x2110
+#define REG_PA_CL_VPORT_XSCALE              0x210f
+#define REG_PA_CL_VPORT_YOFFSET             0x2112
+#define REG_PA_CL_VPORT_YSCALE              0x2111
+#define REG_PA_CL_VPORT_ZOFFSET             0x2114
+#define REG_PA_CL_VPORT_ZSCALE              0x2113
+#define REG_PA_CL_VTE_CNTL                  0x2206
+#define REG_PA_SC_AA_CONFIG                 0x2301
+#define REG_PA_SC_AA_MASK                   0x2312
+#define REG_PA_SC_LINE_CNTL                 0x2300
+#define REG_PA_SC_LINE_STIPPLE              0x2283
+#define REG_PA_SC_SCREEN_SCISSOR_BR         0x200f
+#define REG_PA_SC_SCREEN_SCISSOR_TL         0x200e
+#define REG_PA_SC_VIZ_QUERY                 0x2293
+#define REG_PA_SC_VIZ_QUERY_STATUS          0x0c44
+#define REG_PA_SC_WINDOW_OFFSET             0x2080
+#define REG_PA_SC_WINDOW_SCISSOR_BR         0x2082
+#define REG_PA_SC_WINDOW_SCISSOR_TL         0x2081
+#define REG_PA_SU_DEBUG_CNTL                0x0c80
+#define REG_PA_SU_DEBUG_DATA                0x0c81
+#define REG_PA_SU_FACE_DATA                 0x0c86
+#define REG_PA_SU_LINE_CNTL                 0x2282
+#define REG_PA_SU_POINT_MINMAX              0x2281
+#define REG_PA_SU_POINT_SIZE                0x2280
+#define REG_PA_SU_POLY_OFFSET_BACK_OFFSET   0x2383
+#define REG_PA_SU_POLY_OFFSET_FRONT_SCALE   0x2380
+#define REG_PA_SU_SC_MODE_CNTL              0x2205
+#define REG_PA_SU_VTX_CNTL                  0x2302
+#define REG_PC_DEBUG_CNTL                   0x0c38
+#define REG_PC_DEBUG_DATA                   0x0c39
+#define REG_RB_ALPHA_REF                    0x210e
+#define REG_RB_BC_CONTROL                   0x0f01
+#define REG_RB_BLEND_ALPHA                  0x2108
+#define REG_RB_BLEND_BLUE                   0x2107
+#define REG_RB_BLEND_CONTROL                0x2201
+#define REG_RB_BLEND_GREEN                  0x2106
+#define REG_RB_BLEND_RED                    0x2105
+#define REG_RBBM_CNTL                       0x003b
+#define REG_RBBM_DEBUG                      0x039b
+#define REG_RBBM_DEBUG_CNTL                 0x03a1
+#define REG_RBBM_DEBUG_OUT                  0x03a0
+#define REG_RBBM_INT_ACK                    0x03b6
+#define REG_RBBM_INT_CNTL                   0x03b4
+#define REG_RBBM_INT_STATUS                 0x03b5
+#define REG_RBBM_PATCH_RELEASE              0x0001
+#define REG_RBBM_PERFCOUNTER1_HI            0x0398
+#define REG_RBBM_PERFCOUNTER1_LO            0x0397
+#define REG_RBBM_PERFCOUNTER1_SELECT        0x0395
+#define REG_RBBM_PERIPHID1                  0x03f9
+#define REG_RBBM_PERIPHID2                  0x03fa
+#define REG_RBBM_PM_OVERRIDE1               0x039c
+#define REG_RBBM_PM_OVERRIDE2               0x039d
+#define REG_RBBM_READ_ERROR                 0x03b3
+#define REG_RBBM_SOFT_RESET                 0x003c
+#define REG_RBBM_STATUS                     0x05d0
+#define REG_RB_COLORCONTROL                 0x2202
+#define REG_RB_COLOR_DEST_MASK              0x2326
+#define REG_RB_COLOR_INFO                   0x2001
+#define REG_RB_COLOR_MASK                   0x2104
+#define REG_RB_COPY_CONTROL                 0x2318
+#define REG_RB_COPY_DEST_BASE               0x2319
+#define REG_RB_COPY_DEST_INFO               0x231b
+#define REG_RB_COPY_DEST_OFFSET             0x231c
+#define REG_RB_COPY_DEST_PITCH              0x231a
+#define REG_RB_DEBUG_CNTL                   0x0f26
+#define REG_RB_DEBUG_DATA                   0x0f27
+#define REG_RB_DEPTH_CLEAR                  0x231d
+#define REG_RB_DEPTHCONTROL                 0x2200
+#define REG_RB_DEPTH_INFO                   0x2002
+#define REG_RB_EDRAM_INFO                   0x0f02
+#define REG_RB_FOG_COLOR                    0x2109
+#define REG_RB_MODECONTROL                  0x2208
+#define REG_RB_SAMPLE_COUNT_CTL             0x2324
+#define REG_RB_SAMPLE_POS                   0x220a
+#define REG_RB_STENCILREFMASK               0x210d
+#define REG_RB_STENCILREFMASK_BF            0x210c
+#define REG_RB_SURFACE_INFO                 0x2000
+#define REG_SCRATCH_ADDR                    0x01dd
+#define REG_SCRATCH_REG0                    0x0578
+#define REG_SCRATCH_REG2                    0x057a
+#define REG_SCRATCH_UMSK                    0x01dc
+#define REG_SQ_CF_BOOLEANS                  0x4900
+#define REG_SQ_CF_LOOP                      0x4908
+#define REG_SQ_CONSTANT_0                   0x4000
+#define REG_SQ_CONTEXT_MISC                 0x2181
+#define REG_SQ_DEBUG_CONST_MGR_FSM          0x0daf
+#define REG_SQ_DEBUG_EXP_ALLOC              0x0db3
+#define REG_SQ_DEBUG_FSM_ALU_0              0x0db1
+#define REG_SQ_DEBUG_FSM_ALU_1              0x0db2
+#define REG_SQ_DEBUG_GPR_PIX                0x0db6
+#define REG_SQ_DEBUG_GPR_VTX                0x0db5
+#define REG_SQ_DEBUG_INPUT_FSM              0x0dae
+#define REG_SQ_DEBUG_MISC_0                 0x2309
+#define REG_SQ_DEBUG_MISC                   0x0d05
+#define REG_SQ_DEBUG_MISC_1                 0x230a
+#define REG_SQ_DEBUG_PIX_TB_0               0x0dbc
+#define REG_SQ_DEBUG_PIX_TB_STATE_MEM       0x0dc1
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_0    0x0dbd
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_1    0x0dbe
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_2    0x0dbf
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_3    0x0dc0
+#define REG_SQ_DEBUG_PTR_BUFF               0x0db4
+#define REG_SQ_DEBUG_TB_STATUS_SEL          0x0db7
+#define REG_SQ_DEBUG_TP_FSM                 0x0db0
+#define REG_SQ_DEBUG_VTX_TB_0               0x0db8
+#define REG_SQ_DEBUG_VTX_TB_1               0x0db9
+#define REG_SQ_DEBUG_VTX_TB_STATE_MEM       0x0dbb
+#define REG_SQ_DEBUG_VTX_TB_STATUS_REG      0x0dba
+#define REG_SQ_FETCH_0                      0x4800
+#define REG_SQ_FLOW_CONTROL                 0x0d01
+#define REG_SQ_GPR_MANAGEMENT               0x0d00
+#define REG_SQ_INST_STORE_MANAGMENT         0x0d02
+#define REG_SQ_INT_ACK                      0x0d36
+#define REG_SQ_INT_CNTL                     0x0d34
+#define REG_SQ_INTERPOLATOR_CNTL            0x2182
+#define REG_SQ_INT_STATUS                   0x0d35
+#define REG_SQ_PROGRAM_CNTL                 0x2180
+#define REG_SQ_PS_CONST                     0x2308
+#define REG_SQ_PS_PROGRAM                   0x21f6
+#define REG_SQ_VS_CONST                     0x2307
+#define REG_SQ_VS_PROGRAM                   0x21f7
+#define REG_SQ_WRAPPING_0                   0x2183
+#define REG_SQ_WRAPPING_1                   0x2184
+#define REG_TC_CNTL_STATUS                  0x0e00
+#define REG_TP0_CHICKEN                     0x0e1e
+#define REG_VGT_CURRENT_BIN_ID_MAX          0x2203
+#define REG_VGT_CURRENT_BIN_ID_MIN          0x2207
+#define REG_VGT_ENHANCE                     0x2294
+#define REG_VGT_INDX_OFFSET                 0x2102
+#define REG_VGT_MAX_VTX_INDX                0x2100
+#define REG_VGT_MIN_VTX_INDX                0x2101
+#define REG_VGT_OUT_DEALLOC_CNTL            0x2317
+#define REG_VGT_VERTEX_REUSE_BLOCK_CNTL     0x2316
+
+/* Added in a220: */
+#define REG_A220_RB_LRZ_VSC_CONTROL         0x2209
+#define REG_A220_GRAS_CONTROL               0x2210
+#define REG_A220_VSC_BIN_SIZE               0x0c01
+#define REG_A220_VSC_PIPE_DATA_LENGTH_7     0x0c1d
+#define REG_VSC_PIPE_CONFIG_0               0x0c06
+#define REG_VSC_PIPE_DATA_ADDRESS_0         0x0c07
+#define REG_VSC_PIPE_DATA_LENGTH_0          0x0c08
+#define REG_VSC_PIPE_CONFIG_1               0x0c09
+#define REG_VSC_PIPE_DATA_ADDRESS_1         0x0c0a
+#define REG_VSC_PIPE_DATA_LENGTH_1          0x0c0b
+#define REG_VSC_PIPE_CONFIG_2               0x0c0c
+#define REG_VSC_PIPE_DATA_ADDRESS_2         0x0c0d
+#define REG_VSC_PIPE_DATA_LENGTH_2          0x0c0e
+#define REG_VSC_PIPE_CONFIG_3               0x0c0f
+#define REG_VSC_PIPE_DATA_ADDRESS_3         0x0c10
+#define REG_VSC_PIPE_DATA_LENGTH_3          0x0c11
+#define REG_VSC_PIPE_CONFIG_4               0x0c12
+#define REG_VSC_PIPE_DATA_ADDRESS_4         0x0c13
+#define REG_VSC_PIPE_DATA_LENGTH_4          0x0c14
+#define REG_VSC_PIPE_CONFIG_5               0x0c15
+#define REG_VSC_PIPE_DATA_ADDRESS_5         0x0c16
+#define REG_VSC_PIPE_DATA_LENGTH_5          0x0c17
+#define REG_VSC_PIPE_CONFIG_6               0x0c18
+#define REG_VSC_PIPE_DATA_ADDRESS_6         0x0c19
+#define REG_VSC_PIPE_DATA_LENGTH_6          0x0c1a
+#define REG_VSC_PIPE_CONFIG_7               0x0c1b
+#define REG_VSC_PIPE_DATA_ADDRESS_7         0x0c1c
+#define REG_VSC_PIPE_DATA_LENGTH_7          0x0c1d
+
+/* Added in a225: */
+#define REG_A225_RB_COLOR_INFO3             0x2005
+#define REG_A225_PC_MULTI_PRIM_IB_RESET_INDX 0x2103
+#define REG_A225_GRAS_UCP0X                 0x2340
+#define REG_A225_GRAS_UCP5W                 0x2357
+#define REG_A225_GRAS_UCP_ENABLED           0x2360
+
+/* not sure, maybe RB_CLEAR_COLOR? */
+#define REG_CLEAR_COLOR                     0x220b
+
+/* unnamed registers: */
+#define REG_0c02                            0x0c02
+#define REG_0c04                            0x0c04
+#define REG_0c06                            0x0c06
+#define REG_2010                            0x2010
+
+
+/*
+ * Format for 2nd dword in CP_DRAW_INDX and friends:
+ */
+
+/* see VGT_PRIMITIVE_TYPE.PRIM_TYPE? */
+enum pc_di_primtype {
+       DI_PT_NONE = 0,
+       DI_PT_POINTLIST = 1,
+       DI_PT_LINELIST = 2,
+       DI_PT_LINESTRIP = 3,
+       DI_PT_TRILIST = 4,
+       DI_PT_TRIFAN = 5,
+       DI_PT_TRISTRIP = 6,
+       DI_PT_RECTLIST = 8,
+       DI_PT_QUADLIST = 13,
+       DI_PT_QUADSTRIP = 14,
+       DI_PT_POLYGON = 15,
+       DI_PT_2D_COPY_RECT_LIST_V0 = 16,
+       DI_PT_2D_COPY_RECT_LIST_V1 = 17,
+       DI_PT_2D_COPY_RECT_LIST_V2 = 18,
+       DI_PT_2D_COPY_RECT_LIST_V3 = 19,
+       DI_PT_2D_FILL_RECT_LIST = 20,
+       DI_PT_2D_LINE_STRIP = 21,
+       DI_PT_2D_TRI_STRIP = 22,
+};
+
+/* see VGT:VGT_DRAW_INITIATOR.SOURCE_SELECT? */
+enum pc_di_src_sel {
+       DI_SRC_SEL_DMA = 0,
+       DI_SRC_SEL_IMMEDIATE = 1,
+       DI_SRC_SEL_AUTO_INDEX = 2,
+       DI_SRC_SEL_RESERVED = 3,
+};
+
+/* see VGT_DMA_INDEX_TYPE.INDEX_TYPE? */
+enum pc_di_index_size {
+       INDEX_SIZE_IGN    = 0,
+       INDEX_SIZE_16_BIT = 0,
+       INDEX_SIZE_32_BIT = 1,
+       INDEX_SIZE_8_BIT  = 2,
+       INDEX_SIZE_INVALID
+};
+
+enum pc_di_vis_cull_mode {
+       IGNORE_VISIBILITY = 0,
+};
+
+static inline uint32_t DRAW(enum pc_di_primtype prim_type,
+               enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
+               enum pc_di_vis_cull_mode vis_cull_mode)
+{
+       return (prim_type         << 0) |
+                       (source_select     << 6) |
+                       ((index_size & 1)  << 11) |
+                       ((index_size >> 1) << 13) |
+                       (vis_cull_mode     << 9) |
+                       (1                 << 14);
+}
+
+
+/*
+ * Bits for VGT_CURRENT_BIN_ID_MIN/MAX:
+ */
+
+#define VGT_CURRENT_BIN_ID_MIN_COLUMN(val)       (((val) & 0x7) << 0)
+#define VGT_CURRENT_BIN_ID_MIN_ROW(val)          (((val) & 0x7) << 3)
+#define VGT_CURRENT_BIN_ID_MIN_GUARD_BAND(val)   (((val) & 0x7) << 6)
+
+
+/*
+ * Bits for PA_CL_VTE_CNTL:
+ */
+
+#define PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA         0x00000001
+#define PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA        0x00000002
+#define PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA         0x00000004
+#define PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA        0x00000008
+#define PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA         0x00000010
+#define PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA        0x00000020
+#define PA_CL_VTE_CNTL_VTX_XY_FMT                0x00000100
+#define PA_CL_VTE_CNTL_VTX_Z_FMT                 0x00000200
+#define PA_CL_VTE_CNTL_VTX_W0_FMT                0x00000400
+#define PA_CL_VTE_CNTL_PERFCOUNTER_REF           0x00000800
+
+
+/*
+ * Bits for PA_CL_CLIP_CNTL:
+ */
+
+#define PA_CL_CLIP_CNTL_CLIP_DISABLE             0x00010000
+#define PA_CL_CLIP_CNTL_BOUNDARY_EDGE_FLAG_ENA   0x00040000
+enum dx_clip_space {
+       DXCLIP_OPENGL = 0,
+       DXCLIP_DIRECTX = 1,
+};
+static inline uint32_t PA_CL_CLIP_CNTL_DX_CLIP_SPACE_DEF(enum dx_clip_space val)
+{
+       return val << 19;
+}
+#define PA_CL_CLIP_CNTL_DIS_CLIP_ERR_DETECT      0x00100000
+#define PA_CL_CLIP_CNTL_VTX_KILL_OR              0x00200000
+#define PA_CL_CLIP_CNTL_XY_NAN_RETAIN            0x00400000
+#define PA_CL_CLIP_CNTL_Z_NAN_RETAIN             0x00800000
+#define PA_CL_CLIP_CNTL_W_NAN_RETAIN             0x01000000
+
+
+/*
+ * Bits for PA_SU_SC_MODE_CNTL:
+ */
+
+#define PA_SU_SC_MODE_CNTL_CULL_FRONT            0x00000001
+#define PA_SU_SC_MODE_CNTL_CULL_BACK             0x00000002
+#define PA_SU_SC_MODE_CNTL_FACE                  0x00000004
+enum pa_su_sc_polymode {
+       POLY_DISABLED     = 0,
+       POLY_DUALMODE     = 1,
+};
+static inline uint32_t PA_SU_SC_MODE_CNTL_POLYMODE(enum pa_su_sc_polymode val)
+{
+       return val << 3;
+}
+enum pa_su_sc_draw {
+       DRAW_POINTS       = 0,
+       DRAW_LINES        = 1,
+       DRAW_TRIANGLES    = 2,
+};
+static inline uint32_t PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(enum pa_su_sc_draw val)
+{
+       return val << 5;
+}
+static inline uint32_t PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(enum pa_su_sc_draw val)
+{
+       return val << 8;
+}
+#define PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE        0x00000800
+#define PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE         0x00001000
+#define PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE         0x00002000
+#define PA_SU_SC_MODE_CNTL_MSAA_ENABLE                     0x00008000
+#define PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE        0x00010000
+#define PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE             0x00040000
+#define PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST              0x00080000
+#define PA_SU_SC_MODE_CNTL_PERSP_CORR_DIS                  0x00100000
+#define PA_SU_SC_MODE_CNTL_MULTI_PRIM_IB_ENA               0x00200000
+#define PA_SU_SC_MODE_CNTL_QUAD_ORDER_ENABLE               0x00800000
+#define PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_ALL_TRI            0x02000000
+#define PA_SU_SC_MODE_CNTL_WAIT_RB_IDLE_FIRST_TRI_NEW_STATE 0x04000000
+#define PA_SU_SC_MODE_CNTL_CLAMPED_FACENESS                0x10000000
+#define PA_SU_SC_MODE_CNTL_ZERO_AREA_FACENESS              0x20000000
+#define PA_SU_SC_MODE_CNTL_FACE_KILL_ENABLE                0x40000000
+#define PA_SU_SC_MODE_CNTL_FACE_WRITE_ENABLE               0x80000000
+
+
+/*
+ * Bits for PA_SC_LINE_STIPPLE:
+ */
+
+#define PA_SC_LINE_STIPPLE_LINE_PATTERN(val)     ((val) & 0x0000ffff)
+#define PA_SC_LINE_STIPPLE_REPEAT_COUNT(val)     (((val) << 16) & 0x00ff0000)
+enum pa_sc_pattern_bit_order {
+       PATTERN_BIT_ORDER_LITTLE = 0,
+       PATTERN_BIT_ORDER_BIG    = 1,
+};
+static inline uint32_t PA_SC_LINE_STIPPLE_PATTERN_BIT_ORDER(enum pa_sc_pattern_bit_order val)
+{
+       return val << 28;
+}
+enum pa_sc_auto_reset_cntl {
+       AUTO_RESET_NEVER          = 0,
+       AUTO_RESET_EACH_PRIMITIVE = 1,
+       AUTO_RESET_EACH_PACKET    = 2,
+};
+static inline uint32_t PA_SC_LINE_STIPPLE_AUTO_RESET_CNTL(enum pa_sc_auto_reset_cntl val)
+{
+       return val << 29;
+}
+
+
+/*
+ * Bits for PA_SC_LINE_CNTL:
+ */
+
+#define PA_SC_LINE_CNTL_BRES_CNTL_MASK(val)      ((val) & 0x000000ff)
+#define PA_SC_LINE_CNTL_USE_BRES_CNTL            0x00000100
+#define PA_SC_LINE_CNTL_EXPAND_LINE_WIDTH        0x00000200
+#define PA_SC_LINE_CNTL_LAST_PIXEL               0x00000400
+
+
+/*
+ * Bits for PA_SU_VTX_CNTL:
+ */
+
+enum pa_pixcenter {
+       PIXCENTER_D3D = 0,
+       PIXCENTER_OGL = 1,
+};
+static inline uint32_t PA_SU_VTX_CNTL_PIX_CENTER(enum pa_pixcenter val)
+{
+       return val;
+}
+
+enum pa_roundmode {
+       TRUNCATE = 0,
+       ROUND = 1,
+       ROUNDTOEVEN = 2,
+       ROUNDTOODD = 3,
+};
+static inline uint32_t PA_SU_VTX_CNTL_ROUND_MODE_MASK(enum pa_roundmode val)
+{
+       return val << 1;
+}
+
+enum pa_quantmode {
+       ONE_SIXTEENTH = 0,
+       ONE_EIGHTH = 1,
+       ONE_QUARTER = 2,
+       ONE_HALF = 3,
+       ONE = 4,
+};
+static inline uint32_t PA_SU_VTX_CNTL_QUANT_MODE(enum pa_quantmode val)
+{
+       return val << 3;
+}
+
+
+/*
+ * Bits for PA_SU_POINT_SIZE:
+ */
+
+#define PA_SU_POINT_SIZE_HEIGHT(val)        (f2d12_4(val) & 0xffff)
+#define PA_SU_POINT_SIZE_WIDTH(val)         ((f2d12_4(val) << 16) & 0xffff)
+
+
+/*
+ * Bits for PA_SU_POINT_MINMAX:
+ */
+
+#define PA_SU_POINT_MINMAX_MIN_SIZE(val)    (f2d12_4(val) & 0xffff)
+#define PA_SU_POINT_MINMAX_MAX_SIZE(val)    ((f2d12_4(val) << 16) & 0xffff)
+
+
+/*
+ * Bits for PA_SU_LINE_CNTL:
+ */
+
+#define PA_SU_LINE_CNTL_WIDTH(val)          (f2d12_4(val) & 0xffff)
+
+
+/*
+ * Bits for PA_SC_WINDOW_OFFSET:
+ * (seems to be same as r600)
+ */
+#define PA_SC_WINDOW_OFFSET_X(val)          ((val) & 0x7fff)
+#define PA_SC_WINDOW_OFFSET_Y(val)          (((val) & 0x7fff) << 16)
+
+#define PA_SC_WINDOW_OFFSET_DISABLE         0x80000000
+
+
+/*
+ * Bits for SQ_CONTEXT_MISC:
+ */
+
+#define SQ_CONTEXT_MISC_INST_PRED_OPTIMIZE  0x00000001
+#define SQ_CONTEXT_MISC_SC_OUTPUT_SCREEN_XY 0x00000002
+enum sq_sample_cntl {
+       CENTROIDS_ONLY = 0,
+       CENTERS_ONLY = 1,
+       CENTROIDS_AND_CENTERS = 2,
+};
+static inline uint32_t SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(enum sq_sample_cntl val)
+{
+       return (val & 0x3) << 2;
+}
+#define SQ_CONTEXT_MISC_PARAM_GEN_POS(val)  (((val) & 0xff) << 8)
+#define SQ_CONTEXT_MISC_PERFCOUNTER_REF     0x00010000
+#define SQ_CONTEXT_MISC_YEILD_OPTIMIZE      0x00020000
+#define SQ_CONTEXT_MISC_TX_CACHE_SEL        0x00040000
+
+
+/*
+ * Bits for SQ_PROGRAM_CNTL:
+ */
+/* note: only 0x3f worth of valid register values, but high bit is
+ * set to indicate '0 registers used':
+ */
+#define SQ_PROGRAM_CNTL_VS_REGS(val)        ((val) & 0xff)
+#define SQ_PROGRAM_CNTL_PS_REGS(val)        (((val) & 0xff) << 8)
+#define SQ_PROGRAM_CNTL_VS_RESOURCE         0x00010000
+#define SQ_PROGRAM_CNTL_PS_RESOURCE         0x00020000
+#define SQ_PROGRAM_CNTL_PARAM_GEN           0x00040000
+#define SQ_PROGRAM_CNTL_GEN_INDEX_PIX       0x00080000
+#define SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(val) (((val) & 0xf) << 20)
+#define SQ_PROGRAM_CNTL_VS_EXPORT_MODE(val)  (((val) & 0x7) << 24)
+enum sq_ps_vtx_mode {
+       POSITION_1_VECTOR              = 0,
+       POSITION_2_VECTORS_UNUSED      = 1,
+       POSITION_2_VECTORS_SPRITE      = 2,
+       POSITION_2_VECTORS_EDGE        = 3,
+       POSITION_2_VECTORS_KILL        = 4,
+       POSITION_2_VECTORS_SPRITE_KILL = 5,
+       POSITION_2_VECTORS_EDGE_KILL   = 6,
+       MULTIPASS                      = 7,
+};
+static inline uint32_t SQ_PROGRAM_CNTL_PS_EXPORT_MODE(enum sq_ps_vtx_mode val)
+{
+       return val << 27;
+}
+#define SQ_PROGRAM_CNTL_GEN_INDEX_VTX  0x80000000
+
+
+/*
+ * Bits for SQ_VS_CONST
+ */
+
+#define SQ_VS_CONST_BASE(val)          ((val) & 0x1ff)
+#define SQ_VS_CONST_SIZE(val)          (((val) & 0x1ff) << 12)
+
+
+/*
+ * Bits for SQ_PS_CONST
+ */
+
+#define SQ_PS_CONST_BASE(val)          ((val) & 0x1ff)
+#define SQ_PS_CONST_SIZE(val)          (((val) & 0x1ff) << 12)
+
+
+/*
+ * Bits for tex sampler:
+ */
+
+/* dword0 */
+enum sq_tex_clamp {
+       SQ_TEX_WRAP                    = 0,    /* GL_REPEAT */
+       SQ_TEX_MIRROR                  = 1,    /* GL_MIRRORED_REPEAT */
+       SQ_TEX_CLAMP_LAST_TEXEL        = 2,    /* GL_CLAMP_TO_EDGE */
+       /* TODO confirm these: */
+       SQ_TEX_MIRROR_ONCE_LAST_TEXEL  = 3,
+       SQ_TEX_CLAMP_HALF_BORDER       = 4,
+       SQ_TEX_MIRROR_ONCE_HALF_BORDER = 5,
+       SQ_TEX_CLAMP_BORDER            = 6,
+       SQ_TEX_MIRROR_ONCE_BORDER      = 7,
+};
+static inline uint32_t SQ_TEX0_CLAMP_X(enum sq_tex_clamp val)
+{
+       return (val & 0x7) << 10;
+}
+static inline uint32_t SQ_TEX0_CLAMP_Y(enum sq_tex_clamp val)
+{
+       return (val & 0x7) << 13;
+}
+static inline uint32_t SQ_TEX0_CLAMP_Z(enum sq_tex_clamp val)
+{
+       return (val & 0x7) << 16;
+}
+#define SQ_TEX0_PITCH(val)             (((val) >> 5) << 22)
+
+/* dword2 */
+#define SQ_TEX2_HEIGHT(val)            (((val) - 1) << 13)
+#define SQ_TEX2_WIDTH(val)             ((val) - 1)
+
+/* dword3 */
+enum sq_tex_swiz {
+       SQ_TEX_X    = 0,
+       SQ_TEX_Y    = 1,
+       SQ_TEX_Z    = 2,
+       SQ_TEX_W    = 3,
+       SQ_TEX_ZERO = 4,
+       SQ_TEX_ONE  = 5,
+};
+static inline uint32_t SQ_TEX3_SWIZ_X(enum sq_tex_swiz val)
+{
+       return (val & 0x7) << 1;
+}
+static inline uint32_t SQ_TEX3_SWIZ_Y(enum sq_tex_swiz val)
+{
+       return (val & 0x7) << 4;
+}
+static inline uint32_t SQ_TEX3_SWIZ_Z(enum sq_tex_swiz val)
+{
+       return (val & 0x7) << 7;
+}
+static inline uint32_t SQ_TEX3_SWIZ_W(enum sq_tex_swiz val)
+{
+       return (val & 0x7) << 10;
+}
+
+enum sq_tex_filter {
+       SQ_TEX_FILTER_POINT    = 0,
+       SQ_TEX_FILTER_BILINEAR = 1,
+       SQ_TEX_FILTER_BICUBIC  = 2,  /* presumed */
+};
+static inline uint32_t SQ_TEX3_XY_MAG_FILTER(enum sq_tex_filter val)
+{
+       return (val & 0x3) << 19;
+}
+static inline uint32_t SQ_TEX3_XY_MIN_FILTER(enum sq_tex_filter val)
+{
+       return (val & 0x3) << 21;
+}
+
+
+/*
+ * Bits for RB_BLEND_CONTROL:
+ */
+
+enum rb_blend_op {
+       RB_BLEND_ZERO = 0,
+       RB_BLEND_ONE = 1,
+       RB_BLEND_SRC_COLOR = 4,
+       RB_BLEND_ONE_MINUS_SRC_COLOR = 5,
+       RB_BLEND_SRC_ALPHA = 6,
+       RB_BLEND_ONE_MINUS_SRC_ALPHA = 7,
+       RB_BLEND_DST_COLOR = 8,
+       RB_BLEND_ONE_MINUS_DST_COLOR = 9,
+       RB_BLEND_DST_ALPHA = 10,
+       RB_BLEND_ONE_MINUS_DST_ALPHA = 11,
+       RB_BLEND_CONSTANT_COLOR = 12,
+       RB_BLEND_ONE_MINUS_CONSTANT_COLOR = 13,
+       RB_BLEND_CONSTANT_ALPHA = 14,
+       RB_BLEND_ONE_MINUS_CONSTANT_ALPHA = 15,
+       RB_BLEND_SRC_ALPHA_SATURATE = 16,
+};
+
+enum rb_comb_func {
+       COMB_DST_PLUS_SRC = 0,
+       COMB_SRC_MINUS_DST = 1,
+       COMB_MIN_DST_SRC = 2,
+       COMB_MAX_DST_SRC = 3,
+       COMB_DST_MINUS_SRC = 4,
+       COMB_DST_PLUS_SRC_BIAS = 5,
+};
+
+#define RB_BLENDCONTROL_COLOR_SRCBLEND_MASK      0x0000001f
+static inline uint32_t RB_BLENDCONTROL_COLOR_SRCBLEND(enum rb_blend_op val)
+{
+       return val & RB_BLENDCONTROL_COLOR_SRCBLEND_MASK;
+}
+#define RB_BLENDCONTROL_COLOR_COMB_FCN_MASK      0x000000e0
+static inline uint32_t RB_BLENDCONTROL_COLOR_COMB_FCN(enum rb_comb_func val)
+{
+       return (val << 5) & RB_BLENDCONTROL_COLOR_COMB_FCN_MASK;
+}
+#define RB_BLENDCONTROL_COLOR_DESTBLEND_MASK     0x00001f00
+static inline uint32_t RB_BLENDCONTROL_COLOR_DESTBLEND(enum rb_blend_op val)
+{
+       return (val << 8) & RB_BLENDCONTROL_COLOR_DESTBLEND_MASK;
+}
+#define RB_BLENDCONTROL_ALPHA_SRCBLEND_MASK      0x001f0000
+static inline uint32_t RB_BLENDCONTROL_ALPHA_SRCBLEND(enum rb_blend_op val)
+{
+       return (val << 16) & RB_BLENDCONTROL_ALPHA_SRCBLEND_MASK;
+}
+#define RB_BLENDCONTROL_ALPHA_COMB_FCN_MASK      0x00e00000
+static inline uint32_t RB_BLENDCONTROL_ALPHA_COMB_FCN(enum rb_comb_func val)
+{
+       return (val << 21) & RB_BLENDCONTROL_ALPHA_COMB_FCN_MASK;
+}
+#define RB_BLENDCONTROL_ALPHA_DESTBLEND_MASK     0x1f000000
+static inline uint32_t RB_BLENDCONTROL_ALPHA_DESTBLEND(enum rb_blend_op val)
+{
+       return (val << 24) & RB_BLENDCONTROL_ALPHA_DESTBLEND_MASK;
+}
+#define RB_BLENDCONTROL_BLEND_FORCE_ENABLE       0x20000000
+#define RB_BLENDCONTROL_BLEND_FORCE              0x40000000
+
+
+/*
+ * Bits for RB_COLOR_MASK:
+ */
+#define RB_COLOR_MASK_WRITE_RED                  0x00000001
+#define RB_COLOR_MASK_WRITE_GREEN                0x00000002
+#define RB_COLOR_MASK_WRITE_BLUE                 0x00000004
+#define RB_COLOR_MASK_WRITE_ALPHA                0x00000008
+
+
+/*
+ * Bits for RB_COLOR_INFO:
+ */
+
+#define RB_COLOR_INFO_COLOR_FORMAT_MASK          0x0000000f
+static inline uint32_t RB_COLOR_INFO_COLOR_FORMAT(enum rb_colorformatx val)
+{
+       return val & RB_COLOR_INFO_COLOR_FORMAT_MASK;
+}
+
+#define RB_COLOR_INFO_COLOR_ROUND_MODE(val)      (((val) & 0x3) << 4)
+#define RB_COLOR_INFO_COLOR_LINEAR               0x00000040
+#define RB_COLOR_INFO_COLOR_ENDIAN(val)          (((val) & 0x3) << 7)
+#define RB_COLOR_INFO_COLOR_SWAP(val)            (((val) & 0x3) << 9)
+#define RB_COLOR_INFO_COLOR_BASE(val)            (((val) & 0xfffff) << 12)
+
+
+/*
+ * Bits for RB_MODECONTROL:
+ */
+
+enum rb_edram_mode {
+       EDRAM_NOP = 0,
+       COLOR_DEPTH = 4,
+       DEPTH_ONLY = 5,
+       EDRAM_COPY = 6,
+};
+static inline uint32_t RB_MODECONTROL_EDRAM_MODE(enum rb_edram_mode val)
+{
+       return val & 0x7;
+}
+
+
+/*
+ * Bits for RB_DEPTHCONTROL:
+ */
+
+#define RB_DEPTHCONTROL_STENCIL_ENABLE      0x00000001
+#define RB_DEPTHCONTROL_Z_ENABLE            0x00000002
+#define RB_DEPTHCONTROL_Z_WRITE_ENABLE      0x00000004
+#define RB_DEPTHCONTROL_EARLY_Z_ENABLE      0x00000008
+#define RB_DEPTHCONTROL_ZFUNC_MASK          0x00000070
+#define RB_DEPTHCONTROL_ZFUNC(depth_func) \
+       (((depth_func) << 4) & RB_DEPTHCONTROL_ZFUNC_MASK)
+#define RB_DEPTHCONTROL_BACKFACE_ENABLE     0x00000080
+#define RB_DEPTHCONTROL_STENCILFUNC_MASK    0x00000700
+#define RB_DEPTHCONTROL_STENCILFUNC(depth_func) \
+       (((depth_func) << 8) & RB_DEPTHCONTROL_STENCILFUNC_MASK)
+enum rb_stencil_op {
+       STENCIL_KEEP = 0,
+       STENCIL_ZERO = 1,
+       STENCIL_REPLACE = 2,
+       STENCIL_INCR_CLAMP = 3,
+       STENCIL_DECR_CLAMP = 4,
+       STENCIL_INVERT = 5,
+       STENCIL_INCR_WRAP = 6,
+       STENCIL_DECR_WRAP = 7
+};
+#define RB_DEPTHCONTROL_STENCILFAIL_MASK         0x00003800
+static inline uint32_t RB_DEPTHCONTROL_STENCILFAIL(enum rb_stencil_op val)
+{
+       return (val << 11) & RB_DEPTHCONTROL_STENCILFAIL_MASK;
+}
+#define RB_DEPTHCONTROL_STENCILZPASS_MASK        0x0001c000
+static inline uint32_t RB_DEPTHCONTROL_STENCILZPASS(enum rb_stencil_op val)
+{
+       return (val << 14) & RB_DEPTHCONTROL_STENCILZPASS_MASK;
+}
+#define RB_DEPTHCONTROL_STENCILZFAIL_MASK        0x000e0000
+static inline uint32_t RB_DEPTHCONTROL_STENCILZFAIL(enum rb_stencil_op val)
+{
+       return (val << 17) & RB_DEPTHCONTROL_STENCILZFAIL_MASK;
+}
+#define RB_DEPTHCONTROL_STENCILFUNC_BF_MASK      0x00700000
+#define RB_DEPTHCONTROL_STENCILFUNC_BF(depth_func) \
+       (((depth_func) << 20) & RB_DEPTHCONTROL_STENCILFUNC_BF_MASK)
+#define RB_DEPTHCONTROL_STENCILFAIL_BF_MASK      0x03800000
+static inline uint32_t RB_DEPTHCONTROL_STENCILFAIL_BF(enum rb_stencil_op val)
+{
+       return (val << 23) & RB_DEPTHCONTROL_STENCILFAIL_BF_MASK;
+}
+#define RB_DEPTHCONTROL_STENCILZPASS_BF_MASK     0x1c000000
+static inline uint32_t RB_DEPTHCONTROL_STENCILZPASS_BF(enum rb_stencil_op val)
+{
+       return (val << 26) & RB_DEPTHCONTROL_STENCILZPASS_BF_MASK;
+}
+#define RB_DEPTHCONTROL_STENCILZFAIL_BF_MASK     0xe0000000
+static inline uint32_t RB_DEPTHCONTROL_STENCILZFAIL_BF(enum rb_stencil_op val)
+{
+       return (val << 29) & RB_DEPTHCONTROL_STENCILZFAIL_BF_MASK;
+}
+
+
+/*
+ * Bits for RB_COPY_DEST_INFO:
+ */
+
+enum rb_surface_endian {
+       ENDIAN_NONE = 0,
+       ENDIAN_8IN16 = 1,
+       ENDIAN_8IN32 = 2,
+       ENDIAN_16IN32 = 3,
+       ENDIAN_8IN64 = 4,
+       ENDIAN_8IN128 = 5,
+};
+static inline uint32_t RB_COPY_DEST_INFO_DEST_ENDIAN(enum rb_surface_endian val)
+{
+       return (val & 0x7) << 0;
+}
+#define RB_COPY_DEST_INFO_LINEAR       0x00000008
+static inline uint32_t RB_COPY_DEST_INFO_FORMAT(enum rb_colorformatx val)
+{
+       return val << 4;
+}
+#define RB_COPY_DEST_INFO_SWAP(val)    (((val) & 0x3) << 8) /* maybe VGT_DMA_SWAP_MODE? */
+enum rb_dither_mode {
+       DITHER_DISABLE = 0,
+       DITHER_ALWAYS = 1,
+       DITHER_IF_ALPHA_OFF = 2,
+};
+static inline uint32_t RB_COPY_DEST_INFO_DITHER_MODE(enum rb_dither_mode val)
+{
+       return val << 10;
+}
+enum rb_dither_type {
+       DITHER_PIXEL = 0,
+       DITHER_SUBPIXEL = 1,
+};
+static inline uint32_t RB_COPY_DEST_INFO_DITHER_TYPE(enum rb_dither_type val)
+{
+       return val << 12;
+}
+#define RB_COPY_DEST_INFO_WRITE_RED    0x00004000
+#define RB_COPY_DEST_INFO_WRITE_GREEN  0x00008000
+#define RB_COPY_DEST_INFO_WRITE_BLUE   0x00010000
+#define RB_COPY_DEST_INFO_WRITE_ALPHA  0x00020000
+
+
+/*
+ * Bits for RB_COPY_DEST_OFFSET:
+ */
+
+#define RB_COPY_DEST_OFFSET_X(val)     ((val) & 0x3fff)
+#define RB_COPY_DEST_OFFSET_Y(val)     (((val) & 0x3fff) << 13)
+
+
+/*
+ * Bits for RB_COPY_CONTROL:
+ */
+
+#define RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE  0x00000008L
+#define RB_COPY_CONTROL_CLEAR_MASK(val)     ((val & 0xf) << 4)
+
+
+/*
+ * Bits for RB_COLORCONTROL:
+ */
+
+#define RB_COLORCONTROL_ALPHA_FUNC(val)          ((val) & 0x7)
+#define RB_COLORCONTROL_ALPHA_TEST_ENABLE        0x00000008
+#define RB_COLORCONTROL_ALPHA_TO_MASK_ENABLE     0x00000010
+#define RB_COLORCONTROL_BLEND_DISABLE            0x00000020
+#define RB_COLORCONTROL_FOG_ENABLE               0x00000040
+#define RB_COLORCONTROL_VS_EXPORTS_FOG           0x00000080
+#define RB_COLORCONTROL_ROP_CODE(val)            (((val) & 0xf) << 8)
+static inline uint32_t RB_COLORCONTROL_DITHER_MODE(enum rb_dither_mode val)
+{
+       return (val & 0x3) << 12;
+}
+static inline uint32_t RB_COLORCONTROL_DITHER_TYPE(enum rb_dither_type val)
+{
+       return (val & 0x3) << 14;
+}
+#define RB_COLORCONTROL_PIXEL_FOG                0x00010000
+#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET0(val) (((val) & 0x3) << 24)
+#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET1(val) (((val) & 0x3) << 26)
+#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET2(val) (((val) & 0x3) << 28)
+#define RB_COLORCONTROL_ALPHA_TO_MASK_OFFSET3(val) (((val) & 0x3) << 30)
+
+
+/*
+ * Bits for RB_DEPTH_INFO:
+ */
+
+enum rb_depth_format {
+       DEPTHX_16 = 0,
+       DEPTHX_24_8 = 1,
+       DEPTHX_INVALID,
+};
+
+static inline uint32_t RB_DEPTH_INFO_DEPTH_FORMAT(enum rb_depth_format val)
+{
+       return val & 0x1;
+}
+#define RB_DEPTH_INFO_DEPTH_BASE(val)            ((val) << 12)
+
+
+/*
+ * Bits for RB_STENCILREFMASK (RB_STENCILREFMASK_BF is same):
+ */
+
+#define RB_STENCILREFMASK_STENCILREF_MASK        0x000000ff
+#define RB_STENCILREFMASK_STENCILREF(val)        ((val) & RB_STENCILREFMASK_STENCILREF_MASK)
+#define RB_STENCILREFMASK_STENCILMASK_MASK       0x0000ff00
+#define RB_STENCILREFMASK_STENCILMASK(val)       (((val) << 8) & RB_STENCILREFMASK_STENCILMASK_MASK)
+#define RB_STENCILREFMASK_STENCILWRITEMASK_MASK  0x00ff0000
+#define RB_STENCILREFMASK_STENCILWRITEMASK(val)  (((val) << 16) & RB_STENCILREFMASK_STENCILWRITEMASK_MASK)
+
+
+/*
+ * Bits for RB_BC_CONTROL:
+ */
+
+#define RB_BC_CONTROL_ACCUM_LINEAR_MODE_ENABLE            0x00000001
+#define RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(val)           (((val) & 0x3) << 1)
+#define RB_BC_CONTROL_DISABLE_EDRAM_CAM                   0x00000008
+#define RB_BC_CONTROL_DISABLE_EZ_FAST_CONTEXT_SWITCH      0x00000010
+#define RB_BC_CONTROL_DISABLE_EZ_NULL_ZCMD_DROP           0x00000020
+#define RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP           0x00000040
+#define RB_BC_CONTROL_ENABLE_AZ_THROTTLE                  0x00000080
+#define RB_BC_CONTROL_AZ_THROTTLE_COUNT(val)              (((val) & 0x1f) << 8)
+#define RB_BC_CONTROL_ENABLE_CRC_UPDATE                   0x00004000
+#define RB_BC_CONTROL_CRC_MODE                            0x00008000
+#define RB_BC_CONTROL_DISABLE_SAMPLE_COUNTERS             0x00010000
+#define RB_BC_CONTROL_DISABLE_ACCUM                       0x00020000
+#define RB_BC_CONTROL_ACCUM_ALLOC_MASK(val)               (((val) & 0xf) << 18)
+#define RB_BC_CONTROL_LINEAR_PERFORMANCE_ENABLE           0x00400000
+#define RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(val)          (((val) & 0xf) << 23)
+#define RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(val)      (((val) & 0x3) << 27)
+#define RB_BC_CONTROL_MEM_EXPORT_LINEAR_MODE_ENABLE       0x20000000
+#define RB_BC_CONTROL_CRC_SYSTEM                          0x40000000
+#define RB_BC_CONTROL_RESERVED6                           0x80000000
+
+
+/*
+ * Bits for RBBM_PM_OVERRIDE1:
+ */
+
+#define RBBM_PM_OVERRIDE1_RBBM_AHBCLK_PM_OVERRIDE         0x00000001
+#define RBBM_PM_OVERRIDE1_SC_REG_SCLK_PM_OVERRIDE         0x00000002
+#define RBBM_PM_OVERRIDE1_SC_SCLK_PM_OVERRIDE             0x00000004
+#define RBBM_PM_OVERRIDE1_SP_TOP_SCLK_PM_OVERRIDE         0x00000008
+#define RBBM_PM_OVERRIDE1_SP_V0_SCLK_PM_OVERRIDE          0x00000010
+#define RBBM_PM_OVERRIDE1_SQ_REG_SCLK_PM_OVERRIDE         0x00000020
+#define RBBM_PM_OVERRIDE1_SQ_REG_FIFOS_SCLK_PM_OVERRIDE   0x00000040
+#define RBBM_PM_OVERRIDE1_SQ_CONST_MEM_SCLK_PM_OVERRIDE   0x00000080
+#define RBBM_PM_OVERRIDE1_SQ_SQ_SCLK_PM_OVERRIDE          0x00000100
+#define RBBM_PM_OVERRIDE1_SX_SCLK_PM_OVERRIDE             0x00000200
+#define RBBM_PM_OVERRIDE1_SX_REG_SCLK_PM_OVERRIDE         0x00000400
+#define RBBM_PM_OVERRIDE1_TCM_TCO_SCLK_PM_OVERRIDE        0x00000800
+#define RBBM_PM_OVERRIDE1_TCM_TCM_SCLK_PM_OVERRIDE        0x00001000
+#define RBBM_PM_OVERRIDE1_TCM_TCD_SCLK_PM_OVERRIDE        0x00002000
+#define RBBM_PM_OVERRIDE1_TCM_REG_SCLK_PM_OVERRIDE        0x00004000
+#define RBBM_PM_OVERRIDE1_TPC_TPC_SCLK_PM_OVERRIDE        0x00008000
+#define RBBM_PM_OVERRIDE1_TPC_REG_SCLK_PM_OVERRIDE        0x00010000
+#define RBBM_PM_OVERRIDE1_TCF_TCA_SCLK_PM_OVERRIDE        0x00020000
+#define RBBM_PM_OVERRIDE1_TCF_TCB_SCLK_PM_OVERRIDE        0x00040000
+#define RBBM_PM_OVERRIDE1_TCF_TCB_READ_SCLK_PM_OVERRIDE   0x00080000
+#define RBBM_PM_OVERRIDE1_TP_TP_SCLK_PM_OVERRIDE          0x00100000
+#define RBBM_PM_OVERRIDE1_TP_REG_SCLK_PM_OVERRIDE         0x00200000
+#define RBBM_PM_OVERRIDE1_CP_G_SCLK_PM_OVERRIDE           0x00400000
+#define RBBM_PM_OVERRIDE1_CP_REG_SCLK_PM_OVERRIDE         0x00800000
+#define RBBM_PM_OVERRIDE1_CP_G_REG_SCLK_PM_OVERRIDE       0x01000000
+#define RBBM_PM_OVERRIDE1_SPI_SCLK_PM_OVERRIDE            0x02000000
+#define RBBM_PM_OVERRIDE1_RB_REG_SCLK_PM_OVERRIDE         0x04000000
+#define RBBM_PM_OVERRIDE1_RB_SCLK_PM_OVERRIDE             0x08000000
+#define RBBM_PM_OVERRIDE1_MH_MH_SCLK_PM_OVERRIDE          0x10000000
+#define RBBM_PM_OVERRIDE1_MH_REG_SCLK_PM_OVERRIDE         0x20000000
+#define RBBM_PM_OVERRIDE1_MH_MMU_SCLK_PM_OVERRIDE         0x40000000
+#define RBBM_PM_OVERRIDE1_MH_TCROQ_SCLK_PM_OVERRIDE       0x80000000
+
+
+/*
+ * Bits for RBBM_PM_OVERRIDE2:
+ */
+
+#define RBBM_PM_OVERRIDE2_PA_REG_SCLK_PM_OVERRIDE         0x00000001
+#define RBBM_PM_OVERRIDE2_PA_PA_SCLK_PM_OVERRIDE          0x00000002
+#define RBBM_PM_OVERRIDE2_PA_AG_SCLK_PM_OVERRIDE          0x00000004
+#define RBBM_PM_OVERRIDE2_VGT_REG_SCLK_PM_OVERRIDE        0x00000008
+#define RBBM_PM_OVERRIDE2_VGT_FIFOS_SCLK_PM_OVERRIDE      0x00000010
+#define RBBM_PM_OVERRIDE2_VGT_VGT_SCLK_PM_OVERRIDE        0x00000020
+#define RBBM_PM_OVERRIDE2_DEBUG_PERF_SCLK_PM_OVERRIDE     0x00000040
+#define RBBM_PM_OVERRIDE2_PERM_SCLK_PM_OVERRIDE           0x00000080
+#define RBBM_PM_OVERRIDE2_GC_GA_GMEM0_PM_OVERRIDE         0x00000100
+#define RBBM_PM_OVERRIDE2_GC_GA_GMEM1_PM_OVERRIDE         0x00000200
+#define RBBM_PM_OVERRIDE2_GC_GA_GMEM2_PM_OVERRIDE         0x00000400
+#define RBBM_PM_OVERRIDE2_GC_GA_GMEM3_PM_OVERRIDE         0x00000800
+
+
+/*
+ * Bits for TC_CNTL_STATUS:
+ */
+
+#define TC_CNTL_STATUS_L2_INVALIDATE             0x00000001
+
+
+#endif /* FREEDRENO_A2XX_REG_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_blend.c b/src/gallium/drivers/freedreno/freedreno_blend.c
new file mode 100644 (file)
index 0000000..c965a73
--- /dev/null
@@ -0,0 +1,175 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "freedreno_blend.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+static enum rb_blend_op
+blend_factor(unsigned factor)
+{
+       switch (factor) {
+       case PIPE_BLENDFACTOR_ONE:
+               return RB_BLEND_ONE;
+       case PIPE_BLENDFACTOR_SRC_COLOR:
+               return RB_BLEND_SRC_COLOR;
+       case PIPE_BLENDFACTOR_SRC_ALPHA:
+               return RB_BLEND_SRC_ALPHA;
+       case PIPE_BLENDFACTOR_DST_ALPHA:
+               return RB_BLEND_DST_ALPHA;
+       case PIPE_BLENDFACTOR_DST_COLOR:
+               return RB_BLEND_DST_COLOR;
+       case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+               return RB_BLEND_SRC_ALPHA_SATURATE;
+       case PIPE_BLENDFACTOR_CONST_COLOR:
+               return RB_BLEND_CONSTANT_COLOR;
+       case PIPE_BLENDFACTOR_CONST_ALPHA:
+               return RB_BLEND_CONSTANT_ALPHA;
+       case PIPE_BLENDFACTOR_ZERO:
+       case 0:
+               return RB_BLEND_ZERO;
+       case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+               return RB_BLEND_ONE_MINUS_SRC_COLOR;
+       case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+               return RB_BLEND_ONE_MINUS_SRC_ALPHA;
+       case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+               return RB_BLEND_ONE_MINUS_DST_ALPHA;
+       case PIPE_BLENDFACTOR_INV_DST_COLOR:
+               return RB_BLEND_ONE_MINUS_DST_COLOR;
+       case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+               return RB_BLEND_ONE_MINUS_CONSTANT_COLOR;
+       case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+               return RB_BLEND_ONE_MINUS_CONSTANT_ALPHA;
+       case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+       case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+       case PIPE_BLENDFACTOR_SRC1_COLOR:
+       case PIPE_BLENDFACTOR_SRC1_ALPHA:
+               /* I don't think these are supported */
+       default:
+               DBG("invalid blend factor: %x", factor);
+               return 0;
+       }
+}
+
+static enum rb_comb_func
+blend_func(unsigned func)
+{
+       switch (func) {
+       case PIPE_BLEND_ADD:
+               return COMB_DST_PLUS_SRC;
+       case PIPE_BLEND_MIN:
+               return COMB_MIN_DST_SRC;
+       case PIPE_BLEND_MAX:
+               return COMB_MAX_DST_SRC;
+       case PIPE_BLEND_SUBTRACT:
+               return COMB_SRC_MINUS_DST;
+       case PIPE_BLEND_REVERSE_SUBTRACT:
+               return COMB_DST_MINUS_SRC;
+       default:
+               DBG("invalid blend func: %x", func);
+               return 0;
+       }
+}
+
+static void *
+fd_blend_state_create(struct pipe_context *pctx,
+               const struct pipe_blend_state *cso)
+{
+       const struct pipe_rt_blend_state *rt = &cso->rt[0];
+       struct fd_blend_stateobj *so;
+
+       if (cso->logicop_enable) {
+               DBG("Unsupported! logicop");
+               return NULL;
+       }
+
+       if (cso->independent_blend_enable) {
+               DBG("Unsupported! independent blend state");
+               return NULL;
+       }
+
+       so = CALLOC_STRUCT(fd_blend_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       so->rb_colorcontrol = RB_COLORCONTROL_ROP_CODE(12);
+
+       so->rb_blendcontrol =
+               RB_BLENDCONTROL_COLOR_SRCBLEND(blend_factor(rt->rgb_src_factor)) |
+               RB_BLENDCONTROL_COLOR_COMB_FCN(blend_func(rt->rgb_func)) |
+               RB_BLENDCONTROL_COLOR_DESTBLEND(blend_factor(rt->rgb_dst_factor)) |
+               RB_BLENDCONTROL_ALPHA_SRCBLEND(blend_factor(rt->alpha_src_factor)) |
+               RB_BLENDCONTROL_ALPHA_COMB_FCN(blend_func(rt->alpha_func)) |
+               RB_BLENDCONTROL_ALPHA_DESTBLEND(blend_factor(rt->alpha_dst_factor));
+
+       if (rt->colormask & PIPE_MASK_R)
+               so->rb_colormask |= RB_COLOR_MASK_WRITE_RED;
+       if (rt->colormask & PIPE_MASK_G)
+               so->rb_colormask |= RB_COLOR_MASK_WRITE_GREEN;
+       if (rt->colormask & PIPE_MASK_B)
+               so->rb_colormask |= RB_COLOR_MASK_WRITE_BLUE;
+       if (rt->colormask & PIPE_MASK_A)
+               so->rb_colormask |= RB_COLOR_MASK_WRITE_ALPHA;
+
+       if (!rt->blend_enable)
+               so->rb_colorcontrol |= RB_COLORCONTROL_BLEND_DISABLE;
+
+       if (cso->dither)
+               so->rb_colorcontrol |= RB_COLORCONTROL_DITHER_MODE(DITHER_ALWAYS);
+
+       return so;
+}
+
+static void
+fd_blend_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->blend = hwcso;
+       ctx->dirty |= FD_DIRTY_BLEND;
+}
+
+static void
+fd_blend_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       FREE(hwcso);
+}
+
+void
+fd_blend_init(struct pipe_context *pctx)
+{
+       pctx->create_blend_state = fd_blend_state_create;
+       pctx->bind_blend_state = fd_blend_state_bind;
+       pctx->delete_blend_state = fd_blend_state_delete;
+}
+
diff --git a/src/gallium/drivers/freedreno/freedreno_blend.h b/src/gallium/drivers/freedreno/freedreno_blend.h
new file mode 100644 (file)
index 0000000..70950df
--- /dev/null
@@ -0,0 +1,44 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_BLEND_H_
+#define FREEDRENO_BLEND_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd_blend_stateobj {
+       struct pipe_blend_state base;
+       uint32_t rb_blendcontrol;
+       uint32_t rb_colorcontrol;   /* must be OR'd w/ zsa->rb_colorcontrol */
+       uint32_t rb_colormask;
+};
+
+void fd_blend_init(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_BLEND_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_clear.c b/src/gallium/drivers/freedreno/freedreno_clear.c
new file mode 100644 (file)
index 0000000..04d85ad
--- /dev/null
@@ -0,0 +1,224 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+
+#include "freedreno_clear.h"
+#include "freedreno_context.h"
+#include "freedreno_resource.h"
+#include "freedreno_state.h"
+#include "freedreno_program.h"
+#include "freedreno_zsa.h"
+#include "freedreno_util.h"
+
+static uint32_t
+pack_rgba(enum pipe_format format, const float *rgba)
+{
+       union util_color uc;
+       util_pack_color(rgba, format, &uc);
+       return uc.ui;
+}
+
+static void
+fd_clear(struct pipe_context *pctx, unsigned buffers,
+               const union pipe_color_union *color, double depth, unsigned stencil)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct pipe_framebuffer_state *fb = &ctx->framebuffer.base;
+       uint32_t reg, colr = 0;
+
+       ctx->cleared |= buffers;
+       ctx->resolve |= buffers;
+       ctx->needs_flush = true;
+
+       if (buffers & PIPE_CLEAR_COLOR)
+               fd_resource(fb->cbufs[0]->texture)->dirty = true;
+
+       if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))
+               fd_resource(fb->zsbuf->texture)->dirty = true;
+
+       DBG("depth=%f, stencil=%u", depth, stencil);
+
+       if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
+               colr  = pack_rgba(fb->cbufs[0]->format, color->f);
+
+       /* emit generic state now: */
+       fd_state_emit(pctx, ctx->dirty &
+                       (FD_DIRTY_BLEND | FD_DIRTY_VIEWPORT |
+                                       FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR));
+
+       fd_emit_vertex_bufs(ring, 0x9c, (struct fd_vertex_buf[]) {
+                       { .prsc = ctx->solid_vertexbuf, .size = 48 },
+               }, 1);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET));
+       OUT_RING(ring, 0);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL));
+       OUT_RING(ring, 0x0000028f);
+
+       fd_program_emit(ring, &ctx->solid_prog);
+
+       OUT_PKT0(ring, REG_TC_CNTL_STATUS, 1);
+       OUT_RING(ring, TC_CNTL_STATUS_L2_INVALIDATE);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_CLEAR_COLOR));
+       OUT_RING(ring, colr);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_A220_RB_LRZ_VSC_CONTROL));
+       OUT_RING(ring, 0x00000084);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COPY_CONTROL));
+       reg = 0;
+       if (buffers & PIPE_CLEAR_DEPTH) {
+               reg |= RB_COPY_CONTROL_CLEAR_MASK(0xf) |
+                               RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE;
+       }
+       OUT_RING(ring, reg);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_DEPTH_CLEAR));
+       reg = 0;
+       if (fb->zsbuf) {
+               switch (fd_pipe2depth(fb->zsbuf->format)) {
+               case DEPTHX_24_8:
+                       reg = (((uint32_t)(0xffffff * depth)) << 8) |
+                               (stencil & 0xff);
+                       break;
+               case DEPTHX_16:
+                       reg = (uint32_t)(0xffffffff * depth);
+                       break;
+               }
+       }
+       OUT_RING(ring, reg);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL));
+       reg = 0;
+       if (buffers & PIPE_CLEAR_DEPTH) {
+               reg |= RB_DEPTHCONTROL_ZFUNC(GL_ALWAYS) |
+                               RB_DEPTHCONTROL_Z_ENABLE |
+                               RB_DEPTHCONTROL_Z_WRITE_ENABLE |
+                               RB_DEPTHCONTROL_EARLY_Z_ENABLE;
+       }
+       if (buffers & PIPE_CLEAR_STENCIL) {
+               reg |= RB_DEPTHCONTROL_STENCILFUNC(GL_ALWAYS) |
+                               RB_DEPTHCONTROL_STENCIL_ENABLE |
+                               RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE);
+       }
+       OUT_RING(ring, reg);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+       OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL));
+       OUT_RING(ring, 0x00000000);        /* PA_CL_CLIP_CNTL */
+       OUT_RING(ring, PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |  /* PA_SU_SC_MODE_CNTL */
+                       PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(DRAW_TRIANGLES) |
+                       PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(DRAW_TRIANGLES));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK));
+       OUT_RING(ring, 0x0000ffff);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+       OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL));
+       OUT_RING(ring, xy2d(0,0));              /* PA_SC_WINDOW_SCISSOR_TL */
+       OUT_RING(ring, xy2d(fb->width,      /* PA_SC_WINDOW_SCISSOR_BR */
+                       fb->height));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COLOR_INFO));
+       OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(1) |
+                       RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(fb->cbufs[0]->format)));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COLOR_MASK));
+       if (buffers & PIPE_CLEAR_COLOR) {
+               OUT_RING(ring, RB_COLOR_MASK_WRITE_RED |
+                               RB_COLOR_MASK_WRITE_GREEN |
+                               RB_COLOR_MASK_WRITE_BLUE |
+                               RB_COLOR_MASK_WRITE_ALPHA);
+       } else {
+               OUT_RING(ring, 0x0);
+       }
+
+       OUT_PKT3(ring, CP_DRAW_INDX, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+                       INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+       OUT_RING(ring, 3);                                      /* NumIndices */
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_A220_RB_LRZ_VSC_CONTROL));
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COPY_CONTROL));
+       OUT_RING(ring, 0x00000000);
+
+       ctx->dirty |= FD_DIRTY_ZSA |
+                       FD_DIRTY_RASTERIZER |
+                       FD_DIRTY_SAMPLE_MASK |
+                       FD_DIRTY_PROG |
+                       FD_DIRTY_CONSTBUF |
+                       FD_DIRTY_BLEND;
+}
+
+static void
+fd_clear_render_target(struct pipe_context *pctx, struct pipe_surface *ps,
+               const union pipe_color_union *color,
+               unsigned x, unsigned y, unsigned w, unsigned h)
+{
+       DBG("TODO: x=%u, y=%u, w=%u, h=%u", x, y, w, h);
+}
+
+static void
+fd_clear_depth_stencil(struct pipe_context *pctx, struct pipe_surface *ps,
+               unsigned buffers, double depth, unsigned stencil,
+               unsigned x, unsigned y, unsigned w, unsigned h)
+{
+       DBG("TODO: buffers=%u, depth=%f, stencil=%u, x=%u, y=%u, w=%u, h=%u",
+                       buffers, depth, stencil, x, y, w, h);
+}
+
+void
+fd_clear_init(struct pipe_context *pctx)
+{
+       pctx->clear = fd_clear;
+       pctx->clear_render_target = fd_clear_render_target;
+       pctx->clear_depth_stencil = fd_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_clear.h b/src/gallium/drivers/freedreno/freedreno_clear.h
new file mode 100644 (file)
index 0000000..31bb037
--- /dev/null
@@ -0,0 +1,37 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_CLEAR_H_
+#define FREEDRENO_CLEAR_H_
+
+#include "pipe/p_context.h"
+
+void fd_clear_init(struct pipe_context *pctx);
+
+
+#endif /* FREEDRENO_CLEAR_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_compiler.c b/src/gallium/drivers/freedreno/freedreno_compiler.c
new file mode 100644 (file)
index 0000000..0610902
--- /dev/null
@@ -0,0 +1,1186 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_strings.h"
+#include "tgsi/tgsi_dump.h"
+
+#include "freedreno_program.h"
+#include "freedreno_compiler.h"
+#include "freedreno_util.h"
+
+#include "instr.h"
+#include "ir.h"
+
+struct fd_compile_context {
+       struct fd_program_stateobj *prog;
+       struct fd_shader_stateobj *so;
+
+       struct tgsi_parse_context parser;
+       unsigned type;
+
+       /* predicate stack: */
+       int pred_depth;
+       enum ir_pred pred_stack[8];
+
+       /* Internal-Temporary and Predicate register assignment:
+        *
+        * Some TGSI instructions which translate into multiple actual
+        * instructions need one or more temporary registers (which are not
+        * assigned from TGSI perspective (ie. not TGSI_FILE_TEMPORARY).
+        * Whenever possible, the dst register is used as the first temporary,
+        * but this is not possible when the dst register is in an export (ie.
+        * in TGSI_FILE_OUTPUT).
+        *
+        * The predicate register must be valid across multiple TGSI
+        * instructions, but internal temporary's do not.  For this reason,
+        * once the predicate register is requested, until it is no longer
+        * needed, it gets the first register slot after after the TGSI
+        * assigned temporaries (ie. num_regs[TGSI_FILE_TEMPORARY]), and the
+        * internal temporaries get the register slots above this.
+        */
+
+       int pred_reg;
+       int num_internal_temps;
+
+       uint8_t num_regs[TGSI_FILE_COUNT];
+
+       /* maps input register idx to prog->export_linkage idx: */
+       uint8_t input_export_idx[64];
+
+       /* maps output register idx to prog->export_linkage idx: */
+       uint8_t output_export_idx[64];
+
+       /* idx/slot for last compiler generated immediate */
+       unsigned immediate_idx;
+
+       // TODO we can skip emit exports in the VS that the FS doesn't need..
+       // and get rid perhaps of num_param..
+       unsigned num_position, num_param;
+       unsigned position, psize;
+
+       uint64_t need_sync;
+
+       /* current exec CF instruction */
+       struct ir_cf *cf;
+};
+
+static int
+semantic_idx(struct tgsi_declaration_semantic *semantic)
+{
+       int idx = semantic->Name;
+       if (idx == TGSI_SEMANTIC_GENERIC)
+               idx = TGSI_SEMANTIC_COUNT + semantic->Index;
+       return idx;
+}
+
+/* assign/get the input/export register # for given semantic idx as
+ * returned by semantic_idx():
+ */
+static int
+export_linkage(struct fd_compile_context *ctx, int idx)
+{
+       struct fd_program_stateobj *prog = ctx->prog;
+
+       /* if first time we've seen this export, assign the next available slot: */
+       if (prog->export_linkage[idx] == 0xff)
+               prog->export_linkage[idx] = prog->num_exports++;
+
+       return prog->export_linkage[idx];
+}
+
+static unsigned
+compile_init(struct fd_compile_context *ctx, struct fd_program_stateobj *prog,
+               struct fd_shader_stateobj *so)
+{
+       unsigned ret;
+
+       ctx->prog = prog;
+       ctx->so = so;
+       ctx->cf = NULL;
+       ctx->pred_depth = 0;
+
+       ret = tgsi_parse_init(&ctx->parser, so->tokens);
+       if (ret != TGSI_PARSE_OK)
+               return ret;
+
+       ctx->type = ctx->parser.FullHeader.Processor.Processor;
+       ctx->position = ~0;
+       ctx->psize = ~0;
+       ctx->num_position = 0;
+       ctx->num_param = 0;
+       ctx->need_sync = 0;
+       ctx->immediate_idx = 0;
+       ctx->pred_reg = -1;
+       ctx->num_internal_temps = 0;
+
+       memset(ctx->num_regs, 0, sizeof(ctx->num_regs));
+       memset(ctx->input_export_idx, 0, sizeof(ctx->input_export_idx));
+       memset(ctx->output_export_idx, 0, sizeof(ctx->output_export_idx));
+
+       /* do first pass to extract declarations: */
+       while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
+               tgsi_parse_token(&ctx->parser);
+
+               switch (ctx->parser.FullToken.Token.Type) {
+               case TGSI_TOKEN_TYPE_DECLARATION: {
+                       struct tgsi_full_declaration *decl =
+                                       &ctx->parser.FullToken.FullDeclaration;
+                       if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+                               unsigned name = decl->Semantic.Name;
+
+                               assert(decl->Declaration.Semantic);  // TODO is this ever not true?
+
+                               ctx->output_export_idx[decl->Range.First] =
+                                               semantic_idx(&decl->Semantic);
+
+                               if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                                       switch (name) {
+                                       case TGSI_SEMANTIC_POSITION:
+                                               ctx->position = ctx->num_regs[TGSI_FILE_OUTPUT];
+                                               ctx->num_position++;
+                                               break;
+                                       case TGSI_SEMANTIC_PSIZE:
+                                               ctx->psize = ctx->num_regs[TGSI_FILE_OUTPUT];
+                                               ctx->num_position++;
+                                       case TGSI_SEMANTIC_COLOR:
+                                       case TGSI_SEMANTIC_GENERIC:
+                                               ctx->num_param++;
+                                               break;
+                                       default:
+                                               DBG("unknown VS semantic name: %s",
+                                                               tgsi_semantic_names[name]);
+                                               assert(0);
+                                       }
+                               } else {
+                                       switch (name) {
+                                       case TGSI_SEMANTIC_COLOR:
+                                       case TGSI_SEMANTIC_GENERIC:
+                                               ctx->num_param++;
+                                               break;
+                                       default:
+                                               DBG("unknown PS semantic name: %s",
+                                                               tgsi_semantic_names[name]);
+                                               assert(0);
+                                       }
+                               }
+                       } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+                               ctx->input_export_idx[decl->Range.First] =
+                                               semantic_idx(&decl->Semantic);
+                       }
+                       ctx->num_regs[decl->Declaration.File] +=
+                                       1 + decl->Range.Last - decl->Range.First;
+                       break;
+               }
+               case TGSI_TOKEN_TYPE_IMMEDIATE: {
+                       struct tgsi_full_immediate *imm =
+                                       &ctx->parser.FullToken.FullImmediate;
+                       unsigned n = ctx->so->num_immediates++;
+                       memcpy(ctx->so->immediates[n].val, imm->u, 16);
+                       break;
+               }
+               default:
+                       break;
+               }
+       }
+
+       /* TGSI generated immediates are always entire vec4's, ones we
+        * generate internally are not:
+        */
+       ctx->immediate_idx = ctx->so->num_immediates * 4;
+
+       ctx->so->first_immediate = ctx->num_regs[TGSI_FILE_CONSTANT];
+
+       tgsi_parse_free(&ctx->parser);
+
+       return tgsi_parse_init(&ctx->parser, so->tokens);
+}
+
+static void
+compile_free(struct fd_compile_context *ctx)
+{
+       tgsi_parse_free(&ctx->parser);
+}
+
+static struct ir_cf *
+next_exec_cf(struct fd_compile_context *ctx)
+{
+       struct ir_cf *cf = ctx->cf;
+       if (!cf || cf->exec.instrs_count >= ARRAY_SIZE(ctx->cf->exec.instrs))
+               ctx->cf = cf = ir_cf_create(ctx->so->ir, EXEC);
+       return cf;
+}
+
+static void
+compile_vtx_fetch(struct fd_compile_context *ctx)
+{
+       struct ir_instruction **vfetch_instrs = ctx->so->vfetch_instrs;
+       int i;
+       for (i = 0; i < ctx->num_regs[TGSI_FILE_INPUT]; i++) {
+               struct ir_instruction *instr = ir_instr_create(
+                               next_exec_cf(ctx), IR_FETCH);
+               instr->fetch.opc = VTX_FETCH;
+
+               ctx->need_sync |= 1 << (i+1);
+
+               ir_reg_create(instr, i+1, "xyzw", 0);
+               ir_reg_create(instr, 0, "x", 0);
+
+               if (i == 0)
+                       instr->sync = true;
+
+               vfetch_instrs[i] = instr;
+       }
+       ctx->so->num_vfetch_instrs = i;
+       ctx->cf = NULL;
+}
+
+/*
+ * For vertex shaders (VS):
+ * --- ------ -------------
+ *
+ *   Inputs:     R1-R(num_input)
+ *   Constants:  C0-C(num_const-1)
+ *   Immediates: C(num_const)-C(num_const+num_imm-1)
+ *   Outputs:    export0-export(n) and export62, export63
+ *      n is # of outputs minus gl_Position (export62) and gl_PointSize (export63)
+ *   Temps:      R(num_input+1)-R(num_input+num_temps)
+ *
+ * R0 could be clobbered after the vertex fetch instructions.. so we
+ * could use it for one of the temporaries.
+ *
+ * TODO: maybe the vertex fetch part could fetch first input into R0 as
+ * the last vtx fetch instruction, which would let us use the same
+ * register layout in either case.. although this is not what the blob
+ * compiler does.
+ *
+ *
+ * For frag shaders (PS):
+ * --- ---- -------------
+ *
+ *   Inputs:     R0-R(num_input-1)
+ *   Constants:  same as VS
+ *   Immediates: same as VS
+ *   Outputs:    export0-export(num_outputs)
+ *   Temps:      R(num_input)-R(num_input+num_temps-1)
+ *
+ * In either case, immediates are are postpended to the constants
+ * (uniforms).
+ *
+ */
+
+static unsigned
+get_temp_gpr(struct fd_compile_context *ctx, int idx)
+{
+       unsigned num = idx + ctx->num_regs[TGSI_FILE_INPUT];
+       if (ctx->type == TGSI_PROCESSOR_VERTEX)
+               num++;
+       return num;
+}
+
+static struct ir_register *
+add_dst_reg(struct fd_compile_context *ctx, struct ir_instruction *alu,
+               const struct tgsi_dst_register *dst)
+{
+       unsigned flags = 0, num = 0;
+       char swiz[5];
+
+       switch (dst->File) {
+       case TGSI_FILE_OUTPUT:
+               flags |= IR_REG_EXPORT;
+               if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                       if (dst->Index == ctx->position) {
+                               num = 62;
+                       } else if (dst->Index == ctx->psize) {
+                               num = 63;
+                       } else {
+                               num = export_linkage(ctx,
+                                               ctx->output_export_idx[dst->Index]);
+                       }
+               } else {
+                       num = dst->Index;
+               }
+               break;
+       case TGSI_FILE_TEMPORARY:
+               num = get_temp_gpr(ctx, dst->Index);
+               break;
+       default:
+               DBG("unsupported dst register file: %s",
+                               tgsi_file_names[dst->File]);
+               assert(0);
+               break;
+       }
+
+       swiz[0] = (dst->WriteMask & TGSI_WRITEMASK_X) ? 'x' : '_';
+       swiz[1] = (dst->WriteMask & TGSI_WRITEMASK_Y) ? 'y' : '_';
+       swiz[2] = (dst->WriteMask & TGSI_WRITEMASK_Z) ? 'z' : '_';
+       swiz[3] = (dst->WriteMask & TGSI_WRITEMASK_W) ? 'w' : '_';
+       swiz[4] = '\0';
+
+       return ir_reg_create(alu, num, swiz, flags);
+}
+
+static struct ir_register *
+add_src_reg(struct fd_compile_context *ctx, struct ir_instruction *alu,
+               const struct tgsi_src_register *src)
+{
+       static const char swiz_vals[] = {
+                       'x', 'y', 'z', 'w',
+       };
+       char swiz[5];
+       unsigned flags = 0, num = 0;
+
+       switch (src->File) {
+       case TGSI_FILE_CONSTANT:
+               num = src->Index;
+               flags |= IR_REG_CONST;
+               break;
+       case TGSI_FILE_INPUT:
+               if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+                       num = src->Index + 1;
+               } else {
+                       num = export_linkage(ctx,
+                                       ctx->input_export_idx[src->Index]);
+               }
+               break;
+       case TGSI_FILE_TEMPORARY:
+               num = get_temp_gpr(ctx, src->Index);
+               break;
+       case TGSI_FILE_IMMEDIATE:
+               num = src->Index + ctx->num_regs[TGSI_FILE_CONSTANT];
+               flags |= IR_REG_CONST;
+               break;
+       default:
+               DBG("unsupported src register file: %s",
+                               tgsi_file_names[src->File]);
+               assert(0);
+               break;
+       }
+
+       if (src->Absolute)
+               flags |= IR_REG_ABS;
+       if (src->Negate)
+               flags |= IR_REG_NEGATE;
+
+       swiz[0] = swiz_vals[src->SwizzleX];
+       swiz[1] = swiz_vals[src->SwizzleY];
+       swiz[2] = swiz_vals[src->SwizzleZ];
+       swiz[3] = swiz_vals[src->SwizzleW];
+       swiz[4] = '\0';
+
+       if ((ctx->need_sync & (uint64_t)(1 << num)) &&
+                       !(flags & IR_REG_CONST)) {
+               alu->sync = true;
+               ctx->need_sync &= ~(uint64_t)(1 << num);
+       }
+
+       return ir_reg_create(alu, num, swiz, flags);
+}
+
+static void
+add_vector_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+       switch (inst->Instruction.Saturate) {
+       case TGSI_SAT_NONE:
+               break;
+       case TGSI_SAT_ZERO_ONE:
+               alu->alu.vector_clamp = true;
+               break;
+       case TGSI_SAT_MINUS_PLUS_ONE:
+               DBG("unsupported saturate");
+               assert(0);
+               break;
+       }
+}
+
+static void
+add_scalar_clamp(struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+       switch (inst->Instruction.Saturate) {
+       case TGSI_SAT_NONE:
+               break;
+       case TGSI_SAT_ZERO_ONE:
+               alu->alu.scalar_clamp = true;
+               break;
+       case TGSI_SAT_MINUS_PLUS_ONE:
+               DBG("unsupported saturate");
+               assert(0);
+               break;
+       }
+}
+
+static void
+add_regs_vector_1(struct fd_compile_context *ctx,
+               struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+       assert(inst->Instruction.NumSrcRegs == 1);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+       add_src_reg(ctx, alu, &inst->Src[0].Register);
+       add_src_reg(ctx, alu, &inst->Src[0].Register);
+       add_vector_clamp(inst, alu);
+}
+
+static void
+add_regs_vector_2(struct fd_compile_context *ctx,
+               struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+       assert(inst->Instruction.NumSrcRegs == 2);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+       add_src_reg(ctx, alu, &inst->Src[0].Register);
+       add_src_reg(ctx, alu, &inst->Src[1].Register);
+       add_vector_clamp(inst, alu);
+}
+
+static void
+add_regs_vector_3(struct fd_compile_context *ctx,
+               struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+       assert(inst->Instruction.NumSrcRegs == 3);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+       /* maybe should re-arrange the syntax some day, but
+        * in assembler/disassembler and what ir.c expects
+        * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
+        */
+       add_src_reg(ctx, alu, &inst->Src[2].Register);
+       add_src_reg(ctx, alu, &inst->Src[0].Register);
+       add_src_reg(ctx, alu, &inst->Src[1].Register);
+       add_vector_clamp(inst, alu);
+}
+
+static void
+add_regs_dummy_vector(struct ir_instruction *alu)
+{
+       /* create dummy, non-written vector dst/src regs
+        * for unused vector instr slot:
+        */
+       ir_reg_create(alu, 0, "____", 0); /* vector dst */
+       ir_reg_create(alu, 0, NULL, 0);   /* vector src1 */
+       ir_reg_create(alu, 0, NULL, 0);   /* vector src2 */
+}
+
+static void
+add_regs_scalar_1(struct fd_compile_context *ctx,
+               struct tgsi_full_instruction *inst, struct ir_instruction *alu)
+{
+       assert(inst->Instruction.NumSrcRegs == 1);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       add_regs_dummy_vector(alu);
+
+       add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+       add_src_reg(ctx, alu, &inst->Src[0].Register);
+       add_scalar_clamp(inst, alu);
+}
+
+/*
+ * Helpers for TGSI instructions that don't map to a single shader instr:
+ */
+
+/* Get internal-temp src/dst to use for a sequence of instructions
+ * generated by a single TGSI op.. if possible, use the final dst
+ * register as the temporary to avoid allocating a new register, but
+ * if necessary allocate one.  If a single TGSI op needs multiple
+ * internal temps, pass NULL for orig_dst for all but the first one
+ * so that you don't end up using the same register for all your
+ * internal temps.
+ */
+static bool
+get_internal_temp(struct fd_compile_context *ctx,
+               struct tgsi_dst_register *orig_dst,
+               struct tgsi_dst_register *tmp_dst,
+               struct tgsi_src_register *tmp_src)
+{
+       bool using_temp = false;
+
+       tmp_dst->File      = TGSI_FILE_TEMPORARY;
+       tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
+       tmp_dst->Indirect  = 0;
+       tmp_dst->Dimension = 0;
+
+       if (orig_dst && (orig_dst->File != TGSI_FILE_OUTPUT)) {
+               /* if possible, use orig dst register for the temporary: */
+               tmp_dst->Index = orig_dst->Index;
+       } else {
+               /* otherwise assign one: */
+               int n = ctx->num_internal_temps++;
+               if (ctx->pred_reg != -1)
+                       n++;
+               tmp_dst->Index = get_temp_gpr(ctx,
+                               ctx->num_regs[TGSI_FILE_TEMPORARY] + n);
+               using_temp = true;
+       }
+
+       tmp_src->File      = tmp_dst->File;
+       tmp_src->Indirect  = tmp_dst->Indirect;
+       tmp_src->Dimension = tmp_dst->Dimension;
+       tmp_src->Index     = tmp_dst->Index;
+       tmp_src->Absolute  = 0;
+       tmp_src->Negate    = 0;
+       tmp_src->SwizzleX  = TGSI_SWIZZLE_X;
+       tmp_src->SwizzleY  = TGSI_SWIZZLE_Y;
+       tmp_src->SwizzleZ  = TGSI_SWIZZLE_Z;
+       tmp_src->SwizzleW  = TGSI_SWIZZLE_W;
+
+       return using_temp;
+}
+
+static void
+get_predicate(struct fd_compile_context *ctx, struct tgsi_dst_register *dst,
+               struct tgsi_src_register *src)
+{
+       assert(ctx->pred_reg != -1);
+
+       dst->File      = TGSI_FILE_TEMPORARY;
+       dst->WriteMask = TGSI_WRITEMASK_W;
+       dst->Indirect  = 0;
+       dst->Dimension = 0;
+       dst->Index     = get_temp_gpr(ctx, ctx->pred_reg);
+
+       if (src) {
+               src->File      = dst->File;
+               src->Indirect  = dst->Indirect;
+               src->Dimension = dst->Dimension;
+               src->Index     = dst->Index;
+               src->Absolute  = 0;
+               src->Negate    = 0;
+               src->SwizzleX  = TGSI_SWIZZLE_W;
+               src->SwizzleY  = TGSI_SWIZZLE_W;
+               src->SwizzleZ  = TGSI_SWIZZLE_W;
+               src->SwizzleW  = TGSI_SWIZZLE_W;
+       }
+}
+
+static void
+push_predicate(struct fd_compile_context *ctx, struct tgsi_src_register *src)
+{
+       struct ir_instruction *alu;
+       struct tgsi_dst_register pred_dst;
+
+       /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
+        * themselves:
+        */
+       ctx->cf = NULL;
+
+       if (ctx->pred_depth == 0) {
+               /* assign predicate register: */
+               ctx->pred_reg = ctx->num_regs[TGSI_FILE_TEMPORARY];
+
+               get_predicate(ctx, &pred_dst, NULL);
+
+               alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SETNEs);
+               add_regs_dummy_vector(alu);
+               add_dst_reg(ctx, alu, &pred_dst);
+               add_src_reg(ctx, alu, src);
+       } else {
+               struct tgsi_src_register pred_src;
+
+               get_predicate(ctx, &pred_dst, &pred_src);
+
+               alu = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+               add_dst_reg(ctx, alu, &pred_dst);
+               add_src_reg(ctx, alu, &pred_src);
+               add_src_reg(ctx, alu, src);
+
+               // XXX need to make PRED_SETE_PUSHv IR_PRED_NONE.. but need to make
+               // sure src reg is valid if it was calculated with a predicate
+               // condition..
+               alu->pred = IR_PRED_NONE;
+       }
+
+       /* save previous pred state to restore in pop_predicate(): */
+       ctx->pred_stack[ctx->pred_depth++] = ctx->so->ir->pred;
+
+       ctx->cf = NULL;
+}
+
+static void
+pop_predicate(struct fd_compile_context *ctx)
+{
+       /* NOTE blob compiler seems to always puts PRED_* instrs in a CF by
+        * themselves:
+        */
+       ctx->cf = NULL;
+
+       /* restore previous predicate state: */
+       ctx->so->ir->pred = ctx->pred_stack[--ctx->pred_depth];
+
+       if (ctx->pred_depth != 0) {
+               struct ir_instruction *alu;
+               struct tgsi_dst_register pred_dst;
+               struct tgsi_src_register pred_src;
+
+               get_predicate(ctx, &pred_dst, &pred_src);
+
+               alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, PRED_SET_POPs);
+               add_regs_dummy_vector(alu);
+               add_dst_reg(ctx, alu, &pred_dst);
+               add_src_reg(ctx, alu, &pred_src);
+               alu->pred = IR_PRED_NONE;
+       } else {
+               /* predicate register no longer needed: */
+               ctx->pred_reg = -1;
+       }
+
+       ctx->cf = NULL;
+}
+
+static void
+get_immediate(struct fd_compile_context *ctx,
+               struct tgsi_src_register *reg, uint32_t val)
+{
+       unsigned neg, swiz, idx, i;
+       /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
+       static const unsigned swiz2tgsi[] = {
+                       TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+       };
+
+       for (i = 0; i < ctx->immediate_idx; i++) {
+               swiz = i % 4;
+               idx  = i / 4;
+
+               if (ctx->so->immediates[idx].val[swiz] == val) {
+                       neg = 0;
+                       break;
+               }
+
+               if (ctx->so->immediates[idx].val[swiz] == -val) {
+                       neg = 1;
+                       break;
+               }
+       }
+
+       if (i == ctx->immediate_idx) {
+               /* need to generate a new immediate: */
+               swiz = i % 4;
+               idx  = i / 4;
+               neg  = 0;
+               ctx->so->immediates[idx].val[swiz] = val;
+               ctx->so->num_immediates = idx + 1;
+               ctx->immediate_idx++;
+       }
+
+       reg->File      = TGSI_FILE_IMMEDIATE;
+       reg->Indirect  = 0;
+       reg->Dimension = 0;
+       reg->Index     = idx;
+       reg->Absolute  = 0;
+       reg->Negate    = neg;
+       reg->SwizzleX  = swiz2tgsi[swiz];
+       reg->SwizzleY  = swiz2tgsi[swiz];
+       reg->SwizzleZ  = swiz2tgsi[swiz];
+       reg->SwizzleW  = swiz2tgsi[swiz];
+}
+
+/* POW(a,b) = EXP2(b * LOG2(a)) */
+static void
+translate_pow(struct fd_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+       struct ir_instruction *alu;
+
+       get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+
+       alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, LOG_CLAMP);
+       add_regs_dummy_vector(alu);
+       add_dst_reg(ctx, alu, &tmp_dst);
+       add_src_reg(ctx, alu, &inst->Src[0].Register);
+
+       alu = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+       add_dst_reg(ctx, alu, &tmp_dst);
+       add_src_reg(ctx, alu, &tmp_src);
+       add_src_reg(ctx, alu, &inst->Src[1].Register);
+
+       /* NOTE: some of the instructions, like EXP_IEEE, seem hard-
+        * coded to take their input from the w component.
+        */
+       switch(inst->Dst[0].Register.WriteMask) {
+       case TGSI_WRITEMASK_X:
+               tmp_src.SwizzleW = TGSI_SWIZZLE_X;
+               break;
+       case TGSI_WRITEMASK_Y:
+               tmp_src.SwizzleW = TGSI_SWIZZLE_Y;
+               break;
+       case TGSI_WRITEMASK_Z:
+               tmp_src.SwizzleW = TGSI_SWIZZLE_Z;
+               break;
+       case TGSI_WRITEMASK_W:
+               tmp_src.SwizzleW = TGSI_SWIZZLE_W;
+               break;
+       default:
+               DBG("invalid writemask!");
+               assert(0);
+               break;
+       }
+
+       alu = ir_instr_create_alu(next_exec_cf(ctx), ~0, EXP_IEEE);
+       add_regs_dummy_vector(alu);
+       add_dst_reg(ctx, alu, &inst->Dst[0].Register);
+       add_src_reg(ctx, alu, &tmp_src);
+       add_scalar_clamp(inst, alu);
+}
+
+static void
+translate_tex(struct fd_compile_context *ctx,
+               struct tgsi_full_instruction *inst, unsigned opc)
+{
+       struct ir_instruction *instr;
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+       const struct tgsi_src_register *coord;
+       bool using_temp;
+       int idx;
+
+       using_temp = get_internal_temp(ctx,
+                       &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+
+       if (opc == TGSI_OPCODE_TXP) {
+               /* TXP - Projective Texture Lookup:
+                *
+                *  coord.x = src0.x / src.w
+                *  coord.y = src0.y / src.w
+                *  coord.z = src0.z / src.w
+                *  coord.w = src0.w
+                *  bias = 0.0
+                *
+                *  dst = texture_sample(unit, coord, bias)
+                */
+               instr = ir_instr_create_alu(next_exec_cf(ctx), MAXv, RECIP_IEEE);
+
+               /* MAXv: */
+               add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "___w";
+               add_src_reg(ctx, instr, &inst->Src[0].Register);
+               add_src_reg(ctx, instr, &inst->Src[0].Register);
+
+               /* RECIP_IEEE: */
+               add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "x___";
+               add_src_reg(ctx, instr, &inst->Src[0].Register)->swizzle = "wwww";
+
+               instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+               add_dst_reg(ctx, instr, &tmp_dst)->swizzle = "xyz_";
+               add_src_reg(ctx, instr, &tmp_src)->swizzle = "xxxx";
+               add_src_reg(ctx, instr, &inst->Src[0].Register);
+
+               coord = &tmp_src;
+       } else {
+               coord = &inst->Src[0].Register;
+       }
+
+       instr = ir_instr_create(next_exec_cf(ctx), IR_FETCH);
+       instr->fetch.opc = TEX_FETCH;
+       assert(inst->Texture.NumOffsets <= 1); // TODO what to do in other cases?
+
+       /* save off the tex fetch to be patched later with correct const_idx: */
+       idx = ctx->so->num_tfetch_instrs++;
+       ctx->so->tfetch_instrs[idx].samp_id = inst->Src[1].Register.Index;
+       ctx->so->tfetch_instrs[idx].instr = instr;
+
+       add_dst_reg(ctx, instr, &tmp_dst);
+       add_src_reg(ctx, instr, coord);
+
+       /* dst register needs to be marked for sync: */
+       ctx->need_sync |= 1 << instr->regs[0]->num;
+
+       /* TODO we need some way to know if the tex fetch needs to sync on alu pipe.. */
+       instr->sync = true;
+
+       if (using_temp) {
+               /* texture fetch can't write directly to export, so if tgsi
+                * is telling us the dst register is in output file, we load
+                * the texture to a temp and the use ALU instruction to move
+                * to output
+                */
+               instr = ir_instr_create_alu(next_exec_cf(ctx), MAXv, ~0);
+
+               add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+               add_src_reg(ctx, instr, &tmp_src);
+               add_src_reg(ctx, instr, &tmp_src);
+               add_vector_clamp(inst, instr);
+       }
+}
+
+/* SGE(a,b) = GTE((b - a), 1.0, 0.0) */
+/* SLT(a,b) = GTE((b - a), 0.0, 1.0) */
+static void
+translate_sge_slt(struct fd_compile_context *ctx,
+               struct tgsi_full_instruction *inst, unsigned opc)
+{
+       struct ir_instruction *instr;
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+       struct tgsi_src_register tmp_const;
+       float c0, c1;
+
+       switch (opc) {
+       default:
+               assert(0);
+       case TGSI_OPCODE_SGE:
+               c0 = 1.0;
+               c1 = 0.0;
+               break;
+       case TGSI_OPCODE_SLT:
+               c0 = 0.0;
+               c1 = 1.0;
+               break;
+       }
+
+       get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+
+       instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
+       add_dst_reg(ctx, instr, &tmp_dst);
+       add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR_REG_NEGATE;
+       add_src_reg(ctx, instr, &inst->Src[1].Register);
+
+       instr = ir_instr_create_alu(next_exec_cf(ctx), CNDGTEv, ~0);
+       add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+       /* maybe should re-arrange the syntax some day, but
+        * in assembler/disassembler and what ir.c expects
+        * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
+        */
+       get_immediate(ctx, &tmp_const, f2d(c0));
+       add_src_reg(ctx, instr, &tmp_const);
+       add_src_reg(ctx, instr, &tmp_src);
+       get_immediate(ctx, &tmp_const, f2d(c1));
+       add_src_reg(ctx, instr, &tmp_const);
+}
+
+/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
+static void
+translate_lrp(struct fd_compile_context *ctx,
+               struct tgsi_full_instruction *inst,
+               unsigned opc)
+{
+       struct ir_instruction *instr;
+       struct tgsi_dst_register tmp_dst1, tmp_dst2;
+       struct tgsi_src_register tmp_src1, tmp_src2;
+       struct tgsi_src_register tmp_const;
+
+       get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst1, &tmp_src1);
+       get_internal_temp(ctx, NULL, &tmp_dst2, &tmp_src2);
+
+       get_immediate(ctx, &tmp_const, f2d(1.0));
+
+       /* tmp1 = (a * b) */
+       instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+       add_dst_reg(ctx, instr, &tmp_dst1);
+       add_src_reg(ctx, instr, &inst->Src[0].Register);
+       add_src_reg(ctx, instr, &inst->Src[1].Register);
+
+       /* tmp2 = (1 - a) */
+       instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
+       add_dst_reg(ctx, instr, &tmp_dst2);
+       add_src_reg(ctx, instr, &tmp_const);
+       add_src_reg(ctx, instr, &inst->Src[0].Register)->flags |= IR_REG_NEGATE;
+
+       /* tmp2 = tmp2 * c */
+       instr = ir_instr_create_alu(next_exec_cf(ctx), MULv, ~0);
+       add_dst_reg(ctx, instr, &tmp_dst2);
+       add_src_reg(ctx, instr, &tmp_src2);
+       add_src_reg(ctx, instr, &inst->Src[2].Register);
+
+       /* dst = tmp1 + tmp2 */
+       instr = ir_instr_create_alu(next_exec_cf(ctx), ADDv, ~0);
+       add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+       add_src_reg(ctx, instr, &tmp_src1);
+       add_src_reg(ctx, instr, &tmp_src2);
+}
+
+static void
+translate_trig(struct fd_compile_context *ctx,
+               struct tgsi_full_instruction *inst,
+               unsigned opc)
+{
+       struct ir_instruction *instr;
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+       struct tgsi_src_register tmp_const;
+       instr_scalar_opc_t op;
+
+       switch (opc) {
+       default:
+               assert(0);
+       case TGSI_OPCODE_SIN:
+               op = SIN;
+               break;
+       case TGSI_OPCODE_COS:
+               op = COS;
+               break;
+       }
+
+       get_internal_temp(ctx, &inst->Dst[0].Register, &tmp_dst, &tmp_src);
+
+       tmp_dst.WriteMask = TGSI_WRITEMASK_X;
+       tmp_src.SwizzleX = tmp_src.SwizzleY =
+                       tmp_src.SwizzleZ = tmp_src.SwizzleW = TGSI_SWIZZLE_X;
+
+       /* maybe should re-arrange the syntax some day, but
+        * in assembler/disassembler and what ir.c expects
+        * is: MULADDv Rdst = Rsrc2 + Rsrc0 * Rscr1
+        */
+       instr = ir_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
+       add_dst_reg(ctx, instr, &tmp_dst);
+       get_immediate(ctx, &tmp_const, f2d(0.5));
+       add_src_reg(ctx, instr, &tmp_const);
+       add_src_reg(ctx, instr, &inst->Src[0].Register);
+       get_immediate(ctx, &tmp_const, f2d(0.159155));
+       add_src_reg(ctx, instr, &tmp_const);
+
+       instr = ir_instr_create_alu(next_exec_cf(ctx), FRACv, ~0);
+       add_dst_reg(ctx, instr, &tmp_dst);
+       add_src_reg(ctx, instr, &tmp_src);
+       add_src_reg(ctx, instr, &tmp_src);
+
+       instr = ir_instr_create_alu(next_exec_cf(ctx), MULADDv, ~0);
+       add_dst_reg(ctx, instr, &tmp_dst);
+       get_immediate(ctx, &tmp_const, f2d(-3.141593));
+       add_src_reg(ctx, instr, &tmp_const);
+       add_src_reg(ctx, instr, &tmp_src);
+       get_immediate(ctx, &tmp_const, f2d(6.283185));
+       add_src_reg(ctx, instr, &tmp_const);
+
+       instr = ir_instr_create_alu(next_exec_cf(ctx), ~0, op);
+       add_regs_dummy_vector(instr);
+       add_dst_reg(ctx, instr, &inst->Dst[0].Register);
+       add_src_reg(ctx, instr, &tmp_src);
+}
+
+/*
+ * Main part of compiler/translator:
+ */
+
+static void
+translate_instruction(struct fd_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       unsigned opc = inst->Instruction.Opcode;
+       struct ir_instruction *instr;
+       static struct ir_cf *cf;
+
+       if (opc == TGSI_OPCODE_END)
+               return;
+
+       if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+               unsigned num = inst->Dst[0].Register.Index;
+               /* seems like we need to ensure that position vs param/pixel
+                * exports don't end up in the same EXEC clause..  easy way
+                * to do this is force a new EXEC clause on first appearance
+                * of an position or param/pixel export.
+                */
+               if ((num == ctx->position) || (num == ctx->psize)) {
+                       if (ctx->num_position > 0) {
+                               ctx->cf = NULL;
+                               ir_cf_create_alloc(ctx->so->ir, SQ_POSITION,
+                                               ctx->num_position - 1);
+                               ctx->num_position = 0;
+                       }
+               } else {
+                       if (ctx->num_param > 0) {
+                               ctx->cf = NULL;
+                               ir_cf_create_alloc(ctx->so->ir, SQ_PARAMETER_PIXEL,
+                                               ctx->num_param - 1);
+                               ctx->num_param = 0;
+                       }
+               }
+       }
+
+       cf = next_exec_cf(ctx);
+
+       /* TODO turn this into a table: */
+       switch (opc) {
+       case TGSI_OPCODE_MOV:
+               instr = ir_instr_create_alu(cf, MAXv, ~0);
+               add_regs_vector_1(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_RCP:
+               instr = ir_instr_create_alu(cf, ~0, RECIP_IEEE);
+               add_regs_scalar_1(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_RSQ:
+               instr = ir_instr_create_alu(cf, ~0, RECIPSQ_IEEE);
+               add_regs_scalar_1(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_MUL:
+               instr = ir_instr_create_alu(cf, MULv, ~0);
+               add_regs_vector_2(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_ADD:
+               instr = ir_instr_create_alu(cf, ADDv, ~0);
+               add_regs_vector_2(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_DP3:
+               instr = ir_instr_create_alu(cf, DOT3v, ~0);
+               add_regs_vector_2(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_DP4:
+               instr = ir_instr_create_alu(cf, DOT4v, ~0);
+               add_regs_vector_2(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_MIN:
+               instr = ir_instr_create_alu(cf, MINv, ~0);
+               add_regs_vector_2(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_MAX:
+               instr = ir_instr_create_alu(cf, MAXv, ~0);
+               add_regs_vector_2(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_SLT:
+       case TGSI_OPCODE_SGE:
+               translate_sge_slt(ctx, inst, opc);
+               break;
+       case TGSI_OPCODE_MAD:
+               instr = ir_instr_create_alu(cf, MULADDv, ~0);
+               add_regs_vector_3(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_LRP:
+               translate_lrp(ctx, inst, opc);
+               break;
+       case TGSI_OPCODE_FRC:
+               instr = ir_instr_create_alu(cf, FRACv, ~0);
+               add_regs_vector_1(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_FLR:
+               instr = ir_instr_create_alu(cf, FLOORv, ~0);
+               add_regs_vector_1(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_EX2:
+               instr = ir_instr_create_alu(cf, ~0, EXP_IEEE);
+               add_regs_scalar_1(ctx, inst, instr);
+               break;
+       case TGSI_OPCODE_POW:
+               translate_pow(ctx, inst);
+               break;
+       case TGSI_OPCODE_ABS:
+               instr = ir_instr_create_alu(cf, MAXv, ~0);
+               add_regs_vector_1(ctx, inst, instr);
+               instr->regs[1]->flags |= IR_REG_NEGATE; /* src0 */
+               break;
+       case TGSI_OPCODE_COS:
+       case TGSI_OPCODE_SIN:
+               translate_trig(ctx, inst, opc);
+               break;
+       case TGSI_OPCODE_TEX:
+       case TGSI_OPCODE_TXP:
+               translate_tex(ctx, inst, opc);
+               break;
+       case TGSI_OPCODE_CMP:
+               instr = ir_instr_create_alu(cf, CNDGTEv, ~0);
+               add_regs_vector_3(ctx, inst, instr);
+               // TODO this should be src0 if regs where in sane order..
+               instr->regs[2]->flags ^= IR_REG_NEGATE; /* src1 */
+               break;
+       case TGSI_OPCODE_IF:
+               push_predicate(ctx, &inst->Src[0].Register);
+               ctx->so->ir->pred = IR_PRED_EQ;
+               break;
+       case TGSI_OPCODE_ELSE:
+               ctx->so->ir->pred = IR_PRED_NE;
+               /* not sure if this is required in all cases, but blob compiler
+                * won't combine EQ and NE in same CF:
+                */
+               ctx->cf = NULL;
+               break;
+       case TGSI_OPCODE_ENDIF:
+               pop_predicate(ctx);
+               break;
+       case TGSI_OPCODE_F2I:
+               instr = ir_instr_create_alu(cf, TRUNCv, ~0);
+               add_regs_vector_1(ctx, inst, instr);
+               break;
+       default:
+               DBG("unknown TGSI opc: %s", tgsi_get_opcode_name(opc));
+               tgsi_dump(ctx->so->tokens, 0);
+               assert(0);
+               break;
+       }
+
+       /* internal temporaries are only valid for the duration of a single
+        * TGSI instruction:
+        */
+       ctx->num_internal_temps = 0;
+}
+
+static void
+compile_instructions(struct fd_compile_context *ctx)
+{
+       while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
+               tgsi_parse_token(&ctx->parser);
+
+               switch (ctx->parser.FullToken.Token.Type) {
+               case TGSI_TOKEN_TYPE_INSTRUCTION:
+                       translate_instruction(ctx,
+                                       &ctx->parser.FullToken.FullInstruction);
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       ctx->cf->cf_type = EXEC_END;
+}
+
+int
+fd_compile_shader(struct fd_program_stateobj *prog,
+               struct fd_shader_stateobj *so)
+{
+       struct fd_compile_context ctx;
+
+       ir_shader_destroy(so->ir);
+       so->ir = ir_shader_create();
+       so->num_vfetch_instrs = so->num_tfetch_instrs = so->num_immediates = 0;
+
+       if (compile_init(&ctx, prog, so) != TGSI_PARSE_OK)
+               return -1;
+
+       if (ctx.type == TGSI_PROCESSOR_VERTEX) {
+               compile_vtx_fetch(&ctx);
+       } else if (ctx.type == TGSI_PROCESSOR_FRAGMENT) {
+               prog->num_exports = 0;
+               memset(prog->export_linkage, 0xff,
+                               sizeof(prog->export_linkage));
+       }
+
+       compile_instructions(&ctx);
+
+       compile_free(&ctx);
+
+       return 0;
+}
+
diff --git a/src/gallium/drivers/freedreno/freedreno_compiler.h b/src/gallium/drivers/freedreno/freedreno_compiler.h
new file mode 100644 (file)
index 0000000..ce09788
--- /dev/null
@@ -0,0 +1,38 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_COMPILER_H_
+#define FREEDRENO_COMPILER_H_
+
+#include "freedreno_program.h"
+#include "freedreno_util.h"
+
+int fd_compile_shader(struct fd_program_stateobj *prog,
+               struct fd_shader_stateobj *so);
+
+#endif /* FREEDRENO_COMPILER_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_context.c b/src/gallium/drivers/freedreno/freedreno_context.c
new file mode 100644 (file)
index 0000000..cac10b7
--- /dev/null
@@ -0,0 +1,205 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_context.h"
+#include "freedreno_vbo.h"
+#include "freedreno_blend.h"
+#include "freedreno_rasterizer.h"
+#include "freedreno_zsa.h"
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+#include "freedreno_clear.h"
+#include "freedreno_program.h"
+#include "freedreno_texture.h"
+#include "freedreno_gmem.h"
+#include "freedreno_util.h"
+
+/* there are two cases where we currently need to wait for render complete:
+ * 1) pctx->flush() .. since at the moment we have no way for DDX to sync
+ *    the presentation blit with the 3d core
+ * 2) wrap-around for ringbuffer.. possibly we can do something more
+ *    Intelligent here.  Right now we need to ensure there is enough room
+ *    at the end of the drawcmds in the cmdstream buffer for all the per-
+ *    tile cmds.  We do this the lamest way possible, by making the ringbuffer
+ *    big, and flushing and resetting back to the beginning if we get too
+ *    close to the end.
+ */
+static void
+fd_context_wait(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       uint32_t ts = fd_ringbuffer_timestamp(ctx->ring);
+
+       DBG("wait: %u", ts);
+
+       fd_pipe_wait(ctx->screen->pipe, ts);
+       fd_ringbuffer_reset(ctx->ring);
+       fd_ringmarker_mark(ctx->draw_start);
+}
+
+/* emit accumulated render cmds, needed for example if render target has
+ * changed, or for flush()
+ */
+void
+fd_context_render(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct pipe_framebuffer_state *fb = &ctx->framebuffer.base;
+
+       DBG("needs_flush: %d", ctx->needs_flush);
+
+       if (!ctx->needs_flush)
+               return;
+
+       fd_gmem_render_tiles(pctx);
+
+       DBG("%p/%p/%p", ctx->ring->start, ctx->ring->cur, ctx->ring->end);
+
+       /* if size in dwords is more than half the buffer size, then wait and
+        * wrap around:
+        */
+       if ((ctx->ring->cur - ctx->ring->start) > ctx->ring->size/8)
+               fd_context_wait(pctx);
+
+       ctx->needs_flush = false;
+       ctx->cleared = ctx->restore = ctx->resolve = 0;
+
+       fd_resource(fb->cbufs[0]->texture)->dirty = false;
+       if (fb->zsbuf)
+               fd_resource(fb->zsbuf->texture)->dirty = false;
+}
+
+static void
+fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
+               enum pipe_flush_flags flags)
+{
+       DBG("fence=%p", fence);
+
+#if 0
+       if (fence) {
+               fd_fence_ref(ctx->screen->fence.current,
+                               (struct fd_fence **)fence);
+       }
+#endif
+
+       fd_context_render(pctx);
+       fd_context_wait(pctx);
+}
+
+static void
+fd_context_destroy(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       DBG("");
+
+       if (ctx->blitter)
+               util_blitter_destroy(ctx->blitter);
+
+       fd_ringmarker_del(ctx->draw_start);
+       fd_ringmarker_del(ctx->draw_end);
+       fd_ringbuffer_del(ctx->ring);
+
+       fd_prog_fini(pctx);
+
+       FREE(ctx);
+}
+
+static struct pipe_resource *
+create_solid_vertexbuf(struct pipe_context *pctx)
+{
+       static const float init_shader_const[] = {
+                       /* for clear/gmem2mem: */
+                       -1.000000, +1.000000, +1.000000, +1.100000,
+                       +1.000000, +1.000000, -1.000000, -1.100000,
+                       +1.000000, +1.100000, -1.100000, +1.000000,
+                       /* for mem2gmem: (vertices) */
+                       -1.000000, +1.000000, +1.000000, +1.000000,
+                       +1.000000, +1.000000, -1.000000, -1.000000,
+                       +1.000000, +1.000000, -1.000000, +1.000000,
+                       /* for mem2gmem: (tex coords) */
+                       +0.000000, +0.000000, +1.000000, +0.000000,
+                       +0.000000, +1.000000, +1.000000, +1.000000,
+       };
+       struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+                       PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
+       pipe_buffer_write(pctx, prsc, 0,
+                       sizeof(init_shader_const), init_shader_const);
+       return prsc;
+}
+
+struct pipe_context *
+fd_context_create(struct pipe_screen *pscreen, void *priv)
+{
+       struct fd_screen *screen = fd_screen(pscreen);
+       struct fd_context *ctx = CALLOC_STRUCT(fd_context);
+       struct pipe_context *pctx;
+
+       if (!ctx)
+               return NULL;
+
+       DBG("");
+
+       ctx->screen = screen;
+
+       ctx->ring = fd_ringbuffer_new(screen->pipe, 0x100000);
+       ctx->draw_start = fd_ringmarker_new(ctx->ring);
+       ctx->draw_end = fd_ringmarker_new(ctx->ring);
+
+       pctx = &ctx->base;
+       pctx->screen = pscreen;
+       pctx->priv = priv;
+       pctx->flush = fd_context_flush;
+       pctx->destroy = fd_context_destroy;
+
+       util_slab_create(&ctx->transfer_pool, sizeof(struct pipe_transfer),
+                       16, UTIL_SLAB_SINGLETHREADED);
+
+       fd_vbo_init(pctx);
+       fd_blend_init(pctx);
+       fd_rasterizer_init(pctx);
+       fd_zsa_init(pctx);
+       fd_state_init(pctx);
+       fd_resource_context_init(pctx);
+       fd_clear_init(pctx);
+       fd_prog_init(pctx);
+       fd_texture_init(pctx);
+
+       ctx->blitter = util_blitter_create(pctx);
+       if (!ctx->blitter) {
+               fd_context_destroy(pctx);
+               return NULL;
+       }
+
+       /* construct vertex state used for solid ops (clear, and gmem<->mem) */
+       ctx->solid_vertexbuf = create_solid_vertexbuf(pctx);
+
+       fd_state_emit_setup(pctx);
+
+       return pctx;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_context.h b/src/gallium/drivers/freedreno/freedreno_context.h
new file mode 100644 (file)
index 0000000..6fff8f6
--- /dev/null
@@ -0,0 +1,184 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_CONTEXT_H_
+#define FREEDRENO_CONTEXT_H_
+
+#include "draw/draw_context.h"
+#include "pipe/p_context.h"
+#include "util/u_blitter.h"
+#include "util/u_slab.h"
+#include "util/u_string.h"
+
+#include "freedreno_screen.h"
+
+struct fd_blend_stateobj;
+struct fd_rasterizer_stateobj;
+struct fd_zsa_stateobj;
+struct fd_sampler_stateobj;
+struct fd_vertex_stateobj;
+struct fd_shader_stateobj;
+
+struct fd_texture_stateobj {
+       struct pipe_sampler_view *textures[PIPE_MAX_SAMPLERS];
+       unsigned num_textures;
+       struct fd_sampler_stateobj *samplers[PIPE_MAX_SAMPLERS];
+       unsigned num_samplers;
+       unsigned dirty_samplers;
+};
+
+struct fd_program_stateobj {
+       struct fd_shader_stateobj *vp, *fp;
+       enum {
+               FD_SHADER_DIRTY_VP = (1 << 0),
+               FD_SHADER_DIRTY_FP = (1 << 1),
+       } dirty;
+       uint8_t num_exports;
+       /* Indexed by semantic name or TGSI_SEMANTIC_COUNT + semantic index
+        * for TGSI_SEMANTIC_GENERIC.  Special vs exports (position and point-
+        * size) are not included in this
+        */
+       uint8_t export_linkage[63];
+};
+
+struct fd_constbuf_stateobj {
+       struct pipe_constant_buffer cb[PIPE_MAX_CONSTANT_BUFFERS];
+       uint32_t enabled_mask;
+       uint32_t dirty_mask;
+};
+
+struct fd_vertexbuf_stateobj {
+       struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+       unsigned count;
+       uint32_t enabled_mask;
+       uint32_t dirty_mask;
+};
+
+struct fd_framebuffer_stateobj {
+       struct pipe_framebuffer_state base;
+       uint16_t bin_h, nbins_y;
+       uint16_t bin_w, nbins_x;
+       uint32_t pa_su_sc_mode_cntl;
+};
+
+struct fd_context {
+       struct pipe_context base;
+
+       struct fd_screen *screen;
+       struct blitter_context *blitter;
+
+       struct util_slab_mempool transfer_pool;
+
+       /* shaders used by clear, and gmem->mem blits: */
+       struct fd_program_stateobj solid_prog; // TODO move to screen?
+
+       /* shaders used by mem->gmem blits: */
+       struct fd_program_stateobj blit_prog; // TODO move to screen?
+
+       /* vertex buff used for clear/gmem->mem vertices, and mem->gmem
+        * vertices and tex coords:
+        */
+       struct pipe_resource *solid_vertexbuf;
+
+       /* do we need to mem2gmem before rendering.  We don't, if for example,
+        * there was a glClear() that invalidated the entire previous buffer
+        * contents.  Keep track of which buffer(s) are cleared, or needs
+        * restore.  Masks of PIPE_CLEAR_*
+        */
+       enum {
+               /* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
+               FD_BUFFER_COLOR   = PIPE_CLEAR_COLOR,
+               FD_BUFFER_DEPTH   = PIPE_CLEAR_DEPTH,
+               FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
+               FD_BUFFER_ALL     = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
+       } cleared, restore, resolve;
+
+       bool needs_flush;
+
+       struct fd_ringbuffer *ring;
+       struct fd_ringmarker *draw_start, *draw_end;
+
+       /* scissor can't really be changed mid-render.. we probably need
+        * to flush out all pending draws and then start a new tile pass
+        * w/ new stencil state..
+        */
+       struct pipe_scissor_state scissor;
+
+       /* which state objects need to be re-emit'd: */
+       enum {
+               FD_DIRTY_BLEND       = (1 << 0),
+               FD_DIRTY_RASTERIZER  = (1 << 1),
+               FD_DIRTY_ZSA         = (1 << 2),
+               FD_DIRTY_FRAGTEX     = (1 << 3),
+               FD_DIRTY_VERTTEX     = (1 << 4),
+               FD_DIRTY_PROG        = (1 << 5),
+               FD_DIRTY_VTX         = (1 << 6),
+               FD_DIRTY_BLEND_COLOR = (1 << 7),
+               FD_DIRTY_STENCIL_REF = (1 << 8),
+               FD_DIRTY_SAMPLE_MASK = (1 << 9),
+               FD_DIRTY_FRAMEBUFFER = (1 << 10),
+               FD_DIRTY_STIPPLE     = (1 << 12),
+               FD_DIRTY_VIEWPORT    = (1 << 12),
+               FD_DIRTY_CONSTBUF    = (1 << 13),
+               FD_DIRTY_VERTEXBUF   = (1 << 14),
+               FD_DIRTY_INDEXBUF    = (1 << 15),
+               FD_DIRTY_SCISSOR     = (1 << 16),
+       } dirty;
+
+       struct fd_blend_stateobj *blend;
+       struct fd_rasterizer_stateobj *rasterizer;
+       struct fd_zsa_stateobj *zsa;
+
+       struct fd_texture_stateobj verttex, fragtex;
+
+       struct fd_program_stateobj prog;
+
+       struct fd_vertex_stateobj *vtx;
+
+       struct pipe_blend_color blend_color;
+       struct pipe_stencil_ref stencil_ref;
+       unsigned sample_mask;
+       struct fd_framebuffer_stateobj framebuffer;
+       struct pipe_poly_stipple stipple;
+       struct pipe_viewport_state viewport;
+       struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
+       struct fd_vertexbuf_stateobj vertexbuf;
+       struct pipe_index_buffer indexbuf;
+};
+
+static INLINE struct fd_context *
+fd_context(struct pipe_context *pctx)
+{
+       return (struct fd_context *)pctx;
+}
+
+struct pipe_context * fd_context_create(struct pipe_screen *pscreen, void *priv);
+
+void fd_context_render(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_CONTEXT_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.c b/src/gallium/drivers/freedreno/freedreno_fence.c
new file mode 100644 (file)
index 0000000..e637465
--- /dev/null
@@ -0,0 +1,52 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_fence.h"
+#include "freedreno_util.h"
+
+boolean
+fd_fence_wait(struct fd_fence *fence)
+{
+       DBG("TODO: ");
+       return false;
+}
+
+boolean
+fd_fence_signalled(struct fd_fence *fence)
+{
+       DBG("TODO: ");
+       return false;
+}
+
+void
+fd_fence_del(struct fd_fence *fence)
+{
+
+}
+
+
diff --git a/src/gallium/drivers/freedreno/freedreno_fence.h b/src/gallium/drivers/freedreno/freedreno_fence.h
new file mode 100644 (file)
index 0000000..7e8bee3
--- /dev/null
@@ -0,0 +1,65 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_FENCE_H_
+#define FREEDRENO_FENCE_H_
+
+#include "util/u_inlines.h"
+#include "util/u_double_list.h"
+
+
+struct fd_fence {
+       int ref;
+};
+
+boolean fd_fence_wait(struct fd_fence *fence);
+boolean fd_fence_signalled(struct fd_fence *fence);
+void fd_fence_del(struct fd_fence *fence);
+
+static INLINE void
+fd_fence_ref(struct fd_fence *fence, struct fd_fence **ref)
+{
+       if (fence)
+               ++fence->ref;
+
+       if (*ref) {
+               if (--(*ref)->ref == 0)
+                       fd_fence_del(*ref);
+       }
+
+       *ref = fence;
+}
+
+static INLINE struct fd_fence *
+fd_fence(struct pipe_fence_handle *fence)
+{
+       return (struct fd_fence *)fence;
+}
+
+
+#endif /* FREEDRENO_FENCE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.c b/src/gallium/drivers/freedreno/freedreno_gmem.c
new file mode 100644 (file)
index 0000000..dae60c6
--- /dev/null
@@ -0,0 +1,491 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+
+#include "freedreno_gmem.h"
+#include "freedreno_context.h"
+#include "freedreno_state.h"
+#include "freedreno_program.h"
+#include "freedreno_resource.h"
+#include "freedreno_zsa.h"
+#include "freedreno_util.h"
+
+/*
+ * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer
+ * inside the GPU.  All rendering happens to GMEM.  Larger render targets
+ * are split into tiles that are small enough for the color (and depth and/or
+ * stencil, if enabled) buffers to fit within GMEM.  Before rendering a tile,
+ * if there was not a clear invalidating the previous tile contents, we need
+ * to restore the previous tiles contents (system mem -> GMEM), and after all
+ * the draw calls, before moving to the next tile, we need to save the tile
+ * contents (GMEM -> system mem).
+ *
+ * The code in this file handles dealing with GMEM and tiling.
+ *
+ * The structure of the ringbuffer ends up being:
+ *
+ *     +--<---<-- IB ---<---+---<---+---<---<---<--+
+ *     |                    |       |              |
+ *     v                    ^       ^              ^
+ *   ------------------------------------------------------
+ *     | clear/draw cmds | Tile0 | Tile1 | .... | TileN |
+ *   ------------------------------------------------------
+ *                       ^
+ *                       |
+ *                       address submitted in issueibcmds
+ *
+ * Where the per-tile section handles scissor setup, mem2gmem restore (if
+ * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem
+ * resolve.
+ */
+
+/* transfer from gmem to system memory (ie. normal RAM) */
+
+static void
+emit_gmem2mem_surf(struct fd_ringbuffer *ring, uint32_t swap, uint32_t base,
+               struct pipe_surface *psurf)
+{
+       struct fd_resource *rsc = fd_resource(psurf->texture);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COLOR_INFO));
+       OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(swap) |
+                       RB_COLOR_INFO_COLOR_BASE(base / 1024) |
+                       RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(psurf->format)));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+       OUT_RING(ring, CP_REG(REG_RB_COPY_CONTROL));
+       OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */
+       OUT_RELOC(ring, rsc->bo, 0, 0);         /* RB_COPY_DEST_BASE */
+       OUT_RING(ring, rsc->pitch >> 5);        /* RB_COPY_DEST_PITCH */
+       OUT_RING(ring, RB_COPY_DEST_INFO_FORMAT(fd_pipe2color(psurf->format)) |
+                       RB_COPY_DEST_INFO_LINEAR |      /* RB_COPY_DEST_INFO */
+                       RB_COPY_DEST_INFO_SWAP(swap) |
+                       RB_COPY_DEST_INFO_WRITE_RED |
+                       RB_COPY_DEST_INFO_WRITE_GREEN |
+                       RB_COPY_DEST_INFO_WRITE_BLUE |
+                       RB_COPY_DEST_INFO_WRITE_ALPHA);
+
+       OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+       OUT_RING(ring, 0x0000000);
+
+       OUT_PKT3(ring, CP_DRAW_INDX, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+                       INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+       OUT_RING(ring, 3);                                      /* NumIndices */
+}
+
+static void
+emit_gmem2mem(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               uint32_t xoff, uint32_t yoff, uint32_t bin_w, uint32_t bin_h)
+{
+       struct fd_framebuffer_stateobj *fb = &ctx->framebuffer;
+       struct pipe_framebuffer_state *pfb = &fb->base;
+
+       fd_emit_vertex_bufs(ring, 0x9c, (struct fd_vertex_buf[]) {
+                       { .prsc = ctx->solid_vertexbuf, .size = 48 },
+               }, 1);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET));
+       OUT_RING(ring, 0);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL));
+       OUT_RING(ring, 0x0000028f);
+
+       fd_program_emit(ring, &ctx->solid_prog);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK));
+       OUT_RING(ring, 0x0000ffff);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL));
+       OUT_RING(ring, RB_DEPTHCONTROL_EARLY_Z_ENABLE);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_SU_SC_MODE_CNTL));
+       OUT_RING(ring, PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |  /* PA_SU_SC_MODE_CNTL */
+                       PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(DRAW_TRIANGLES) |
+                       PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(DRAW_TRIANGLES));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+       OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL));
+       OUT_RING(ring, xy2d(0, 0));                       /* PA_SC_WINDOW_SCISSOR_TL */
+       OUT_RING(ring, xy2d(pfb->width, pfb->height));    /* PA_SC_WINDOW_SCISSOR_BR */
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_CL_VTE_CNTL));
+       OUT_RING(ring, PA_CL_VTE_CNTL_VTX_W0_FMT |
+                       PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+                       PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+                       PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+                       PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL));
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_MODECONTROL));
+       OUT_RING(ring, RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COPY_DEST_OFFSET));
+       OUT_RING(ring, RB_COPY_DEST_OFFSET_X(xoff) | RB_COPY_DEST_OFFSET_Y(yoff));
+
+       if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+               emit_gmem2mem_surf(ring, 0, bin_w * bin_h, pfb->zsbuf);
+
+       if (ctx->resolve & FD_BUFFER_COLOR)
+               emit_gmem2mem_surf(ring, 1, 0, pfb->cbufs[0]);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_MODECONTROL));
+       OUT_RING(ring, RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
+}
+
+/* transfer from system memory to gmem */
+
+static void
+emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t swap, uint32_t base,
+               struct pipe_surface *psurf)
+{
+       struct fd_resource *rsc = fd_resource(psurf->texture);
+       uint32_t swiz;
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COLOR_INFO));
+       OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(swap) |
+                       RB_COLOR_INFO_COLOR_BASE(base / 1024) |
+                       RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(psurf->format)));
+
+       swiz = fd_tex_swiz(psurf->format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
+                       PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA);
+
+       /* emit fb as a texture: */
+       OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+       OUT_RING(ring, 0x00010000);
+       OUT_RING(ring, SQ_TEX0_CLAMP_X(SQ_TEX_WRAP) |
+                       SQ_TEX0_CLAMP_Y(SQ_TEX_WRAP) |
+                       SQ_TEX0_CLAMP_Z(SQ_TEX_WRAP) |
+                       SQ_TEX0_PITCH(rsc->pitch));
+       OUT_RELOC(ring, rsc->bo, 0,
+                       fd_pipe2surface(psurf->format) | 0x800);
+       OUT_RING(ring, SQ_TEX2_WIDTH(psurf->width) |
+                       SQ_TEX2_HEIGHT(psurf->height));
+       OUT_RING(ring, 0x01000000 | // XXX
+                       swiz |
+                       SQ_TEX3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
+                       SQ_TEX3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000200);
+
+       OUT_PKT3(ring, CP_DRAW_INDX, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+                       INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+       OUT_RING(ring, 3);                                      /* NumIndices */
+}
+
+static void
+emit_mem2gmem(struct fd_context *ctx, struct fd_ringbuffer *ring,
+               uint32_t xoff, uint32_t yoff, uint32_t bin_w, uint32_t bin_h)
+{
+       struct fd_framebuffer_stateobj *fb = &ctx->framebuffer;
+       struct pipe_framebuffer_state *pfb = &fb->base;
+       float x0, y0, x1, y1;
+
+       fd_emit_vertex_bufs(ring, 0x9c, (struct fd_vertex_buf[]) {
+                       { .prsc = ctx->solid_vertexbuf, .size = 48, .offset = 0x30 },
+                       { .prsc = ctx->solid_vertexbuf, .size = 32, .offset = 0x60 },
+               }, 2);
+
+       /* write texture coordinates to vertexbuf: */
+       x0 = ((float)xoff) / ((float)pfb->width);
+       x1 = ((float)xoff + bin_w) / ((float)pfb->width);
+       y0 = ((float)yoff) / ((float)pfb->height);
+       y1 = ((float)yoff + bin_h) / ((float)pfb->height);
+       OUT_PKT3(ring, CP_MEM_WRITE, 9);
+       OUT_RELOC(ring, fd_resource(ctx->solid_vertexbuf)->bo, 0x60, 0);
+       OUT_RING(ring, f2d(x0));
+       OUT_RING(ring, f2d(y0));
+       OUT_RING(ring, f2d(x1));
+       OUT_RING(ring, f2d(y0));
+       OUT_RING(ring, f2d(x0));
+       OUT_RING(ring, f2d(y1));
+       OUT_RING(ring, f2d(x1));
+       OUT_RING(ring, f2d(y1));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET));
+       OUT_RING(ring, 0);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL));
+       OUT_RING(ring, 0x0000003b);
+
+       fd_program_emit(ring, &ctx->blit_prog);
+
+       OUT_PKT0(ring, REG_TC_CNTL_STATUS, 1);
+       OUT_RING(ring, TC_CNTL_STATUS_L2_INVALIDATE);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL));
+       OUT_RING(ring, RB_DEPTHCONTROL_EARLY_Z_ENABLE);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_SU_SC_MODE_CNTL));
+       OUT_RING(ring, PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
+                       PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(DRAW_TRIANGLES) |
+                       PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(DRAW_TRIANGLES));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK));
+       OUT_RING(ring, 0x0000ffff);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COLORCONTROL));
+       OUT_RING(ring, RB_COLORCONTROL_ALPHA_FUNC(PIPE_FUNC_ALWAYS) |
+                       RB_COLORCONTROL_BLEND_DISABLE |
+                       RB_COLORCONTROL_ROP_CODE(12) |
+                       RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
+                       RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_BLEND_CONTROL));
+       OUT_RING(ring, RB_BLENDCONTROL_COLOR_SRCBLEND(RB_BLEND_ONE) |
+                       RB_BLENDCONTROL_COLOR_COMB_FCN(COMB_DST_PLUS_SRC) |
+                       RB_BLENDCONTROL_COLOR_DESTBLEND(RB_BLEND_ZERO) |
+                       RB_BLENDCONTROL_ALPHA_SRCBLEND(RB_BLEND_ONE) |
+                       RB_BLENDCONTROL_ALPHA_COMB_FCN(COMB_DST_PLUS_SRC) |
+                       RB_BLENDCONTROL_ALPHA_DESTBLEND(RB_BLEND_ZERO));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+       OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL));
+       OUT_RING(ring, PA_SC_WINDOW_OFFSET_DISABLE |
+                       xy2d(0,0));                     /* PA_SC_WINDOW_SCISSOR_TL */
+       OUT_RING(ring, xy2d(bin_w, bin_h));     /* PA_SC_WINDOW_SCISSOR_BR */
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+       OUT_RING(ring, CP_REG(REG_PA_CL_VPORT_XSCALE));
+       OUT_RING(ring, f2d((float)bin_w/2.0));  /* PA_CL_VPORT_XSCALE */
+       OUT_RING(ring, f2d((float)bin_w/2.0));  /* PA_CL_VPORT_XOFFSET */
+       OUT_RING(ring, f2d(-(float)bin_h/2.0)); /* PA_CL_VPORT_YSCALE */
+       OUT_RING(ring, f2d((float)bin_h/2.0));  /* PA_CL_VPORT_YOFFSET */
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_CL_VTE_CNTL));
+       OUT_RING(ring, PA_CL_VTE_CNTL_VTX_XY_FMT |
+                       PA_CL_VTE_CNTL_VTX_Z_FMT |       // XXX check this???
+                       PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+                       PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+                       PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+                       PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL));
+       OUT_RING(ring, 0x00000000);
+
+       if (ctx->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+               emit_mem2gmem_surf(ring, 0, bin_w * bin_h, pfb->zsbuf);
+
+       if (ctx->resolve & FD_BUFFER_COLOR)
+               emit_mem2gmem_surf(ring, 1, 0, pfb->cbufs[0]);
+
+       /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
+}
+
+void
+fd_gmem_render_tiles(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_framebuffer_stateobj *fb = &ctx->framebuffer;
+       struct pipe_framebuffer_state *pfb = &fb->base;
+       struct fd_ringbuffer *ring;
+       uint32_t i, yoff = 0;
+       uint32_t timestamp;
+       ring = ctx->ring;
+
+       DBG("rendering %dx%d tiles (%s/%s)", fb->nbins_x, fb->nbins_y,
+                       util_format_name(pfb->cbufs[0]->format),
+                       pfb->zsbuf ? util_format_name(pfb->zsbuf->format) : "none");
+
+       /* mark the end of the clear/draw cmds before emitting per-tile cmds: */
+       fd_ringmarker_mark(ctx->draw_end);
+
+       for (i = 0; i < fb->nbins_y; i++) {
+               uint32_t j, xoff = 0;
+               uint32_t bin_h = fb->bin_h;
+
+               /* clip bin height: */
+               bin_h = min(bin_h, pfb->height - yoff);
+
+               for (j = 0; j < fb->nbins_x; j++) {
+                       uint32_t bin_w = fb->bin_w;
+
+                       /* clip bin width: */
+                       bin_w = min(bin_w, pfb->width - xoff);
+
+                       DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d",
+                                       bin_h, yoff, bin_w, xoff);
+
+                       fd_emit_framebuffer_state(ring, &ctx->framebuffer);
+
+                       /* setup screen scissor for current tile (same for mem2gmem): */
+                       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+                       OUT_RING(ring, CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL));
+                       OUT_RING(ring, xy2d(0,0));           /* PA_SC_SCREEN_SCISSOR_TL */
+                       OUT_RING(ring, xy2d(bin_w, bin_h));  /* PA_SC_SCREEN_SCISSOR_BR */
+
+                       if (ctx->restore)
+                               emit_mem2gmem(ctx, ring, xoff, yoff, bin_w, bin_h);
+
+                       /* setup window scissor and offset for current tile (different
+                        * from mem2gmem):
+                        */
+                       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+                       OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_OFFSET));
+                       OUT_RING(ring, PA_SC_WINDOW_OFFSET_X(-xoff) |
+                                       PA_SC_WINDOW_OFFSET_Y(-yoff));/* PA_SC_WINDOW_OFFSET */
+
+                       /* emit IB to drawcmds: */
+                       OUT_IB  (ring, ctx->draw_start, ctx->draw_end);
+
+                       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+                       OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_OFFSET));
+                       OUT_RING(ring, 0x00000000);          /* PA_SC_WINDOW_OFFSET */
+
+                       /* emit gmem2mem to transfer tile back to system memory: */
+                       emit_gmem2mem(ctx, ring, xoff, yoff, bin_w, bin_h);
+
+                       xoff += bin_w;
+               }
+
+               yoff += bin_h;
+       }
+
+       /* GPU executes starting from tile cmds, which IB back to draw cmds: */
+       fd_ringmarker_flush(ctx->draw_end);
+
+       /* mark start for next draw cmds: */
+       fd_ringmarker_mark(ctx->draw_start);
+
+       /* update timestamps on render targets: */
+       fd_pipe_timestamp(ctx->screen->pipe, &timestamp);
+       fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp;
+       if (pfb->zsbuf)
+               fd_resource(pfb->zsbuf->texture)->timestamp = timestamp;
+
+       /* Note that because the per-tile setup and mem2gmem/gmem2mem are emitted
+        * after the draw/clear calls, but executed before, we need to preemptively
+        * flag some state as dirty before the first draw/clear call.
+        *
+        * TODO maybe we need to mark all state as dirty to not worry about state
+        * being clobbered by other contexts?
+        */
+       ctx->dirty |= FD_DIRTY_ZSA |
+                       FD_DIRTY_RASTERIZER |
+                       FD_DIRTY_FRAMEBUFFER |
+                       FD_DIRTY_SAMPLE_MASK |
+                       FD_DIRTY_VIEWPORT |
+                       FD_DIRTY_CONSTBUF |
+                       FD_DIRTY_PROG |
+                       FD_DIRTY_SCISSOR |
+                       /* probably only needed if we need to mem2gmem on the next
+                        * draw..  but not sure if there is a good way to know?
+                        */
+                       FD_DIRTY_VERTTEX |
+                       FD_DIRTY_FRAGTEX |
+                       FD_DIRTY_BLEND;
+}
+
+void
+fd_gmem_calculate_tiles(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_framebuffer_stateobj *fb = &ctx->framebuffer;
+       struct pipe_framebuffer_state *pfb = &fb->base;
+       uint32_t nbins_x = 1, nbins_y = 1;
+       uint32_t bin_w, bin_h;
+       uint32_t cpp = util_format_get_blocksize(pfb->cbufs[0]->format);
+       uint32_t gmem_size = ctx->screen->gmemsize_bytes;
+       uint32_t max_width = 992;
+
+// TODO we probably could optimize this a bit if we know that
+// Z or stencil is not enabled for any of the draw calls..
+//     if (fd_stencil_enabled(ctx->zsa) || fd_depth_enabled(ctx->zsa)) {
+               gmem_size /= 2;
+               max_width = 256;
+//     }
+
+       bin_w = ALIGN(pfb->width, 32);
+       bin_h = ALIGN(pfb->height, 32);
+
+       /* first, find a bin width that satisfies the maximum width
+        * restrictions:
+        */
+       while (bin_w > max_width) {
+               nbins_x++;
+               bin_w = ALIGN(pfb->width / nbins_x, 32);
+       }
+
+       /* then find a bin height that satisfies the memory constraints:
+        */
+       while ((bin_w * bin_h * cpp) > gmem_size) {
+               nbins_y++;
+               bin_h = ALIGN(pfb->height / nbins_y, 32);
+       }
+
+       if ((nbins_x > 1) || (nbins_y > 1)) {
+               fb->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE;
+       } else {
+               fb->pa_su_sc_mode_cntl &= ~PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE;
+       }
+
+       DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h);
+
+//if we use hw binning, tile sizes (in multiple of 32) need to
+//fit in 5 bits.. for now don't care because we aren't using
+//that:
+//     assert(!(bin_h/32 & ~0x1f));
+//     assert(!(bin_w/32 & ~0x1f));
+
+       fb->nbins_x = nbins_x;
+       fb->nbins_y = nbins_y;
+       fb->bin_w = bin_w;
+       fb->bin_h = bin_h;
+
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_gmem.h b/src/gallium/drivers/freedreno/freedreno_gmem.h
new file mode 100644 (file)
index 0000000..7b46f6b
--- /dev/null
@@ -0,0 +1,37 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_GMEM_H_
+#define FREEDRENO_GMEM_H_
+
+#include "pipe/p_context.h"
+
+void fd_gmem_render_tiles(struct pipe_context *pctx);
+void fd_gmem_calculate_tiles(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_GMEM_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_pm4.h b/src/gallium/drivers/freedreno/freedreno_pm4.h
new file mode 100644 (file)
index 0000000..a536f9c
--- /dev/null
@@ -0,0 +1,86 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_PM4_H_
+#define FREEDRENO_PM4_H_
+
+#define CP_TYPE0_PKT              (0 << 30)
+#define CP_TYPE1_PKT              (1 << 30)
+#define CP_TYPE2_PKT              (2 << 30)
+#define CP_TYPE3_PKT              (3 << 30)
+
+
+#define CP_ME_INIT                0x48
+#define CP_NOP                    0x10
+#define CP_INDIRECT_BUFFER        0x3f
+#define CP_INDIRECT_BUFFER_PFD    0x37
+#define CP_WAIT_FOR_IDLE          0x26
+#define CP_WAIT_REG_MEM           0x3c
+#define CP_WAIT_REG_EQ            0x52
+#define CP_WAT_REG_GTE            0x53
+#define CP_WAIT_UNTIL_READ        0x5c
+#define CP_WAIT_IB_PFD_COMPLETE   0x5d
+#define CP_REG_RMW                0x21
+#define CP_REG_TO_MEM             0x3e
+#define CP_MEM_WRITE              0x3d
+#define CP_MEM_WRITE_CNTR         0x4f
+#define CP_COND_EXEC              0x44
+#define CP_COND_WRITE             0x45
+#define CP_EVENT_WRITE            0x46
+#define CP_EVENT_WRITE_SHD        0x58
+#define CP_EVENT_WRITE_CFL        0x59
+#define CP_EVENT_WRITE_ZPD        0x5b
+#define CP_DRAW_INDX              0x22
+#define CP_DRAW_INDX_2            0x36
+#define CP_DRAW_INDX_BIN          0x34
+#define CP_DRAW_INDX_2_BIN        0x35
+#define CP_VIZ_QUERY              0x23
+#define CP_SET_STATE              0x25
+#define CP_SET_CONSTANT           0x2d
+#define CP_IM_LOAD                0x27
+#define CP_IM_LOAD_IMMEDIATE      0x2b
+#define CP_LOAD_CONSTANT_CONTEXT  0x2e
+#define CP_INVALIDATE_STATE       0x3b
+#define CP_SET_SHADER_BASES       0x4a
+#define CP_SET_BIN_MASK           0x50
+#define CP_SET_BIN_SELECT         0x51
+#define CP_CONTEXT_UPDATE         0x5e
+#define CP_INTERRUPT              0x40
+#define CP_IM_STORE               0x2c
+#define CP_SET_BIN_BASE_OFFSET    0x4b      /* for a20x */
+#define CP_SET_DRAW_INIT_FLAGS    0x4b      /* for a22x */
+#define CP_SET_PROTECTED_MODE     0x5f
+#define CP_LOAD_STATE             0x30
+#define CP_COND_INDIRECT_BUFFER_PFE 0x3a
+#define CP_COND_INDIRECT_BUFFER_PFD 0x32
+
+
+#define CP_REG(reg) ((0x4 << 16) | ((reg) - 0x2000))
+
+
+#endif /* FREEDRENO_PM4_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_program.c b/src/gallium/drivers/freedreno/freedreno_program.c
new file mode 100644 (file)
index 0000000..b7fec6d
--- /dev/null
@@ -0,0 +1,506 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "freedreno_program.h"
+#include "freedreno_compiler.h"
+#include "freedreno_vbo.h"
+#include "freedreno_texture.h"
+#include "freedreno_util.h"
+
+static struct fd_shader_stateobj *
+create_shader(enum shader_t type)
+{
+       struct fd_shader_stateobj *so = CALLOC_STRUCT(fd_shader_stateobj);
+       if (!so)
+               return NULL;
+       so->type = type;
+       return so;
+}
+
+static void
+delete_shader(struct fd_shader_stateobj *so)
+{
+       ir_shader_destroy(so->ir);
+       FREE(so->tokens);
+       FREE(so);
+}
+
+static struct fd_shader_stateobj *
+assemble(struct fd_shader_stateobj *so)
+{
+       free(so->bin);
+       so->bin = ir_shader_assemble(so->ir, &so->info);
+       if (!so->bin)
+               goto fail;
+
+       if (fd_mesa_debug & FD_DBG_DISASM) {
+               DBG("disassemble: type=%d", so->type);
+               disasm(so->bin, so->info.sizedwords, 0, so->type);
+       }
+
+       return so;
+
+fail:
+       debug_error("assemble failed!");
+       delete_shader(so);
+       return NULL;
+}
+
+static struct fd_shader_stateobj *
+compile(struct fd_program_stateobj *prog, struct fd_shader_stateobj *so)
+{
+       int ret;
+
+       if (fd_mesa_debug & FD_DBG_DISASM) {
+               DBG("dump tgsi: type=%d", so->type);
+               tgsi_dump(so->tokens, 0);
+       }
+
+       ret = fd_compile_shader(prog, so);
+       if (ret)
+               goto fail;
+
+       /* NOTE: we don't assemble yet because for VS we don't know the
+        * type information for vertex fetch yet.. so those need to be
+        * patched up later before assembling.
+        */
+
+       so->info.sizedwords = 0;
+
+       return so;
+
+fail:
+       debug_error("compile failed!");
+       delete_shader(so);
+       return NULL;
+}
+
+static void
+emit(struct fd_ringbuffer *ring, struct fd_shader_stateobj *so)
+{
+       unsigned i;
+
+       if (so->info.sizedwords == 0)
+               assemble(so);
+
+       OUT_PKT3(ring, CP_IM_LOAD_IMMEDIATE, 2 + so->info.sizedwords);
+       OUT_RING(ring, (so->type == SHADER_VERTEX) ? 0 : 1);
+       OUT_RING(ring, so->info.sizedwords);
+       for (i = 0; i < so->info.sizedwords; i++)
+               OUT_RING(ring, so->bin[i]);
+}
+
+static void *
+fd_fp_state_create(struct pipe_context *pctx,
+               const struct pipe_shader_state *cso)
+{
+       struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
+       if (!so)
+               return NULL;
+       so->tokens = tgsi_dup_tokens(cso->tokens);
+       return so;
+}
+
+static void
+fd_fp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd_shader_stateobj *so = hwcso;
+       delete_shader(so);
+}
+
+static void
+fd_fp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->prog.fp = hwcso;
+       ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+       ctx->dirty |= FD_DIRTY_PROG;
+}
+
+static void *
+fd_vp_state_create(struct pipe_context *pctx,
+               const struct pipe_shader_state *cso)
+{
+       struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX);
+       if (!so)
+               return NULL;
+       so->tokens = tgsi_dup_tokens(cso->tokens);
+       return so;
+}
+
+static void
+fd_vp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd_shader_stateobj *so = hwcso;
+       delete_shader(so);
+}
+
+static void
+fd_vp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->prog.vp = hwcso;
+       ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+       ctx->dirty |= FD_DIRTY_PROG;
+}
+
+static void
+patch_vtx_fetches(struct fd_context *ctx, struct fd_shader_stateobj *so,
+               struct fd_vertex_stateobj *vtx)
+{
+       unsigned i;
+
+       assert(so->num_vfetch_instrs == vtx->num_elements);
+
+       /* update vtx fetch instructions: */
+       for (i = 0; i < so->num_vfetch_instrs; i++) {
+               struct ir_instruction *instr = so->vfetch_instrs[i];
+               struct pipe_vertex_element *elem = &vtx->pipe[i];
+               struct pipe_vertex_buffer *vb =
+                               &ctx->vertexbuf.vb[elem->vertex_buffer_index];
+               enum pipe_format format = elem->src_format;
+               const struct util_format_description *desc =
+                               util_format_description(format);
+               unsigned j;
+
+               /* Find the first non-VOID channel. */
+               for (j = 0; j < 4; j++)
+                       if (desc->channel[j].type != UTIL_FORMAT_TYPE_VOID)
+                               break;
+
+               /* CI/CIS can probably be set in compiler instead: */
+               instr->fetch.const_idx = 20 + (i / 3);
+               instr->fetch.const_idx_sel = i % 3;
+
+               instr->fetch.fmt = fd_pipe2surface(format);
+               instr->fetch.is_normalized = desc->channel[j].normalized;
+               instr->fetch.is_signed =
+                               desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED;
+               instr->fetch.stride = vb->stride ? : 1;
+               instr->fetch.offset = elem->src_offset;
+
+               for (j = 0; j < 4; j++)
+                       instr->regs[0]->swizzle[j] = "xyzw01__"[desc->swizzle[j]];
+
+               assert(instr->fetch.fmt != FMT_INVALID);
+
+               DBG("vtx[%d]: %s (%d), ci=%d, cis=%d, id=%d, swizzle=%s, "
+                               "stride=%d, offset=%d",
+                               i, util_format_name(format),
+                               instr->fetch.fmt,
+                               instr->fetch.const_idx,
+                               instr->fetch.const_idx_sel,
+                               elem->instance_divisor,
+                               instr->regs[0]->swizzle,
+                               instr->fetch.stride,
+                               instr->fetch.offset);
+       }
+
+       /* trigger re-assemble: */
+       so->info.sizedwords = 0;
+}
+
+static void
+patch_tex_fetches(struct fd_context *ctx, struct fd_shader_stateobj *so,
+               struct fd_texture_stateobj *tex)
+{
+       unsigned i;
+
+       /* update tex fetch instructions: */
+       for (i = 0; i < so->num_tfetch_instrs; i++) {
+               struct ir_instruction *instr = so->tfetch_instrs[i].instr;
+               unsigned samp_id = so->tfetch_instrs[i].samp_id;
+               unsigned const_idx = fd_get_const_idx(ctx, tex, samp_id);
+
+               if (const_idx != instr->fetch.const_idx) {
+                       instr->fetch.const_idx = const_idx;
+                       /* trigger re-assemble: */
+                       so->info.sizedwords = 0;
+               }
+       }
+}
+
+void
+fd_program_validate(struct fd_context *ctx)
+{
+       struct fd_program_stateobj *prog = &ctx->prog;
+
+       /* if vertex or frag shader is dirty, we may need to recompile. Compile
+        * frag shader first, as that assigns the register slots for exports
+        * from the vertex shader.  And therefore if frag shader has changed we
+        * need to recompile both vert and frag shader.
+        */
+       if (prog->dirty & FD_SHADER_DIRTY_FP)
+               compile(prog, prog->fp);
+
+       if (prog->dirty & (FD_SHADER_DIRTY_FP | FD_SHADER_DIRTY_VP))
+               compile(prog, prog->vp);
+
+       if (prog->dirty)
+               ctx->dirty |= FD_DIRTY_PROG;
+
+       prog->dirty = 0;
+
+       /* if necessary, fix up vertex fetch instructions: */
+       if (ctx->dirty & (FD_DIRTY_VTX | FD_DIRTY_VERTEXBUF | FD_DIRTY_PROG))
+               patch_vtx_fetches(ctx, prog->vp, ctx->vtx);
+
+       /* if necessary, fix up texture fetch instructions: */
+       if (ctx->dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG)) {
+               patch_tex_fetches(ctx, prog->vp, &ctx->verttex);
+               patch_tex_fetches(ctx, prog->fp, &ctx->fragtex);
+       }
+}
+
+void
+fd_program_emit(struct fd_ringbuffer *ring,
+               struct fd_program_stateobj *prog)
+{
+       struct ir_shader_info *vsi = &prog->vp->info;
+       struct ir_shader_info *fsi = &prog->fp->info;
+       uint8_t vs_gprs, fs_gprs, vs_export;
+
+       emit(ring, prog->vp);
+       emit(ring, prog->fp);
+
+       vs_gprs = (vsi->max_reg < 0) ? 0x80 : vsi->max_reg;
+       fs_gprs = (fsi->max_reg < 0) ? 0x80 : fsi->max_reg;
+       vs_export = max(1, prog->num_exports) - 1;
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_SQ_PROGRAM_CNTL));
+       OUT_RING(ring, SQ_PROGRAM_CNTL_PS_EXPORT_MODE(POSITION_2_VECTORS_SPRITE) |
+                       SQ_PROGRAM_CNTL_VS_RESOURCE |
+                       SQ_PROGRAM_CNTL_PS_RESOURCE |
+                       SQ_PROGRAM_CNTL_VS_EXPORT_COUNT(vs_export) |
+                       SQ_PROGRAM_CNTL_PS_REGS(fs_gprs) |
+                       SQ_PROGRAM_CNTL_VS_REGS(vs_gprs));
+}
+
+/* Creates shader:
+ *    EXEC ADDR(0x2) CNT(0x1)
+ *       (S)FETCH:     SAMPLE  R0.xyzw = R0.xyx CONST(0) LOCATION(CENTER)
+ *    ALLOC PARAM/PIXEL SIZE(0x0)
+ *    EXEC_END ADDR(0x3) CNT(0x1)
+ *          ALU:       MAXv    export0 = R0, R0        ; gl_FragColor
+ *    NOP
+ */
+static struct fd_shader_stateobj *
+create_blit_fp(void)
+{
+       struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
+       struct ir_cf *cf;
+       struct ir_instruction *instr;
+
+       if (!so)
+               return NULL;
+
+       so->ir = ir_shader_create();
+
+       cf = ir_cf_create(so->ir, EXEC);
+
+       instr = ir_instr_create_tex_fetch(cf, 0);
+       ir_reg_create(instr, 0, "xyzw", 0);
+       ir_reg_create(instr, 0, "xyx", 0);
+       instr->sync = true;
+
+       cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+       cf = ir_cf_create(so->ir, EXEC_END);
+
+       instr = ir_instr_create_alu(cf, MAXv, ~0);
+       ir_reg_create(instr, 0, NULL, IR_REG_EXPORT);
+       ir_reg_create(instr, 0, NULL, 0);
+       ir_reg_create(instr, 0, NULL, 0);
+
+       return assemble(so);
+}
+
+/* Creates shader:
+*     EXEC ADDR(0x3) CNT(0x2)
+*           FETCH:     VERTEX  R1.xy01 = R0.x FMT_32_32_FLOAT UNSIGNED STRIDE(8) CONST(26, 1)
+*           FETCH:     VERTEX  R2.xyz1 = R0.x FMT_32_32_32_FLOAT UNSIGNED STRIDE(12) CONST(26, 0)
+*     ALLOC POSITION SIZE(0x0)
+*     EXEC ADDR(0x5) CNT(0x1)
+*           ALU:       MAXv    export62 = R2, R2       ; gl_Position
+*     ALLOC PARAM/PIXEL SIZE(0x0)
+*     EXEC_END ADDR(0x6) CNT(0x1)
+*           ALU:       MAXv    export0 = R1, R1
+*     NOP
+ */
+static struct fd_shader_stateobj *
+create_blit_vp(void)
+{
+       struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX);
+       struct ir_cf *cf;
+       struct ir_instruction *instr;
+
+       if (!so)
+               return NULL;
+
+       so->ir = ir_shader_create();
+
+       cf = ir_cf_create(so->ir, EXEC);
+
+       instr = ir_instr_create_vtx_fetch(cf, 26, 1, FMT_32_32_FLOAT, false, 8);
+       instr->fetch.is_normalized = true;
+       ir_reg_create(instr, 1, "xy01", 0);
+       ir_reg_create(instr, 0, "x", 0);
+
+       instr = ir_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
+       instr->fetch.is_normalized = true;
+       ir_reg_create(instr, 2, "xyz1", 0);
+       ir_reg_create(instr, 0, "x", 0);
+
+       cf = ir_cf_create_alloc(so->ir, SQ_POSITION, 0);
+       cf = ir_cf_create(so->ir, EXEC);
+
+       instr = ir_instr_create_alu(cf, MAXv, ~0);
+       ir_reg_create(instr, 62, NULL, IR_REG_EXPORT);
+       ir_reg_create(instr, 2, NULL, 0);
+       ir_reg_create(instr, 2, NULL, 0);
+
+       cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+       cf = ir_cf_create(so->ir, EXEC_END);
+
+       instr = ir_instr_create_alu(cf, MAXv, ~0);
+       ir_reg_create(instr, 0, NULL, IR_REG_EXPORT);
+       ir_reg_create(instr, 1, NULL, 0);
+       ir_reg_create(instr, 1, NULL, 0);
+
+
+       return assemble(so);
+
+}
+
+/* Creates shader:
+ *    ALLOC PARAM/PIXEL SIZE(0x0)
+ *    EXEC_END ADDR(0x1) CNT(0x1)
+ *          ALU:       MAXv    export0 = C0, C0        ; gl_FragColor
+ */
+static struct fd_shader_stateobj *
+create_solid_fp(void)
+{
+       struct fd_shader_stateobj *so = create_shader(SHADER_FRAGMENT);
+       struct ir_cf *cf;
+       struct ir_instruction *instr;
+
+       if (!so)
+               return NULL;
+
+       so->ir = ir_shader_create();
+
+       cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+       cf = ir_cf_create(so->ir, EXEC_END);
+
+       instr = ir_instr_create_alu(cf, MAXv, ~0);
+       ir_reg_create(instr, 0, NULL, IR_REG_EXPORT);
+       ir_reg_create(instr, 0, NULL, IR_REG_CONST);
+       ir_reg_create(instr, 0, NULL, IR_REG_CONST);
+
+       return assemble(so);
+}
+
+/* Creates shader:
+ *    EXEC ADDR(0x3) CNT(0x1)
+ *       (S)FETCH:     VERTEX  R1.xyz1 = R0.x FMT_32_32_32_FLOAT
+ *                           UNSIGNED STRIDE(12) CONST(26, 0)
+ *    ALLOC POSITION SIZE(0x0)
+ *    EXEC ADDR(0x4) CNT(0x1)
+ *          ALU:       MAXv    export62 = R1, R1       ; gl_Position
+ *    ALLOC PARAM/PIXEL SIZE(0x0)
+ *    EXEC_END ADDR(0x5) CNT(0x0)
+ */
+static struct fd_shader_stateobj *
+create_solid_vp(void)
+{
+       struct fd_shader_stateobj *so = create_shader(SHADER_VERTEX);
+       struct ir_cf *cf;
+       struct ir_instruction *instr;
+
+       if (!so)
+               return NULL;
+
+       so->ir = ir_shader_create();
+
+       cf = ir_cf_create(so->ir, EXEC);
+
+       instr = ir_instr_create_vtx_fetch(cf, 26, 0, FMT_32_32_32_FLOAT, false, 12);
+       ir_reg_create(instr, 1, "xyz1", 0);
+       ir_reg_create(instr, 0, "x", 0);
+
+       cf = ir_cf_create_alloc(so->ir, SQ_POSITION, 0);
+       cf = ir_cf_create(so->ir, EXEC);
+
+       instr = ir_instr_create_alu(cf, MAXv, ~0);
+       ir_reg_create(instr, 62, NULL, IR_REG_EXPORT);
+       ir_reg_create(instr, 1, NULL, 0);
+       ir_reg_create(instr, 1, NULL, 0);
+
+       cf = ir_cf_create_alloc(so->ir, SQ_PARAMETER_PIXEL, 0);
+       cf = ir_cf_create(so->ir, EXEC_END);
+
+       return assemble(so);
+}
+
+void
+fd_prog_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       pctx->create_fs_state = fd_fp_state_create;
+       pctx->bind_fs_state = fd_fp_state_bind;
+       pctx->delete_fs_state = fd_fp_state_delete;
+
+       pctx->create_vs_state = fd_vp_state_create;
+       pctx->bind_vs_state = fd_vp_state_bind;
+       pctx->delete_vs_state = fd_vp_state_delete;
+
+       ctx->solid_prog.fp = create_solid_fp();
+       ctx->solid_prog.vp = create_solid_vp();
+       ctx->blit_prog.fp = create_blit_fp();
+       ctx->blit_prog.vp = create_blit_vp();
+}
+
+void
+fd_prog_fini(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       delete_shader(ctx->solid_prog.vp);
+       delete_shader(ctx->solid_prog.fp);
+       delete_shader(ctx->blit_prog.vp);
+       delete_shader(ctx->blit_prog.fp);
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_program.h b/src/gallium/drivers/freedreno/freedreno_program.h
new file mode 100644 (file)
index 0000000..e73cf1b
--- /dev/null
@@ -0,0 +1,82 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_PROGRAM_H_
+#define FREEDRENO_PROGRAM_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+
+#include "ir.h"
+#include "disasm.h"
+
+struct fd_shader_stateobj {
+       enum shader_t type;
+
+       uint32_t *bin;
+
+       struct tgsi_token *tokens;
+
+       /* note that we defer compiling shader until we know both vs and ps..
+        * and if one changes, we potentially need to recompile in order to
+        * get varying linkages correct:
+        */
+       struct ir_shader_info info;
+       struct ir_shader *ir;
+
+       /* for vertex shaders, the fetch instructions which need to be
+        * patched up before assembly:
+        */
+       unsigned num_vfetch_instrs;
+       struct ir_instruction *vfetch_instrs[64];
+
+       /* for all shaders, any tex fetch instructions which need to be
+        * patched before assembly:
+        */
+       unsigned num_tfetch_instrs;
+       struct {
+               unsigned samp_id;
+               struct ir_instruction *instr;
+       } tfetch_instrs[64];
+
+       unsigned first_immediate;     /* const reg # of first immediate */
+       unsigned num_immediates;
+       struct {
+               uint32_t val[4];
+       } immediates[64];
+};
+
+void fd_program_emit(struct fd_ringbuffer *ring,
+               struct fd_program_stateobj *prog);
+void fd_program_validate(struct fd_context *ctx);
+
+void fd_prog_init(struct pipe_context *pctx);
+void fd_prog_fini(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_PROGRAM_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_rasterizer.c b/src/gallium/drivers/freedreno/freedreno_rasterizer.c
new file mode 100644 (file)
index 0000000..2d69133
--- /dev/null
@@ -0,0 +1,151 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "freedreno_rasterizer.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+
+static enum pa_su_sc_draw
+polygon_mode(unsigned mode)
+{
+       switch (mode) {
+       case PIPE_POLYGON_MODE_POINT:
+               return DRAW_POINTS;
+       case PIPE_POLYGON_MODE_LINE:
+               return DRAW_LINES;
+       case PIPE_POLYGON_MODE_FILL:
+               return DRAW_TRIANGLES;
+       default:
+               DBG("invalid polygon mode: %u", mode);
+               return 0;
+       }
+}
+
+static void *
+fd_rasterizer_state_create(struct pipe_context *pctx,
+               const struct pipe_rasterizer_state *cso)
+{
+       struct fd_rasterizer_stateobj *so;
+       float psize_min, psize_max;
+
+       so = CALLOC_STRUCT(fd_rasterizer_stateobj);
+       if (!so)
+               return NULL;
+
+       if (cso->point_size_per_vertex) {
+               psize_min = util_get_min_point_size(cso);
+               psize_max = 8192;
+       } else {
+               /* Force the point size to be as if the vertex output was disabled. */
+               psize_min = cso->point_size;
+               psize_max = cso->point_size;
+       }
+
+       so->base = *cso;
+
+       so->pa_sc_line_stipple = cso->line_stipple_enable ?
+               PA_SC_LINE_STIPPLE_LINE_PATTERN(cso->line_stipple_pattern) |
+               PA_SC_LINE_STIPPLE_REPEAT_COUNT(cso->line_stipple_factor) : 0;
+
+       so->pa_cl_clip_cntl = 0; // TODO
+
+       so->pa_su_vtx_cntl =
+               PA_SU_VTX_CNTL_PIX_CENTER(cso->gl_rasterization_rules ? PIXCENTER_OGL : PIXCENTER_D3D) |
+               PA_SU_VTX_CNTL_QUANT_MODE(ONE_SIXTEENTH);
+
+       so->pa_su_point_size =
+               PA_SU_POINT_SIZE_HEIGHT(cso->point_size/2) |
+               PA_SU_POINT_SIZE_WIDTH(cso->point_size/2);
+
+       so->pa_su_point_minmax =
+               PA_SU_POINT_MINMAX_MIN_SIZE(psize_min/2) |
+               PA_SU_POINT_MINMAX_MAX_SIZE(psize_max/2);
+
+       so->pa_su_line_cntl =
+               PA_SU_LINE_CNTL_WIDTH(cso->line_width/2);
+
+       so->pa_su_sc_mode_cntl =
+               PA_SU_SC_MODE_CNTL_VTX_WINDOW_OFFSET_ENABLE |
+               PA_SU_SC_MODE_CNTL_POLYMODE_FRONT_PTYPE(polygon_mode(cso->fill_front)) |
+               PA_SU_SC_MODE_CNTL_POLYMODE_BACK_PTYPE(polygon_mode(cso->fill_back));
+
+       if (cso->cull_face & PIPE_FACE_FRONT)
+               so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_CULL_FRONT;
+       if (cso->cull_face & PIPE_FACE_BACK)
+               so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_CULL_BACK;
+       if (!cso->flatshade_first)
+               so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST;
+       if (!cso->front_ccw)
+               so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_FACE;
+       if (cso->line_stipple_enable)
+               so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_LINE_STIPPLE_ENABLE;
+       if (cso->multisample)
+               so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_MSAA_ENABLE;
+
+       if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
+                       cso->fill_back != PIPE_POLYGON_MODE_FILL)
+               so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DUALMODE);
+       else
+               so->pa_su_sc_mode_cntl |= PA_SU_SC_MODE_CNTL_POLYMODE(POLY_DISABLED);
+
+       if (cso->offset_tri)
+               so->pa_su_sc_mode_cntl |=
+                       PA_SU_SC_MODE_CNTL_POLY_OFFSET_FRONT_ENABLE |
+                       PA_SU_SC_MODE_CNTL_POLY_OFFSET_BACK_ENABLE |
+                       PA_SU_SC_MODE_CNTL_POLY_OFFSET_PARA_ENABLE;
+
+       return so;
+}
+
+static void
+fd_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->rasterizer = hwcso;
+       ctx->dirty |= FD_DIRTY_RASTERIZER;
+}
+
+static void
+fd_rasterizer_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       FREE(hwcso);
+}
+
+void
+fd_rasterizer_init(struct pipe_context *pctx)
+{
+       pctx->create_rasterizer_state = fd_rasterizer_state_create;
+       pctx->bind_rasterizer_state = fd_rasterizer_state_bind;
+       pctx->delete_rasterizer_state = fd_rasterizer_state_delete;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_rasterizer.h b/src/gallium/drivers/freedreno/freedreno_rasterizer.h
new file mode 100644 (file)
index 0000000..519a05e
--- /dev/null
@@ -0,0 +1,48 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_RASTERIZER_H_
+#define FREEDRENO_RASTERIZER_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd_rasterizer_stateobj {
+       struct pipe_rasterizer_state base;
+       uint32_t pa_sc_line_stipple;
+       uint32_t pa_cl_clip_cntl;
+       uint32_t pa_su_vtx_cntl;
+       uint32_t pa_su_point_size;
+       uint32_t pa_su_point_minmax;
+       uint32_t pa_su_line_cntl;
+       uint32_t pa_su_sc_mode_cntl;
+};
+
+void fd_rasterizer_init(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_RASTERIZER_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
new file mode 100644 (file)
index 0000000..4458ba9
--- /dev/null
@@ -0,0 +1,248 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "util/u_format.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+#include "util/u_string.h"
+
+#include "freedreno_resource.h"
+#include "freedreno_screen.h"
+#include "freedreno_surface.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+static void *
+fd_resource_transfer_map(struct pipe_context *pctx,
+               struct pipe_resource *prsc,
+               unsigned level, unsigned usage,
+               const struct pipe_box *box,
+               struct pipe_transfer **pptrans)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_resource *rsc = fd_resource(prsc);
+       struct pipe_transfer *ptrans = util_slab_alloc(&ctx->transfer_pool);
+       enum pipe_format format = prsc->format;
+       char *buf;
+
+       if (!ptrans)
+               return NULL;
+
+       ptrans->resource = prsc;
+       ptrans->level = level;
+       ptrans->usage = usage;
+       ptrans->box = *box;
+       ptrans->stride = rsc->pitch * rsc->cpp;
+       ptrans->layer_stride = ptrans->stride;
+
+       buf = fd_bo_map(rsc->bo);
+
+       *pptrans = ptrans;
+
+       return buf +
+               box->y / util_format_get_blockheight(format) * ptrans->stride +
+               box->x / util_format_get_blockwidth(format) * rsc->cpp;
+}
+
+static void fd_resource_transfer_flush_region(struct pipe_context *pctx,
+               struct pipe_transfer *ptrans,
+               const struct pipe_box *box)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_resource *rsc = fd_resource(ptrans->resource);
+
+       if (rsc->dirty)
+               fd_context_render(pctx);
+
+       if (rsc->timestamp) {
+               fd_pipe_wait(ctx->screen->pipe, rsc->timestamp);
+               rsc->timestamp = 0;
+       }
+}
+
+static void
+fd_resource_transfer_unmap(struct pipe_context *pctx,
+               struct pipe_transfer *ptrans)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       util_slab_free(&ctx->transfer_pool, ptrans);
+}
+
+static void
+fd_resource_destroy(struct pipe_screen *pscreen,
+               struct pipe_resource *prsc)
+{
+       struct fd_resource *rsc = fd_resource(prsc);
+       fd_bo_del(rsc->bo);
+       FREE(rsc);
+}
+
+static boolean
+fd_resource_get_handle(struct pipe_screen *pscreen,
+               struct pipe_resource *prsc,
+               struct winsys_handle *handle)
+{
+       struct fd_resource *rsc = fd_resource(prsc);
+
+       return fd_screen_bo_get_handle(pscreen, rsc->bo, rsc->pitch, handle);
+}
+
+
+const struct u_resource_vtbl fd_resource_vtbl = {
+               .resource_get_handle      = fd_resource_get_handle,
+               .resource_destroy         = fd_resource_destroy,
+               .transfer_map             = fd_resource_transfer_map,
+               .transfer_flush_region    = fd_resource_transfer_flush_region,
+               .transfer_unmap           = fd_resource_transfer_unmap,
+               .transfer_inline_write    = u_default_transfer_inline_write,
+};
+
+/**
+ * Create a new texture object, using the given template info.
+ */
+static struct pipe_resource *
+fd_resource_create(struct pipe_screen *pscreen,
+               const struct pipe_resource *tmpl)
+{
+       struct fd_screen *screen = fd_screen(pscreen);
+       struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
+       struct pipe_resource *prsc = &rsc->base.b;
+       uint32_t flags, size;
+
+       DBG("target=%d, format=%s, %ux%u@%u, array_size=%u, last_level=%u, "
+                       "nr_samples=%u, usage=%u, bind=%x, flags=%x",
+                       tmpl->target, util_format_name(tmpl->format),
+                       tmpl->width0, tmpl->height0, tmpl->depth0,
+                       tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
+                       tmpl->usage, tmpl->bind, tmpl->flags);
+
+       if (!rsc)
+               return NULL;
+
+       *prsc = *tmpl;
+
+       pipe_reference_init(&prsc->reference, 1);
+       prsc->screen = pscreen;
+
+       rsc->base.vtbl = &fd_resource_vtbl;
+       rsc->pitch = ALIGN(tmpl->width0, 32);
+       rsc->cpp = util_format_get_blocksize(tmpl->format);
+
+       size = rsc->pitch * tmpl->height0 * rsc->cpp;
+       flags = DRM_FREEDRENO_GEM_TYPE_KMEM; /* TODO */
+
+       rsc->bo = fd_bo_new(screen->dev, size, flags);
+
+       return prsc;
+}
+
+/**
+ * Create a texture from a winsys_handle. The handle is often created in
+ * another process by first creating a pipe texture and then calling
+ * resource_get_handle.
+ */
+static struct pipe_resource *
+fd_resource_from_handle(struct pipe_screen *pscreen,
+               const struct pipe_resource *tmpl,
+               struct winsys_handle *handle)
+{
+       struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
+       struct pipe_resource *prsc = &rsc->base.b;
+
+       DBG("target=%d, format=%s, %ux%u@%u, array_size=%u, last_level=%u, "
+                       "nr_samples=%u, usage=%u, bind=%x, flags=%x",
+                       tmpl->target, util_format_name(tmpl->format),
+                       tmpl->width0, tmpl->height0, tmpl->depth0,
+                       tmpl->array_size, tmpl->last_level, tmpl->nr_samples,
+                       tmpl->usage, tmpl->bind, tmpl->flags);
+
+       if (!rsc)
+               return NULL;
+
+       *prsc = *tmpl;
+
+       pipe_reference_init(&prsc->reference, 1);
+       prsc->screen = pscreen;
+
+       rsc->bo = fd_screen_bo_from_handle(pscreen, handle, &rsc->pitch);
+
+       rsc->base.vtbl = &fd_resource_vtbl;
+       rsc->pitch = ALIGN(tmpl->width0, 32);
+
+       return prsc;
+}
+
+/**
+ * Copy a block of pixels from one resource to another.
+ * The resource must be of the same format.
+ * Resources with nr_samples > 1 are not allowed.
+ */
+static void
+fd_resource_copy_region(struct pipe_context *pctx,
+               struct pipe_resource *dst,
+               unsigned dst_level,
+               unsigned dstx, unsigned dsty, unsigned dstz,
+               struct pipe_resource *src,
+               unsigned src_level,
+               const struct pipe_box *src_box)
+{
+       DBG("TODO: ");
+       // TODO
+}
+
+/* Optimal hardware path for blitting pixels.
+ * Scaling, format conversion, up- and downsampling (resolve) are allowed.
+ */
+static void
+fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
+{
+       DBG("TODO: ");
+       // TODO
+}
+
+void
+fd_resource_screen_init(struct pipe_screen *pscreen)
+{
+       pscreen->resource_create = fd_resource_create;
+       pscreen->resource_from_handle = fd_resource_from_handle;
+       pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+       pscreen->resource_destroy = u_resource_destroy_vtbl;
+}
+
+void
+fd_resource_context_init(struct pipe_context *pctx)
+{
+       pctx->transfer_map = u_transfer_map_vtbl;
+       pctx->transfer_flush_region = u_transfer_flush_region_vtbl;
+       pctx->transfer_unmap = u_transfer_unmap_vtbl;
+       pctx->transfer_inline_write = u_transfer_inline_write_vtbl;
+       pctx->create_surface = fd_create_surface;
+       pctx->surface_destroy = fd_surface_destroy;
+       pctx->resource_copy_region = fd_resource_copy_region;
+       pctx->blit = fd_blit;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
new file mode 100644 (file)
index 0000000..575a143
--- /dev/null
@@ -0,0 +1,51 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_RESOURCE_H_
+#define FREEDRENO_RESOURCE_H_
+
+#include "util/u_transfer.h"
+
+struct fd_resource {
+       struct u_resource base;
+       struct fd_bo *bo;
+       uint32_t pitch, cpp;
+       uint32_t timestamp;
+       bool dirty;
+};
+
+static INLINE struct fd_resource *
+fd_resource(struct pipe_resource *ptex)
+{
+       return (struct fd_resource *)ptex;
+}
+
+void fd_resource_screen_init(struct pipe_screen *pscreen);
+void fd_resource_context_init(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_RESOURCE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c b/src/gallium/drivers/freedreno/freedreno_screen.c
new file mode 100644 (file)
index 0000000..5310fc7
--- /dev/null
@@ -0,0 +1,471 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_defines.h"
+#include "pipe/p_screen.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "util/u_string.h"
+
+#include "os/os_time.h"
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+
+#include "freedreno_context.h"
+#include "freedreno_screen.h"
+#include "freedreno_resource.h"
+#include "freedreno_fence.h"
+#include "freedreno_util.h"
+
+/* XXX this should go away */
+#include "state_tracker/drm_driver.h"
+
+int fd_mesa_debug = 0;
+
+static const char *
+fd_screen_get_name(struct pipe_screen *pscreen)
+{
+       static char buffer[128];
+       util_snprintf(buffer, sizeof(buffer), "FD%03d",
+                       fd_screen(pscreen)->device_id);
+       return buffer;
+}
+
+static const char *
+fd_screen_get_vendor(struct pipe_screen *pscreen)
+{
+       return "freedreno";
+}
+
+static uint64_t
+fd_screen_get_timestamp(struct pipe_screen *pscreen)
+{
+       int64_t cpu_time = os_time_get() * 1000;
+       return cpu_time + fd_screen(pscreen)->cpu_gpu_time_delta;
+}
+
+static void
+fd_screen_fence_ref(struct pipe_screen *pscreen,
+               struct pipe_fence_handle **ptr,
+               struct pipe_fence_handle *pfence)
+{
+       fd_fence_ref(fd_fence(pfence), (struct fd_fence **)ptr);
+}
+
+static boolean
+fd_screen_fence_signalled(struct pipe_screen *screen,
+               struct pipe_fence_handle *pfence)
+{
+       return fd_fence_signalled(fd_fence(pfence));
+}
+
+static boolean
+fd_screen_fence_finish(struct pipe_screen *screen,
+               struct pipe_fence_handle *pfence,
+               uint64_t timeout)
+{
+       return fd_fence_wait(fd_fence(pfence));
+}
+
+static void
+fd_screen_destroy(struct pipe_screen *pscreen)
+{
+       // TODO
+       DBG("TODO");
+}
+
+/*
+EGL Version 1.4
+EGL Vendor Qualcomm, Inc
+EGL Extensions EGL_QUALCOMM_shared_image EGL_KHR_image EGL_AMD_create_image EGL_KHR_lock_surface EGL_KHR_lock_surface2 EGL_KHR_fence_sync EGL_IMG_context_priorityEGL_ANDROID_image_native_buffer
+GL extensions: GL_AMD_compressed_ATC_texture GL_AMD_performance_monitor GL_AMD_program_binary_Z400 GL_EXT_texture_filter_anisotropic GL_EXT_texture_format_BGRA8888 GL_EXT_texture_type_2_10_10_10_REV GL_NV_fence GL_OES_compressed_ETC1_RGB8_texture GL_OES_depth_texture GL_OES_depth24 GL_OES_EGL_image GL_OES_EGL_image_external GL_OES_element_index_uint GL_OES_fbo_render_mipmap GL_OES_fragment_precision_high GL_OES_get_program_binary GL_OES_packed_depth_stencil GL_OES_rgb8_rgba8 GL_OES_standard_derivatives GL_OES_texture_3D GL_OES_texture_float GL_OES_texture_half_float GL_OES_texture_half_float_linear GL_OES_texture_npot GL_OES_vertex_half_float GL_OES_vertex_type_10_10_10_2 GL_QCOM_alpha_test GL_QCOM_binning_control GL_QCOM_driver_control GL_QCOM_perfmon_global_mode GL_QCOM_extended_get GL_QCOM_extended_get2 GL_QCOM_tiled_rendering GL_QCOM_writeonly_rendering GL_AMD_compressed_3DC_texture
+GL_MAX_3D_TEXTURE_SIZE_OES: 1024 0 0 0
+no GL_MAX_SAMPLES_ANGLE: GL_INVALID_ENUM
+no GL_MAX_SAMPLES_APPLE: GL_INVALID_ENUM
+GL_MAX_TEXTURE_MAX_ANISOTROPY_EXT: 16 0 0 0
+no GL_MAX_SAMPLES_IMG: GL_INVALID_ENUM
+GL_MAX_TEXTURE_SIZE: 4096 0 0 0
+GL_MAX_VIEWPORT_DIMS: 4096 4096 0 0
+GL_MAX_VERTEX_ATTRIBS: 16 0 0 0
+GL_MAX_VERTEX_UNIFORM_VECTORS: 251 0 0 0
+GL_MAX_VARYING_VECTORS: 8 0 0 0
+GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS: 20 0 0 0
+GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS: 4 0 0 0
+GL_MAX_TEXTURE_IMAGE_UNITS: 16 0 0 0
+GL_MAX_FRAGMENT_UNIFORM_VECTORS: 221 0 0 0
+GL_MAX_CUBE_MAP_TEXTURE_SIZE: 4096 0 0 0
+GL_MAX_RENDERBUFFER_SIZE: 4096 0 0 0
+no GL_TEXTURE_NUM_LEVELS_QCOM: GL_INVALID_ENUM
+ */
+static int
+fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+       /* this is probably not totally correct.. but it's a start: */
+       switch (param) {
+       /* Supported features (boolean caps). */
+       case PIPE_CAP_NPOT_TEXTURES:
+       case PIPE_CAP_TWO_SIDED_STENCIL:
+       case PIPE_CAP_ANISOTROPIC_FILTER:
+       case PIPE_CAP_POINT_SPRITE:
+       case PIPE_CAP_TEXTURE_SHADOW_MAP:
+       case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+       case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+       case PIPE_CAP_TEXTURE_SWIZZLE:
+       case PIPE_CAP_SHADER_STENCIL_EXPORT:
+       case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+       case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+       case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+       case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+       case PIPE_CAP_SM3:
+       case PIPE_CAP_SEAMLESS_CUBE_MAP:
+       case PIPE_CAP_PRIMITIVE_RESTART:
+       case PIPE_CAP_CONDITIONAL_RENDER:
+       case PIPE_CAP_TEXTURE_BARRIER:
+       case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+       case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+       case PIPE_CAP_TGSI_INSTANCEID:
+       case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+       case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+       case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+       case PIPE_CAP_COMPUTE:
+       case PIPE_CAP_START_INSTANCE:
+       case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+       case PIPE_CAP_TEXTURE_MULTISAMPLE:
+       case PIPE_CAP_USER_CONSTANT_BUFFERS:
+               return 1;
+
+       case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+               return 256;
+
+       case PIPE_CAP_GLSL_FEATURE_LEVEL:
+               return 120;
+
+       /* Unsupported features. */
+       case PIPE_CAP_INDEP_BLEND_ENABLE:
+       case PIPE_CAP_INDEP_BLEND_FUNC:
+       case PIPE_CAP_DEPTH_CLIP_DISABLE:
+       case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+       case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+       case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+       case PIPE_CAP_SCALED_RESOLVE:
+       case PIPE_CAP_TGSI_CAN_COMPACT_VARYINGS:
+       case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+       case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+       case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+       case PIPE_CAP_USER_VERTEX_BUFFERS:
+       case PIPE_CAP_USER_INDEX_BUFFERS:
+               return 0;
+
+       /* Stream output. */
+       case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+       case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+       case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+       case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+               return 0;
+
+       /* Texturing. */
+       case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+       case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+       case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+               return 14;
+       case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+               return 9192;
+       case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+               return 20;
+
+       /* Render targets. */
+       case PIPE_CAP_MAX_RENDER_TARGETS:
+               return 1;
+
+       /* Timer queries. */
+       case PIPE_CAP_QUERY_TIME_ELAPSED:
+       case PIPE_CAP_OCCLUSION_QUERY:
+       case PIPE_CAP_QUERY_TIMESTAMP:
+               return 0;
+
+       case PIPE_CAP_MIN_TEXEL_OFFSET:
+               return -8;
+
+       case PIPE_CAP_MAX_TEXEL_OFFSET:
+               return 7;
+
+       default:
+               DBG("unknown param %d", param);
+               return 0;
+       }
+}
+
+static float
+fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+       switch (param) {
+       case PIPE_CAPF_MAX_LINE_WIDTH:
+       case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+       case PIPE_CAPF_MAX_POINT_WIDTH:
+       case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+               return 8192.0f;
+       case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+               return 16.0f;
+       case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+               return 16.0f;
+       case PIPE_CAPF_GUARD_BAND_LEFT:
+       case PIPE_CAPF_GUARD_BAND_TOP:
+       case PIPE_CAPF_GUARD_BAND_RIGHT:
+       case PIPE_CAPF_GUARD_BAND_BOTTOM:
+               return 0.0f;
+       default:
+               DBG("unknown paramf %d", param);
+               return 0;
+       }
+}
+
+static int
+fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+               enum pipe_shader_cap param)
+{
+       switch(shader)
+       {
+       case PIPE_SHADER_FRAGMENT:
+       case PIPE_SHADER_VERTEX:
+               break;
+       case PIPE_SHADER_COMPUTE:
+       case PIPE_SHADER_GEOMETRY:
+               /* maye we could emulate.. */
+               return 0;
+       default:
+               DBG("unknown shader type %d", shader);
+               return 0;
+       }
+
+       /* this is probably not totally correct.. but it's a start: */
+       switch (param) {
+       case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+       case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+       case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+       case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+               return 16384;
+       case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+               return 8; /* XXX */
+       case PIPE_SHADER_CAP_MAX_INPUTS:
+               return 32;
+       case PIPE_SHADER_CAP_MAX_TEMPS:
+               return 256; /* Max native temporaries. */
+       case PIPE_SHADER_CAP_MAX_ADDRS:
+               /* XXX Isn't this equal to TEMPS? */
+               return 1; /* Max native address registers */
+       case PIPE_SHADER_CAP_MAX_CONSTS:
+       case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+               return 64;
+       case PIPE_SHADER_CAP_MAX_PREDS:
+               return 0; /* nothing uses this */
+       case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+               return 1;
+       case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+       case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+       case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+       case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+               return 1;
+       case PIPE_SHADER_CAP_SUBROUTINES:
+               return 0;
+       case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+       case PIPE_SHADER_CAP_INTEGERS:
+               return 0;
+       case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+               return 16;
+       case PIPE_SHADER_CAP_PREFERRED_IR:
+               return PIPE_SHADER_IR_TGSI;
+       default:
+               DBG("unknown shader param %d", param);
+               return 0;
+       }
+       return 0;
+}
+
+static boolean
+fd_screen_is_format_supported(struct pipe_screen *pscreen,
+               enum pipe_format format,
+               enum pipe_texture_target target,
+               unsigned sample_count,
+               unsigned usage)
+{
+       unsigned retval = 0;
+
+       if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+                       (sample_count > 1) || /* TODO add MSAA */
+                       !util_format_is_supported(format, usage)) {
+               DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+                               util_format_name(format), target, sample_count, usage);
+               return FALSE;
+       }
+
+       /* TODO figure out how to render to other formats.. */
+       if ((usage & PIPE_BIND_RENDER_TARGET) &&
+                       ((format != PIPE_FORMAT_B8G8R8A8_UNORM) &&
+                        (format != PIPE_FORMAT_B8G8R8X8_UNORM))) {
+               DBG("not supported render target: format=%s, target=%d, sample_count=%d, usage=%x",
+                               util_format_name(format), target, sample_count, usage);
+               return FALSE;
+       }
+
+       if ((usage & (PIPE_BIND_SAMPLER_VIEW |
+                               PIPE_BIND_VERTEX_BUFFER)) &&
+                       (fd_pipe2surface(format) != FMT_INVALID)) {
+               retval |= usage & (PIPE_BIND_SAMPLER_VIEW |
+                               PIPE_BIND_VERTEX_BUFFER);
+       }
+
+       if ((usage & (PIPE_BIND_RENDER_TARGET |
+                               PIPE_BIND_DISPLAY_TARGET |
+                               PIPE_BIND_SCANOUT |
+                               PIPE_BIND_SHARED)) &&
+                       (fd_pipe2color(format) != COLORX_INVALID)) {
+               retval |= usage & (PIPE_BIND_RENDER_TARGET |
+                               PIPE_BIND_DISPLAY_TARGET |
+                               PIPE_BIND_SCANOUT |
+                               PIPE_BIND_SHARED);
+       }
+
+       if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+                       (fd_pipe2depth(format) != DEPTHX_INVALID)) {
+               retval |= PIPE_BIND_DEPTH_STENCIL;
+       }
+
+       if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+                       (fd_pipe2index(format) != INDEX_SIZE_INVALID)) {
+               retval |= PIPE_BIND_INDEX_BUFFER;
+       }
+
+       if (usage & PIPE_BIND_TRANSFER_READ)
+               retval |= PIPE_BIND_TRANSFER_READ;
+       if (usage & PIPE_BIND_TRANSFER_WRITE)
+               retval |= PIPE_BIND_TRANSFER_WRITE;
+
+       if (retval != usage) {
+               DBG("not supported: format=%s, target=%d, sample_count=%d, "
+                               "usage=%x, retval=%x", util_format_name(format),
+                               target, sample_count, usage, retval);
+       }
+
+       return retval == usage;
+}
+
+boolean
+fd_screen_bo_get_handle(struct pipe_screen *pscreen,
+               struct fd_bo *bo,
+               unsigned stride,
+               struct winsys_handle *whandle)
+{
+       whandle->stride = stride;
+
+       if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
+               return fd_bo_get_name(bo, &whandle->handle) == 0;
+       } else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
+               whandle->handle = fd_bo_handle(bo);
+               return TRUE;
+       } else {
+               return FALSE;
+       }
+}
+
+struct fd_bo *
+fd_screen_bo_from_handle(struct pipe_screen *pscreen,
+               struct winsys_handle *whandle,
+               unsigned *out_stride)
+{
+       struct fd_screen *screen = fd_screen(pscreen);
+       struct fd_bo *bo;
+
+       bo = fd_bo_from_name(screen->dev, whandle->handle);
+       if (!bo) {
+               DBG("ref name 0x%08x failed", whandle->handle);
+               return NULL;
+       }
+
+       *out_stride = whandle->stride;
+
+       return bo;
+}
+
+struct pipe_screen *
+fd_screen_create(struct fd_device *dev)
+{
+       struct fd_screen *screen = CALLOC_STRUCT(fd_screen);
+       struct pipe_screen *pscreen;
+       uint64_t val;
+
+       char *fd_dbg = getenv("FD_MESA_DEBUG");
+       if (fd_dbg)
+               fd_mesa_debug = atoi(fd_dbg);
+
+       if (!screen)
+               return NULL;
+
+       DBG("");
+
+       screen->dev = dev;
+
+       // maybe this should be in context?
+       screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);
+
+       fd_pipe_get_param(screen->pipe, FD_GMEM_SIZE, &val);
+       screen->gmemsize_bytes = val;
+
+       fd_pipe_get_param(screen->pipe, FD_DEVICE_ID, &val);
+       screen->device_id = val;
+
+       pscreen = &screen->base;
+
+       pscreen->destroy = fd_screen_destroy;
+       pscreen->get_param = fd_screen_get_param;
+       pscreen->get_paramf = fd_screen_get_paramf;
+       pscreen->get_shader_param = fd_screen_get_shader_param;
+       pscreen->context_create = fd_context_create;
+       pscreen->is_format_supported = fd_screen_is_format_supported;
+
+       fd_resource_screen_init(pscreen);
+
+       pscreen->get_name = fd_screen_get_name;
+       pscreen->get_vendor = fd_screen_get_vendor;
+
+       pscreen->get_timestamp = fd_screen_get_timestamp;
+
+       pscreen->fence_reference = fd_screen_fence_ref;
+       pscreen->fence_signalled = fd_screen_fence_signalled;
+       pscreen->fence_finish = fd_screen_fence_finish;
+
+       util_format_s3tc_init();
+
+       return pscreen;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
new file mode 100644 (file)
index 0000000..720ee05
--- /dev/null
@@ -0,0 +1,70 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_SCREEN_H_
+#define FREEDRENO_SCREEN_H_
+
+#include <freedreno_drmif.h>
+#include <freedreno_ringbuffer.h>
+
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+
+typedef uint32_t u32;
+
+struct fd_bo;
+
+struct fd_screen {
+       struct pipe_screen base;
+
+       uint32_t gmemsize_bytes;
+       uint32_t device_id;
+
+       struct fd_device *dev;
+       struct fd_pipe *pipe;
+
+       int64_t cpu_gpu_time_delta;
+};
+
+static INLINE struct fd_screen *
+fd_screen(struct pipe_screen *pscreen)
+{
+       return (struct fd_screen *)pscreen;
+}
+
+boolean fd_screen_bo_get_handle(struct pipe_screen *pscreen,
+               struct fd_bo *bo,
+               unsigned stride,
+               struct winsys_handle *whandle);
+struct fd_bo * fd_screen_bo_from_handle(struct pipe_screen *pscreen,
+               struct winsys_handle *whandle,
+               unsigned *out_stride);
+
+struct pipe_screen * fd_screen_create(struct fd_device *dev);
+
+#endif /* FREEDRENO_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_state.c b/src/gallium/drivers/freedreno/freedreno_state.c
new file mode 100644 (file)
index 0000000..529e246
--- /dev/null
@@ -0,0 +1,641 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+
+#include "freedreno_state.h"
+#include "freedreno_context.h"
+#include "freedreno_zsa.h"
+#include "freedreno_rasterizer.h"
+#include "freedreno_blend.h"
+#include "freedreno_program.h"
+#include "freedreno_resource.h"
+#include "freedreno_texture.h"
+#include "freedreno_gmem.h"
+#include "freedreno_util.h"
+
+static void
+fd_set_blend_color(struct pipe_context *pctx,
+               const struct pipe_blend_color *blend_color)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->blend_color = *blend_color;
+       ctx->dirty |= FD_DIRTY_BLEND_COLOR;
+}
+
+static void
+fd_set_stencil_ref(struct pipe_context *pctx,
+               const struct pipe_stencil_ref *stencil_ref)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->stencil_ref =* stencil_ref;
+       ctx->dirty |= FD_DIRTY_STENCIL_REF;
+}
+
+static void
+fd_set_clip_state(struct pipe_context *pctx,
+               const struct pipe_clip_state *clip)
+{
+       DBG("TODO: ");
+}
+
+static void
+fd_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->sample_mask = (uint16_t)sample_mask;
+       ctx->dirty |= FD_DIRTY_SAMPLE_MASK;
+}
+
+/* notes from calim on #dri-devel:
+ * index==0 will be non-UBO (ie. glUniformXYZ()) all packed together padded
+ * out to vec4's
+ * I should be able to consider that I own the user_ptr until the next
+ * set_constant_buffer() call, at which point I don't really care about the
+ * previous values.
+ * index>0 will be UBO's.. well, I'll worry about that later
+ */
+static void
+fd_set_constant_buffer(struct pipe_context *pctx, uint shader, uint index,
+               struct pipe_constant_buffer *cb)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_constbuf_stateobj *so = &ctx->constbuf[shader];
+
+       /* Note that the state tracker can unbind constant buffers by
+        * passing NULL here.
+        */
+       if (unlikely(!cb)) {
+               so->enabled_mask &= ~(1 << index);
+               so->dirty_mask &= ~(1 << index);
+               pipe_resource_reference(&so->cb[index].buffer, NULL);
+               return;
+       }
+
+       pipe_resource_reference(&so->cb[index].buffer, cb->buffer);
+       so->cb[index].buffer_offset = cb->buffer_offset;
+       so->cb[index].buffer_size   = cb->buffer_size;
+       so->cb[index].user_buffer   = cb->user_buffer;
+
+       so->enabled_mask |= 1 << index;
+       so->dirty_mask |= 1 << index;
+       ctx->dirty |= FD_DIRTY_CONSTBUF;
+}
+
+static void
+fd_set_framebuffer_state(struct pipe_context *pctx,
+               const struct pipe_framebuffer_state *framebuffer)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct pipe_framebuffer_state *cso = &ctx->framebuffer.base;
+       unsigned i;
+
+       DBG("%d: cbufs[0]=%p, zsbuf=%p", ctx->needs_flush,
+                       cso->cbufs[0], cso->zsbuf);
+
+       fd_context_render(pctx);
+
+       for (i = 0; i < framebuffer->nr_cbufs; i++)
+               pipe_surface_reference(&cso->cbufs[i], framebuffer->cbufs[i]);
+       for (; i < ctx->framebuffer.base.nr_cbufs; i++)
+               pipe_surface_reference(&cso->cbufs[i], NULL);
+
+       cso->nr_cbufs = framebuffer->nr_cbufs;
+       cso->width = framebuffer->width;
+       cso->height = framebuffer->height;
+
+       pipe_surface_reference(&cso->zsbuf, framebuffer->zsbuf);
+
+       if (cso->nr_cbufs > 0)
+               fd_gmem_calculate_tiles(pctx);
+
+       ctx->dirty |= FD_DIRTY_FRAMEBUFFER;
+}
+
+static void
+fd_set_polygon_stipple(struct pipe_context *pctx,
+               const struct pipe_poly_stipple *stipple)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->stipple = *stipple;
+       ctx->dirty |= FD_DIRTY_STIPPLE;
+}
+
+static void
+fd_set_scissor_state(struct pipe_context *pctx,
+               const struct pipe_scissor_state *scissor)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       ctx->scissor = *scissor;
+       ctx->dirty |= FD_DIRTY_SCISSOR;
+}
+
+static void
+fd_set_viewport_state(struct pipe_context *pctx,
+               const struct pipe_viewport_state *viewport)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->viewport = *viewport;
+       ctx->dirty |= FD_DIRTY_VIEWPORT;
+}
+
+static void
+fd_set_vertex_buffers(struct pipe_context *pctx,
+               unsigned start_slot, unsigned count,
+               const struct pipe_vertex_buffer *vb)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_vertexbuf_stateobj *so = &ctx->vertexbuf;
+
+       util_set_vertex_buffers_mask(so->vb, &so->enabled_mask, vb, start_slot, count);
+       so->count = util_last_bit(so->enabled_mask);
+
+       ctx->dirty |= FD_DIRTY_VERTEXBUF;
+}
+
+static void
+fd_set_index_buffer(struct pipe_context *pctx,
+               const struct pipe_index_buffer *ib)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       if (ib) {
+               pipe_resource_reference(&ctx->indexbuf.buffer, ib->buffer);
+               ctx->indexbuf.index_size = ib->index_size;
+               ctx->indexbuf.offset = ib->offset;
+               ctx->indexbuf.user_buffer = ib->user_buffer;
+       } else {
+               pipe_resource_reference(&ctx->indexbuf.buffer, NULL);
+       }
+
+       ctx->dirty |= FD_DIRTY_INDEXBUF;
+}
+
+void
+fd_state_init(struct pipe_context *pctx)
+{
+       pctx->set_blend_color = fd_set_blend_color;
+       pctx->set_stencil_ref = fd_set_stencil_ref;
+       pctx->set_clip_state = fd_set_clip_state;
+       pctx->set_sample_mask = fd_set_sample_mask;
+       pctx->set_constant_buffer = fd_set_constant_buffer;
+       pctx->set_framebuffer_state = fd_set_framebuffer_state;
+       pctx->set_polygon_stipple = fd_set_polygon_stipple;
+       pctx->set_scissor_state = fd_set_scissor_state;
+       pctx->set_viewport_state = fd_set_viewport_state;
+
+       pctx->set_vertex_buffers = fd_set_vertex_buffers;
+       pctx->set_index_buffer = fd_set_index_buffer;
+}
+
+/* NOTE: just define the position for const regs statically.. the blob
+ * driver doesn't seem to change these dynamically, and I can't really
+ * think of a good reason to so..
+ */
+#define VS_CONST_BASE 0x20
+#define PS_CONST_BASE 0x120
+
+static void
+emit_constants(struct fd_ringbuffer *ring, uint32_t base,
+               struct fd_constbuf_stateobj *constbuf,
+               struct fd_shader_stateobj *shader)
+{
+       uint32_t enabled_mask = constbuf->enabled_mask;
+       uint32_t start_base = base;
+       unsigned i;
+
+       // XXX TODO only emit dirty consts.. but we need to keep track if
+       // they are clobbered by a clear, gmem2mem, or mem2gmem..
+       constbuf->dirty_mask = enabled_mask;
+
+       /* emit user constants: */
+       while (enabled_mask) {
+               unsigned index = ffs(enabled_mask) - 1;
+               struct pipe_constant_buffer *cb = &constbuf->cb[index];
+               unsigned size = ALIGN(cb->buffer_size, 4) / 4; /* size in dwords */
+
+               // I expect that size should be a multiple of vec4's:
+               assert(size == ALIGN(size, 4));
+
+               /* hmm, sometimes we still seem to end up with consts bound,
+                * even if shader isn't using them, which ends up overwriting
+                * const reg's used for immediates.. this is a hack to work
+                * around that:
+                */
+               if (shader && ((base - start_base) >= (shader->first_immediate * 4)))
+                       break;
+
+               if (constbuf->dirty_mask & (1 << index)) {
+                       const uint32_t *dwords;
+
+                       if (cb->user_buffer) {
+                               dwords = cb->user_buffer;
+                       } else {
+                               struct fd_resource *rsc = fd_resource(cb->buffer);
+                               dwords = fd_bo_map(rsc->bo);
+                       }
+
+                       dwords = (uint32_t *)(((uint8_t *)dwords) + cb->buffer_offset);
+
+                       OUT_PKT3(ring, CP_SET_CONSTANT, size + 1);
+                       OUT_RING(ring, base);
+                       for (i = 0; i < size; i++)
+                               OUT_RING(ring, *(dwords++));
+
+                       constbuf->dirty_mask &= ~(1 << index);
+               }
+
+               base += size;
+               enabled_mask &= ~(1 << index);
+       }
+
+       /* emit shader immediates: */
+       if (shader) {
+               for (i = 0; i < shader->num_immediates; i++) {
+                       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+                       OUT_RING(ring, base);
+                       OUT_RING(ring, shader->immediates[i].val[0]);
+                       OUT_RING(ring, shader->immediates[i].val[1]);
+                       OUT_RING(ring, shader->immediates[i].val[2]);
+                       OUT_RING(ring, shader->immediates[i].val[3]);
+                       base += 4;
+               }
+       }
+}
+
+/* this works at least for a220 and earlier.. if later gpu's gain more than
+ * 32 texture units, might need to bump this up to uint64_t
+ */
+typedef uint32_t texmask;
+
+static texmask
+emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
+               struct fd_texture_stateobj *tex, unsigned samp_id, texmask emitted)
+{
+       unsigned const_idx = fd_get_const_idx(ctx, tex, samp_id);
+       struct fd_sampler_stateobj *sampler;
+       struct fd_pipe_sampler_view *view;
+
+       if (emitted & (1 << const_idx))
+               return 0;
+
+       sampler = tex->samplers[samp_id];
+       view = fd_pipe_sampler_view(tex->textures[samp_id]);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+       OUT_RING(ring, 0x00010000 + (0x6 * const_idx));
+
+       OUT_RING(ring, sampler->tex0 | view->tex0);
+       OUT_RELOC(ring, view->tex_resource->bo, 0, view->fmt);
+       OUT_RING(ring, view->tex2);
+       OUT_RING(ring, sampler->tex3 | view->tex3);
+       OUT_RING(ring, sampler->tex4);
+       OUT_RING(ring, sampler->tex5);
+
+       return (1 << const_idx);
+}
+
+static void
+emit_textures(struct fd_ringbuffer *ring, struct fd_context *ctx)
+{
+       texmask emitted = 0;
+       unsigned i;
+
+       for (i = 0; i < ctx->verttex.num_samplers; i++)
+               if (ctx->verttex.samplers[i])
+                       emitted |= emit_texture(ring, ctx, &ctx->verttex, i, emitted);
+
+       for (i = 0; i < ctx->fragtex.num_samplers; i++)
+               if (ctx->fragtex.samplers[i])
+                       emitted |= emit_texture(ring, ctx, &ctx->fragtex, i, emitted);
+}
+
+void
+fd_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
+               struct fd_vertex_buf *vbufs, uint32_t n)
+{
+       unsigned i;
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 1 + (2 * n));
+       OUT_RING(ring, (0x1 << 16) | (val & 0xffff));
+       for (i = 0; i < n; i++) {
+               struct fd_resource *rsc = fd_resource(vbufs[i].prsc);
+               OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 3);
+               OUT_RING (ring, vbufs[i].size);
+       }
+}
+
+void
+fd_emit_framebuffer_state(struct fd_ringbuffer *ring,
+               struct fd_framebuffer_stateobj *fb)
+{
+       struct pipe_framebuffer_state *pfb = &fb->base;
+       uint32_t reg, base;
+
+       /* this should be true because bin_w/bin_h should be multiples of 32: */
+       assert(((fb->bin_w * fb->bin_h) % 1024) == 0);
+
+       /* depth/stencil starts after color buffer in GMEM: */
+       base = (fb->bin_w * fb->bin_h) / 1024;
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+       OUT_RING(ring, CP_REG(REG_RB_SURFACE_INFO));
+       OUT_RING(ring, fb->bin_w);                   /* RB_SURFACE_INFO */
+       OUT_RING(ring, RB_COLOR_INFO_COLOR_SWAP(1) | /* RB_COLOR_INFO */
+                       RB_COLOR_INFO_COLOR_FORMAT(fd_pipe2color(pfb->cbufs[0]->format)));
+       reg = RB_DEPTH_INFO_DEPTH_BASE(ALIGN(base, 4));
+       if (pfb->zsbuf)
+               reg |= RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+       OUT_RING(ring, reg);                         /* RB_DEPTH_INFO */
+}
+
+void
+fd_state_emit(struct pipe_context *pctx, uint32_t dirty)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_ringbuffer *ring = ctx->ring;
+
+       /* NOTE: we probably want to eventually refactor this so each state
+        * object handles emitting it's own state..  although the mapping of
+        * state to registers is not always orthogonal, sometimes a single
+        * register contains bitfields coming from multiple state objects,
+        * so not sure the best way to deal with that yet.
+        */
+
+       if (dirty & FD_DIRTY_SAMPLE_MASK) {
+               OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+               OUT_RING(ring, CP_REG(REG_PA_SC_AA_MASK));
+               OUT_RING(ring, ctx->sample_mask);
+       }
+
+       if (dirty & FD_DIRTY_ZSA) {
+               struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+               OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+               OUT_RING(ring, CP_REG(REG_RB_DEPTHCONTROL));
+               OUT_RING(ring, ctx->zsa->rb_depthcontrol);
+
+               OUT_PKT3(ring, CP_SET_CONSTANT, 4);
+               OUT_RING(ring, CP_REG(REG_RB_STENCILREFMASK_BF));
+               OUT_RING(ring, ctx->zsa->rb_stencilrefmask_bf |
+                               RB_STENCILREFMASK_STENCILREF(sr->ref_value[1]));
+               OUT_RING(ring, ctx->zsa->rb_stencilrefmask |
+                               RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+               OUT_RING(ring, ctx->zsa->rb_alpha_ref);
+       }
+
+       if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) {
+               OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+               OUT_RING(ring, CP_REG(REG_PA_CL_CLIP_CNTL));
+               OUT_RING(ring, ctx->rasterizer->pa_cl_clip_cntl);
+               OUT_RING(ring, ctx->rasterizer->pa_su_sc_mode_cntl |
+                               ctx->framebuffer.pa_su_sc_mode_cntl);
+
+               OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+               OUT_RING(ring, CP_REG(REG_PA_SU_POINT_SIZE));
+               OUT_RING(ring, ctx->rasterizer->pa_su_point_size);
+               OUT_RING(ring, ctx->rasterizer->pa_su_point_minmax);
+               OUT_RING(ring, ctx->rasterizer->pa_su_line_cntl);
+               OUT_RING(ring, ctx->rasterizer->pa_sc_line_stipple);
+
+               OUT_PKT3(ring, CP_SET_CONSTANT, 6);
+               OUT_RING(ring, CP_REG(REG_PA_SU_VTX_CNTL));
+               OUT_RING(ring, ctx->rasterizer->pa_su_vtx_cntl);
+               OUT_RING(ring, f2d(1.0));                /* PA_CL_GB_VERT_CLIP_ADJ */
+               OUT_RING(ring, f2d(1.0));                /* PA_CL_GB_VERT_DISC_ADJ */
+               OUT_RING(ring, f2d(1.0));                /* PA_CL_GB_HORZ_CLIP_ADJ */
+               OUT_RING(ring, f2d(1.0));                /* PA_CL_GB_HORZ_DISC_ADJ */
+       }
+
+       if (dirty & FD_DIRTY_FRAMEBUFFER)
+               fd_emit_framebuffer_state(ring, &ctx->framebuffer);
+
+       if (dirty & FD_DIRTY_SCISSOR) {
+               OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+               OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_SCISSOR_TL));
+               OUT_RING(ring, xy2d(ctx->scissor.minx,   /* PA_SC_WINDOW_SCISSOR_TL */
+                               ctx->scissor.miny));
+               OUT_RING(ring, xy2d(ctx->scissor.maxx,   /* PA_SC_WINDOW_SCISSOR_BR */
+                               ctx->scissor.maxy));
+       }
+
+       if (dirty & FD_DIRTY_VIEWPORT) {
+               OUT_PKT3(ring, CP_SET_CONSTANT, 7);
+               OUT_RING(ring, CP_REG(REG_PA_CL_VPORT_XSCALE));
+               OUT_RING(ring, f2d(ctx->viewport.scale[0]));       /* PA_CL_VPORT_XSCALE */
+               OUT_RING(ring, f2d(ctx->viewport.translate[0]));   /* PA_CL_VPORT_XOFFSET */
+               OUT_RING(ring, f2d(ctx->viewport.scale[1]));       /* PA_CL_VPORT_YSCALE */
+               OUT_RING(ring, f2d(ctx->viewport.translate[1]));   /* PA_CL_VPORT_YOFFSET */
+               OUT_RING(ring, f2d(ctx->viewport.scale[2]));       /* PA_CL_VPORT_ZSCALE */
+               OUT_RING(ring, f2d(ctx->viewport.translate[2]));   /* PA_CL_VPORT_ZOFFSET */
+
+               OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+               OUT_RING(ring, CP_REG(REG_PA_CL_VTE_CNTL));
+               OUT_RING(ring, PA_CL_VTE_CNTL_VTX_W0_FMT |
+                               PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
+                               PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
+                               PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
+                               PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
+                               PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
+                               PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
+       }
+
+       if (dirty & (FD_DIRTY_PROG | FD_DIRTY_VTX | FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX)) {
+               fd_program_validate(ctx);
+               fd_program_emit(ring, &ctx->prog);
+       }
+
+       if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+               emit_constants(ring,  VS_CONST_BASE * 4,
+                               &ctx->constbuf[PIPE_SHADER_VERTEX],
+                               (dirty & FD_DIRTY_PROG) ? ctx->prog.vp : NULL);
+               emit_constants(ring, PS_CONST_BASE * 4,
+                               &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+                               (dirty & FD_DIRTY_PROG) ? ctx->prog.fp : NULL);
+       }
+
+       if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_ZSA)) {
+               OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+               OUT_RING(ring, CP_REG(REG_RB_COLORCONTROL));
+               OUT_RING(ring, ctx->zsa->rb_colorcontrol | ctx->blend->rb_colorcontrol);
+       }
+
+       if (dirty & FD_DIRTY_BLEND) {
+               OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+               OUT_RING(ring, CP_REG(REG_RB_BLEND_CONTROL));
+               OUT_RING(ring, ctx->blend->rb_blendcontrol);
+
+               OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+               OUT_RING(ring, CP_REG(REG_RB_COLOR_MASK));
+               OUT_RING(ring, ctx->blend->rb_colormask);
+       }
+
+       if (dirty & (FD_DIRTY_VERTTEX | FD_DIRTY_FRAGTEX | FD_DIRTY_PROG))
+               emit_textures(ring, ctx);
+
+       ctx->dirty &= ~dirty;
+}
+
+/* emit per-context initialization:
+ */
+void
+fd_state_emit_setup(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct fd_ringbuffer *ring = ctx->ring;
+
+       OUT_PKT0(ring, REG_TP0_CHICKEN, 1);
+       OUT_RING(ring, 0x00000002);
+
+       OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+       OUT_RING(ring, 0x00007fff);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_SQ_VS_CONST));
+       OUT_RING(ring, SQ_VS_CONST_BASE(VS_CONST_BASE) |
+                       SQ_VS_CONST_SIZE(0x100));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_SQ_PS_CONST));
+       OUT_RING(ring, SQ_PS_CONST_BASE(PS_CONST_BASE) |
+                       SQ_PS_CONST_SIZE(0xe0));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+       OUT_RING(ring, CP_REG(REG_VGT_MAX_VTX_INDX));
+       OUT_RING(ring, 0xffffffff);        /* VGT_MAX_VTX_INDX */
+       OUT_RING(ring, 0x00000000);        /* VGT_MIN_VTX_INDX */
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET));
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL));
+       OUT_RING(ring, 0x0000003b);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_SQ_CONTEXT_MISC));
+       OUT_RING(ring, SQ_CONTEXT_MISC_SC_SAMPLE_CNTL(CENTERS_ONLY));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_SQ_INTERPOLATOR_CNTL));
+       OUT_RING(ring, 0xffffffff);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_SC_AA_CONFIG));
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_SC_LINE_CNTL));
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_PA_SC_WINDOW_OFFSET));
+       OUT_RING(ring, 0x00000000);
+
+       // XXX we change this dynamically for draw/clear.. vs gmem<->mem..
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_MODECONTROL));
+       OUT_RING(ring, RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_SAMPLE_POS));
+       OUT_RING(ring, 0x88888888);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COLOR_DEST_MASK));
+       OUT_RING(ring, 0xffffffff);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COPY_DEST_INFO));
+       OUT_RING(ring, RB_COPY_DEST_INFO_FORMAT(COLORX_4_4_4_4) |
+                       RB_COPY_DEST_INFO_WRITE_RED |
+                       RB_COPY_DEST_INFO_WRITE_GREEN |
+                       RB_COPY_DEST_INFO_WRITE_BLUE |
+                       RB_COPY_DEST_INFO_WRITE_ALPHA);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
+       OUT_RING(ring, CP_REG(REG_SQ_WRAPPING_0));
+       OUT_RING(ring, 0x00000000);        /* SQ_WRAPPING_0 */
+       OUT_RING(ring, 0x00000000);        /* SQ_WRAPPING_1 */
+
+       OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT3(ring, CP_WAIT_REG_EQ, 4);
+       OUT_RING(ring, 0x000005d0);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x5f601000);
+       OUT_RING(ring, 0x00000001);
+
+       OUT_PKT0(ring, REG_SQ_INST_STORE_MANAGMENT, 1);
+       OUT_RING(ring, 0x00000180);
+
+       OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+       OUT_RING(ring, 0x00000300);
+
+       OUT_PKT3(ring, CP_SET_SHADER_BASES, 1);
+       OUT_RING(ring, 0x80000180);
+
+       /* not sure what this form of CP_SET_CONSTANT is.. */
+       OUT_PKT3(ring, CP_SET_CONSTANT, 13);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x469c4000);
+       OUT_RING(ring, 0x3f800000);
+       OUT_RING(ring, 0x3f000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x40000000);
+       OUT_RING(ring, 0x3f400000);
+       OUT_RING(ring, 0x3ec00000);
+       OUT_RING(ring, 0x3e800000);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_RB_COLOR_MASK));
+       OUT_RING(ring, RB_COLOR_MASK_WRITE_RED |
+                       RB_COLOR_MASK_WRITE_GREEN |
+                       RB_COLOR_MASK_WRITE_BLUE |
+                       RB_COLOR_MASK_WRITE_ALPHA);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 5);
+       OUT_RING(ring, CP_REG(REG_RB_BLEND_RED));
+       OUT_RING(ring, 0x00000000);        /* RB_BLEND_RED */
+       OUT_RING(ring, 0x00000000);        /* RB_BLEND_GREEN */
+       OUT_RING(ring, 0x00000000);        /* RB_BLEND_BLUE */
+       OUT_RING(ring, 0x000000ff);        /* RB_BLEND_ALPHA */
+
+       fd_ringbuffer_flush(ring);
+       fd_ringmarker_mark(ctx->draw_start);
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_state.h b/src/gallium/drivers/freedreno/freedreno_state.h
new file mode 100644 (file)
index 0000000..4e68448
--- /dev/null
@@ -0,0 +1,53 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_STATE_H_
+#define FREEDRENO_STATE_H_
+
+#include "pipe/p_context.h"
+
+struct fd_vertexbuf_stateobj;
+struct fd_zsa_stateobj;
+struct fd_framebuffer_stateobj;
+struct fd_ringbuffer;
+
+void fd_state_init(struct pipe_context *pctx);
+
+struct fd_vertex_buf {
+       unsigned offset, size;
+       struct pipe_resource *prsc;
+};
+
+void fd_emit_vertex_bufs(struct fd_ringbuffer *ring, uint32_t val,
+               struct fd_vertex_buf *vbufs, uint32_t n);
+void fd_emit_framebuffer_state(struct fd_ringbuffer *ring,
+               struct fd_framebuffer_stateobj *fb);
+void fd_state_emit(struct pipe_context *pctx, uint32_t dirty);
+void fd_state_emit_setup(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_STATE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_surface.c b/src/gallium/drivers/freedreno/freedreno_surface.c
new file mode 100644 (file)
index 0000000..250fe4b
--- /dev/null
@@ -0,0 +1,73 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "freedreno_surface.h"
+#include "freedreno_resource.h"
+#include "freedreno_util.h"
+
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+struct pipe_surface *
+fd_create_surface(struct pipe_context *pctx,
+               struct pipe_resource *ptex,
+               const struct pipe_surface *surf_tmpl)
+{
+//     struct fd_resource* tex = fd_resource(ptex);
+       struct fd_surface* surface = CALLOC_STRUCT(fd_surface);
+
+       assert(surf_tmpl->u.tex.first_layer == surf_tmpl->u.tex.last_layer);
+
+       if (surface) {
+               struct pipe_surface *psurf = &surface->base;
+               unsigned level = surf_tmpl->u.tex.level;
+
+               pipe_reference_init(&psurf->reference, 1);
+               pipe_resource_reference(&psurf->texture, ptex);
+
+               psurf->context = pctx;
+               psurf->format = surf_tmpl->format;
+               psurf->width = u_minify(ptex->width0, level);
+               psurf->height = u_minify(ptex->height0, level);
+               psurf->u.tex.level = level;
+               psurf->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
+               psurf->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
+
+               // TODO
+               DBG("TODO: %ux%u", psurf->width, psurf->height);
+       }
+
+       return &surface->base;
+}
+
+void
+fd_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
+{
+       pipe_resource_reference(&psurf->texture, NULL);
+       FREE(psurf);
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_surface.h b/src/gallium/drivers/freedreno/freedreno_surface.h
new file mode 100644 (file)
index 0000000..3293f33
--- /dev/null
@@ -0,0 +1,54 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_SURFACE_H_
+#define FREEDRENO_SURFACE_H_
+
+#include "pipe/p_state.h"
+
+struct fd_surface {
+       struct pipe_surface base;
+       uint32_t offset;
+       uint32_t pitch;
+       uint32_t width;
+       uint16_t height;
+       uint16_t depth;
+};
+
+static INLINE struct fd_surface *
+fd_surface(struct pipe_surface *psurf)
+{
+       return (struct fd_surface *)psurf;
+}
+
+struct pipe_surface* fd_create_surface(struct pipe_context *pctx,
+               struct pipe_resource *ptex,
+               const struct pipe_surface *surf_tmpl);
+void fd_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf);
+
+#endif /* FREEDRENO_SURFACE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.c b/src/gallium/drivers/freedreno/freedreno_texture.c
new file mode 100644 (file)
index 0000000..07bfbd3
--- /dev/null
@@ -0,0 +1,286 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+#include "freedreno_texture.h"
+#include "freedreno_util.h"
+
+static enum sq_tex_clamp
+tex_clamp(unsigned wrap)
+{
+       switch (wrap) {
+       case PIPE_TEX_WRAP_REPEAT:
+               return SQ_TEX_WRAP;
+       case PIPE_TEX_WRAP_CLAMP:
+               return SQ_TEX_CLAMP_HALF_BORDER;
+       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+               return SQ_TEX_CLAMP_LAST_TEXEL;
+       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+               return SQ_TEX_CLAMP_BORDER;
+       case PIPE_TEX_WRAP_MIRROR_REPEAT:
+               return SQ_TEX_MIRROR;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP:
+               return SQ_TEX_MIRROR_ONCE_HALF_BORDER;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+               return SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+               return SQ_TEX_MIRROR_ONCE_BORDER;
+       default:
+               DBG("invalid wrap: %u", wrap);
+               return 0;
+       }
+}
+
+static enum sq_tex_filter
+tex_filter(unsigned filter)
+{
+       switch (filter) {
+       case PIPE_TEX_FILTER_NEAREST:
+               return SQ_TEX_FILTER_POINT;
+       case PIPE_TEX_FILTER_LINEAR:
+               return SQ_TEX_FILTER_BILINEAR;
+       default:
+               DBG("invalid filter: %u", filter);
+               return 0;
+       }
+}
+
+static void *
+fd_sampler_state_create(struct pipe_context *pctx,
+               const struct pipe_sampler_state *cso)
+{
+       struct fd_sampler_stateobj *so = CALLOC_STRUCT(fd_sampler_stateobj);
+
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       /* SQ_TEX0_PITCH() must be OR'd in later when we know the bound texture: */
+       so->tex0 =
+               SQ_TEX0_CLAMP_X(tex_clamp(cso->wrap_s)) |
+               SQ_TEX0_CLAMP_Y(tex_clamp(cso->wrap_t)) |
+               SQ_TEX0_CLAMP_Z(tex_clamp(cso->wrap_r));
+
+       so->tex3 =
+               SQ_TEX3_XY_MAG_FILTER(tex_filter(cso->mag_img_filter)) |
+               SQ_TEX3_XY_MIN_FILTER(tex_filter(cso->min_img_filter));
+
+       so->tex4 = 0x00000000; /* ??? */
+       so->tex5 = 0x00000200; /* ??? */
+
+       return so;
+}
+
+static void
+fd_sampler_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       FREE(hwcso);
+}
+
+static struct pipe_sampler_view *
+fd_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+               const struct pipe_sampler_view *cso)
+{
+       struct fd_pipe_sampler_view *so = CALLOC_STRUCT(fd_pipe_sampler_view);
+       struct fd_resource *rsc = fd_resource(prsc);
+
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+       pipe_reference(NULL, &prsc->reference);
+       so->base.texture = prsc;
+       so->base.reference.count = 1;
+       so->base.context = pctx;
+
+       so->tex_resource =  rsc;
+       so->fmt = fd_pipe2surface(cso->format);
+
+       so->tex0 = SQ_TEX0_PITCH(rsc->pitch);
+       so->tex2 =
+               SQ_TEX2_HEIGHT(prsc->height0) |
+               SQ_TEX2_WIDTH(prsc->width0);
+       so->tex3 = fd_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+                       cso->swizzle_b, cso->swizzle_a);
+
+       return &so->base;
+}
+
+static void
+fd_sampler_view_destroy(struct pipe_context *pctx,
+               struct pipe_sampler_view *view)
+{
+       pipe_resource_reference(&view->texture, NULL);
+       FREE(view);
+}
+
+static void bind_sampler_states(struct fd_texture_stateobj *prog,
+               unsigned nr, void **hwcso)
+{
+       unsigned i;
+
+       for (i = 0; i < nr; i++) {
+               prog->samplers[i] = hwcso[i];
+               prog->dirty_samplers |= (1 << i);
+       }
+
+       for (; i < prog->num_samplers; i++) {
+               prog->samplers[i] = NULL;
+               prog->dirty_samplers |= (1 << i);
+       }
+
+       prog->num_samplers = nr;
+}
+
+static void set_sampler_views(struct fd_texture_stateobj *prog,
+               unsigned nr, struct pipe_sampler_view **views)
+{
+       unsigned i;
+
+       for (i = 0; i < nr; i++) {
+               pipe_sampler_view_reference(&prog->textures[i], views[i]);
+               prog->dirty_samplers |= (1 << i);
+       }
+
+       for (; i < prog->num_textures; i++) {
+               pipe_sampler_view_reference(&prog->textures[i], NULL);
+               prog->dirty_samplers |= (1 << i);
+       }
+
+       prog->num_textures = nr;
+}
+
+static void
+fd_fragtex_sampler_states_bind(struct pipe_context *pctx,
+               unsigned nr, void **hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       bind_sampler_states(&ctx->fragtex, nr, hwcso);
+       ctx->dirty |= FD_DIRTY_FRAGTEX;
+}
+
+
+static void
+fd_fragtex_set_sampler_views(struct pipe_context *pctx, unsigned nr,
+               struct pipe_sampler_view **views)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       set_sampler_views(&ctx->fragtex, nr, views);
+       ctx->dirty |= FD_DIRTY_FRAGTEX;
+}
+
+static void
+fd_verttex_sampler_states_bind(struct pipe_context *pctx,
+               unsigned nr, void **hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       bind_sampler_states(&ctx->verttex, nr, hwcso);
+       ctx->dirty |= FD_DIRTY_VERTTEX;
+}
+
+
+static void
+fd_verttex_set_sampler_views(struct pipe_context *pctx, unsigned nr,
+               struct pipe_sampler_view **views)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       set_sampler_views(&ctx->verttex, nr, views);
+       ctx->dirty |= FD_DIRTY_VERTTEX;
+}
+
+static bool
+tex_cmp(struct fd_texture_stateobj *tex1, unsigned samp_id1,
+               struct fd_texture_stateobj *tex2, unsigned samp_id2)
+{
+       if ((samp_id1 >= tex1->num_samplers) ||
+                       (samp_id2 >= tex2->num_samplers))
+               return false;
+
+       if ((tex1 == tex2) && (samp_id1 == samp_id2))
+               return true;
+
+       if (tex1->textures[samp_id1]->texture != tex2->textures[samp_id2]->texture)
+               return false;
+
+       if (memcmp(&tex1->samplers[samp_id1]->base, &tex2->samplers[samp_id2]->base,
+                       sizeof(tex1->samplers[samp_id1]->base)))
+               return false;
+
+       return true;
+}
+
+/* map gallium sampler-id to hw const-idx.. adreno uses a flat address
+ * space of samplers (const-idx), so we need to map the gallium sampler-id
+ * which is per-shader to a global const-idx space.
+ */
+unsigned
+fd_get_const_idx(struct fd_context *ctx, struct fd_texture_stateobj *tex,
+               unsigned samp_id)
+{
+       unsigned i, const_idx = 0;
+
+       /* TODO maybe worth having some sort of cache, because we need to
+        * do this loop thru all the samplers both when patching shaders
+        * and also when emitting sampler state..
+        */
+
+       for (i = 0; i < ctx->verttex.num_samplers; i++) {
+               if (tex_cmp(&ctx->verttex, i, tex, samp_id))
+                       return const_idx;
+               const_idx++;
+       }
+
+       for (i = 0; i < ctx->fragtex.num_samplers; i++) {
+               if (tex_cmp(&ctx->fragtex, i, tex, samp_id))
+                       return const_idx;
+               const_idx++;
+       }
+
+       return const_idx;
+}
+
+void
+fd_texture_init(struct pipe_context *pctx)
+{
+       pctx->create_sampler_state = fd_sampler_state_create;
+       pctx->delete_sampler_state = fd_sampler_state_delete;
+
+       pctx->create_sampler_view = fd_sampler_view_create;
+       pctx->sampler_view_destroy = fd_sampler_view_destroy;
+
+       pctx->bind_fragment_sampler_states = fd_fragtex_sampler_states_bind;
+       pctx->set_fragment_sampler_views = fd_fragtex_set_sampler_views;
+
+       pctx->bind_vertex_sampler_states = fd_verttex_sampler_states_bind;
+       pctx->set_vertex_sampler_views = fd_verttex_set_sampler_views;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_texture.h b/src/gallium/drivers/freedreno/freedreno_texture.h
new file mode 100644 (file)
index 0000000..32bdb03
--- /dev/null
@@ -0,0 +1,61 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_TEXTURE_H_
+#define FREEDRENO_TEXTURE_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+#include "freedreno_resource.h"
+#include "freedreno_util.h"
+
+struct fd_sampler_stateobj {
+       struct pipe_sampler_state base;
+       uint32_t tex0, tex3, tex4, tex5;
+};
+
+struct fd_pipe_sampler_view {
+       struct pipe_sampler_view base;
+       struct fd_resource *tex_resource;
+       enum sq_surfaceformat fmt;
+       uint32_t tex0, tex2, tex3;
+};
+
+static INLINE struct fd_pipe_sampler_view *
+fd_pipe_sampler_view(struct pipe_sampler_view *pview)
+{
+       return (struct fd_pipe_sampler_view *)pview;
+}
+
+unsigned fd_get_const_idx(struct fd_context *ctx,
+               struct fd_texture_stateobj *tex, unsigned samp_id);
+
+void fd_texture_init(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_TEXTURE_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_util.c b/src/gallium/drivers/freedreno/freedreno_util.c
new file mode 100644 (file)
index 0000000..3bc3e79
--- /dev/null
@@ -0,0 +1,351 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+
+#include "freedreno_util.h"
+
+enum sq_surfaceformat
+fd_pipe2surface(enum pipe_format format)
+{
+       switch (format) {
+       /* 8-bit buffers. */
+       case PIPE_FORMAT_A8_UNORM:
+       case PIPE_FORMAT_A8_SNORM:
+       case PIPE_FORMAT_A8_UINT:
+       case PIPE_FORMAT_A8_SINT:
+       case PIPE_FORMAT_I8_UNORM:
+       case PIPE_FORMAT_I8_SNORM:
+       case PIPE_FORMAT_I8_UINT:
+       case PIPE_FORMAT_I8_SINT:
+       case PIPE_FORMAT_L8_UNORM:
+       case PIPE_FORMAT_L8_SNORM:
+       case PIPE_FORMAT_L8_UINT:
+       case PIPE_FORMAT_L8_SINT:
+       case PIPE_FORMAT_L8_SRGB:
+       case PIPE_FORMAT_R8_UNORM:
+       case PIPE_FORMAT_R8_SNORM:
+       case PIPE_FORMAT_R8_UINT:
+       case PIPE_FORMAT_R8_SINT:
+               return FMT_8;
+
+       /* 16-bit buffers. */
+       case PIPE_FORMAT_B5G6R5_UNORM:
+               return FMT_5_6_5;
+       case PIPE_FORMAT_B5G5R5A1_UNORM:
+       case PIPE_FORMAT_B5G5R5X1_UNORM:
+               return FMT_1_5_5_5;
+       case PIPE_FORMAT_B4G4R4A4_UNORM:
+       case PIPE_FORMAT_B4G4R4X4_UNORM:
+               return FMT_4_4_4_4;
+       case PIPE_FORMAT_Z16_UNORM:
+               return FMT_16;
+       case PIPE_FORMAT_L8A8_UNORM:
+       case PIPE_FORMAT_L8A8_SNORM:
+       case PIPE_FORMAT_L8A8_UINT:
+       case PIPE_FORMAT_L8A8_SINT:
+       case PIPE_FORMAT_L8A8_SRGB:
+       case PIPE_FORMAT_R8G8_UNORM:
+       case PIPE_FORMAT_R8G8_SNORM:
+       case PIPE_FORMAT_R8G8_UINT:
+       case PIPE_FORMAT_R8G8_SINT:
+               return FMT_8_8;
+       case PIPE_FORMAT_R16_UNORM:
+       case PIPE_FORMAT_R16_SNORM:
+       case PIPE_FORMAT_R16_UINT:
+       case PIPE_FORMAT_R16_SINT:
+       case PIPE_FORMAT_A16_UNORM:
+       case PIPE_FORMAT_A16_SNORM:
+       case PIPE_FORMAT_A16_UINT:
+       case PIPE_FORMAT_A16_SINT:
+       case PIPE_FORMAT_L16_UNORM:
+       case PIPE_FORMAT_L16_SNORM:
+       case PIPE_FORMAT_L16_UINT:
+       case PIPE_FORMAT_L16_SINT:
+       case PIPE_FORMAT_I16_UNORM:
+       case PIPE_FORMAT_I16_SNORM:
+       case PIPE_FORMAT_I16_UINT:
+       case PIPE_FORMAT_I16_SINT:
+               return FMT_16;
+       case PIPE_FORMAT_R16_FLOAT:
+       case PIPE_FORMAT_A16_FLOAT:
+       case PIPE_FORMAT_L16_FLOAT:
+       case PIPE_FORMAT_I16_FLOAT:
+               return FMT_16_FLOAT;
+
+       /* 32-bit buffers. */
+       case PIPE_FORMAT_A8B8G8R8_SRGB:
+       case PIPE_FORMAT_A8B8G8R8_UNORM:
+       case PIPE_FORMAT_A8R8G8B8_UNORM:
+       case PIPE_FORMAT_B8G8R8A8_SRGB:
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_B8G8R8X8_UNORM:
+       case PIPE_FORMAT_R8G8B8A8_SNORM:
+       case PIPE_FORMAT_R8G8B8A8_UNORM:
+       case PIPE_FORMAT_R8G8B8X8_UNORM:
+       case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
+       case PIPE_FORMAT_X8B8G8R8_UNORM:
+       case PIPE_FORMAT_X8R8G8B8_UNORM:
+       case PIPE_FORMAT_R8G8B8_UNORM:
+       case PIPE_FORMAT_R8G8B8A8_SINT:
+       case PIPE_FORMAT_R8G8B8A8_UINT:
+               return FMT_8_8_8_8;
+       case PIPE_FORMAT_R10G10B10A2_UNORM:
+       case PIPE_FORMAT_R10G10B10X2_SNORM:
+       case PIPE_FORMAT_B10G10R10A2_UNORM:
+       case PIPE_FORMAT_B10G10R10A2_UINT:
+       case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
+               return FMT_2_10_10_10;
+       case PIPE_FORMAT_Z24X8_UNORM:
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+               return FMT_24_8;
+       case PIPE_FORMAT_R32_UINT:
+       case PIPE_FORMAT_R32_SINT:
+       case PIPE_FORMAT_A32_UINT:
+       case PIPE_FORMAT_A32_SINT:
+       case PIPE_FORMAT_L32_UINT:
+       case PIPE_FORMAT_L32_SINT:
+       case PIPE_FORMAT_I32_UINT:
+       case PIPE_FORMAT_I32_SINT:
+               return FMT_32;
+       case PIPE_FORMAT_R32_FLOAT:
+       case PIPE_FORMAT_A32_FLOAT:
+       case PIPE_FORMAT_L32_FLOAT:
+       case PIPE_FORMAT_I32_FLOAT:
+       case PIPE_FORMAT_Z32_FLOAT:
+               return FMT_32_FLOAT;
+       case PIPE_FORMAT_R16G16_FLOAT:
+       case PIPE_FORMAT_L16A16_FLOAT:
+               return FMT_16_16_FLOAT;
+       case PIPE_FORMAT_R16G16_UNORM:
+       case PIPE_FORMAT_R16G16_SNORM:
+       case PIPE_FORMAT_R16G16_UINT:
+       case PIPE_FORMAT_R16G16_SINT:
+       case PIPE_FORMAT_L16A16_UNORM:
+       case PIPE_FORMAT_L16A16_SNORM:
+       case PIPE_FORMAT_L16A16_UINT:
+       case PIPE_FORMAT_L16A16_SINT:
+               return FMT_16_16;
+
+       /* 64-bit buffers. */
+       case PIPE_FORMAT_R16G16B16A16_UINT:
+       case PIPE_FORMAT_R16G16B16A16_SINT:
+       case PIPE_FORMAT_R16G16B16A16_UNORM:
+       case PIPE_FORMAT_R16G16B16A16_SNORM:
+               return FMT_16_16_16_16;
+       case PIPE_FORMAT_R16G16B16A16_FLOAT:
+               return FMT_16_16_16_16_FLOAT;
+       case PIPE_FORMAT_R32G32_FLOAT:
+       case PIPE_FORMAT_L32A32_FLOAT:
+               return FMT_32_32_FLOAT;
+       case PIPE_FORMAT_R32G32_SINT:
+       case PIPE_FORMAT_R32G32_UINT:
+       case PIPE_FORMAT_L32A32_UINT:
+       case PIPE_FORMAT_L32A32_SINT:
+               return FMT_32_32;
+
+       /* 96-bit buffers. */
+       case PIPE_FORMAT_R32G32B32_FLOAT:
+               return FMT_32_32_32_FLOAT;
+
+       /* 128-bit buffers. */
+       case PIPE_FORMAT_R32G32B32A32_SNORM:
+       case PIPE_FORMAT_R32G32B32A32_UNORM:
+       case PIPE_FORMAT_R32G32B32A32_SINT:
+       case PIPE_FORMAT_R32G32B32A32_UINT:
+               return FMT_32_32_32_32;
+       case PIPE_FORMAT_R32G32B32A32_FLOAT:
+               return FMT_32_32_32_32_FLOAT;
+
+       /* YUV buffers. */
+       case PIPE_FORMAT_UYVY:
+               return FMT_Cr_Y1_Cb_Y0;
+       case PIPE_FORMAT_YUYV:
+               return FMT_Y1_Cr_Y0_Cb;
+
+       default:
+               return FMT_INVALID;
+       }
+}
+
+enum rb_colorformatx
+fd_pipe2color(enum pipe_format format)
+{
+       switch (format) {
+       /* 8-bit buffers. */
+       case PIPE_FORMAT_A8_UNORM:
+       case PIPE_FORMAT_A8_SNORM:
+       case PIPE_FORMAT_A8_UINT:
+       case PIPE_FORMAT_A8_SINT:
+       case PIPE_FORMAT_I8_UNORM:
+       case PIPE_FORMAT_I8_SNORM:
+       case PIPE_FORMAT_I8_UINT:
+       case PIPE_FORMAT_I8_SINT:
+       case PIPE_FORMAT_L8_UNORM:
+       case PIPE_FORMAT_L8_SNORM:
+       case PIPE_FORMAT_L8_UINT:
+       case PIPE_FORMAT_L8_SINT:
+       case PIPE_FORMAT_L8_SRGB:
+       case PIPE_FORMAT_R8_UNORM:
+       case PIPE_FORMAT_R8_SNORM:
+       case PIPE_FORMAT_R8_UINT:
+       case PIPE_FORMAT_R8_SINT:
+               return COLORX_8;
+
+       /* 16-bit buffers. */
+       case PIPE_FORMAT_B5G6R5_UNORM:
+               return COLORX_5_6_5;
+       case PIPE_FORMAT_B5G5R5A1_UNORM:
+       case PIPE_FORMAT_B5G5R5X1_UNORM:
+               return COLORX_1_5_5_5;
+       case PIPE_FORMAT_B4G4R4A4_UNORM:
+       case PIPE_FORMAT_B4G4R4X4_UNORM:
+               return COLORX_4_4_4_4;
+       case PIPE_FORMAT_L8A8_UNORM:
+       case PIPE_FORMAT_L8A8_SNORM:
+       case PIPE_FORMAT_L8A8_UINT:
+       case PIPE_FORMAT_L8A8_SINT:
+       case PIPE_FORMAT_L8A8_SRGB:
+       case PIPE_FORMAT_R8G8_UNORM:
+       case PIPE_FORMAT_R8G8_SNORM:
+       case PIPE_FORMAT_R8G8_UINT:
+       case PIPE_FORMAT_R8G8_SINT:
+       case PIPE_FORMAT_Z16_UNORM:
+               return COLORX_8_8;
+       case PIPE_FORMAT_R16_FLOAT:
+       case PIPE_FORMAT_A16_FLOAT:
+       case PIPE_FORMAT_L16_FLOAT:
+       case PIPE_FORMAT_I16_FLOAT:
+               return COLORX_16_FLOAT;
+
+       /* 32-bit buffers. */
+       case PIPE_FORMAT_A8B8G8R8_SRGB:
+       case PIPE_FORMAT_A8B8G8R8_UNORM:
+       case PIPE_FORMAT_A8R8G8B8_UNORM:
+       case PIPE_FORMAT_B8G8R8A8_SRGB:
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_B8G8R8X8_UNORM:
+       case PIPE_FORMAT_R8G8B8A8_SNORM:
+       case PIPE_FORMAT_R8G8B8A8_UNORM:
+       case PIPE_FORMAT_R8G8B8X8_UNORM:
+       case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
+       case PIPE_FORMAT_X8B8G8R8_UNORM:
+       case PIPE_FORMAT_X8R8G8B8_UNORM:
+       case PIPE_FORMAT_R8G8B8_UNORM:
+       case PIPE_FORMAT_R8G8B8A8_SINT:
+       case PIPE_FORMAT_R8G8B8A8_UINT:
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+       case PIPE_FORMAT_Z24X8_UNORM:
+               return COLORX_8_8_8_8;
+       case PIPE_FORMAT_R32_FLOAT:
+       case PIPE_FORMAT_A32_FLOAT:
+       case PIPE_FORMAT_L32_FLOAT:
+       case PIPE_FORMAT_I32_FLOAT:
+       case PIPE_FORMAT_Z32_FLOAT:
+               return COLORX_32_FLOAT;
+       case PIPE_FORMAT_R16G16_FLOAT:
+       case PIPE_FORMAT_L16A16_FLOAT:
+               return COLORX_16_16_FLOAT;
+
+       /* 64-bit buffers. */
+       case PIPE_FORMAT_R16G16B16A16_FLOAT:
+               return COLORX_16_16_16_16_FLOAT;
+       case PIPE_FORMAT_R32G32_FLOAT:
+       case PIPE_FORMAT_L32A32_FLOAT:
+               return COLORX_32_32_FLOAT;
+
+       /* 128-bit buffers. */
+       case PIPE_FORMAT_R32G32B32A32_FLOAT:
+               return COLORX_32_32_32_32_FLOAT;
+
+       default:
+               return COLORX_INVALID;
+       }
+}
+
+enum rb_depth_format
+fd_pipe2depth(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_Z16_UNORM:
+               return DEPTHX_16;
+       case PIPE_FORMAT_Z24X8_UNORM:
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+               return DEPTHX_24_8;
+       default:
+               return DEPTHX_INVALID;
+       }
+}
+
+enum pc_di_index_size
+fd_pipe2index(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_I8_UINT:
+               return INDEX_SIZE_8_BIT;
+       case PIPE_FORMAT_I16_UINT:
+               return INDEX_SIZE_16_BIT;
+       case PIPE_FORMAT_I32_UINT:
+               return INDEX_SIZE_32_BIT;
+       default:
+               return INDEX_SIZE_INVALID;
+       }
+}
+
+static inline enum sq_tex_swiz
+tex_swiz(unsigned swiz)
+{
+       switch (swiz) {
+       default:
+       case PIPE_SWIZZLE_RED:   return SQ_TEX_X;
+       case PIPE_SWIZZLE_GREEN: return SQ_TEX_Y;
+       case PIPE_SWIZZLE_BLUE:  return SQ_TEX_Z;
+       case PIPE_SWIZZLE_ALPHA: return SQ_TEX_W;
+       case PIPE_SWIZZLE_ZERO:  return SQ_TEX_ZERO;
+       case PIPE_SWIZZLE_ONE:   return SQ_TEX_ONE;
+       }
+}
+
+uint32_t
+fd_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+               unsigned swizzle_b, unsigned swizzle_a)
+{
+       const struct util_format_description *desc =
+                       util_format_description(format);
+       uint8_t swiz[] = {
+                       swizzle_r, swizzle_g, swizzle_b, swizzle_a,
+                       PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ONE,
+                       PIPE_SWIZZLE_ONE, PIPE_SWIZZLE_ONE,
+       };
+
+       return SQ_TEX3_SWIZ_X(tex_swiz(swiz[desc->swizzle[0]])) |
+                       SQ_TEX3_SWIZ_Y(tex_swiz(swiz[desc->swizzle[1]])) |
+                       SQ_TEX3_SWIZ_Z(tex_swiz(swiz[desc->swizzle[2]])) |
+                       SQ_TEX3_SWIZ_W(tex_swiz(swiz[desc->swizzle[3]]));
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_util.h b/src/gallium/drivers/freedreno/freedreno_util.h
new file mode 100644 (file)
index 0000000..fb1e392
--- /dev/null
@@ -0,0 +1,124 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_UTIL_H_
+#define FREEDRENO_UTIL_H_
+
+#include <freedreno_drmif.h>
+#include <freedreno_ringbuffer.h>
+
+#include "pipe/p_format.h"
+#include "util/u_debug.h"
+
+#include "freedreno_pm4.h"
+#include "freedreno_a2xx_reg.h"
+
+enum sq_surfaceformat fd_pipe2surface(enum pipe_format format);
+enum rb_colorformatx fd_pipe2color(enum pipe_format format);
+enum rb_depth_format fd_pipe2depth(enum pipe_format format);
+enum pc_di_index_size fd_pipe2index(enum pipe_format format);
+uint32_t fd_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+               unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+
+
+#define FD_DBG_MSGS   0x1
+#define FD_DBG_DISASM 0x2
+extern int fd_mesa_debug;
+
+#define DBG(fmt, ...) \
+               do { if (fd_mesa_debug & FD_DBG_MSGS) \
+                       debug_printf("%s:%d: "fmt "\n", \
+                               __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0)
+
+#define ALIGN(v,a) (((v) + (a) - 1) & ~((a) - 1))
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+
+#define min(a, b) (((a) < (b)) ? (a) : (b))
+#define max(a, b) (((a) > (b)) ? (a) : (b))
+
+
+#define LOG_DWORDS 0
+
+
+static inline void
+OUT_RING(struct fd_ringbuffer *ring, uint32_t data)
+{
+       if (LOG_DWORDS) {
+               DBG("ring[%p]: OUT_RING   %04x:  %08x", ring,
+                               (uint32_t)(ring->cur - ring->last_start), data);
+       }
+       *(ring->cur++) = data;
+}
+
+static inline void
+OUT_RELOC(struct fd_ringbuffer *ring, struct fd_bo *bo,
+               uint32_t offset, uint32_t or)
+{
+       if (LOG_DWORDS) {
+               DBG("ring[%p]: OUT_RELOC  %04x:  %p+%u", ring,
+                               (uint32_t)(ring->cur - ring->last_start), bo, offset);
+       }
+       fd_ringbuffer_emit_reloc(ring, bo, offset, or);
+}
+
+static inline void BEGIN_RING(struct fd_ringbuffer *ring, uint32_t ndwords)
+{
+       if ((ring->cur + ndwords) >= ring->end) {
+               /* this probably won't really work if we have multiple tiles..
+                * but it is ok for 2d..  we might need different behavior
+                * depending on 2d or 3d pipe.
+                */
+               DBG("uh oh..");
+       }
+}
+
+static inline void
+OUT_PKT0(struct fd_ringbuffer *ring, uint16_t regindx, uint16_t cnt)
+{
+       BEGIN_RING(ring, cnt+1);
+       OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF));
+}
+
+static inline void
+OUT_PKT3(struct fd_ringbuffer *ring, uint8_t opcode, uint16_t cnt)
+{
+       BEGIN_RING(ring, cnt+1);
+       OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8));
+}
+
+static inline void
+OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
+               struct fd_ringmarker *end)
+{
+       OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
+       fd_ringbuffer_emit_reloc_ring(ring, start);
+       OUT_RING(ring, fd_ringmarker_dwords(start, end));
+}
+
+#endif /* FREEDRENO_UTIL_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_vbo.c b/src/gallium/drivers/freedreno/freedreno_vbo.c
new file mode 100644 (file)
index 0000000..fc33539
--- /dev/null
@@ -0,0 +1,232 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+#include "freedreno_vbo.h"
+#include "freedreno_context.h"
+#include "freedreno_state.h"
+#include "freedreno_zsa.h"
+#include "freedreno_resource.h"
+#include "freedreno_util.h"
+
+
+static void *
+fd_vertex_state_create(struct pipe_context *pctx, unsigned num_elements,
+               const struct pipe_vertex_element *elements)
+{
+       struct fd_vertex_stateobj *so = CALLOC_STRUCT(fd_vertex_stateobj);
+
+       if (!so)
+               return NULL;
+
+       memcpy(so->pipe, elements, sizeof(*elements) * num_elements);
+       so->num_elements = num_elements;
+
+       return so;
+}
+
+static void
+fd_vertex_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       FREE(hwcso);
+}
+
+static void
+fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->vtx = hwcso;
+       ctx->dirty |= FD_DIRTY_VTX;
+}
+
+static void
+emit_cacheflush(struct fd_ringbuffer *ring)
+{
+       unsigned i;
+
+       for (i = 0; i < 12; i++) {
+               OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+               OUT_RING(ring, CACHE_FLUSH);
+       }
+}
+
+static enum pc_di_primtype
+mode2primtype(unsigned mode)
+{
+       switch (mode) {
+       case PIPE_PRIM_POINTS:         return DI_PT_POINTLIST;
+       case PIPE_PRIM_LINES:          return DI_PT_LINELIST;
+       case PIPE_PRIM_LINE_STRIP:     return DI_PT_LINESTRIP;
+       case PIPE_PRIM_TRIANGLES:      return DI_PT_TRILIST;
+       case PIPE_PRIM_TRIANGLE_STRIP: return DI_PT_TRISTRIP;
+       case PIPE_PRIM_TRIANGLE_FAN:   return DI_PT_TRIFAN;
+       case PIPE_PRIM_QUADS:          return DI_PT_QUADLIST;
+       case PIPE_PRIM_QUAD_STRIP:     return DI_PT_QUADSTRIP;
+       case PIPE_PRIM_POLYGON:        return DI_PT_POLYGON;
+       }
+       DBG("unsupported mode: (%s) %d", u_prim_name(mode), mode);
+       assert(0);
+       return DI_PT_NONE;
+}
+
+static enum pc_di_index_size
+size2indextype(unsigned index_size)
+{
+       switch (index_size) {
+       case 1: return INDEX_SIZE_8_BIT;
+       case 2: return INDEX_SIZE_16_BIT;
+       case 4: return INDEX_SIZE_32_BIT;
+       }
+       DBG("unsupported index size: %d", index_size);
+       assert(0);
+       return INDEX_SIZE_IGN;
+}
+
+static void
+emit_vertexbufs(struct fd_context *ctx, unsigned count)
+{
+       struct fd_vertex_stateobj *vtx = ctx->vtx;
+       struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
+       struct fd_vertex_buf bufs[PIPE_MAX_ATTRIBS];
+       unsigned i;
+
+       if (!vtx->num_elements)
+               return;
+
+       for (i = 0; i < vtx->num_elements; i++) {
+               struct pipe_vertex_element *elem = &vtx->pipe[i];
+               struct pipe_vertex_buffer *vb =
+                               &vertexbuf->vb[elem->vertex_buffer_index];
+               bufs[i].offset = vb->buffer_offset;
+               bufs[i].size = count * vb->stride;
+               bufs[i].prsc = vb->buffer;
+       }
+
+       // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the
+       // CONST(20,0) (or CONST(26,0) in soliv_vp)
+
+       fd_emit_vertex_bufs(ctx->ring, 0x78, bufs, vtx->num_elements);
+}
+
+static void
+fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       struct pipe_framebuffer_state *fb = &ctx->framebuffer.base;
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct fd_bo *idx_bo = NULL;
+       enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
+       enum pc_di_src_sel src_sel;
+       uint32_t idx_size, idx_offset;
+       unsigned buffers;
+
+       ctx->needs_flush = true;
+
+       if (info->indexed) {
+               struct pipe_index_buffer *idx = &ctx->indexbuf;
+
+               assert(!idx->user_buffer);
+
+               idx_bo = fd_resource(idx->buffer)->bo;
+               idx_type = size2indextype(idx->index_size);
+               idx_size = idx->index_size * info->count;
+               idx_offset = idx->offset;
+               src_sel = DI_SRC_SEL_DMA;
+       } else {
+               idx_bo = NULL;
+               idx_type = INDEX_SIZE_IGN;
+               idx_size = 0;
+               idx_offset = 0;
+               src_sel = DI_SRC_SEL_AUTO_INDEX;
+       }
+
+       fd_resource(fb->cbufs[0]->texture)->dirty = true;
+
+       /* figure out the buffers we need: */
+       buffers = FD_BUFFER_COLOR;
+       if (fd_depth_enabled(ctx->zsa)) {
+               buffers |= FD_BUFFER_DEPTH;
+               fd_resource(fb->zsbuf->texture)->dirty = true;
+       }
+       if (fd_stencil_enabled(ctx->zsa)) {
+               buffers |= FD_BUFFER_STENCIL;
+               fd_resource(fb->zsbuf->texture)->dirty = true;
+       }
+
+       /* any buffers that haven't been cleared, we need to restore: */
+       ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared);
+       /* and any buffers used, need to be resolved: */
+       ctx->resolve |= buffers;
+
+       fd_state_emit(pctx, ctx->dirty);
+
+       emit_vertexbufs(ctx, info->count);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_VGT_INDX_OFFSET));
+       OUT_RING(ring, info->start);
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL));
+       OUT_RING(ring, 0x0000003b);
+
+       OUT_PKT0(ring, REG_TC_CNTL_STATUS, 1);
+       OUT_RING(ring, TC_CNTL_STATUS_L2_INVALIDATE);
+
+       OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+       OUT_RING(ring, 0x0000000);
+
+       OUT_PKT3(ring, CP_DRAW_INDX, info->indexed ? 5 : 3);
+       OUT_RING(ring, 0x00000000);        /* viz query info. */
+       OUT_RING(ring, DRAW(mode2primtype(info->mode),
+                       src_sel, idx_type, IGNORE_VISIBILITY));
+       OUT_RING(ring, info->count);       /* NumIndices */
+       if (info->indexed) {
+               OUT_RELOC(ring, idx_bo, idx_offset, 0);
+               OUT_RING (ring, idx_size);
+       }
+
+       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
+       OUT_RING(ring, CP_REG(REG_2010));
+       OUT_RING(ring, 0x00000000);
+
+       emit_cacheflush(ring);
+}
+
+void
+fd_vbo_init(struct pipe_context *pctx)
+{
+       pctx->create_vertex_elements_state = fd_vertex_state_create;
+       pctx->delete_vertex_elements_state = fd_vertex_state_delete;
+       pctx->bind_vertex_elements_state = fd_vertex_state_bind;
+       pctx->draw_vbo = fd_draw_vbo;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_vbo.h b/src/gallium/drivers/freedreno/freedreno_vbo.h
new file mode 100644 (file)
index 0000000..081edf5
--- /dev/null
@@ -0,0 +1,42 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_VBO_H_
+#define FREEDRENO_VBO_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd_vertex_stateobj {
+       struct pipe_vertex_element pipe[PIPE_MAX_ATTRIBS];
+       unsigned num_elements;
+};
+
+void fd_vbo_init(struct pipe_context *pctx);
+
+#endif /* FREEDRENO_VBO_H_ */
diff --git a/src/gallium/drivers/freedreno/freedreno_zsa.c b/src/gallium/drivers/freedreno/freedreno_zsa.c
new file mode 100644 (file)
index 0000000..e8daa37
--- /dev/null
@@ -0,0 +1,144 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "freedreno_zsa.h"
+#include "freedreno_context.h"
+#include "freedreno_util.h"
+
+static enum rb_stencil_op
+stencil_op(unsigned op)
+{
+       switch (op) {
+       case PIPE_STENCIL_OP_KEEP:
+               return STENCIL_KEEP;
+       case PIPE_STENCIL_OP_ZERO:
+               return STENCIL_ZERO;
+       case PIPE_STENCIL_OP_REPLACE:
+               return STENCIL_REPLACE;
+       case PIPE_STENCIL_OP_INCR:
+               return STENCIL_INCR_CLAMP;
+       case PIPE_STENCIL_OP_DECR:
+               return STENCIL_DECR_CLAMP;
+       case PIPE_STENCIL_OP_INCR_WRAP:
+               return STENCIL_INCR_WRAP;
+       case PIPE_STENCIL_OP_DECR_WRAP:
+               return STENCIL_DECR_WRAP;
+       case PIPE_STENCIL_OP_INVERT:
+               return STENCIL_INVERT;
+       default:
+               DBG("invalid stencil op: %u", op);
+               return 0;
+       }
+}
+
+static void *
+fd_zsa_state_create(struct pipe_context *pctx,
+               const struct pipe_depth_stencil_alpha_state *cso)
+{
+       struct fd_zsa_stateobj *so;
+
+       so = CALLOC_STRUCT(fd_zsa_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       so->rb_depthcontrol |=
+               RB_DEPTHCONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */
+
+       if (cso->depth.enabled)
+               so->rb_depthcontrol |= RB_DEPTHCONTROL_Z_ENABLE;
+       if (cso->depth.writemask)
+               so->rb_depthcontrol |= RB_DEPTHCONTROL_Z_WRITE_ENABLE;
+
+       if (cso->stencil[0].enabled) {
+               const struct pipe_stencil_state *s = &cso->stencil[0];
+
+               so->rb_depthcontrol |=
+                       RB_DEPTHCONTROL_STENCIL_ENABLE |
+                       RB_DEPTHCONTROL_STENCILFUNC(s->func) | /* maps 1:1 */
+                       RB_DEPTHCONTROL_STENCILFAIL(stencil_op(s->fail_op)) |
+                       RB_DEPTHCONTROL_STENCILZPASS(stencil_op(s->zpass_op)) |
+                       RB_DEPTHCONTROL_STENCILZFAIL(stencil_op(s->zfail_op));
+               so->rb_stencilrefmask |=
+                       0xff000000 | /* ??? */
+                       RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+                       RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+
+               if (cso->stencil[1].enabled) {
+                       const struct pipe_stencil_state *bs = &cso->stencil[1];
+
+                       so->rb_depthcontrol |=
+                               RB_DEPTHCONTROL_BACKFACE_ENABLE |
+                               RB_DEPTHCONTROL_STENCILFUNC_BF(bs->func) | /* maps 1:1 */
+                               RB_DEPTHCONTROL_STENCILFAIL_BF(stencil_op(bs->fail_op)) |
+                               RB_DEPTHCONTROL_STENCILZPASS_BF(stencil_op(bs->zpass_op)) |
+                               RB_DEPTHCONTROL_STENCILZFAIL_BF(stencil_op(bs->zfail_op));
+                       so->rb_stencilrefmask_bf |=
+                               0xff000000 | /* ??? */
+                               RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
+                               RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
+               }
+       }
+
+       if (cso->alpha.enabled) {
+               so->rb_colorcontrol =
+                       RB_COLORCONTROL_ALPHA_FUNC(cso->alpha.func) |
+                       RB_COLORCONTROL_ALPHA_TEST_ENABLE;
+               so->rb_alpha_ref = f2d(cso->alpha.ref_value);
+       }
+
+       return so;
+}
+
+static void
+fd_zsa_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->zsa = hwcso;
+       ctx->dirty |= FD_DIRTY_ZSA;
+}
+
+static void
+fd_zsa_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       FREE(hwcso);
+}
+
+void
+fd_zsa_init(struct pipe_context *pctx)
+{
+       pctx->create_depth_stencil_alpha_state = fd_zsa_state_create;
+       pctx->bind_depth_stencil_alpha_state = fd_zsa_state_bind;
+       pctx->delete_depth_stencil_alpha_state = fd_zsa_state_delete;
+}
diff --git a/src/gallium/drivers/freedreno/freedreno_zsa.h b/src/gallium/drivers/freedreno/freedreno_zsa.h
new file mode 100644 (file)
index 0000000..d1112f1
--- /dev/null
@@ -0,0 +1,60 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FREEDRENO_ZSA_H_
+#define FREEDRENO_ZSA_H_
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+#include "freedreno_util.h"
+
+struct fd_zsa_stateobj {
+       struct pipe_depth_stencil_alpha_state base;
+       uint32_t rb_depthcontrol;
+       uint32_t rb_colorcontrol;   /* must be OR'd w/ blend->rb_colorcontrol */
+       uint32_t rb_alpha_ref;
+       uint32_t rb_stencilrefmask;
+       uint32_t rb_stencilrefmask_bf;
+};
+
+void fd_zsa_init(struct pipe_context *pctx);
+
+static inline bool fd_depth_enabled(struct fd_zsa_stateobj *zsa)
+{
+       return !!(zsa->rb_depthcontrol & RB_DEPTHCONTROL_Z_ENABLE);
+}
+
+static inline bool fd_stencil_enabled(struct fd_zsa_stateobj *zsa)
+{
+       //RB_DEPTHCONTROL_STENCIL_ENABLE
+       return !!(zsa->rb_depthcontrol & RB_DEPTHCONTROL_STENCIL_ENABLE);
+}
+
+#endif /* FREEDRENO_ZSA_H_ */
diff --git a/src/gallium/drivers/freedreno/instr.h b/src/gallium/drivers/freedreno/instr.h
new file mode 100644 (file)
index 0000000..fd19234
--- /dev/null
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INSTR_H_
+#define INSTR_H_
+
+#include "freedreno_a2xx_reg.h"
+
+#define PACKED __attribute__((__packed__))
+
+/*
+ * ALU instructions:
+ */
+
+typedef enum {
+       ADDs = 0,
+       ADD_PREVs = 1,
+       MULs = 2,
+       MUL_PREVs = 3,
+       MUL_PREV2s = 4,
+       MAXs = 5,
+       MINs = 6,
+       SETEs = 7,
+       SETGTs = 8,
+       SETGTEs = 9,
+       SETNEs = 10,
+       FRACs = 11,
+       TRUNCs = 12,
+       FLOORs = 13,
+       EXP_IEEE = 14,
+       LOG_CLAMP = 15,
+       LOG_IEEE = 16,
+       RECIP_CLAMP = 17,
+       RECIP_FF = 18,
+       RECIP_IEEE = 19,
+       RECIPSQ_CLAMP = 20,
+       RECIPSQ_FF = 21,
+       RECIPSQ_IEEE = 22,
+       MOVAs = 23,
+       MOVA_FLOORs = 24,
+       SUBs = 25,
+       SUB_PREVs = 26,
+       PRED_SETEs = 27,
+       PRED_SETNEs = 28,
+       PRED_SETGTs = 29,
+       PRED_SETGTEs = 30,
+       PRED_SET_INVs = 31,
+       PRED_SET_POPs = 32,
+       PRED_SET_CLRs = 33,
+       PRED_SET_RESTOREs = 34,
+       KILLEs = 35,
+       KILLGTs = 36,
+       KILLGTEs = 37,
+       KILLNEs = 38,
+       KILLONEs = 39,
+       SQRT_IEEE = 40,
+       MUL_CONST_0 = 42,
+       MUL_CONST_1 = 43,
+       ADD_CONST_0 = 44,
+       ADD_CONST_1 = 45,
+       SUB_CONST_0 = 46,
+       SUB_CONST_1 = 47,
+       SIN = 48,
+       COS = 49,
+       RETAIN_PREV = 50,
+} instr_scalar_opc_t;
+
+typedef enum {
+       ADDv = 0,
+       MULv = 1,
+       MAXv = 2,
+       MINv = 3,
+       SETEv = 4,
+       SETGTv = 5,
+       SETGTEv = 6,
+       SETNEv = 7,
+       FRACv = 8,
+       TRUNCv = 9,
+       FLOORv = 10,
+       MULADDv = 11,
+       CNDEv = 12,
+       CNDGTEv = 13,
+       CNDGTv = 14,
+       DOT4v = 15,
+       DOT3v = 16,
+       DOT2ADDv = 17,
+       CUBEv = 18,
+       MAX4v = 19,
+       PRED_SETE_PUSHv = 20,
+       PRED_SETNE_PUSHv = 21,
+       PRED_SETGT_PUSHv = 22,
+       PRED_SETGTE_PUSHv = 23,
+       KILLEv = 24,
+       KILLGTv = 25,
+       KILLGTEv = 26,
+       KILLNEv = 27,
+       DSTv = 28,
+       MOVAv = 29,
+} instr_vector_opc_t;
+
+typedef struct PACKED {
+       /* dword0: */
+       uint8_t             vector_dest              : 6;
+       uint8_t             vector_dest_rel          : 1;
+       uint8_t             low_precision_16b_fp     : 1;
+       uint8_t             scalar_dest              : 6;
+       uint8_t             scalar_dest_rel          : 1;
+       uint8_t             export_data              : 1;
+       uint8_t             vector_write_mask        : 4;
+       uint8_t             scalar_write_mask        : 4;
+       uint8_t             vector_clamp             : 1;
+       uint8_t             scalar_clamp             : 1;
+       instr_scalar_opc_t  scalar_opc               : 6;
+       /* dword1: */
+       uint8_t             src3_swiz                : 8;
+       uint8_t             src2_swiz                : 8;
+       uint8_t             src1_swiz                : 8;
+       uint8_t             src3_reg_negate          : 1;
+       uint8_t             src2_reg_negate          : 1;
+       uint8_t             src1_reg_negate          : 1;
+       uint8_t             pred_select              : 2;
+       uint8_t             relative_addr            : 1;
+       uint8_t             const_1_rel_abs          : 1;
+       uint8_t             const_0_rel_abs          : 1;
+       /* dword2: */
+       uint8_t             src3_reg                 : 6;
+       uint8_t             src3_reg_select          : 1;
+       uint8_t             src3_reg_abs             : 1;
+       uint8_t             src2_reg                 : 6;
+       uint8_t             src2_reg_select          : 1;
+       uint8_t             src2_reg_abs             : 1;
+       uint8_t             src1_reg                 : 6;
+       uint8_t             src1_reg_select          : 1;
+       uint8_t             src1_reg_abs             : 1;
+       instr_vector_opc_t  vector_opc               : 5;
+       uint8_t             src3_sel                 : 1;
+       uint8_t             src2_sel                 : 1;
+       uint8_t             src1_sel                 : 1;
+} instr_alu_t;
+
+
+
+/*
+ * CF instructions:
+ */
+
+typedef enum {
+       NOP = 0,
+       EXEC = 1,
+       EXEC_END = 2,
+       COND_EXEC = 3,
+       COND_EXEC_END = 4,
+       COND_PRED_EXEC = 5,
+       COND_PRED_EXEC_END = 6,
+       LOOP_START = 7,
+       LOOP_END = 8,
+       COND_CALL = 9,
+       RETURN = 10,
+       COND_JMP = 11,
+       ALLOC = 12,
+       COND_EXEC_PRED_CLEAN = 13,
+       COND_EXEC_PRED_CLEAN_END = 14,
+       MARK_VS_FETCH_DONE = 15,
+} instr_cf_opc_t;
+
+typedef enum {
+       RELATIVE_ADDR = 0,
+       ABSOLUTE_ADDR = 1,
+} instr_addr_mode_t;
+
+typedef enum {
+       SQ_NO_ALLOC = 0,
+       SQ_POSITION = 1,
+       SQ_PARAMETER_PIXEL = 2,
+       SQ_MEMORY = 3,
+} instr_alloc_type_t;
+
+typedef struct PACKED {
+       uint16_t            address                  : 9;
+       uint8_t             reserved0                : 3;
+       uint8_t             count                    : 3;
+       uint8_t             yeild                    : 1;
+       uint16_t            serialize                : 12;
+       uint8_t             vc                       : 6;   /* vertex cache? */
+       uint8_t             bool_addr                : 8;
+       uint8_t             condition                : 1;
+       instr_addr_mode_t   address_mode             : 1;
+       instr_cf_opc_t      opc                      : 4;
+} instr_cf_exec_t;
+
+typedef struct PACKED {
+       uint16_t            address                  : 10;
+       uint8_t             reserved0                : 6;
+       uint8_t             loop_id                  : 5;
+       uint32_t            reserved1                : 22;
+       instr_addr_mode_t   address_mode             : 1;
+       instr_cf_opc_t      opc                      : 4;
+} instr_cf_loop_t;
+
+typedef struct PACKED {
+       uint16_t            address                  : 10;
+       uint8_t             reserved0                : 3;
+       uint8_t             force_call               : 1;
+       uint8_t             predicated_jmp           : 1;
+       uint32_t            reserved1                : 18;
+       uint8_t             direction                : 1;
+       uint8_t             bool_addr                : 8;
+       uint8_t             condition                : 1;
+       instr_addr_mode_t   address_mode             : 1;
+       instr_cf_opc_t      opc                      : 4;
+} instr_cf_jmp_call_t;
+
+typedef struct PACKED {
+       uint8_t             size                     : 4;
+       uint64_t            reserved0                : 36;
+       uint8_t             no_serial                : 1;
+       instr_alloc_type_t  buffer_select            : 2;
+       uint8_t             alloc_mode               : 1;
+       instr_cf_opc_t      opc                      : 4;
+} instr_cf_alloc_t;
+
+typedef union PACKED {
+       instr_cf_exec_t     exec;
+       instr_cf_loop_t     loop;
+       instr_cf_jmp_call_t jmp_call;
+       instr_cf_alloc_t    alloc;
+       struct PACKED {
+               uint64_t        dummy                    : 44;
+               instr_cf_opc_t  opc                      : 4;
+       };
+} instr_cf_t;
+
+
+
+/*
+ * FETCH instructions:
+ */
+
+typedef enum {
+       VTX_FETCH = 0,
+       TEX_FETCH = 1,
+       TEX_GET_BORDER_COLOR_FRAC = 16,
+       TEX_GET_COMP_TEX_LOD = 17,
+       TEX_GET_GRADIENTS = 18,
+       TEX_GET_WEIGHTS = 19,
+       TEX_SET_TEX_LOD = 24,
+       TEX_SET_GRADIENTS_H = 25,
+       TEX_SET_GRADIENTS_V = 26,
+       TEX_RESERVED_4 = 27,
+} instr_fetch_opc_t;
+
+typedef enum {
+       TEX_FILTER_POINT = 0,
+       TEX_FILTER_LINEAR = 1,
+       TEX_FILTER_BASEMAP = 2,            /* only applicable for mip-filter */
+       TEX_FILTER_USE_FETCH_CONST = 3,
+} instr_tex_filter_t;
+
+typedef enum {
+       ANISO_FILTER_DISABLED = 0,
+       ANISO_FILTER_MAX_1_1 = 1,
+       ANISO_FILTER_MAX_2_1 = 2,
+       ANISO_FILTER_MAX_4_1 = 3,
+       ANISO_FILTER_MAX_8_1 = 4,
+       ANISO_FILTER_MAX_16_1 = 5,
+       ANISO_FILTER_USE_FETCH_CONST = 7,
+} instr_aniso_filter_t;
+
+typedef enum {
+       ARBITRARY_FILTER_2X4_SYM = 0,
+       ARBITRARY_FILTER_2X4_ASYM = 1,
+       ARBITRARY_FILTER_4X2_SYM = 2,
+       ARBITRARY_FILTER_4X2_ASYM = 3,
+       ARBITRARY_FILTER_4X4_SYM = 4,
+       ARBITRARY_FILTER_4X4_ASYM = 5,
+       ARBITRARY_FILTER_USE_FETCH_CONST = 7,
+} instr_arbitrary_filter_t;
+
+typedef enum {
+       SAMPLE_CENTROID = 0,
+       SAMPLE_CENTER = 1,
+} instr_sample_loc_t;
+
+typedef enum sq_surfaceformat instr_surf_fmt_t;
+
+typedef struct PACKED {
+       /* dword0: */
+       instr_fetch_opc_t   opc                      : 5;
+       uint8_t             src_reg                  : 6;
+       uint8_t             src_reg_am               : 1;
+       uint8_t             dst_reg                  : 6;
+       uint8_t             dst_reg_am               : 1;
+       uint8_t             fetch_valid_only         : 1;
+       uint8_t             const_idx                : 5;
+       uint8_t             tx_coord_denorm          : 1;
+       uint8_t             src_swiz                 : 6;
+       /* dword1: */
+       uint16_t            dst_swiz                 : 12;
+       instr_tex_filter_t  mag_filter               : 2;
+       instr_tex_filter_t  min_filter               : 2;
+       instr_tex_filter_t  mip_filter               : 2;
+       instr_aniso_filter_t aniso_filter            : 3;
+       instr_arbitrary_filter_t arbitrary_filter    : 3;
+       instr_tex_filter_t  vol_mag_filter           : 2;
+       instr_tex_filter_t  vol_min_filter           : 2;
+       uint8_t             use_comp_lod             : 1;
+       uint8_t             use_reg_lod              : 2;
+       uint8_t             pred_select              : 1;
+       /* dword2: */
+       uint8_t             use_reg_gradients        : 1;
+       instr_sample_loc_t  sample_location          : 1;
+       uint8_t             lod_bias                 : 7;
+       uint8_t             unused                   : 7;
+       uint8_t             offset_x                 : 5;
+       uint8_t             offset_y                 : 5;
+       uint8_t             offset_z                 : 5;
+       uint8_t             pred_condition           : 1;
+} instr_fetch_tex_t;
+
+typedef struct PACKED {
+       /* dword0: */
+       instr_fetch_opc_t   opc                      : 5;
+       uint8_t             src_reg                  : 6;
+       uint8_t             src_reg_am               : 1;
+       uint8_t             dst_reg                  : 6;
+       uint8_t             dst_reg_am               : 1;
+       uint8_t             must_be_one              : 1;
+       uint8_t             const_index              : 5;
+       uint8_t             const_index_sel          : 2;
+       uint8_t             reserved0                : 3;
+       uint8_t             src_swiz                 : 2;
+       /* dword1: */
+       uint16_t            dst_swiz                 : 12;
+       uint8_t             format_comp_all          : 1;   /* '1' for signed, '0' for unsigned? */
+       uint8_t             num_format_all           : 1;   /* '0' for normalized, '1' for unnormalized */
+       uint8_t             signed_rf_mode_all       : 1;
+       uint8_t             reserved1                : 1;
+       instr_surf_fmt_t    format                   : 6;
+       uint8_t             reserved2                : 1;
+       uint8_t             exp_adjust_all           : 7;
+       uint8_t             reserved3                : 1;
+       uint8_t             pred_select              : 1;
+       /* dword2: */
+       uint8_t             stride                   : 8;
+       /* possibly offset and reserved4 are swapped on a200? */
+       uint8_t             offset                   : 8;
+       uint8_t             reserved4                : 8;
+       uint8_t             reserved5                : 7;
+       uint8_t             pred_condition           : 1;
+} instr_fetch_vtx_t;
+
+typedef union PACKED {
+       instr_fetch_tex_t   tex;
+       instr_fetch_vtx_t   vtx;
+       struct PACKED {
+               /* dword0: */
+               instr_fetch_opc_t opc                    : 5;
+               uint32_t        dummy0                   : 27;
+               /* dword1: */
+               uint32_t        dummy1                   : 32;
+               /* dword2: */
+               uint32_t        dummy2                   : 32;
+       };
+} instr_fetch_t;
+
+#endif /* INSTR_H_ */
diff --git a/src/gallium/drivers/freedreno/ir.c b/src/gallium/drivers/freedreno/ir.c
new file mode 100644 (file)
index 0000000..cbc1230
--- /dev/null
@@ -0,0 +1,701 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "freedreno_util.h"
+#include "instr.h"
+
+#define DEBUG_MSG(f, ...)  do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
+#define WARN_MSG(f, ...)   DBG("WARN:  "f, ##__VA_ARGS__)
+#define ERROR_MSG(f, ...)  DBG("ERROR: "f, ##__VA_ARGS__)
+
+#define REG_MASK 0x3f
+
+static int cf_emit(struct ir_cf *cf, instr_cf_t *instr);
+
+static int instr_emit(struct ir_instruction *instr, uint32_t *dwords,
+               uint32_t idx, struct ir_shader_info *info);
+
+static void reg_update_stats(struct ir_register *reg,
+               struct ir_shader_info *info, bool dest);
+static uint32_t reg_fetch_src_swiz(struct ir_register *reg, uint32_t n);
+static uint32_t reg_fetch_dst_swiz(struct ir_register *reg);
+static uint32_t reg_alu_dst_swiz(struct ir_register *reg);
+static uint32_t reg_alu_src_swiz(struct ir_register *reg);
+
+/* simple allocator to carve allocations out of an up-front allocated heap,
+ * so that we can free everything easily in one shot.
+ */
+static void * ir_alloc(struct ir_shader *shader, int sz)
+{
+       void *ptr = &shader->heap[shader->heap_idx];
+       shader->heap_idx += ALIGN(sz, 4);
+       return ptr;
+}
+
+static char * ir_strdup(struct ir_shader *shader, const char *str)
+{
+       char *ptr = NULL;
+       if (str) {
+               int len = strlen(str);
+               ptr = ir_alloc(shader, len+1);
+               memcpy(ptr, str, len);
+               ptr[len] = '\0';
+       }
+       return ptr;
+}
+
+struct ir_shader * ir_shader_create(void)
+{
+       DEBUG_MSG("");
+       return calloc(1, sizeof(struct ir_shader));
+}
+
+void ir_shader_destroy(struct ir_shader *shader)
+{
+       DEBUG_MSG("");
+       free(shader);
+}
+
+/* resolve addr/cnt/sequence fields in the individual CF's */
+static int shader_resolve(struct ir_shader *shader, struct ir_shader_info *info)
+{
+       uint32_t addr;
+       unsigned i;
+       int j;
+
+       addr = shader->cfs_count / 2;
+       for (i = 0; i < shader->cfs_count; i++) {
+               struct ir_cf *cf = shader->cfs[i];
+               if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
+                       uint32_t sequence = 0;
+
+                       if (cf->exec.addr && (cf->exec.addr != addr))
+                               WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i);
+                       if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count))
+                               WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i);
+
+                       for (j = cf->exec.instrs_count - 1; j >= 0; j--) {
+                               struct ir_instruction *instr = cf->exec.instrs[j];
+                               sequence <<= 2;
+                               if (instr->instr_type == IR_FETCH)
+                                       sequence |= 0x1;
+                               if (instr->sync)
+                                       sequence |= 0x2;
+                       }
+
+                       cf->exec.addr = addr;
+                       cf->exec.cnt  = cf->exec.instrs_count;
+                       cf->exec.sequence = sequence;
+
+                       addr += cf->exec.instrs_count;
+               }
+       }
+
+       info->sizedwords = 3 * addr;
+
+       return 0;
+}
+
+void * ir_shader_assemble(struct ir_shader *shader, struct ir_shader_info *info)
+{
+       uint32_t i, j;
+       uint32_t *ptr, *dwords = NULL;
+       uint32_t idx = 0;
+       int ret;
+
+       info->sizedwords    = 0;
+       info->max_reg       = -1;
+       info->max_input_reg = 0;
+       info->regs_written  = 0;
+
+       /* we need an even # of CF's.. insert a NOP if needed */
+       if (shader->cfs_count != ALIGN(shader->cfs_count, 2))
+               ir_cf_create(shader, NOP);
+
+       /* first pass, resolve sizes and addresses: */
+       ret = shader_resolve(shader, info);
+       if (ret) {
+               ERROR_MSG("resolve failed: %d", ret);
+               goto fail;
+       }
+
+       ptr = dwords = calloc(1, 4 * info->sizedwords);
+
+       /* second pass, emit CF program in pairs: */
+       for (i = 0; i < shader->cfs_count; i += 2) {
+               instr_cf_t *cfs = (instr_cf_t *)ptr;
+               ret = cf_emit(shader->cfs[i], &cfs[0]);
+               if (ret) {
+                       ERROR_MSG("CF emit failed: %d\n", ret);
+                       goto fail;
+               }
+               ret = cf_emit(shader->cfs[i+1], &cfs[1]);
+               if (ret) {
+                       ERROR_MSG("CF emit failed: %d\n", ret);
+                       goto fail;
+               }
+               ptr += 3;
+               assert((ptr - dwords) <= info->sizedwords);
+       }
+
+       /* third pass, emit ALU/FETCH: */
+       for (i = 0; i < shader->cfs_count; i++) {
+               struct ir_cf *cf = shader->cfs[i];
+               if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
+                       for (j = 0; j < cf->exec.instrs_count; j++) {
+                               ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info);
+                               if (ret) {
+                                       ERROR_MSG("instruction emit failed: %d", ret);
+                                       goto fail;
+                               }
+                               ptr += 3;
+                               assert((ptr - dwords) <= info->sizedwords);
+                       }
+               }
+       }
+
+       return dwords;
+
+fail:
+       free(dwords);
+       return NULL;
+}
+
+
+struct ir_attribute * ir_attribute_create(struct ir_shader *shader,
+               int rstart, int num, const char *name)
+{
+       struct ir_attribute *a = ir_alloc(shader, sizeof(struct ir_attribute));
+       DEBUG_MSG("R%d-R%d: %s", rstart, rstart + num - 1, name);
+       a->name   = ir_strdup(shader, name);
+       a->rstart = rstart;
+       a->num    = num;
+       assert(shader->attributes_count < ARRAY_SIZE(shader->attributes));
+       shader->attributes[shader->attributes_count++] = a;
+       return a;
+}
+
+struct ir_const * ir_const_create(struct ir_shader *shader,
+               int cstart, float v0, float v1, float v2, float v3)
+{
+       struct ir_const *c = ir_alloc(shader, sizeof(struct ir_const));
+       DEBUG_MSG("C%d: %f, %f, %f, %f", cstart, v0, v1, v2, v3);
+       c->val[0] = v0;
+       c->val[1] = v1;
+       c->val[2] = v2;
+       c->val[3] = v3;
+       c->cstart = cstart;
+       assert(shader->consts_count < ARRAY_SIZE(shader->consts));
+       shader->consts[shader->consts_count++] = c;
+       return c;
+}
+
+struct ir_sampler * ir_sampler_create(struct ir_shader *shader,
+               int idx, const char *name)
+{
+       struct ir_sampler *s = ir_alloc(shader, sizeof(struct ir_sampler));
+       DEBUG_MSG("CONST(%d): %s", idx, name);
+       s->name   = ir_strdup(shader, name);
+       s->idx    = idx;
+       assert(shader->samplers_count < ARRAY_SIZE(shader->samplers));
+       shader->samplers[shader->samplers_count++] = s;
+       return s;
+}
+
+struct ir_uniform * ir_uniform_create(struct ir_shader *shader,
+               int cstart, int num, const char *name)
+{
+       struct ir_uniform *u = ir_alloc(shader, sizeof(struct ir_uniform));
+       DEBUG_MSG("C%d-C%d: %s", cstart, cstart + num - 1, name);
+       u->name   = ir_strdup(shader, name);
+       u->cstart = cstart;
+       u->num    = num;
+       assert(shader->uniforms_count < ARRAY_SIZE(shader->uniforms));
+       shader->uniforms[shader->uniforms_count++] = u;
+       return u;
+}
+
+struct ir_varying * ir_varying_create(struct ir_shader *shader,
+               int rstart, int num, const char *name)
+{
+       struct ir_varying *v = ir_alloc(shader, sizeof(struct ir_varying));
+       DEBUG_MSG("R%d-R%d: %s", rstart, rstart + num - 1, name);
+       v->name   = ir_strdup(shader, name);
+       v->rstart = rstart;
+       v->num    = num;
+       assert(shader->varyings_count < ARRAY_SIZE(shader->varyings));
+       shader->varyings[shader->varyings_count++] = v;
+       return v;
+}
+
+
+struct ir_cf * ir_cf_create(struct ir_shader *shader, instr_cf_opc_t cf_type)
+{
+       struct ir_cf *cf = ir_alloc(shader, sizeof(struct ir_cf));
+       DEBUG_MSG("%d", cf_type);
+       cf->shader = shader;
+       cf->cf_type = cf_type;
+       assert(shader->cfs_count < ARRAY_SIZE(shader->cfs));
+       shader->cfs[shader->cfs_count++] = cf;
+       return cf;
+}
+
+
+/*
+ * CF instructions:
+ */
+
+static int cf_emit(struct ir_cf *cf, instr_cf_t *instr)
+{
+       memset(instr, 0, sizeof(*instr));
+
+       instr->opc = cf->cf_type;
+
+       switch (cf->cf_type) {
+       case NOP:
+               break;
+       case EXEC:
+       case EXEC_END:
+               assert(cf->exec.addr <= 0x1ff);
+               assert(cf->exec.cnt <= 0x6);
+               assert(cf->exec.sequence <= 0xfff);
+               instr->exec.address = cf->exec.addr;
+               instr->exec.count = cf->exec.cnt;
+               instr->exec.serialize = cf->exec.sequence;
+               break;
+       case ALLOC:
+               assert(cf->alloc.size <= 0xf);
+               instr->alloc.size = cf->alloc.size;
+               switch (cf->alloc.type) {
+               case SQ_POSITION:
+               case SQ_PARAMETER_PIXEL:
+                       instr->alloc.buffer_select = cf->alloc.type;
+                       break;
+               default:
+                       ERROR_MSG("invalid alloc type: %d", cf->alloc.type);
+                       return -1;
+               }
+               break;
+       case COND_EXEC:
+       case COND_EXEC_END:
+       case COND_PRED_EXEC:
+       case COND_PRED_EXEC_END:
+       case LOOP_START:
+       case LOOP_END:
+       case COND_CALL:
+       case RETURN:
+       case COND_JMP:
+       case COND_EXEC_PRED_CLEAN:
+       case COND_EXEC_PRED_CLEAN_END:
+       case MARK_VS_FETCH_DONE:
+               ERROR_MSG("TODO");
+               return -1;
+       }
+
+       return 0;
+}
+
+
+struct ir_instruction * ir_instr_create(struct ir_cf *cf, int instr_type)
+{
+       struct ir_instruction *instr =
+                       ir_alloc(cf->shader, sizeof(struct ir_instruction));
+       DEBUG_MSG("%d", instr_type);
+       instr->shader = cf->shader;
+       instr->pred = cf->shader->pred;
+       instr->instr_type = instr_type;
+       assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs));
+       cf->exec.instrs[cf->exec.instrs_count++] = instr;
+       return instr;
+}
+
+
+/*
+ * FETCH instructions:
+ */
+
+static int instr_emit_fetch(struct ir_instruction *instr,
+               uint32_t *dwords, uint32_t idx,
+               struct ir_shader_info *info)
+{
+       instr_fetch_t *fetch = (instr_fetch_t *)dwords;
+       int reg = 0;
+       struct ir_register *dst_reg = instr->regs[reg++];
+       struct ir_register *src_reg = instr->regs[reg++];
+
+       memset(fetch, 0, sizeof(*fetch));
+
+       reg_update_stats(dst_reg, info, true);
+       reg_update_stats(src_reg, info, false);
+
+       fetch->opc = instr->fetch.opc;
+
+       if (instr->fetch.opc == VTX_FETCH) {
+               instr_fetch_vtx_t *vtx = &fetch->vtx;
+
+               assert(instr->fetch.stride <= 0xff);
+               assert(instr->fetch.fmt <= 0x3f);
+               assert(instr->fetch.const_idx <= 0x1f);
+               assert(instr->fetch.const_idx_sel <= 0x3);
+
+               vtx->src_reg = src_reg->num;
+               vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
+               vtx->dst_reg = dst_reg->num;
+               vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
+               vtx->must_be_one = 1;
+               vtx->const_index = instr->fetch.const_idx;
+               vtx->const_index_sel = instr->fetch.const_idx_sel;
+               vtx->format_comp_all = !!instr->fetch.is_signed;
+               vtx->num_format_all = !instr->fetch.is_normalized;
+               vtx->format = instr->fetch.fmt;
+               vtx->stride = instr->fetch.stride;
+               vtx->offset = instr->fetch.offset;
+
+               if (instr->pred != IR_PRED_NONE) {
+                       vtx->pred_select = 1;
+                       vtx->pred_condition = (instr->pred == IR_PRED_EQ) ? 1 : 0;
+               }
+
+               /* XXX seems like every FETCH but the first has
+                * this bit set:
+                */
+               vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
+               vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
+       } else if (instr->fetch.opc == TEX_FETCH) {
+               instr_fetch_tex_t *tex = &fetch->tex;
+
+               assert(instr->fetch.const_idx <= 0x1f);
+
+               tex->src_reg = src_reg->num;
+               tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
+               tex->dst_reg = dst_reg->num;
+               tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
+               tex->const_idx = instr->fetch.const_idx;
+               tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
+               tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
+               tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
+               tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
+               tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
+               tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
+               tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
+               tex->use_comp_lod = 1;
+               tex->sample_location = SAMPLE_CENTER;
+
+               if (instr->pred != IR_PRED_NONE) {
+                       tex->pred_select = 1;
+                       tex->pred_condition = (instr->pred == IR_PRED_EQ) ? 1 : 0;
+               }
+
+       } else {
+               ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
+               return -1;
+       }
+
+       return 0;
+}
+
+/*
+ * ALU instructions:
+ */
+
+static int instr_emit_alu(struct ir_instruction *instr, uint32_t *dwords,
+               struct ir_shader_info *info)
+{
+       int reg = 0;
+       instr_alu_t *alu = (instr_alu_t *)dwords;
+       struct ir_register *dst_reg  = instr->regs[reg++];
+       struct ir_register *src1_reg;
+       struct ir_register *src2_reg;
+       struct ir_register *src3_reg;
+
+       memset(alu, 0, sizeof(*alu));
+
+       /* handle instructions w/ 3 src operands: */
+       switch (instr->alu.vector_opc) {
+       case MULADDv:
+       case CNDEv:
+       case CNDGTEv:
+       case CNDGTv:
+       case DOT2ADDv:
+               /* note: disassembler lists 3rd src first, ie:
+                *   MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2)
+                * which is the reason for this strange ordering.
+                */
+               src3_reg = instr->regs[reg++];
+               break;
+       default:
+               src3_reg = NULL;
+               break;
+       }
+
+       src1_reg = instr->regs[reg++];
+       src2_reg = instr->regs[reg++];
+
+       reg_update_stats(dst_reg, info, true);
+       reg_update_stats(src1_reg, info, false);
+       reg_update_stats(src2_reg, info, false);
+
+       assert((dst_reg->flags & ~IR_REG_EXPORT) == 0);
+       assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4));
+       assert((src1_reg->flags & IR_REG_EXPORT) == 0);
+       assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4));
+       assert((src2_reg->flags & IR_REG_EXPORT) == 0);
+       assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4));
+
+       if (instr->alu.vector_opc == ~0) {
+               alu->vector_opc          = MAXv;
+               alu->vector_write_mask   = 0;
+       } else {
+               alu->vector_opc          = instr->alu.vector_opc;
+               alu->vector_write_mask   = reg_alu_dst_swiz(dst_reg);
+       }
+
+       alu->vector_dest         = dst_reg->num;
+       alu->export_data         = !!(dst_reg->flags & IR_REG_EXPORT);
+
+       // TODO predicate case/condition.. need to add to parser
+
+       alu->src2_reg            = src2_reg->num;
+       alu->src2_swiz           = reg_alu_src_swiz(src2_reg);
+       alu->src2_reg_negate     = !!(src2_reg->flags & IR_REG_NEGATE);
+       alu->src2_reg_abs        = !!(src2_reg->flags & IR_REG_ABS);
+       alu->src2_sel            = !(src2_reg->flags & IR_REG_CONST);
+
+       alu->src1_reg            = src1_reg->num;
+       alu->src1_swiz           = reg_alu_src_swiz(src1_reg);
+       alu->src1_reg_negate     = !!(src1_reg->flags & IR_REG_NEGATE);
+       alu->src1_reg_abs        = !!(src1_reg->flags & IR_REG_ABS);
+       alu->src1_sel            = !(src1_reg->flags & IR_REG_CONST);
+
+       alu->vector_clamp        = instr->alu.vector_clamp;
+       alu->scalar_clamp        = instr->alu.scalar_clamp;
+
+       if (instr->alu.scalar_opc != ~0) {
+               struct ir_register *sdst_reg = instr->regs[reg++];
+
+               reg_update_stats(sdst_reg, info, true);
+
+               assert(sdst_reg->flags == dst_reg->flags);
+
+               if (src3_reg) {
+                       assert(src3_reg == instr->regs[reg++]);
+               } else {
+                       src3_reg = instr->regs[reg++];
+               }
+
+               alu->scalar_dest         = sdst_reg->num;
+               alu->scalar_write_mask   = reg_alu_dst_swiz(sdst_reg);
+               alu->scalar_opc          = instr->alu.scalar_opc;
+       } else {
+               /* not sure if this is required, but adreno compiler seems
+                * to always set scalar opc to MAXs if it is not used:
+                */
+               alu->scalar_opc = MAXs;
+       }
+
+       if (src3_reg) {
+               reg_update_stats(src3_reg, info, false);
+
+               alu->src3_reg            = src3_reg->num;
+               alu->src3_swiz           = reg_alu_src_swiz(src3_reg);
+               alu->src3_reg_negate     = !!(src3_reg->flags & IR_REG_NEGATE);
+               alu->src3_reg_abs        = !!(src3_reg->flags & IR_REG_ABS);
+               alu->src3_sel            = !(src3_reg->flags & IR_REG_CONST);
+       } else {
+               /* not sure if this is required, but adreno compiler seems
+                * to always set register bank for 3rd src if unused:
+                */
+               alu->src3_sel = 1;
+       }
+
+       if (instr->pred != IR_PRED_NONE) {
+               alu->pred_select = (instr->pred == IR_PRED_EQ) ? 3 : 2;
+       }
+
+       return 0;
+}
+
+static int instr_emit(struct ir_instruction *instr, uint32_t *dwords,
+               uint32_t idx, struct ir_shader_info *info)
+{
+       switch (instr->instr_type) {
+       case IR_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
+       case IR_ALU:   return instr_emit_alu(instr, dwords, info);
+       }
+       return -1;
+}
+
+
+struct ir_register * ir_reg_create(struct ir_instruction *instr,
+               int num, const char *swizzle, int flags)
+{
+       struct ir_register *reg =
+                       ir_alloc(instr->shader, sizeof(struct ir_register));
+       DEBUG_MSG("%x, %d, %s", flags, num, swizzle);
+       assert(num <= REG_MASK);
+       reg->flags = flags;
+       reg->num = num;
+       reg->swizzle = ir_strdup(instr->shader, swizzle);
+       assert(instr->regs_count < ARRAY_SIZE(instr->regs));
+       instr->regs[instr->regs_count++] = reg;
+       return reg;
+}
+
+static void reg_update_stats(struct ir_register *reg,
+               struct ir_shader_info *info, bool dest)
+{
+       if (!(reg->flags & (IR_REG_CONST|IR_REG_EXPORT))) {
+               info->max_reg = max(info->max_reg, reg->num);
+
+               if (dest) {
+                       info->regs_written |= (1 << reg->num);
+               } else if (!(info->regs_written & (1 << reg->num))) {
+                       /* for registers that haven't been written, they must be an
+                        * input register that the thread scheduler (presumably?)
+                        * needs to know about:
+                        */
+                       info->max_input_reg = max(info->max_input_reg, reg->num);
+               }
+       }
+}
+
+static uint32_t reg_fetch_src_swiz(struct ir_register *reg, uint32_t n)
+{
+       uint32_t swiz = 0;
+       int i;
+
+       assert(reg->flags == 0);
+       assert(reg->swizzle);
+
+       DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
+
+       for (i = n-1; i >= 0; i--) {
+               swiz <<= 2;
+               switch (reg->swizzle[i]) {
+               default:
+                       ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
+               case 'x': swiz |= 0x0; break;
+               case 'y': swiz |= 0x1; break;
+               case 'z': swiz |= 0x2; break;
+               case 'w': swiz |= 0x3; break;
+               }
+       }
+
+       return swiz;
+}
+
+static uint32_t reg_fetch_dst_swiz(struct ir_register *reg)
+{
+       uint32_t swiz = 0;
+       int i;
+
+       assert(reg->flags == 0);
+       assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
+
+       DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
+
+       if (reg->swizzle) {
+               for (i = 3; i >= 0; i--) {
+                       swiz <<= 3;
+                       switch (reg->swizzle[i]) {
+                       default:
+                               ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
+                       case 'x': swiz |= 0x0; break;
+                       case 'y': swiz |= 0x1; break;
+                       case 'z': swiz |= 0x2; break;
+                       case 'w': swiz |= 0x3; break;
+                       case '0': swiz |= 0x4; break;
+                       case '1': swiz |= 0x5; break;
+                       case '_': swiz |= 0x7; break;
+                       }
+               }
+       } else {
+               swiz = 0x688;
+       }
+
+       return swiz;
+}
+
+/* actually, a write-mask */
+static uint32_t reg_alu_dst_swiz(struct ir_register *reg)
+{
+       uint32_t swiz = 0;
+       int i;
+
+       assert((reg->flags & ~IR_REG_EXPORT) == 0);
+       assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
+
+       DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
+
+       if (reg->swizzle) {
+               for (i = 3; i >= 0; i--) {
+                       swiz <<= 1;
+                       if (reg->swizzle[i] == "xyzw"[i]) {
+                               swiz |= 0x1;
+                       } else if (reg->swizzle[i] != '_') {
+                               ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
+                               break;
+                       }
+               }
+       } else {
+               swiz = 0xf;
+       }
+
+       return swiz;
+}
+
+static uint32_t reg_alu_src_swiz(struct ir_register *reg)
+{
+       uint32_t swiz = 0;
+       int i;
+
+       assert((reg->flags & IR_REG_EXPORT) == 0);
+       assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
+
+       DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
+
+       if (reg->swizzle) {
+               for (i = 3; i >= 0; i--) {
+                       swiz <<= 2;
+                       switch (reg->swizzle[i]) {
+                       default:
+                               ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
+                       case 'x': swiz |= (0x0 - i) & 0x3; break;
+                       case 'y': swiz |= (0x1 - i) & 0x3; break;
+                       case 'z': swiz |= (0x2 - i) & 0x3; break;
+                       case 'w': swiz |= (0x3 - i) & 0x3; break;
+                       }
+               }
+       } else {
+               swiz = 0x0;
+       }
+
+       return swiz;
+}
diff --git a/src/gallium/drivers/freedreno/ir.h b/src/gallium/drivers/freedreno/ir.h
new file mode 100644 (file)
index 0000000..e802544
--- /dev/null
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IR_H_
+#define IR_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "instr.h"
+
+/* low level intermediate representation of an adreno shader program */
+
+struct ir_shader;
+
+struct ir_shader * fd_asm_parse(const char *src);
+
+struct ir_shader_info {
+       uint16_t sizedwords;
+       int8_t   max_reg;   /* highest GPR # used by shader */
+       uint8_t  max_input_reg;
+       uint64_t regs_written;
+};
+
+struct ir_register {
+       enum {
+               IR_REG_CONST  = 0x1,
+               IR_REG_EXPORT = 0x2,
+               IR_REG_NEGATE = 0x4,
+               IR_REG_ABS    = 0x8,
+       } flags;
+       int num;
+       char *swizzle;
+};
+
+enum ir_pred {
+       IR_PRED_NONE = 0,
+       IR_PRED_EQ = 1,
+       IR_PRED_NE = 2,
+};
+
+struct ir_instruction {
+       struct ir_shader *shader;
+       enum {
+               IR_FETCH,
+               IR_ALU,
+       } instr_type;
+       enum ir_pred pred;
+       int sync;
+       unsigned regs_count;
+       struct ir_register *regs[5];
+       union {
+               /* FETCH specific: */
+               struct {
+                       instr_fetch_opc_t opc;
+                       unsigned const_idx;
+                       /* maybe vertex fetch specific: */
+                       unsigned const_idx_sel;
+                       enum sq_surfaceformat fmt;
+                       bool is_signed : 1;
+                       bool is_normalized : 1;
+                       uint32_t stride;
+                       uint32_t offset;
+               } fetch;
+               /* ALU specific: */
+               struct {
+                       instr_vector_opc_t vector_opc;
+                       instr_scalar_opc_t scalar_opc;
+                       bool vector_clamp : 1;
+                       bool scalar_clamp : 1;
+               } alu;
+       };
+};
+
+struct ir_cf {
+       struct ir_shader *shader;
+       instr_cf_opc_t cf_type;
+
+       union {
+               /* EXEC/EXEC_END specific: */
+               struct {
+                       unsigned instrs_count;
+                       struct ir_instruction *instrs[6];
+                       uint32_t addr, cnt, sequence;
+               } exec;
+               /* ALLOC specific: */
+               struct {
+                       instr_alloc_type_t type;   /* SQ_POSITION or SQ_PARAMETER_PIXEL */
+                       int size;
+               } alloc;
+       };
+};
+
+/* somewhat arbitrary limits.. */
+#define MAX_ATTRIBUTES 32
+#define MAX_CONSTS     32
+#define MAX_SAMPLERS   32
+#define MAX_UNIFORMS   32
+#define MAX_VARYINGS   32
+
+struct ir_attribute {
+       const char *name;
+       int rstart;         /* first register */
+       int num;            /* number of registers */
+};
+
+struct ir_const {
+       float val[4];
+       int cstart;         /* first const register */
+};
+
+struct ir_sampler {
+       const char *name;
+       int idx;
+};
+
+struct ir_uniform {
+       const char *name;
+       int cstart;         /* first const register */
+       int num;            /* number of const registers */
+};
+
+struct ir_varying {
+       const char *name;
+       int rstart;         /* first register */
+       int num;            /* number of registers */
+};
+
+struct ir_shader {
+       unsigned cfs_count;
+       struct ir_cf *cfs[0x56];
+       uint32_t heap[100 * 4096];
+       unsigned heap_idx;
+
+       enum ir_pred pred;  /* pred inherited by newly created instrs */
+
+       /* @ headers: */
+       uint32_t attributes_count;
+       struct ir_attribute *attributes[MAX_ATTRIBUTES];
+
+       uint32_t consts_count;
+       struct ir_const *consts[MAX_CONSTS];
+
+       uint32_t samplers_count;
+       struct ir_sampler *samplers[MAX_SAMPLERS];
+
+       uint32_t uniforms_count;
+       struct ir_uniform *uniforms[MAX_UNIFORMS];
+
+       uint32_t varyings_count;
+       struct ir_varying *varyings[MAX_VARYINGS];
+
+};
+
+struct ir_shader * ir_shader_create(void);
+void ir_shader_destroy(struct ir_shader *shader);
+void * ir_shader_assemble(struct ir_shader *shader,
+               struct ir_shader_info *info);
+
+struct ir_attribute * ir_attribute_create(struct ir_shader *shader,
+               int rstart, int num, const char *name);
+struct ir_const * ir_const_create(struct ir_shader *shader,
+               int cstart, float v0, float v1, float v2, float v3);
+struct ir_sampler * ir_sampler_create(struct ir_shader *shader,
+               int idx, const char *name);
+struct ir_uniform * ir_uniform_create(struct ir_shader *shader,
+               int cstart, int num, const char *name);
+struct ir_varying * ir_varying_create(struct ir_shader *shader,
+               int rstart, int num, const char *name);
+
+struct ir_cf * ir_cf_create(struct ir_shader *shader, instr_cf_opc_t cf_type);
+
+struct ir_instruction * ir_instr_create(struct ir_cf *cf, int instr_type);
+
+struct ir_register * ir_reg_create(struct ir_instruction *instr,
+               int num, const char *swizzle, int flags);
+
+/* some helper fxns: */
+
+static inline struct ir_cf *
+ir_cf_create_alloc(struct ir_shader *shader, instr_alloc_type_t type, int size)
+{
+       struct ir_cf *cf = ir_cf_create(shader, ALLOC);
+       if (!cf)
+               return cf;
+       cf->alloc.type = type;
+       cf->alloc.size = size;
+       return cf;
+}
+static inline struct ir_instruction *
+ir_instr_create_alu(struct ir_cf *cf, instr_vector_opc_t vop, instr_scalar_opc_t sop)
+{
+       struct ir_instruction *instr = ir_instr_create(cf, IR_ALU);
+       if (!instr)
+               return instr;
+       instr->alu.vector_opc = vop;
+       instr->alu.scalar_opc = sop;
+       return instr;
+}
+static inline struct ir_instruction *
+ir_instr_create_vtx_fetch(struct ir_cf *cf, int ci, int cis,
+               enum sq_surfaceformat fmt, bool is_signed, int stride)
+{
+       struct ir_instruction *instr = instr = ir_instr_create(cf, IR_FETCH);
+       instr->fetch.opc = VTX_FETCH;
+       instr->fetch.const_idx = ci;
+       instr->fetch.const_idx_sel = cis;
+       instr->fetch.fmt = fmt;
+       instr->fetch.is_signed = is_signed;
+       instr->fetch.stride = stride;
+       return instr;
+}
+static inline struct ir_instruction *
+ir_instr_create_tex_fetch(struct ir_cf *cf, int ci)
+{
+       struct ir_instruction *instr = instr = ir_instr_create(cf, IR_FETCH);
+       instr->fetch.opc = TEX_FETCH;
+       instr->fetch.const_idx = ci;
+       return instr;
+}
+
+
+#endif /* IR_H_ */
diff --git a/src/gallium/targets/dri-freedreno/Makefile.am b/src/gallium/targets/dri-freedreno/Makefile.am
new file mode 100644 (file)
index 0000000..59293a6
--- /dev/null
@@ -0,0 +1,71 @@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+       $(GALLIUM_CFLAGS) \
+       $(PTHREAD_CFLAGS) \
+       $(LIBDRM_CFLAGS)
+AM_CPPFLAGS = \
+       -I$(top_srcdir)/src/gallium/drivers \
+       -I$(top_srcdir)/src/gallium/winsys \
+       -I$(top_srcdir)/src/mesa \
+       -I$(top_srcdir)/src/mapi \
+       -I$(top_builddir)/src/mesa/drivers/dri/common \
+       -DGALLIUM_RBUG \
+       -DGALLIUM_TRACE
+
+dridir = $(DRI_DRIVER_INSTALL_DIR)
+dri_LTLIBRARIES = kgsl_dri.la
+
+nodist_EXTRA_kgsl_dri_la_SOURCES = dummy.cpp
+kgsl_dri_la_SOURCES = \
+       target.c \
+       $(top_srcdir)/src/mesa/drivers/dri/common/utils.c \
+       $(top_srcdir)/src/mesa/drivers/dri/common/dri_util.c \
+       $(top_srcdir)/src/mesa/drivers/dri/common/xmlconfig.c
+
+kgsl_dri_la_LDFLAGS = -module -avoid-version -shared -no-undefined
+
+kgsl_dri_la_LIBADD = \
+       $(top_builddir)/src/mesa/libmesagallium.la \
+       $(top_builddir)/src/gallium/auxiliary/libgallium.la \
+       $(top_builddir)/src/gallium/state_trackers/dri/drm/libdridrm.la \
+       $(top_builddir)/src/gallium/winsys/freedreno/drm/libfreedrenodrm.la \
+       $(top_builddir)/src/gallium/drivers/freedreno/libfreedreno.la \
+       $(top_builddir)/src/gallium/drivers/trace/libtrace.la \
+       $(top_builddir)/src/gallium/drivers/rbug/librbug.la \
+       $(GALLIUM_DRI_LIB_DEPS) \
+       $(LIBDRM_LIBS) \
+       $(FREEDRENO_LIBS)
+
+if HAVE_MESA_LLVM
+kgsl_dri_la_LDFLAGS += $(LLVM_LDFLAGS)
+kgsl_dri_la_LIBADD += $(LLVM_LIBS)
+endif
+
+# Provide compatibility with scripts for the old Mesa build system for
+# a while by putting a link to the driver into /lib of the build tree.
+all-local: kgsl_dri.la
+       $(MKDIR_P) $(top_builddir)/$(LIB_DIR)/gallium
+       ln -f .libs/kgsl_dri.so $(top_builddir)/$(LIB_DIR)/gallium/kgsl_dri.so
diff --git a/src/gallium/targets/dri-freedreno/target.c b/src/gallium/targets/dri-freedreno/target.c
new file mode 100644 (file)
index 0000000..dcaf299
--- /dev/null
@@ -0,0 +1,20 @@
+
+#include "target-helpers/inline_debug_helper.h"
+#include "state_tracker/drm_driver.h"
+#include "freedreno/drm/freedreno_drm_public.h"
+
+static struct pipe_screen *
+create_screen(int fd)
+{
+   struct pipe_screen *screen;
+
+   screen = fd_drm_screen_create(fd);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+}
+
+DRM_DRIVER_DESCRIPTOR("freedreno", "kgsl", create_screen, NULL)
index 5c40ae8e1458578e1625cd0653f9d5a8f64a14e8..31dbc791ddc695b62a126ba375c0472d32b935a0 100644 (file)
@@ -191,6 +191,15 @@ egl_gallium_la_LIBADD += \
        $(top_builddir)/src/gallium/drivers/svga/libsvga.la
 endif
 
+if HAVE_GALLIUM_FREEDRENO
+AM_CPPFLAGS += -D_EGL_PIPE_FREEDRENO=1
+egl_gallium_la_LIBADD += \
+       $(top_builddir)/src/gallium/winsys/freedreno/drm/libfreedrenodrm.la \
+       $(top_builddir)/src/gallium/drivers/freedreno/libfreedreno.la \
+       $(FREEDRENO_LIBS)
+
+endif
+
 if HAVE_GALLIUM_SOFTPIPE
 AM_CPPFLAGS += -DGALLIUM_SOFTPIPE -DGALLIUM_RBUG -DGALLIUM_TRACE
 egl_gallium_la_LIBADD += \
index 407c6a8f236c497fca605b106b4d634150403e30..e05490b5580b647a982451ed9d34b2161beb3aab 100644 (file)
@@ -45,6 +45,8 @@
 /* for vmwgfx */
 #include "svga/drm/svga_drm_public.h"
 #include "svga/svga_public.h"
+/* for freedreno */
+#include "freedreno/drm/freedreno_drm_public.h"
 
 static struct pipe_screen *
 pipe_i915_create_screen(int fd)
@@ -179,6 +181,24 @@ pipe_vmwgfx_create_screen(int fd)
 #endif
 }
 
+static struct pipe_screen *
+pipe_freedreno_create_screen(int fd)
+{
+#if _EGL_PIPE_FREEDRENO
+   struct pipe_screen *screen;
+
+   screen = fd_drm_screen_create(fd);
+   if (!screen)
+      return NULL;
+
+   screen = debug_screen_wrap(screen);
+
+   return screen;
+#else
+   return NULL;
+#endif
+}
+
 struct pipe_screen *
 egl_pipe_create_drm_screen(const char *name, int fd)
 {
@@ -194,6 +214,8 @@ egl_pipe_create_drm_screen(const char *name, int fd)
       return pipe_radeonsi_create_screen(fd);
    else if (strcmp(name, "vmwgfx") == 0)
       return pipe_vmwgfx_create_screen(fd);
+   else if (strcmp(name, "kgsl") == 0)
+      return pipe_freedreno_create_screen(fd);
    else
       return NULL;
 }
diff --git a/src/gallium/winsys/freedreno/drm/.gitignore b/src/gallium/winsys/freedreno/drm/.gitignore
new file mode 100644 (file)
index 0000000..f3c7a7c
--- /dev/null
@@ -0,0 +1 @@
+Makefile
diff --git a/src/gallium/winsys/freedreno/drm/Makefile.am b/src/gallium/winsys/freedreno/drm/Makefile.am
new file mode 100644 (file)
index 0000000..58f69d1
--- /dev/null
@@ -0,0 +1,32 @@
+# Copyright © 2012 Intel Corporation
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+# NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+include $(top_srcdir)/src/gallium/Automake.inc
+
+AM_CFLAGS = \
+       -I$(top_srcdir)/src/gallium/drivers \
+       $(GALLIUM_CFLAGS) \
+       $(FREEDRENO_CFLAGS)
+
+noinst_LTLIBRARIES = libfreedrenodrm.la
+
+libfreedrenodrm_la_SOURCES = freedreno_drm_winsys.c
diff --git a/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h b/src/gallium/winsys/freedreno/drm/freedreno_drm_public.h
new file mode 100644 (file)
index 0000000..a7ba207
--- /dev/null
@@ -0,0 +1,9 @@
+
+#ifndef __FREEDRENO_DRM_PUBLIC_H__
+#define __FREEDRENO_DRM_PUBLIC_H__
+
+struct pipe_screen;
+
+struct pipe_screen *fd_drm_screen_create(int drmFD);
+
+#endif
diff --git a/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c b/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
new file mode 100644 (file)
index 0000000..8afb9cd
--- /dev/null
@@ -0,0 +1,18 @@
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_format.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+#include "freedreno_drm_public.h"
+
+#include "freedreno/freedreno_screen.h"
+
+struct pipe_screen *
+fd_drm_screen_create(int fd)
+{
+       struct fd_device *dev = fd_device_new(fd);
+       if (!dev)
+               return NULL;
+       return fd_screen_create(dev);
+}