freedreno: add a3xx support
authorRob Clark <robclark@freedesktop.org>
Sun, 26 May 2013 21:13:44 +0000 (17:13 -0400)
committerRob Clark <robclark@freedesktop.org>
Sat, 8 Jun 2013 17:15:51 +0000 (13:15 -0400)
The adreno a3xx GPU is found in newer snapdragon devices, such as the
nexus4.  The a3xx is GLESv3 and OpenCL capable, although that is not
enabled yet in gallium.

Compared to a2xx, it introduces an entirely new unified shader ISA, and
re-shuffles all or nearly all of the registers.  The good news is that
(for the most part) the registers are more orthogonal, not combining
unrelated state in a single register.  And that there is a lot more
flexibility, so we don't need to patch and re-emit the shader like we
did on a2xx.

The shader compiler is currently quite dumb, there would be a lot of
room for improvement with an optimizing pass.  Despite that, with the
a320 in my nexus4 it seems to be ~2-3x faster compared to the a220 in my
HP touchpad.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
36 files changed:
configure.ac
src/gallium/drivers/freedreno/Makefile.am
src/gallium/drivers/freedreno/a3xx/Makefile.am [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/a3xx.xml.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_blend.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_blend.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_compiler.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_compiler.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_context.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_context.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_draw.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_draw.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_emit.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_emit.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_gmem.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_gmem.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_program.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_program.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_screen.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_screen.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_texture.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_texture.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_util.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_util.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_zsa.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/fd3_zsa.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/instr-a3xx.h [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/ir-a3xx.c [new file with mode: 0644]
src/gallium/drivers/freedreno/a3xx/ir-a3xx.h [new file with mode: 0644]
src/gallium/drivers/freedreno/adreno_common.xml.h
src/gallium/drivers/freedreno/adreno_pm4.xml.h
src/gallium/drivers/freedreno/freedreno_screen.c
src/gallium/drivers/freedreno/freedreno_util.c

index 8d4a3dc27b02a7b03fe70c9230964d9b1a175ddf..6832b0d8973fb61975ba3d9fead8dc17b0314492 100644 (file)
@@ -1985,6 +1985,7 @@ AC_CONFIG_FILES([Makefile
                src/gallium/drivers/Makefile
                src/gallium/drivers/freedreno/Makefile
                src/gallium/drivers/freedreno/a2xx/Makefile
+               src/gallium/drivers/freedreno/a3xx/Makefile
                src/gallium/drivers/i915/Makefile
                src/gallium/drivers/ilo/Makefile
                src/gallium/drivers/llvmpipe/Makefile
index 64dfda6239969b66c1189197dcb9628f145f263c..b6dbbd191833013f6ba92fbd09e0512a3ab69208 100644 (file)
@@ -5,12 +5,13 @@ noinst_LTLIBRARIES = libfreedreno.la
 AM_CFLAGS = \
        -Wno-packed-bitfield-compat \
        -I$(top_srcdir)/src/gallium/drivers \
+       -I$(top_srcdir)/src/gallium/drivers/freedreno/a3xx \
        -I$(top_srcdir)/src/gallium/drivers/freedreno/a2xx \
        $(GALLIUM_CFLAGS) \
        $(FREEDRENO_CFLAGS) \
        $(VISIBILITY_CFLAGS)
 
-SUBDIRS = a2xx
+SUBDIRS = a2xx a3xx
 
 libfreedreno_la_SOURCES = \
        freedreno_util.c \
@@ -25,5 +26,6 @@ libfreedreno_la_SOURCES = \
        freedreno_gmem.c
 
 libfreedreno_la_LIBADD = \
+       a3xx/libfd3xx.la \
        a2xx/libfd2xx.la
 
diff --git a/src/gallium/drivers/freedreno/a3xx/Makefile.am b/src/gallium/drivers/freedreno/a3xx/Makefile.am
new file mode 100644 (file)
index 0000000..a7e415f
--- /dev/null
@@ -0,0 +1,27 @@
+include $(top_srcdir)/src/gallium/Automake.inc
+
+noinst_LTLIBRARIES = libfd3xx.la
+
+AM_CFLAGS = \
+       -Wno-packed-bitfield-compat \
+       -I$(top_srcdir)/src/gallium/drivers \
+       -I$(top_srcdir)/src/gallium/drivers/freedreno \
+       $(GALLIUM_CFLAGS) \
+       $(FREEDRENO_CFLAGS) \
+       $(VISIBILITY_CFLAGS)
+
+libfd3xx_la_SOURCES = \
+       fd3_blend.c \
+       fd3_compiler.c \
+       fd3_context.c \
+       fd3_draw.c \
+       fd3_emit.c \
+       fd3_gmem.c \
+       fd3_program.c \
+       fd3_rasterizer.c \
+       fd3_screen.c \
+       fd3_texture.c \
+       fd3_util.c \
+       fd3_zsa.c \
+       disasm-a3xx.c \
+       ir-a3xx.c
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
new file mode 100644 (file)
index 0000000..c7f5085
--- /dev/null
@@ -0,0 +1,1838 @@
+#ifndef A3XX_XML
+#define A3XX_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml                (  42578 bytes, from 2013-06-02 13:10:46)
+- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   3094 bytes, from 2013-05-05 18:29:22)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
+
+Copyright (C) 2013 by the following authors:
+- Rob Clark <robdclark@gmail.com> (robclark)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+enum a3xx_render_mode {
+       RB_RENDERING_PASS = 0,
+       RB_TILING_PASS = 1,
+       RB_RESOLVE_PASS = 2,
+};
+
+enum a3xx_tile_mode {
+       LINEAR = 0,
+       TILE_32X32 = 2,
+};
+
+enum a3xx_threadmode {
+       MULTI = 0,
+       SINGLE = 1,
+};
+
+enum a3xx_instrbuffermode {
+       BUFFER = 1,
+};
+
+enum a3xx_threadsize {
+       TWO_QUADS = 0,
+       FOUR_QUADS = 1,
+};
+
+enum a3xx_state_block_id {
+       HLSQ_BLOCK_ID_TP_TEX = 2,
+       HLSQ_BLOCK_ID_TP_MIPMAP = 3,
+       HLSQ_BLOCK_ID_SP_VS = 4,
+       HLSQ_BLOCK_ID_SP_FS = 6,
+};
+
+enum a3xx_cache_opcode {
+       INVALIDATE = 1,
+};
+
+enum a3xx_vtx_fmt {
+       VFMT_FLOAT_32 = 0,
+       VFMT_FLOAT_32_32 = 1,
+       VFMT_FLOAT_32_32_32 = 2,
+       VFMT_FLOAT_32_32_32_32 = 3,
+       VFMT_FLOAT_16 = 4,
+       VFMT_FLOAT_16_16 = 5,
+       VFMT_FLOAT_16_16_16 = 6,
+       VFMT_FLOAT_16_16_16_16 = 7,
+       VFMT_FIXED_32 = 8,
+       VFMT_FIXED_32_32 = 9,
+       VFMT_FIXED_32_32_32 = 10,
+       VFMT_FIXED_32_32_32_32 = 11,
+       VFMT_SHORT_16 = 16,
+       VFMT_SHORT_16_16 = 17,
+       VFMT_SHORT_16_16_16 = 18,
+       VFMT_SHORT_16_16_16_16 = 19,
+       VFMT_USHORT_16 = 20,
+       VFMT_USHORT_16_16 = 21,
+       VFMT_USHORT_16_16_16 = 22,
+       VFMT_USHORT_16_16_16_16 = 23,
+       VFMT_NORM_SHORT_16 = 24,
+       VFMT_NORM_SHORT_16_16 = 25,
+       VFMT_NORM_SHORT_16_16_16 = 26,
+       VFMT_NORM_SHORT_16_16_16_16 = 27,
+       VFMT_NORM_USHORT_16 = 28,
+       VFMT_NORM_USHORT_16_16 = 29,
+       VFMT_NORM_USHORT_16_16_16 = 30,
+       VFMT_NORM_USHORT_16_16_16_16 = 31,
+       VFMT_UBYTE_8 = 40,
+       VFMT_UBYTE_8_8 = 41,
+       VFMT_UBYTE_8_8_8 = 42,
+       VFMT_UBYTE_8_8_8_8 = 43,
+       VFMT_NORM_UBYTE_8 = 44,
+       VFMT_NORM_UBYTE_8_8 = 45,
+       VFMT_NORM_UBYTE_8_8_8 = 46,
+       VFMT_NORM_UBYTE_8_8_8_8 = 47,
+       VFMT_BYTE_8 = 48,
+       VFMT_BYTE_8_8 = 49,
+       VFMT_BYTE_8_8_8 = 50,
+       VFMT_BYTE_8_8_8_8 = 51,
+       VFMT_NORM_BYTE_8 = 52,
+       VFMT_NORM_BYTE_8_8 = 53,
+       VFMT_NORM_BYTE_8_8_8 = 54,
+       VFMT_NORM_BYTE_8_8_8_8 = 55,
+       VFMT_UINT_10_10_10_2 = 60,
+       VFMT_NORM_UINT_10_10_10_2 = 61,
+       VFMT_INT_10_10_10_2 = 62,
+       VFMT_NORM_INT_10_10_10_2 = 63,
+};
+
+enum a3xx_tex_fmt {
+       TFMT_NORM_USHORT_565 = 4,
+       TFMT_NORM_USHORT_5551 = 6,
+       TFMT_NORM_USHORT_4444 = 7,
+       TFMT_NORM_UINT_X8Z24 = 10,
+       TFMT_NORM_UINT_2_10_10_10 = 41,
+       TFMT_NORM_UINT_A8 = 44,
+       TFMT_NORM_UINT_L8_A8 = 47,
+       TFMT_NORM_UINT_8 = 48,
+       TFMT_NORM_UINT_8_8 = 49,
+       TFMT_NORM_UINT_8_8_8 = 50,
+       TFMT_NORM_UINT_8_8_8_8 = 51,
+       TFMT_FLOAT_16 = 64,
+       TFMT_FLOAT_16_16 = 65,
+       TFMT_FLOAT_16_16_16_16 = 67,
+       TFMT_FLOAT_32 = 84,
+       TFMT_FLOAT_32_32 = 85,
+       TFMT_FLOAT_32_32_32_32 = 87,
+};
+
+enum a3xx_tex_fetchsize {
+       TFETCH_DISABLE = 0,
+       TFETCH_1_BYTE = 1,
+       TFETCH_2_BYTE = 2,
+       TFETCH_4_BYTE = 3,
+       TFETCH_8_BYTE = 4,
+       TFETCH_16_BYTE = 5,
+};
+
+enum a3xx_color_fmt {
+       RB_R8G8B8_UNORM = 4,
+       RB_R8G8B8A8_UNORM = 8,
+       RB_Z16_UNORM = 12,
+       RB_A8_UNORM = 20,
+};
+
+enum a3xx_color_swap {
+       WZYX = 0,
+       WXYZ = 1,
+       ZYXW = 2,
+       XYZW = 3,
+};
+
+enum a3xx_msaa_samples {
+       MSAA_ONE = 0,
+       MSAA_TWO = 1,
+       MSAA_FOUR = 2,
+};
+
+enum a3xx_sp_perfcounter_select {
+       SP_FS_CFLOW_INSTRUCTIONS = 12,
+       SP_FS_FULL_ALU_INSTRUCTIONS = 14,
+       SP0_ICL1_MISSES = 26,
+       SP_ALU_ACTIVE_CYCLES = 29,
+};
+
+enum adreno_rb_copy_control_mode {
+       RB_COPY_RESOLVE = 1,
+       RB_COPY_DEPTH_STENCIL = 5,
+};
+
+enum a3xx_tex_filter {
+       A3XX_TEX_NEAREST = 0,
+       A3XX_TEX_LINEAR = 1,
+};
+
+enum a3xx_tex_clamp {
+       A3XX_TEX_REPEAT = 0,
+       A3XX_TEX_CLAMP_TO_EDGE = 1,
+       A3XX_TEX_MIRROR_REPEAT = 2,
+       A3XX_TEX_CLAMP_NONE = 3,
+};
+
+enum a3xx_tex_swiz {
+       A3XX_TEX_X = 0,
+       A3XX_TEX_Y = 1,
+       A3XX_TEX_Z = 2,
+       A3XX_TEX_W = 3,
+       A3XX_TEX_ZERO = 4,
+       A3XX_TEX_ONE = 5,
+};
+
+#define REG_A3XX_RBBM_HW_VERSION                               0x00000000
+
+#define REG_A3XX_RBBM_HW_RELEASE                               0x00000001
+
+#define REG_A3XX_RBBM_HW_CONFIGURATION                         0x00000002
+
+#define REG_A3XX_RBBM_CLOCK_CTL                                        0x00000010
+
+#define REG_A3XX_RBBM_SP_HYST_CNT                              0x00000012
+
+#define REG_A3XX_RBBM_SW_RESET_CMD                             0x00000018
+
+#define REG_A3XX_RBBM_AHB_CTL0                                 0x00000020
+
+#define REG_A3XX_RBBM_AHB_CTL1                                 0x00000021
+
+#define REG_A3XX_RBBM_AHB_CMD                                  0x00000022
+
+#define REG_A3XX_RBBM_AHB_ERROR_STATUS                         0x00000027
+
+#define REG_A3XX_RBBM_GPR0_CTL                                 0x0000002e
+
+#define REG_A3XX_RBBM_STATUS                                   0x00000030
+
+#define REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL                     0x00000033
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL                   0x00000050
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL0                 0x00000051
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL1                 0x00000054
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL2                 0x00000057
+
+#define REG_A3XX_RBBM_INTERFACE_HANG_MASK_CTL3                 0x0000005a
+
+#define REG_A3XX_RBBM_INT_CLEAR_CMD                            0x00000061
+
+#define REG_A3XX_RBBM_INT_0_MASK                               0x00000063
+
+#define REG_A3XX_RBBM_INT_0_STATUS                             0x00000064
+
+#define REG_A3XX_RBBM_PERFCTR_CTL                              0x00000080
+
+#define REG_A3XX_RBBM_GPU_BUSY_MASKED                          0x00000088
+
+#define REG_A3XX_RBBM_PERFCTR_SP_7_LO                          0x000000e0
+
+#define REG_A3XX_RBBM_PERFCTR_SP_7_HI                          0x000000e1
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_1_LO                         0x000000ec
+
+#define REG_A3XX_RBBM_PERFCTR_PWR_1_HI                         0x000000ed
+
+#define REG_A3XX_RBBM_RBBM_CTL                                 0x00000100
+
+#define REG_A3XX_RBBM_RBBM_CTL                                 0x00000100
+
+#define REG_A3XX_RBBM_DEBUG_BUS_CTL                            0x00000111
+
+#define REG_A3XX_RBBM_DEBUG_BUS_DATA_STATUS                    0x00000112
+
+#define REG_A3XX_CP_PFP_UCODE_ADDR                             0x000001c9
+
+#define REG_A3XX_CP_PFP_UCODE_DATA                             0x000001ca
+
+#define REG_A3XX_CP_ROQ_ADDR                                   0x000001cc
+
+#define REG_A3XX_CP_ROQ_DATA                                   0x000001cd
+
+#define REG_A3XX_CP_MERCIU_ADDR                                        0x000001d1
+
+#define REG_A3XX_CP_MERCIU_DATA                                        0x000001d2
+
+#define REG_A3XX_CP_MERCIU_DATA2                               0x000001d3
+
+#define REG_A3XX_CP_MEQ_ADDR                                   0x000001da
+
+#define REG_A3XX_CP_MEQ_DATA                                   0x000001db
+
+#define REG_A3XX_CP_HW_FAULT                                   0x0000045c
+
+#define REG_A3XX_CP_PROTECT_CTRL                               0x0000045e
+
+#define REG_A3XX_CP_PROTECT_STATUS                             0x0000045f
+
+#define REG_A3XX_CP_PROTECT(i0)                                       (0x00000460 + 0x1*(i0))
+
+#define REG_A3XX_CP_PROTECT_REG(i0)                           (0x00000460 + 0x1*(i0))
+
+#define REG_A3XX_CP_AHB_FAULT                                  0x0000054d
+
+#define REG_A3XX_CP_SCRATCH_REG2                               0x0000057a
+
+#define REG_A3XX_CP_SCRATCH_REG3                               0x0000057b
+
+#define REG_A3XX_GRAS_CL_CLIP_CNTL                             0x00002040
+#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER                 0x00001000
+#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE                    0x00010000
+#define A3XX_GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE               0x00020000
+#define A3XX_GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE             0x00080000
+#define A3XX_GRAS_CL_CLIP_CNTL_VP_XFORM_DISABLE                        0x00100000
+#define A3XX_GRAS_CL_CLIP_CNTL_PERSP_DIVISION_DISABLE          0x00200000
+
+#define REG_A3XX_GRAS_CL_GB_CLIP_ADJ                           0x00002044
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK                    0x000003ff
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT                   0
+static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ__MASK;
+}
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK                    0x000ffc00
+#define A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT                   10
+static inline uint32_t A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__SHIFT) & A3XX_GRAS_CL_GB_CLIP_ADJ_VERT__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_XOFFSET                         0x00002048
+#define A3XX_GRAS_CL_VPORT_XOFFSET__MASK                       0xffffffff
+#define A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT                      0
+static inline uint32_t A3XX_GRAS_CL_VPORT_XOFFSET(float val)
+{
+       return ((fui(val)) << A3XX_GRAS_CL_VPORT_XOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_XOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_XSCALE                          0x00002049
+#define A3XX_GRAS_CL_VPORT_XSCALE__MASK                                0xffffffff
+#define A3XX_GRAS_CL_VPORT_XSCALE__SHIFT                       0
+static inline uint32_t A3XX_GRAS_CL_VPORT_XSCALE(float val)
+{
+       return ((fui(val)) << A3XX_GRAS_CL_VPORT_XSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_XSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_YOFFSET                         0x0000204a
+#define A3XX_GRAS_CL_VPORT_YOFFSET__MASK                       0xffffffff
+#define A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT                      0
+static inline uint32_t A3XX_GRAS_CL_VPORT_YOFFSET(float val)
+{
+       return ((fui(val)) << A3XX_GRAS_CL_VPORT_YOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_YOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_YSCALE                          0x0000204b
+#define A3XX_GRAS_CL_VPORT_YSCALE__MASK                                0xffffffff
+#define A3XX_GRAS_CL_VPORT_YSCALE__SHIFT                       0
+static inline uint32_t A3XX_GRAS_CL_VPORT_YSCALE(float val)
+{
+       return ((fui(val)) << A3XX_GRAS_CL_VPORT_YSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_YSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_ZOFFSET                         0x0000204c
+#define A3XX_GRAS_CL_VPORT_ZOFFSET__MASK                       0xffffffff
+#define A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT                      0
+static inline uint32_t A3XX_GRAS_CL_VPORT_ZOFFSET(float val)
+{
+       return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZOFFSET__SHIFT) & A3XX_GRAS_CL_VPORT_ZOFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_CL_VPORT_ZSCALE                          0x0000204d
+#define A3XX_GRAS_CL_VPORT_ZSCALE__MASK                                0xffffffff
+#define A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT                       0
+static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val)
+{
+       return ((fui(val)) << A3XX_GRAS_CL_VPORT_ZSCALE__SHIFT) & A3XX_GRAS_CL_VPORT_ZSCALE__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_POINT_MINMAX                          0x00002068
+
+#define REG_A3XX_GRAS_SU_POINT_SIZE                            0x00002069
+
+#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE                     0x0000206c
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK               0x00ffffff
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT              0
+static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
+{
+       return ((((uint32_t)(val * 40.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET                    0x0000206d
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK                  0xffffffff
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT                 0
+static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
+{
+       return ((((uint32_t)(val * 44.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
+}
+
+#define REG_A3XX_GRAS_SU_MODE_CONTROL                          0x00002070
+#define A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT                   0x00000001
+#define A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK                    0x00000002
+#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK          0x000007fc
+#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT         2
+static inline uint32_t A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
+}
+#define A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET                  0x00000800
+
+#define REG_A3XX_GRAS_SC_CONTROL                               0x00002072
+#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK                 0x000000f0
+#define A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT                        4
+static inline uint32_t A3XX_GRAS_SC_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+       return ((val) << A3XX_GRAS_SC_CONTROL_RENDER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RENDER_MODE__MASK;
+}
+#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK                        0x00000f00
+#define A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT               8
+static inline uint32_t A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(enum a3xx_msaa_samples val)
+{
+       return ((val) << A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__SHIFT) & A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES__MASK;
+}
+#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK                 0x0000f000
+#define A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT                        12
+static inline uint32_t A3XX_GRAS_SC_CONTROL_RASTER_MODE(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_SC_CONTROL_RASTER_MODE__SHIFT) & A3XX_GRAS_SC_CONTROL_RASTER_MODE__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL                     0x00002074
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE   0x80000000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK                 0x00007fff
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT                        0
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X__MASK;
+}
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK                 0x7fff0000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT                        16
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_SCREEN_SCISSOR_BR                     0x00002075
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_WINDOW_OFFSET_DISABLE   0x80000000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK                 0x00007fff
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT                        0
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X__MASK;
+}
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK                 0x7fff0000
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT                        16
+static inline uint32_t A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL                     0x00002079
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_WINDOW_OFFSET_DISABLE   0x80000000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK                 0x00007fff
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT                        0
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X__MASK;
+}
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK                 0x7fff0000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT                        16
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y__MASK;
+}
+
+#define REG_A3XX_GRAS_SC_WINDOW_SCISSOR_BR                     0x0000207a
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_WINDOW_OFFSET_DISABLE   0x80000000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK                 0x00007fff
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT                        0
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X__MASK;
+}
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK                 0x7fff0000
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT                        16
+static inline uint32_t A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(uint32_t val)
+{
+       return ((val) << A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__SHIFT) & A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y__MASK;
+}
+
+#define REG_A3XX_RB_MODE_CONTROL                               0x000020c0
+#define A3XX_RB_MODE_CONTROL_GMEM_BYPASS                       0x00000080
+#define A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK                 0x00000700
+#define A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT                        8
+static inline uint32_t A3XX_RB_MODE_CONTROL_RENDER_MODE(enum a3xx_render_mode val)
+{
+       return ((val) << A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT) & A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK;
+}
+#define A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE             0x00008000
+#define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE               0x00010000
+
+#define REG_A3XX_RB_RENDER_CONTROL                             0x000020c1
+#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK                 0x00000ff0
+#define A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT                        4
+static inline uint32_t A3XX_RB_RENDER_CONTROL_BIN_WIDTH(uint32_t val)
+{
+       return ((val >> 5) << A3XX_RB_RENDER_CONTROL_BIN_WIDTH__SHIFT) & A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK;
+}
+#define A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE              0x00001000
+#define A3XX_RB_RENDER_CONTROL_ENABLE_GMEM                     0x00002000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK           0x07000000
+#define A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT          24
+static inline uint32_t A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(enum adreno_compare_func val)
+{
+       return ((val) << A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__SHIFT) & A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC__MASK;
+}
+
+#define REG_A3XX_RB_MSAA_CONTROL                               0x000020c2
+#define A3XX_RB_MSAA_CONTROL_DISABLE                           0x00000400
+#define A3XX_RB_MSAA_CONTROL_SAMPLES__MASK                     0x0000f000
+#define A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT                    12
+static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLES(enum a3xx_msaa_samples val)
+{
+       return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLES__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLES__MASK;
+}
+#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK                 0xffff0000
+#define A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT                        16
+static inline uint32_t A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(uint32_t val)
+{
+       return ((val) << A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__SHIFT) & A3XX_RB_MSAA_CONTROL_SAMPLE_MASK__MASK;
+}
+
+#define REG_A3XX_UNKNOWN_20C3                                  0x000020c3
+
+#define REG_A3XX_RB_MRT(i0)                                   (0x000020c4 + 0x4*(i0))
+
+#define REG_A3XX_RB_MRT_CONTROL(i0)                           (0x000020c4 + 0x4*(i0))
+#define A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE                   0x00000008
+#define A3XX_RB_MRT_CONTROL_BLEND                              0x00000010
+#define A3XX_RB_MRT_CONTROL_BLEND2                             0x00000020
+#define A3XX_RB_MRT_CONTROL_ROP_CODE__MASK                     0x00000f00
+#define A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT                    8
+static inline uint32_t A3XX_RB_MRT_CONTROL_ROP_CODE(uint32_t val)
+{
+       return ((val) << A3XX_RB_MRT_CONTROL_ROP_CODE__SHIFT) & A3XX_RB_MRT_CONTROL_ROP_CODE__MASK;
+}
+#define A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK                  0x00003000
+#define A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT                 12
+static inline uint32_t A3XX_RB_MRT_CONTROL_DITHER_MODE(enum adreno_rb_dither_mode val)
+{
+       return ((val) << A3XX_RB_MRT_CONTROL_DITHER_MODE__SHIFT) & A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK;
+}
+#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK             0x0f000000
+#define A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT            24
+static inline uint32_t A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(uint32_t val)
+{
+       return ((val) << A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__SHIFT) & A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
+}
+
+#define REG_A3XX_RB_MRT_BUF_INFO(i0)                          (0x000020c5 + 0x4*(i0))
+#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK                        0x0000003f
+#define A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT               0
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(enum a3xx_color_fmt val)
+{
+       return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK             0x000000c0
+#define A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT            6
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(enum a3xx_tile_mode val)
+{
+       return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK                  0x00000c00
+#define A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT                 10
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(enum a3xx_color_swap val)
+{
+       return ((val) << A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_SWAP__MASK;
+}
+#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK             0xfffe0000
+#define A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT            17
+static inline uint32_t A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(uint32_t val)
+{
+       return ((val >> 5) << A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__SHIFT) & A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_MRT_BUF_BASE(i0)                          (0x000020c6 + 0x4*(i0))
+#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK              0xfffffff0
+#define A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT             4
+static inline uint32_t A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(uint32_t val)
+{
+       return ((val >> 5) << A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__SHIFT) & A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE__MASK;
+}
+
+#define REG_A3XX_RB_MRT_BLEND_CONTROL(i0)                     (0x000020c7 + 0x4*(i0))
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK         0x0000001f
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT                0
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+       return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK       0x000000e0
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT      5
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(enum adreno_rb_blend_opcode val)
+{
+       return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK                0x00001f00
+#define A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT       8
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+       return ((val) << A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK       0x001f0000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT      16
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(enum adreno_rb_blend_factor val)
+{
+       return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK     0x00e00000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT    21
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(enum adreno_rb_blend_opcode val)
+{
+       return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK      0x1f000000
+#define A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT     24
+static inline uint32_t A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(enum adreno_rb_blend_factor val)
+{
+       return ((val) << A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__SHIFT) & A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR__MASK;
+}
+#define A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE                 0x20000000
+
+#define REG_A3XX_RB_BLEND_RED                                  0x000020e4
+
+#define REG_A3XX_RB_BLEND_GREEN                                        0x000020e5
+
+#define REG_A3XX_RB_BLEND_BLUE                                 0x000020e6
+
+#define REG_A3XX_RB_BLEND_ALPHA                                        0x000020e7
+
+#define REG_A3XX_UNKNOWN_20E8                                  0x000020e8
+
+#define REG_A3XX_UNKNOWN_20E9                                  0x000020e9
+
+#define REG_A3XX_UNKNOWN_20EA                                  0x000020ea
+
+#define REG_A3XX_UNKNOWN_20EB                                  0x000020eb
+
+#define REG_A3XX_RB_COPY_CONTROL                               0x000020ec
+#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK                        0x00000003
+#define A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT               0
+static inline uint32_t A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(enum a3xx_msaa_samples val)
+{
+       return ((val) << A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__SHIFT) & A3XX_RB_COPY_CONTROL_MSAA_RESOLVE__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_MODE__MASK                                0x00000070
+#define A3XX_RB_COPY_CONTROL_MODE__SHIFT                       4
+static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mode val)
+{
+       return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK;
+}
+#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK                   0xfffffc00
+#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT                  10
+static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
+{
+       return ((val >> 10) << A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT) & A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_BASE                             0x000020ed
+#define A3XX_RB_COPY_DEST_BASE_BASE__MASK                      0xfffffff0
+#define A3XX_RB_COPY_DEST_BASE_BASE__SHIFT                     4
+static inline uint32_t A3XX_RB_COPY_DEST_BASE_BASE(uint32_t val)
+{
+       return ((val >> 5) << A3XX_RB_COPY_DEST_BASE_BASE__SHIFT) & A3XX_RB_COPY_DEST_BASE_BASE__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_PITCH                            0x000020ee
+#define A3XX_RB_COPY_DEST_PITCH_PITCH__MASK                    0xffffffff
+#define A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT                   0
+static inline uint32_t A3XX_RB_COPY_DEST_PITCH_PITCH(uint32_t val)
+{
+       return ((val >> 5) << A3XX_RB_COPY_DEST_PITCH_PITCH__SHIFT) & A3XX_RB_COPY_DEST_PITCH_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_COPY_DEST_INFO                             0x000020ef
+#define A3XX_RB_COPY_DEST_INFO_TILE__MASK                      0x00000003
+#define A3XX_RB_COPY_DEST_INFO_TILE__SHIFT                     0
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_TILE(enum a3xx_tile_mode val)
+{
+       return ((val) << A3XX_RB_COPY_DEST_INFO_TILE__SHIFT) & A3XX_RB_COPY_DEST_INFO_TILE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_FORMAT__MASK                    0x000000fc
+#define A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT                   2
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_FORMAT(enum a3xx_color_fmt val)
+{
+       return ((val) << A3XX_RB_COPY_DEST_INFO_FORMAT__SHIFT) & A3XX_RB_COPY_DEST_INFO_FORMAT__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_SWAP__MASK                      0x00000300
+#define A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT                     8
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
+{
+       return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK          0x0003c000
+#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT         14
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
+{
+       return ((val) << A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT) & A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK;
+}
+#define A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK                    0x001c0000
+#define A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT                   18
+static inline uint32_t A3XX_RB_COPY_DEST_INFO_ENDIAN(enum adreno_rb_surface_endian val)
+{
+       return ((val) << A3XX_RB_COPY_DEST_INFO_ENDIAN__SHIFT) & A3XX_RB_COPY_DEST_INFO_ENDIAN__MASK;
+}
+
+#define REG_A3XX_RB_DEPTH_CONTROL                              0x00002100
+#define A3XX_RB_DEPTH_CONTROL_Z_ENABLE                         0x00000002
+#define A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE                   0x00000004
+#define A3XX_RB_DEPTH_CONTROL_EARLY_Z_ENABLE                   0x00000008
+#define A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK                      0x00000070
+#define A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT                     4
+static inline uint32_t A3XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
+{
+       return ((val) << A3XX_RB_DEPTH_CONTROL_ZFUNC__SHIFT) & A3XX_RB_DEPTH_CONTROL_ZFUNC__MASK;
+}
+#define A3XX_RB_DEPTH_CONTROL_BF_ENABLE                                0x00000080
+#define A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE                    0x80000000
+
+#define REG_A3XX_UNKNOWN_2101                                  0x00002101
+
+#define REG_A3XX_RB_DEPTH_INFO                                 0x00002102
+#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK                  0x00000001
+#define A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT                 0
+static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(enum adreno_rb_depth_format val)
+{
+       return ((val) << A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_FORMAT__MASK;
+}
+#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK                    0xfffff800
+#define A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT                   11
+static inline uint32_t A3XX_RB_DEPTH_INFO_DEPTH_BASE(uint32_t val)
+{
+       return ((val >> 10) << A3XX_RB_DEPTH_INFO_DEPTH_BASE__SHIFT) & A3XX_RB_DEPTH_INFO_DEPTH_BASE__MASK;
+}
+
+#define REG_A3XX_RB_DEPTH_PITCH                                        0x00002103
+#define A3XX_RB_DEPTH_PITCH__MASK                              0xffffffff
+#define A3XX_RB_DEPTH_PITCH__SHIFT                             0
+static inline uint32_t A3XX_RB_DEPTH_PITCH(uint32_t val)
+{
+       return ((val >> 3) << A3XX_RB_DEPTH_PITCH__SHIFT) & A3XX_RB_DEPTH_PITCH__MASK;
+}
+
+#define REG_A3XX_RB_STENCIL_CONTROL                            0x00002104
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE                 0x00000001
+#define A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF              0x00000004
+#define A3XX_RB_STENCIL_CONTROL_FUNC__MASK                     0x00000700
+#define A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT                    8
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC(enum adreno_compare_func val)
+{
+       return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FAIL__MASK                     0x00003800
+#define A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT                    11
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL(enum adreno_stencil_op val)
+{
+       return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZPASS__MASK                    0x0001c000
+#define A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT                   14
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS(enum adreno_stencil_op val)
+{
+       return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK                    0x000e0000
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT                   17
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL(enum adreno_stencil_op val)
+{
+       return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK                  0x00700000
+#define A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT                 20
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FUNC_BF(enum adreno_compare_func val)
+{
+       return ((val) << A3XX_RB_STENCIL_CONTROL_FUNC_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FUNC_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK                  0x03800000
+#define A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT                 23
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_FAIL_BF(enum adreno_stencil_op val)
+{
+       return ((val) << A3XX_RB_STENCIL_CONTROL_FAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_FAIL_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK                 0x1c000000
+#define A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT                        26
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZPASS_BF(enum adreno_stencil_op val)
+{
+       return ((val) << A3XX_RB_STENCIL_CONTROL_ZPASS_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK;
+}
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK                 0xe0000000
+#define A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT                        29
+static inline uint32_t A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(enum adreno_stencil_op val)
+{
+       return ((val) << A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__SHIFT) & A3XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK;
+}
+
+#define REG_A3XX_UNKNOWN_2105                                  0x00002105
+
+#define REG_A3XX_UNKNOWN_2106                                  0x00002106
+
+#define REG_A3XX_UNKNOWN_2107                                  0x00002107
+
+#define REG_A3XX_RB_STENCILREFMASK                             0x00002108
+#define A3XX_RB_STENCILREFMASK_STENCILREF__MASK                        0x000000ff
+#define A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT               0
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILREF(uint32_t val)
+{
+       return ((val) << A3XX_RB_STENCILREFMASK_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILREF__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_STENCILMASK__MASK               0x0000ff00
+#define A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT              8
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILMASK(uint32_t val)
+{
+       return ((val) << A3XX_RB_STENCILREFMASK_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILMASK__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK          0x00ff0000
+#define A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT         16
+static inline uint32_t A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(uint32_t val)
+{
+       return ((val) << A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A3XX_RB_STENCILREFMASK_BF                          0x00002109
+#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK             0x000000ff
+#define A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT            0
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILREF(uint32_t val)
+{
+       return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILREF__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILREF__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK            0x0000ff00
+#define A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT           8
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILMASK(uint32_t val)
+{
+       return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILMASK__MASK;
+}
+#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK       0x00ff0000
+#define A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT      16
+static inline uint32_t A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(uint32_t val)
+{
+       return ((val) << A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__SHIFT) & A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK__MASK;
+}
+
+#define REG_A3XX_PA_SC_WINDOW_OFFSET                           0x0000210e
+#define A3XX_PA_SC_WINDOW_OFFSET_X__MASK                       0x0000ffff
+#define A3XX_PA_SC_WINDOW_OFFSET_X__SHIFT                      0
+static inline uint32_t A3XX_PA_SC_WINDOW_OFFSET_X(uint32_t val)
+{
+       return ((val) << A3XX_PA_SC_WINDOW_OFFSET_X__SHIFT) & A3XX_PA_SC_WINDOW_OFFSET_X__MASK;
+}
+#define A3XX_PA_SC_WINDOW_OFFSET_Y__MASK                       0xffff0000
+#define A3XX_PA_SC_WINDOW_OFFSET_Y__SHIFT                      16
+static inline uint32_t A3XX_PA_SC_WINDOW_OFFSET_Y(uint32_t val)
+{
+       return ((val) << A3XX_PA_SC_WINDOW_OFFSET_Y__SHIFT) & A3XX_PA_SC_WINDOW_OFFSET_Y__MASK;
+}
+
+#define REG_A3XX_PC_VSTREAM_CONTROL                            0x000021e4
+
+#define REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL                    0x000021ea
+
+#define REG_A3XX_PC_PRIM_VTX_CNTL                              0x000021ec
+#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK              0x0000001f
+#define A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT             0
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(uint32_t val)
+{
+       return ((val) << A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK       0x000000e0
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT      5
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+       return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK                0x00000700
+#define A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT       8
+static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_su_sc_draw val)
+{
+       return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK;
+}
+#define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST               0x02000000
+
+#define REG_A3XX_PC_RESTART_INDEX                              0x000021ed
+
+#define REG_A3XX_HLSQ_CONTROL_0_REG                            0x00002200
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK             0x00000010
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT            4
+static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
+{
+       return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE            0x00000040
+#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART                        0x00000200
+#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2                      0x00000400
+#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE                   0x04000000
+#define A3XX_HLSQ_CONTROL_0_REG_CONSTSWITCHMODE                        0x08000000
+#define A3XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE              0x10000000
+#define A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE              0x20000000
+#define A3XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE                   0x40000000
+#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT                  0x80000000
+
+#define REG_A3XX_HLSQ_CONTROL_1_REG                            0x00002201
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK             0x00000040
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT            6
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
+{
+       return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
+}
+#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE            0x00000100
+#define A3XX_HLSQ_CONTROL_1_REG_RESERVED1                      0x00000200
+
+#define REG_A3XX_HLSQ_CONTROL_2_REG                            0x00002202
+#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK       0xfc000000
+#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT      26
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONTROL_3_REG                            0x00002203
+
+#define REG_A3XX_HLSQ_VS_CONTROL_REG                           0x00002204
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK             0x00000fff
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT            0
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK                0x00fff000
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT       12
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
+}
+#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK             0xff000000
+#define A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT            24
+static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A3XX_HLSQ_FS_CONTROL_REG                           0x00002205
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK             0x00000fff
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT            0
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
+}
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK                0x00fff000
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT       12
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK;
+}
+#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK             0xff000000
+#define A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT            24
+static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG                  0x00002206
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK     0x0000ffff
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT    0
+static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK;
+}
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK       0xffff0000
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT      16
+static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK;
+}
+
+#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG                  0x00002207
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK     0x0000ffff
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT    0
+static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK;
+}
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK       0xffff0000
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT      16
+static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
+{
+       return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK;
+}
+
+#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG                         0x0000220a
+
+#define REG_A3XX_HLSQ_CL_NDRANGE_1_REG                         0x0000220b
+
+#define REG_A3XX_HLSQ_CL_NDRANGE_2_REG                         0x0000220c
+
+#define REG_A3XX_HLSQ_CL_CONTROL_0_REG                         0x00002211
+
+#define REG_A3XX_HLSQ_CL_CONTROL_1_REG                         0x00002212
+
+#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG                      0x00002214
+
+#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG                    0x00002215
+
+#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG                    0x00002217
+
+#define REG_A3XX_HLSQ_CL_WG_OFFSET_REG                         0x0000221a
+
+#define REG_A3XX_VFD_CONTROL_0                                 0x00002240
+#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK                 0x0003ffff
+#define A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT                        0
+static inline uint32_t A3XX_VFD_CONTROL_0_TOTALATTRTOVS(uint32_t val)
+{
+       return ((val) << A3XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT) & A3XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK;
+}
+#define A3XX_VFD_CONTROL_0_PACKETSIZE__MASK                    0x003c0000
+#define A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT                   18
+static inline uint32_t A3XX_VFD_CONTROL_0_PACKETSIZE(uint32_t val)
+{
+       return ((val) << A3XX_VFD_CONTROL_0_PACKETSIZE__SHIFT) & A3XX_VFD_CONTROL_0_PACKETSIZE__MASK;
+}
+#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK               0x07c00000
+#define A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT              22
+static inline uint32_t A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(uint32_t val)
+{
+       return ((val) << A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMDECINSTRCNT__MASK;
+}
+#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK             0xf8000000
+#define A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT            27
+static inline uint32_t A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(uint32_t val)
+{
+       return ((val) << A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__SHIFT) & A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT__MASK;
+}
+
+#define REG_A3XX_VFD_CONTROL_1                                 0x00002241
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK                    0x0000ffff
+#define A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT                   0
+static inline uint32_t A3XX_VFD_CONTROL_1_MAXSTORAGE(uint32_t val)
+{
+       return ((val) << A3XX_VFD_CONTROL_1_MAXSTORAGE__SHIFT) & A3XX_VFD_CONTROL_1_MAXSTORAGE__MASK;
+}
+#define A3XX_VFD_CONTROL_1_REGID4VTX__MASK                     0x00ff0000
+#define A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT                    16
+static inline uint32_t A3XX_VFD_CONTROL_1_REGID4VTX(uint32_t val)
+{
+       return ((val) << A3XX_VFD_CONTROL_1_REGID4VTX__SHIFT) & A3XX_VFD_CONTROL_1_REGID4VTX__MASK;
+}
+#define A3XX_VFD_CONTROL_1_REGID4INST__MASK                    0xff000000
+#define A3XX_VFD_CONTROL_1_REGID4INST__SHIFT                   24
+static inline uint32_t A3XX_VFD_CONTROL_1_REGID4INST(uint32_t val)
+{
+       return ((val) << A3XX_VFD_CONTROL_1_REGID4INST__SHIFT) & A3XX_VFD_CONTROL_1_REGID4INST__MASK;
+}
+
+#define REG_A3XX_VFD_INDEX_MIN                                 0x00002242
+
+#define REG_A3XX_VFD_INDEX_MAX                                 0x00002243
+
+#define REG_A3XX_VFD_INSTANCEID_OFFSET                         0x00002244
+
+#define REG_A3XX_VFD_INDEX_OFFSET                              0x00002245
+
+#define REG_A3XX_VFD_FETCH(i0)                                (0x00002246 + 0x2*(i0))
+
+#define REG_A3XX_VFD_FETCH_INSTR_0(i0)                        (0x00002246 + 0x2*(i0))
+#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK                 0x0000007f
+#define A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT                        0
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(uint32_t val)
+{
+       return ((val) << A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_FETCHSIZE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK                 0x0001ff80
+#define A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT                        7
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(uint32_t val)
+{
+       return ((val) << A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT                      0x00020000
+#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK                 0x00fc0000
+#define A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT                        18
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_INDEXCODE(uint32_t val)
+{
+       return ((val) << A3XX_VFD_FETCH_INSTR_0_INDEXCODE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_INDEXCODE__MASK;
+}
+#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK                  0xff000000
+#define A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT                 24
+static inline uint32_t A3XX_VFD_FETCH_INSTR_0_STEPRATE(uint32_t val)
+{
+       return ((val) << A3XX_VFD_FETCH_INSTR_0_STEPRATE__SHIFT) & A3XX_VFD_FETCH_INSTR_0_STEPRATE__MASK;
+}
+
+#define REG_A3XX_VFD_FETCH_INSTR_1(i0)                        (0x00002247 + 0x2*(i0))
+
+#define REG_A3XX_VFD_DECODE(i0)                                       (0x00002266 + 0x1*(i0))
+
+#define REG_A3XX_VFD_DECODE_INSTR(i0)                         (0x00002266 + 0x1*(i0))
+#define A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK                  0x0000000f
+#define A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT                 0
+static inline uint32_t A3XX_VFD_DECODE_INSTR_WRITEMASK(uint32_t val)
+{
+       return ((val) << A3XX_VFD_DECODE_INSTR_WRITEMASK__SHIFT) & A3XX_VFD_DECODE_INSTR_WRITEMASK__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_CONSTFILL                                0x00000010
+#define A3XX_VFD_DECODE_INSTR_FORMAT__MASK                     0x00000fc0
+#define A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT                    6
+static inline uint32_t A3XX_VFD_DECODE_INSTR_FORMAT(enum a3xx_vtx_fmt val)
+{
+       return ((val) << A3XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A3XX_VFD_DECODE_INSTR_FORMAT__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_REGID__MASK                      0x000ff000
+#define A3XX_VFD_DECODE_INSTR_REGID__SHIFT                     12
+static inline uint32_t A3XX_VFD_DECODE_INSTR_REGID(uint32_t val)
+{
+       return ((val) << A3XX_VFD_DECODE_INSTR_REGID__SHIFT) & A3XX_VFD_DECODE_INSTR_REGID__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK                   0x1f000000
+#define A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT                  24
+static inline uint32_t A3XX_VFD_DECODE_INSTR_SHIFTCNT(uint32_t val)
+{
+       return ((val) << A3XX_VFD_DECODE_INSTR_SHIFTCNT__SHIFT) & A3XX_VFD_DECODE_INSTR_SHIFTCNT__MASK;
+}
+#define A3XX_VFD_DECODE_INSTR_LASTCOMPVALID                    0x20000000
+#define A3XX_VFD_DECODE_INSTR_SWITCHNEXT                       0x40000000
+
+#define REG_A3XX_VFD_VS_THREADING_THRESHOLD                    0x0000227e
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK  0x0000000f
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT 0
+static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(uint32_t val)
+{
+       return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD__MASK;
+}
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK     0x0000ff00
+#define A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT    8
+static inline uint32_t A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(uint32_t val)
+{
+       return ((val) << A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__SHIFT) & A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT__MASK;
+}
+
+#define REG_A3XX_VPC_ATTR                                      0x00002280
+#define A3XX_VPC_ATTR_TOTALATTR__MASK                          0x00000fff
+#define A3XX_VPC_ATTR_TOTALATTR__SHIFT                         0
+static inline uint32_t A3XX_VPC_ATTR_TOTALATTR(uint32_t val)
+{
+       return ((val) << A3XX_VPC_ATTR_TOTALATTR__SHIFT) & A3XX_VPC_ATTR_TOTALATTR__MASK;
+}
+#define A3XX_VPC_ATTR_THRDASSIGN__MASK                         0x0ffff000
+#define A3XX_VPC_ATTR_THRDASSIGN__SHIFT                                12
+static inline uint32_t A3XX_VPC_ATTR_THRDASSIGN(uint32_t val)
+{
+       return ((val) << A3XX_VPC_ATTR_THRDASSIGN__SHIFT) & A3XX_VPC_ATTR_THRDASSIGN__MASK;
+}
+#define A3XX_VPC_ATTR_LMSIZE__MASK                             0xf0000000
+#define A3XX_VPC_ATTR_LMSIZE__SHIFT                            28
+static inline uint32_t A3XX_VPC_ATTR_LMSIZE(uint32_t val)
+{
+       return ((val) << A3XX_VPC_ATTR_LMSIZE__SHIFT) & A3XX_VPC_ATTR_LMSIZE__MASK;
+}
+
+#define REG_A3XX_VPC_PACK                                      0x00002281
+#define A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK                     0x0000ff00
+#define A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT                    8
+static inline uint32_t A3XX_VPC_PACK_NUMFPNONPOSVAR(uint32_t val)
+{
+       return ((val) << A3XX_VPC_PACK_NUMFPNONPOSVAR__SHIFT) & A3XX_VPC_PACK_NUMFPNONPOSVAR__MASK;
+}
+#define A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK                     0x00ff0000
+#define A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT                    16
+static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
+{
+       return ((val) << A3XX_VPC_PACK_NUMNONPOSVSVAR__SHIFT) & A3XX_VPC_PACK_NUMNONPOSVSVAR__MASK;
+}
+
+#define REG_A3XX_VPC_VARYING_INTERP(i0)                               (0x00002282 + 0x1*(i0))
+
+#define REG_A3XX_VPC_VARYING_INTERP_MODE(i0)                  (0x00002282 + 0x1*(i0))
+
+#define REG_A3XX_VPC_VARYING_PS_REPL(i0)                      (0x00002286 + 0x1*(i0))
+
+#define REG_A3XX_VPC_VARYING_PS_REPL_MODE(i0)                 (0x00002286 + 0x1*(i0))
+
+#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0                     0x0000228a
+
+#define REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_1                     0x0000228b
+
+#define REG_A3XX_SP_SP_CTRL_REG                                        0x000022c0
+#define A3XX_SP_SP_CTRL_REG_RESOLVE                            0x00010000
+#define A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK                    0x000c0000
+#define A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT                   18
+static inline uint32_t A3XX_SP_SP_CTRL_REG_CONSTMODE(uint32_t val)
+{
+       return ((val) << A3XX_SP_SP_CTRL_REG_CONSTMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_CONSTMODE__MASK;
+}
+#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK                    0x00300000
+#define A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT                   20
+static inline uint32_t A3XX_SP_SP_CTRL_REG_SLEEPMODE(uint32_t val)
+{
+       return ((val) << A3XX_SP_SP_CTRL_REG_SLEEPMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_SLEEPMODE__MASK;
+}
+#define A3XX_SP_SP_CTRL_REG_LOMODE__MASK                       0x00c00000
+#define A3XX_SP_SP_CTRL_REG_LOMODE__SHIFT                      22
+static inline uint32_t A3XX_SP_SP_CTRL_REG_LOMODE(uint32_t val)
+{
+       return ((val) << A3XX_SP_SP_CTRL_REG_LOMODE__SHIFT) & A3XX_SP_SP_CTRL_REG_LOMODE__MASK;
+}
+
+#define REG_A3XX_SP_VS_CTRL_REG0                               0x000022c4
+#define A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK                  0x00000001
+#define A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT                 0
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+       return ((val) << A3XX_SP_VS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK             0x00000002
+#define A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT            1
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
+{
+       return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID                      0x00000004
+#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK            0x000003f0
+#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT           4
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK            0x0003fc00
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT           10
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK             0x000c0000
+#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT            18
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK                  0x00100000
+#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT                 20
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+       return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE                   0x00200000
+#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE                      0x00400000
+#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK                      0xff000000
+#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT                     24
+static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG0_LENGTH__MASK;
+}
+
+#define REG_A3XX_SP_VS_CTRL_REG1                               0x000022c5
+#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK                 0x000003ff
+#define A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT                        0
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK              0x000ffc00
+#define A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT             10
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK;
+}
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK          0x3f000000
+#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT         24
+static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+
+#define REG_A3XX_SP_VS_PARAM_REG                               0x000022c6
+#define A3XX_SP_VS_PARAM_REG_POSREGID__MASK                    0x000000ff
+#define A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT                   0
+static inline uint32_t A3XX_SP_VS_PARAM_REG_POSREGID(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_PARAM_REG_POSREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_POSREGID__MASK;
+}
+#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK                  0x0000ff00
+#define A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT                 8
+static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
+}
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK               0xfff00000
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT              20
+static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT) & A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK;
+}
+
+#define REG_A3XX_SP_VS_OUT(i0)                                (0x000022c7 + 0x1*(i0))
+
+#define REG_A3XX_SP_VS_OUT_REG(i0)                            (0x000022c7 + 0x1*(i0))
+#define A3XX_SP_VS_OUT_REG_A_REGID__MASK                       0x000001ff
+#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT                      0
+static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK                    0x00001e00
+#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT                   9
+static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_B_REGID__MASK                       0x01ff0000
+#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT                      16
+static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK;
+}
+#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK                    0x1e000000
+#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT                   25
+static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK;
+}
+
+#define REG_A3XX_SP_VS_VPC_DST(i0)                            (0x000022d0 + 0x1*(i0))
+
+#define REG_A3XX_SP_VS_VPC_DST_REG(i0)                        (0x000022d0 + 0x1*(i0))
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK                   0x000000ff
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT                  0
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK                   0x0000ff00
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT                  8
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK                   0x00ff0000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT                  16
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
+}
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK                   0xff000000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT                  24
+static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK;
+}
+
+#define REG_A3XX_SP_VS_OBJ_OFFSET_REG                          0x000022d4
+#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK      0x01ff0000
+#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT     16
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK                0xfe000000
+#define A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT       25
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A3XX_SP_VS_OBJ_START_REG                           0x000022d5
+
+#define REG_A3XX_SP_VS_PVT_MEM_CTRL_REG                                0x000022d6
+
+#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG                                0x000022d7
+
+#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG                                0x000022d8
+
+#define REG_A3XX_SP_VS_LENGTH_REG                              0x000022df
+#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK               0xffffffff
+#define A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT              0
+static inline uint32_t A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(uint32_t val)
+{
+       return ((val) << A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_VS_LENGTH_REG_SHADERLENGTH__MASK;
+}
+
+#define REG_A3XX_SP_FS_CTRL_REG0                               0x000022e0
+#define A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK                  0x00000001
+#define A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT                 0
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADMODE(enum a3xx_threadmode val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG0_THREADMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADMODE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK             0x00000002
+#define A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT            1
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffermode val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID                      0x00000004
+#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK            0x000003f0
+#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT           4
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK            0x0003fc00
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT           10
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK             0x000c0000
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT            18
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK                  0x00100000
+#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT                 20
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE                   0x00200000
+#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE                      0x00400000
+#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK                      0xff000000
+#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT                     24
+static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG0_LENGTH__MASK;
+}
+
+#define REG_A3XX_SP_FS_CTRL_REG1                               0x000022e1
+#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK                 0x000003ff
+#define A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT                        0
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTLENGTH__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK              0x000ffc00
+#define A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT             10
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK          0x00f00000
+#define A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT         20
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK;
+}
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK           0x3f000000
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT          24
+static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT) & A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK;
+}
+
+#define REG_A3XX_SP_FS_OBJ_OFFSET_REG                          0x000022e2
+#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK      0x01ff0000
+#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT     16
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK;
+}
+#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK                0xfe000000
+#define A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT       25
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET__MASK;
+}
+
+#define REG_A3XX_SP_FS_OBJ_START_REG                           0x000022e3
+
+#define REG_A3XX_SP_FS_PVT_MEM_CTRL_REG                                0x000022e4
+
+#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG                                0x000022e5
+
+#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG                                0x000022e6
+
+#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0                    0x000022e8
+
+#define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_1                    0x000022e9
+
+#define REG_A3XX_SP_FS_OUTPUT_REG                              0x000022ec
+
+#define REG_A3XX_SP_FS_MRT(i0)                                (0x000022f0 + 0x1*(i0))
+
+#define REG_A3XX_SP_FS_MRT_REG(i0)                            (0x000022f0 + 0x1*(i0))
+#define A3XX_SP_FS_MRT_REG_REGID__MASK                         0x000000ff
+#define A3XX_SP_FS_MRT_REG_REGID__SHIFT                                0
+static inline uint32_t A3XX_SP_FS_MRT_REG_REGID(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_MRT_REG_REGID__SHIFT) & A3XX_SP_FS_MRT_REG_REGID__MASK;
+}
+#define A3XX_SP_FS_MRT_REG_HALF_PRECISION                      0x00000100
+
+#define REG_A3XX_SP_FS_IMAGE_OUTPUT(i0)                               (0x000022f4 + 0x1*(i0))
+
+#define REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i0)                   (0x000022f4 + 0x1*(i0))
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK            0x0000003f
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT           0
+static inline uint32_t A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(enum a3xx_color_fmt val)
+{
+       return ((val) << A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__SHIFT) & A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT__MASK;
+}
+
+#define REG_A3XX_SP_FS_LENGTH_REG                              0x000022ff
+#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK               0xffffffff
+#define A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT              0
+static inline uint32_t A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(uint32_t val)
+{
+       return ((val) << A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__SHIFT) & A3XX_SP_FS_LENGTH_REG_SHADERLENGTH__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_VS_TEX_OFFSET                         0x00002340
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK         0x000000ff
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT                0
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
+{
+       return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET__MASK;
+}
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK          0x0000ff00
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT         8
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
+{
+       return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET__MASK;
+}
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK          0xffff0000
+#define A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT         16
+static inline uint32_t A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
+{
+       return ((val) << A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR             0x00002341
+
+#define REG_A3XX_TPL1_TP_FS_TEX_OFFSET                         0x00002342
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK         0x000000ff
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT                0
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(uint32_t val)
+{
+       return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET__MASK;
+}
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK          0x0000ff00
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT         8
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(uint32_t val)
+{
+       return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET__MASK;
+}
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK          0xffff0000
+#define A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT         16
+static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
+{
+       return ((val) << A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__SHIFT) & A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR__MASK;
+}
+
+#define REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR             0x00002343
+
+#define REG_A3XX_VBIF_CLKON                                    0x00003001
+
+#define REG_A3XX_VBIF_FIXED_SORT_EN                            0x0000300c
+
+#define REG_A3XX_VBIF_FIXED_SORT_SEL0                          0x0000300d
+
+#define REG_A3XX_VBIF_FIXED_SORT_SEL1                          0x0000300e
+
+#define REG_A3XX_VBIF_ABIT_SORT                                        0x0000301c
+
+#define REG_A3XX_VBIF_ABIT_SORT_CONF                           0x0000301d
+
+#define REG_A3XX_VBIF_GATE_OFF_WRREQ_EN                                0x0000302a
+
+#define REG_A3XX_VBIF_IN_RD_LIM_CONF0                          0x0000302c
+
+#define REG_A3XX_VBIF_IN_RD_LIM_CONF1                          0x0000302d
+
+#define REG_A3XX_VBIF_IN_WR_LIM_CONF0                          0x00003030
+
+#define REG_A3XX_VBIF_IN_WR_LIM_CONF1                          0x00003031
+
+#define REG_A3XX_VBIF_OUT_RD_LIM_CONF0                         0x00003034
+
+#define REG_A3XX_VBIF_OUT_WR_LIM_CONF0                         0x00003035
+
+#define REG_A3XX_VBIF_DDR_OUT_MAX_BURST                                0x00003036
+
+#define REG_A3XX_VBIF_ARB_CTL                                  0x0000303c
+
+#define REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB                      0x00003049
+
+#define REG_A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0                   0x00003058
+
+#define REG_A3XX_VBIF_OUT_AXI_AOOO_EN                          0x0000305e
+
+#define REG_A3XX_VBIF_OUT_AXI_AOOO                             0x0000305f
+
+#define REG_A3XX_VSC_BIN_SIZE                                  0x00000c01
+#define A3XX_VSC_BIN_SIZE_WIDTH__MASK                          0x0000001f
+#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT                         0
+static inline uint32_t A3XX_VSC_BIN_SIZE_WIDTH(uint32_t val)
+{
+       return ((val >> 5) << A3XX_VSC_BIN_SIZE_WIDTH__SHIFT) & A3XX_VSC_BIN_SIZE_WIDTH__MASK;
+}
+#define A3XX_VSC_BIN_SIZE_HEIGHT__MASK                         0x000003e0
+#define A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT                                5
+static inline uint32_t A3XX_VSC_BIN_SIZE_HEIGHT(uint32_t val)
+{
+       return ((val >> 5) << A3XX_VSC_BIN_SIZE_HEIGHT__SHIFT) & A3XX_VSC_BIN_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A3XX_VSC_SIZE_ADDRESS                              0x00000c02
+
+#define REG_A3XX_VSC_PIPE(i0)                                 (0x00000c06 + 0x3*(i0))
+
+#define REG_A3XX_VSC_PIPE_CONFIG(i0)                          (0x00000c06 + 0x3*(i0))
+#define A3XX_VSC_PIPE_CONFIG_X__MASK                           0x000003ff
+#define A3XX_VSC_PIPE_CONFIG_X__SHIFT                          0
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_X(uint32_t val)
+{
+       return ((val) << A3XX_VSC_PIPE_CONFIG_X__SHIFT) & A3XX_VSC_PIPE_CONFIG_X__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_Y__MASK                           0x000ffc00
+#define A3XX_VSC_PIPE_CONFIG_Y__SHIFT                          10
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_Y(uint32_t val)
+{
+       return ((val) << A3XX_VSC_PIPE_CONFIG_Y__SHIFT) & A3XX_VSC_PIPE_CONFIG_Y__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_W__MASK                           0x00f00000
+#define A3XX_VSC_PIPE_CONFIG_W__SHIFT                          20
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_W(uint32_t val)
+{
+       return ((val) << A3XX_VSC_PIPE_CONFIG_W__SHIFT) & A3XX_VSC_PIPE_CONFIG_W__MASK;
+}
+#define A3XX_VSC_PIPE_CONFIG_H__MASK                           0x0f000000
+#define A3XX_VSC_PIPE_CONFIG_H__SHIFT                          24
+static inline uint32_t A3XX_VSC_PIPE_CONFIG_H(uint32_t val)
+{
+       return ((val) << A3XX_VSC_PIPE_CONFIG_H__SHIFT) & A3XX_VSC_PIPE_CONFIG_H__MASK;
+}
+
+#define REG_A3XX_VSC_PIPE_DATA_ADDRESS(i0)                    (0x00000c07 + 0x3*(i0))
+
+#define REG_A3XX_VSC_PIPE_DATA_LENGTH(i0)                     (0x00000c08 + 0x3*(i0))
+
+#define REG_A3XX_UNKNOWN_0C3D                                  0x00000c3d
+
+#define REG_A3XX_UNKNOWN_0C81                                  0x00000c81
+
+#define REG_A3XX_GRAS_CL_USER_PLANE(i0)                               (0x00000ca0 + 0x4*(i0))
+
+#define REG_A3XX_GRAS_CL_USER_PLANE_X(i0)                     (0x00000ca0 + 0x4*(i0))
+
+#define REG_A3XX_GRAS_CL_USER_PLANE_Y(i0)                     (0x00000ca1 + 0x4*(i0))
+
+#define REG_A3XX_GRAS_CL_USER_PLANE_Z(i0)                     (0x00000ca2 + 0x4*(i0))
+
+#define REG_A3XX_GRAS_CL_USER_PLANE_W(i0)                     (0x00000ca3 + 0x4*(i0))
+
+#define REG_A3XX_RB_GMEM_BASE_ADDR                             0x00000cc0
+
+#define REG_A3XX_RB_WINDOW_SIZE                                        0x00000ce0
+#define A3XX_RB_WINDOW_SIZE_WIDTH__MASK                                0x00003fff
+#define A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT                       0
+static inline uint32_t A3XX_RB_WINDOW_SIZE_WIDTH(uint32_t val)
+{
+       return ((val) << A3XX_RB_WINDOW_SIZE_WIDTH__SHIFT) & A3XX_RB_WINDOW_SIZE_WIDTH__MASK;
+}
+#define A3XX_RB_WINDOW_SIZE_HEIGHT__MASK                       0x0fffc000
+#define A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT                      14
+static inline uint32_t A3XX_RB_WINDOW_SIZE_HEIGHT(uint32_t val)
+{
+       return ((val) << A3XX_RB_WINDOW_SIZE_HEIGHT__SHIFT) & A3XX_RB_WINDOW_SIZE_HEIGHT__MASK;
+}
+
+#define REG_A3XX_UNKNOWN_0E00                                  0x00000e00
+
+#define REG_A3XX_UNKNOWN_0E43                                  0x00000e43
+
+#define REG_A3XX_VFD_PERFCOUNTER0_SELECT                       0x00000e44
+
+#define REG_A3XX_VPC_VPC_DEBUG_RAM_SEL                         0x00000e61
+
+#define REG_A3XX_VPC_VPC_DEBUG_RAM_READ                                0x00000e62
+
+#define REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG                   0x00000e82
+
+#define REG_A3XX_UCHE_CACHE_INVALIDATE0_REG                    0x00000ea0
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK             0x0fffffff
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT            0
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(uint32_t val)
+{
+       return ((val) << A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR__MASK;
+}
+
+#define REG_A3XX_UCHE_CACHE_INVALIDATE1_REG                    0x00000ea1
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK             0x0fffffff
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT            0
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(uint32_t val)
+{
+       return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR__MASK;
+}
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK           0x30000000
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT          28
+static inline uint32_t A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(enum a3xx_cache_opcode val)
+{
+       return ((val) << A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__SHIFT) & A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE__MASK;
+}
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE           0x80000000
+
+#define REG_A3XX_SP_PERFCOUNTER0_SELECT                                0x00000ec4
+
+#define REG_A3XX_SP_PERFCOUNTER1_SELECT                                0x00000ec5
+
+#define REG_A3XX_SP_PERFCOUNTER2_SELECT                                0x00000ec6
+
+#define REG_A3XX_SP_PERFCOUNTER3_SELECT                                0x00000ec7
+
+#define REG_A3XX_SP_PERFCOUNTER4_SELECT                                0x00000ec8
+
+#define REG_A3XX_SP_PERFCOUNTER5_SELECT                                0x00000ec9
+
+#define REG_A3XX_SP_PERFCOUNTER6_SELECT                                0x00000eca
+
+#define REG_A3XX_SP_PERFCOUNTER7_SELECT                                0x00000ecb
+
+#define REG_A3XX_UNKNOWN_0EE0                                  0x00000ee0
+
+#define REG_A3XX_UNKNOWN_0F03                                  0x00000f03
+
+#define REG_A3XX_TEX_SAMP_0                                    0x00000000
+#define A3XX_TEX_SAMP_0_XY_MAG__MASK                           0x0000000c
+#define A3XX_TEX_SAMP_0_XY_MAG__SHIFT                          2
+static inline uint32_t A3XX_TEX_SAMP_0_XY_MAG(enum a3xx_tex_filter val)
+{
+       return ((val) << A3XX_TEX_SAMP_0_XY_MAG__SHIFT) & A3XX_TEX_SAMP_0_XY_MAG__MASK;
+}
+#define A3XX_TEX_SAMP_0_XY_MIN__MASK                           0x00000030
+#define A3XX_TEX_SAMP_0_XY_MIN__SHIFT                          4
+static inline uint32_t A3XX_TEX_SAMP_0_XY_MIN(enum a3xx_tex_filter val)
+{
+       return ((val) << A3XX_TEX_SAMP_0_XY_MIN__SHIFT) & A3XX_TEX_SAMP_0_XY_MIN__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_S__MASK                           0x000001c0
+#define A3XX_TEX_SAMP_0_WRAP_S__SHIFT                          6
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_S(enum a3xx_tex_clamp val)
+{
+       return ((val) << A3XX_TEX_SAMP_0_WRAP_S__SHIFT) & A3XX_TEX_SAMP_0_WRAP_S__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_T__MASK                           0x00000e00
+#define A3XX_TEX_SAMP_0_WRAP_T__SHIFT                          9
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_T(enum a3xx_tex_clamp val)
+{
+       return ((val) << A3XX_TEX_SAMP_0_WRAP_T__SHIFT) & A3XX_TEX_SAMP_0_WRAP_T__MASK;
+}
+#define A3XX_TEX_SAMP_0_WRAP_R__MASK                           0x00007000
+#define A3XX_TEX_SAMP_0_WRAP_R__SHIFT                          12
+static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val)
+{
+       return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK;
+}
+#define A3XX_TEX_SAMP_0_UNNORM_COORDS                          0x80000000
+
+#define REG_A3XX_TEX_SAMP_1                                    0x00000001
+
+#define REG_A3XX_TEX_CONST_0                                   0x00000000
+#define A3XX_TEX_CONST_0_TILED                                 0x00000001
+#define A3XX_TEX_CONST_0_SWIZ_X__MASK                          0x00000070
+#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT                         4
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val)
+{
+       return ((val) << A3XX_TEX_CONST_0_SWIZ_X__SHIFT) & A3XX_TEX_CONST_0_SWIZ_X__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_Y__MASK                          0x00000380
+#define A3XX_TEX_CONST_0_SWIZ_Y__SHIFT                         7
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Y(enum a3xx_tex_swiz val)
+{
+       return ((val) << A3XX_TEX_CONST_0_SWIZ_Y__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Y__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_Z__MASK                          0x00001c00
+#define A3XX_TEX_CONST_0_SWIZ_Z__SHIFT                         10
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_Z(enum a3xx_tex_swiz val)
+{
+       return ((val) << A3XX_TEX_CONST_0_SWIZ_Z__SHIFT) & A3XX_TEX_CONST_0_SWIZ_Z__MASK;
+}
+#define A3XX_TEX_CONST_0_SWIZ_W__MASK                          0x0000e000
+#define A3XX_TEX_CONST_0_SWIZ_W__SHIFT                         13
+static inline uint32_t A3XX_TEX_CONST_0_SWIZ_W(enum a3xx_tex_swiz val)
+{
+       return ((val) << A3XX_TEX_CONST_0_SWIZ_W__SHIFT) & A3XX_TEX_CONST_0_SWIZ_W__MASK;
+}
+#define A3XX_TEX_CONST_0_FMT__MASK                             0x1fc00000
+#define A3XX_TEX_CONST_0_FMT__SHIFT                            22
+static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
+{
+       return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_1                                   0x00000001
+#define A3XX_TEX_CONST_1_HEIGHT__MASK                          0x00003fff
+#define A3XX_TEX_CONST_1_HEIGHT__SHIFT                         0
+static inline uint32_t A3XX_TEX_CONST_1_HEIGHT(uint32_t val)
+{
+       return ((val) << A3XX_TEX_CONST_1_HEIGHT__SHIFT) & A3XX_TEX_CONST_1_HEIGHT__MASK;
+}
+#define A3XX_TEX_CONST_1_WIDTH__MASK                           0x0fffc000
+#define A3XX_TEX_CONST_1_WIDTH__SHIFT                          14
+static inline uint32_t A3XX_TEX_CONST_1_WIDTH(uint32_t val)
+{
+       return ((val) << A3XX_TEX_CONST_1_WIDTH__SHIFT) & A3XX_TEX_CONST_1_WIDTH__MASK;
+}
+#define A3XX_TEX_CONST_1_FETCHSIZE__MASK                       0xf0000000
+#define A3XX_TEX_CONST_1_FETCHSIZE__SHIFT                      28
+static inline uint32_t A3XX_TEX_CONST_1_FETCHSIZE(enum a3xx_tex_fetchsize val)
+{
+       return ((val) << A3XX_TEX_CONST_1_FETCHSIZE__SHIFT) & A3XX_TEX_CONST_1_FETCHSIZE__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_2                                   0x00000002
+#define A3XX_TEX_CONST_2_INDX__MASK                            0x000000ff
+#define A3XX_TEX_CONST_2_INDX__SHIFT                           0
+static inline uint32_t A3XX_TEX_CONST_2_INDX(uint32_t val)
+{
+       return ((val) << A3XX_TEX_CONST_2_INDX__SHIFT) & A3XX_TEX_CONST_2_INDX__MASK;
+}
+#define A3XX_TEX_CONST_2_PITCH__MASK                           0x3ffff000
+#define A3XX_TEX_CONST_2_PITCH__SHIFT                          12
+static inline uint32_t A3XX_TEX_CONST_2_PITCH(uint32_t val)
+{
+       return ((val) << A3XX_TEX_CONST_2_PITCH__SHIFT) & A3XX_TEX_CONST_2_PITCH__MASK;
+}
+#define A3XX_TEX_CONST_2_SWAP__MASK                            0xc0000000
+#define A3XX_TEX_CONST_2_SWAP__SHIFT                           30
+static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
+{
+       return ((val) << A3XX_TEX_CONST_2_SWAP__SHIFT) & A3XX_TEX_CONST_2_SWAP__MASK;
+}
+
+#define REG_A3XX_TEX_CONST_3                                   0x00000003
+
+
+#endif /* A3XX_XML */
diff --git a/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c b/src/gallium/drivers/freedreno/a3xx/disasm-a3xx.c
new file mode 100644 (file)
index 0000000..4db095f
--- /dev/null
@@ -0,0 +1,946 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <assert.h>
+
+#include <util/u_debug.h>
+
+#include "disasm.h"
+#include "instr-a3xx.h"
+
+static enum debug_t debug;
+
+#define printf debug_printf
+
+static const char *levels[] = {
+               "",
+               "\t",
+               "\t\t",
+               "\t\t\t",
+               "\t\t\t\t",
+               "\t\t\t\t\t",
+               "\t\t\t\t\t\t",
+               "\t\t\t\t\t\t\t",
+               "\t\t\t\t\t\t\t\t",
+               "\t\t\t\t\t\t\t\t\t",
+               "x",
+               "x",
+               "x",
+               "x",
+               "x",
+               "x",
+};
+
+static const char *component = "xyzw";
+
+static const char *type[] = {
+               [TYPE_F16] = "f16",
+               [TYPE_F32] = "f32",
+               [TYPE_U16] = "u16",
+               [TYPE_U32] = "u32",
+               [TYPE_S16] = "s16",
+               [TYPE_S32] = "s32",
+               [TYPE_U8]  = "u8",
+               [TYPE_S8]  = "s8",
+};
+
+static void print_reg(reg_t reg, bool full, bool r, bool c, bool im,
+               bool neg, bool abs, bool addr_rel)
+{
+       const char type = c ? 'c' : 'r';
+
+       // XXX I prefer - and || for neg/abs, but preserving format used
+       // by libllvm-a3xx for easy diffing..
+
+       if (abs && neg)
+               printf("(absneg)");
+       else if (neg)
+               printf("(neg)");
+       else if (abs)
+               printf("(abs)");
+
+       if (r)
+               printf("(r)");
+
+       if (im) {
+               printf("%d", reg.iim_val);
+       } else if (addr_rel) {
+               /* I would just use %+d but trying to make it diff'able with
+                * libllvm-a3xx...
+                */
+               if (reg.iim_val < 0)
+                       printf("%s%c<a0.x - %d>", full ? "" : "h", type, -reg.iim_val);
+               else if (reg.iim_val > 0)
+                       printf("%s%c<a0.x + %d>", full ? "" : "h", type, reg.iim_val);
+               else
+                       printf("%s%c<a0.x>", full ? "" : "h", type);
+       } else if ((reg.num == REG_A0) && !c) {
+               printf("a0.%c", component[reg.comp]);
+       } else if ((reg.num == REG_P0) && !c) {
+               printf("p0.%c", component[reg.comp]);
+       } else {
+               printf("%s%c%d.%c", full ? "" : "h", type, reg.num, component[reg.comp]);
+       }
+}
+
+/* Tracking for registers used, read-before-write (input), and
+ * write-after-read (output.. but not 100%)..
+ */
+
+#define MAX_REG 128
+
+typedef struct {
+       uint8_t full[MAX_REG/8];
+       uint8_t half[MAX_REG/8];
+} regmask_t;
+
+static void regmask_set(regmask_t *regmask, unsigned num, bool full, unsigned val)
+{
+       unsigned i = num / 8;
+       unsigned j = num % 8;
+       assert(num < MAX_REG);
+       if (full) {
+               regmask->full[i] = (regmask->full[i] & ~(1 << j)) | (val << j);
+       } else {
+               regmask->half[i] = (regmask->half[i] & ~(1 << j)) | (val << j);
+       }
+}
+
+static unsigned regmask_get(regmask_t *regmask, unsigned num, bool full)
+{
+       unsigned i = num / 8;
+       unsigned j = num % 8;
+       assert(num < MAX_REG);
+       if (full) {
+               return (regmask->full[i] >> j) & 0x1;
+       } else {
+               return (regmask->half[i] >> j) & 0x1;
+       }
+}
+
+static unsigned regidx(reg_t reg)
+{
+       return (4 * reg.num) + reg.comp;
+}
+
+static struct {
+       regmask_t used;
+       regmask_t rbw;      /* read before write */
+       regmask_t war;      /* write after read */
+       regmask_t cnst;     /* used consts */
+} regs;
+
+static void print_regs(regmask_t *regmask, bool full)
+{
+       int num, max = 0, cnt = 0;
+       int first, last;
+
+       void print_sequence(void)
+       {
+               if (first != MAX_REG) {
+                       if (first == last) {
+                               printf(" %d", first);
+                       } else {
+                               printf(" %d-%d", first, last);
+                       }
+               }
+       }
+
+       first = last = MAX_REG;
+
+       for (num = 0; num < MAX_REG; num++) {
+               if (regmask_get(regmask, num, full)) {
+                       if (num != (last + 1)) {
+                               print_sequence();
+                               first = num;
+                       }
+                       last = num;
+                       max = num;
+                       cnt++;
+               }
+       }
+
+       print_sequence();
+
+       printf(" (cnt=%d, max=%d)", cnt, max);
+}
+
+static void print_reg_stats(int level)
+{
+       printf("%sRegister Stats:\n", levels[level]);
+       printf("%s- used (half):", levels[level]);
+       print_regs(&regs.used, false);
+       printf("\n");
+       printf("%s- used (full):", levels[level]);
+       print_regs(&regs.used, true);
+       printf("\n");
+       printf("%s- input (half):", levels[level]);
+       print_regs(&regs.rbw, false);
+       printf("\n");
+       printf("%s- input (full):", levels[level]);
+       print_regs(&regs.rbw, true);
+       printf("\n");
+       printf("%s- const (half):", levels[level]);
+       print_regs(&regs.cnst, false);
+       printf("\n");
+       printf("%s- const (full):", levels[level]);
+       print_regs(&regs.cnst, true);
+       printf("\n");
+       printf("%s- output (half):", levels[level]);
+       print_regs(&regs.war, false);
+       printf("  (estimated)\n");
+       printf("%s- output (full):", levels[level]);
+       print_regs(&regs.war, true);
+       printf("  (estimated)\n");
+}
+
+/* we have to process the dst register after src to avoid tripping up
+ * the read-before-write detection
+ */
+static unsigned last_dst;
+static bool last_dst_full;
+static bool last_dst_valid = false;
+
+/* current instruction repeat flag: */
+static unsigned repeat;
+
+static void process_reg_dst(void)
+{
+       int i;
+
+       if (!last_dst_valid)
+               return;
+
+       for (i = 0; i <= repeat; i++) {
+               unsigned dst = last_dst + i;
+
+               regmask_set(&regs.war, dst, last_dst_full, 1);
+               regmask_set(&regs.used, dst, last_dst_full, 1);
+       }
+
+       last_dst_valid = false;
+}
+
+static void print_reg_dst(reg_t reg, bool full, bool addr_rel)
+{
+       /* presumably the special registers a0.c and p0.c don't count.. */
+       if (!(addr_rel || reg_special(reg))) {
+               last_dst = regidx(reg);
+               last_dst_full = full;
+               last_dst_valid = true;
+       }
+       print_reg(reg, full, false, false, false, false, false, addr_rel);
+}
+
+static void print_reg_src(reg_t reg, bool full, bool r, bool c, bool im,
+               bool neg, bool abs, bool addr_rel)
+{
+       /* presumably the special registers a0.c and p0.c don't count.. */
+       if (!(addr_rel || c || im || reg_special(reg))) {
+               int i, num = regidx(reg);
+               for (i = 0; i <= repeat; i++) {
+                       unsigned src = num + i;
+
+                       if (!regmask_get(&regs.used, src, full))
+                               regmask_set(&regs.rbw, src, full, 1);
+
+                       regmask_set(&regs.war, src, full, 0);
+                       regmask_set(&regs.used, src, full, 1);
+
+                       if (!r)
+                               break;
+               }
+       } else if (c) {
+               int i, num = regidx(reg);
+               for (i = 0; i <= repeat; i++) {
+                       unsigned src = num + i;
+
+                       regmask_set(&regs.cnst, src, full, 1);
+
+                       if (!r)
+                               break;
+               }
+       }
+
+       print_reg(reg, full, r, c, im, neg, abs, addr_rel);
+}
+
+
+static void print_instr_cat0(instr_t *instr)
+{
+       instr_cat0_t *cat0 = &instr->cat0;
+
+       switch (cat0->opc) {
+       case OPC_KILL:
+               printf(" %sp0.%c", cat0->inv ? "!" : "",
+                               component[cat0->comp]);
+               break;
+       case OPC_BR:
+               printf(" %sp0.%c, #%d", cat0->inv ? "!" : "",
+                               component[cat0->comp], cat0->immed);
+               break;
+       case OPC_JUMP:
+       case OPC_CALL:
+               printf(" #%d", cat0->immed);
+               break;
+       }
+
+       if ((debug & PRINT_VERBOSE) && (cat0->dummy1|cat0->dummy2|cat0->dummy3|cat0->dummy4))
+               printf("\t{0: %x,%x,%x,%x}", cat0->dummy1, cat0->dummy2, cat0->dummy3, cat0->dummy4);
+}
+
+static void print_instr_cat1(instr_t *instr)
+{
+       instr_cat1_t *cat1 = &instr->cat1;
+
+       // XXX maybe a bug in libllvm disassembler?
+       if (cat1->src_rel)
+               printf("(ul)");
+
+       if (cat1->src_type == cat1->dst_type) {
+               if ((cat1->src_type == TYPE_S16) && (((reg_t)cat1->dst).num == REG_A0)) {
+                       /* special case (nmemonic?): */
+                       printf("mova");
+               } else {
+                       printf("mov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
+               }
+       } else {
+               printf("cov.%s%s", type[cat1->src_type], type[cat1->dst_type]);
+       }
+
+       printf(" ");
+
+       if (cat1->even)
+               printf("(even)");
+
+       if (cat1->pos_inf)
+               printf("(pos_infinity)");
+
+       print_reg_dst((reg_t)(cat1->dst), type_size(cat1->dst_type) == 32,
+                       cat1->dst_rel);
+
+       printf(", ");
+
+       /* ugg, have to special case this.. vs print_reg().. */
+       if (cat1->src_im) {
+               if (type_float(cat1->src_type))
+                       printf("(%f)", cat1->fim_val);
+               else
+                       printf("%d", cat1->iim_val);
+       } else if (cat1->src_rel && !cat1->src_c) {
+               /* I would just use %+d but trying to make it diff'able with
+                * libllvm-a3xx...
+                */
+               if (cat1->off < 0)
+                       printf("c<a0.x - %d>", -cat1->off);
+               else if (cat1->off > 0)
+                       printf("c<a0.x + %d>", cat1->off);
+               else
+                       printf("c<a0.x>");
+       } else {
+               print_reg_src((reg_t)(cat1->src), type_size(cat1->src_type) == 32,
+                               cat1->src_r, cat1->src_c, cat1->src_im, false, false, false);
+       }
+
+       if ((debug & PRINT_VERBOSE) && (cat1->must_be_0))
+               printf("\t{1: %x}", cat1->must_be_0);
+}
+
+static void print_instr_cat2(instr_t *instr)
+{
+       instr_cat2_t *cat2 = &instr->cat2;
+       static const char *cond[] = {
+                       "lt",
+                       "le",
+                       "gt",
+                       "ge",
+                       "eq",
+                       "ne",
+                       "?6?",
+       };
+
+       switch (cat2->opc) {
+       case OPC_CMPS_F:
+       case OPC_CMPS_U:
+       case OPC_CMPS_S:
+       case OPC_CMPV_F:
+       case OPC_CMPV_U:
+       case OPC_CMPV_S:
+               printf(".%s", cond[cat2->cond]);
+               break;
+       }
+
+       printf(" ");
+       if (cat2->ei)
+               printf("(ei)");
+       print_reg_dst((reg_t)(cat2->dst), cat2->full ^ cat2->dst_half, false);
+       printf(", ");
+       print_reg_src((reg_t)(cat2->src1), cat2->full, cat2->src1_r,
+                       cat2->src1_c, cat2->src1_im, cat2->src1_neg,
+                       cat2->src1_abs, cat2->src1_rel);
+       switch (cat2->opc) {
+       case OPC_ABSNEG_F:
+       case OPC_ABSNEG_S:
+       case OPC_CLZ_B:
+       case OPC_CLZ_S:
+       case OPC_SIGN_F:
+       case OPC_FLOOR_F:
+       case OPC_CEIL_F:
+       case OPC_RNDNE_F:
+       case OPC_RNDAZ_F:
+       case OPC_TRUNC_F:
+       case OPC_NOT_B:
+       case OPC_BFREV_B:
+       case OPC_SETRM:
+       case OPC_CBITS_B:
+               /* these only have one src reg */
+               break;
+       default:
+               printf(", ");
+               print_reg_src((reg_t)(cat2->src2), cat2->full, cat2->src2_r,
+                               cat2->src2_c, cat2->src2_im, cat2->src2_neg,
+                               cat2->src2_abs, cat2->src2_rel);
+               break;
+       }
+}
+
+static void print_instr_cat3(instr_t *instr)
+{
+       instr_cat3_t *cat3 = &instr->cat3;
+       bool full = true;
+
+       // XXX is this based on opc or some other bit?
+       switch (cat3->opc) {
+       case OPC_MAD_F16:
+       case OPC_MAD_U16:
+       case OPC_MAD_S16:
+       case OPC_SEL_B16:
+       case OPC_SEL_S16:
+       case OPC_SEL_F16:
+       case OPC_SAD_S16:
+       case OPC_SAD_S32:  // really??
+               full = false;
+               break;
+       }
+
+       printf(" ");
+       print_reg_dst((reg_t)(cat3->dst), full ^ cat3->dst_half, false);
+       printf(", ");
+       print_reg_src((reg_t)(cat3->src1), full,
+                       cat3->src1_r, cat3->src1_c, false, cat3->src1_neg,
+                       false, cat3->src1_rel);
+       printf(", ");
+       print_reg_src((reg_t)cat3->src2, full,
+                       cat3->src2_r, cat3->src2_c, false, cat3->src2_neg,
+                       false, false);
+       printf(", ");
+       print_reg_src((reg_t)(cat3->src3), full,
+                       cat3->src3_r, cat3->src3_c, false, cat3->src3_neg,
+                       false, cat3->src3_rel);
+}
+
+static void print_instr_cat4(instr_t *instr)
+{
+       instr_cat4_t *cat4 = &instr->cat4;
+
+       printf(" ");
+       print_reg_dst((reg_t)(cat4->dst), cat4->full ^ cat4->dst_half, false);
+       printf(", ");
+       print_reg_src((reg_t)(cat4->src), cat4->full,
+                       cat4->src_r, cat4->src_c, cat4->src_im,
+                       cat4->src_neg, cat4->src_abs, cat4->src_rel);
+
+       if ((debug & PRINT_VERBOSE) && (cat4->dummy1|cat4->dummy2))
+               printf("\t{4: %x,%x}", cat4->dummy1, cat4->dummy2);
+}
+
+static void print_instr_cat5(instr_t *instr)
+{
+       static const struct {
+               bool src1, src2, samp, tex;
+       } info[0x1f] = {
+                       [OPC_ISAM]     = { true,  false, true,  true,  },
+                       [OPC_ISAML]    = { true,  true,  true,  true,  },
+                       [OPC_ISAMM]    = { true,  false, true,  true,  },
+                       [OPC_SAM]      = { true,  false, true,  true,  },
+                       [OPC_SAMB]     = { true,  true,  true,  true,  },
+                       [OPC_SAML]     = { true,  true,  true,  true,  },
+                       [OPC_SAMGQ]    = { true,  false, true,  true,  },
+                       [OPC_GETLOD]   = { true,  false, true,  true,  },
+                       [OPC_CONV]     = { true,  true,  true,  true,  },
+                       [OPC_CONVM]    = { true,  true,  true,  true,  },
+                       [OPC_GETSIZE]  = { true,  false, false, true,  },
+                       [OPC_GETBUF]   = { false, false, false, true,  },
+                       [OPC_GETPOS]   = { true,  false, false, true,  },
+                       [OPC_GETINFO]  = { false, false, false, true,  },
+                       [OPC_DSX]      = { true,  false, false, false, },
+                       [OPC_DSY]      = { true,  false, false, false, },
+                       [OPC_GATHER4R] = { true,  false, true,  true,  },
+                       [OPC_GATHER4G] = { true,  false, true,  true,  },
+                       [OPC_GATHER4B] = { true,  false, true,  true,  },
+                       [OPC_GATHER4A] = { true,  false, true,  true,  },
+                       [OPC_SAMGP0]   = { true,  false, true,  true,  },
+                       [OPC_SAMGP1]   = { true,  false, true,  true,  },
+                       [OPC_SAMGP2]   = { true,  false, true,  true,  },
+                       [OPC_SAMGP3]   = { true,  false, true,  true,  },
+                       [OPC_DSXPP_1]  = { true,  false, false, false, },
+                       [OPC_DSYPP_1]  = { true,  false, false, false, },
+                       [OPC_RGETPOS]  = { false, false, false, false, },
+                       [OPC_RGETINFO] = { false, false, false, false, },
+       };
+       instr_cat5_t *cat5 = &instr->cat5;
+       int i;
+
+       if (cat5->is_3d)   printf(".3d");
+       if (cat5->is_a)    printf(".a");
+       if (cat5->is_o)    printf(".o");
+       if (cat5->is_p)    printf(".p");
+       if (cat5->is_s)    printf(".s");
+       if (cat5->is_s2en) printf(".s2en");
+
+       printf(" ");
+
+       switch (cat5->opc) {
+       case OPC_DSXPP_1:
+       case OPC_DSYPP_1:
+               break;
+       default:
+               printf("(%s)", type[cat5->type]);
+               break;
+       }
+
+       printf("(");
+       for (i = 0; i < 4; i++)
+               if (cat5->wrmask & (1 << i))
+                       printf("%c", "xyzw"[i]);
+       printf(")");
+
+       print_reg_dst((reg_t)(cat5->dst), type_size(cat5->type) == 32, false);
+
+       if (info[cat5->opc].src1) {
+               printf(", ");
+               print_reg_src((reg_t)(cat5->src1), cat5->full, false, false, false,
+                               false, false, false);
+       }
+
+       if (cat5->is_s2en) {
+               printf(", ");
+               print_reg_src((reg_t)(cat5->s2en.src2), cat5->full, false, false, false,
+                               false, false, false);
+               printf(", ");
+               print_reg_src((reg_t)(cat5->s2en.src3), false, false, false, false,
+                               false, false, false);
+       } else {
+               if (cat5->is_o || info[cat5->opc].src2) {
+                       printf(", ");
+                       print_reg_src((reg_t)(cat5->norm.src2), cat5->full,
+                                       false, false, false, false, false, false);
+               }
+               if (info[cat5->opc].samp)
+                       printf(", s#%d", cat5->norm.samp);
+               if (info[cat5->opc].tex)
+                       printf(", t#%d", cat5->norm.tex);
+       }
+
+       if (debug & PRINT_VERBOSE) {
+               if (cat5->is_s2en) {
+                       if ((debug & PRINT_VERBOSE) && (cat5->s2en.dummy1|cat5->s2en.dummy2|cat5->dummy2))
+                               printf("\t{5: %x,%x,%x}", cat5->s2en.dummy1, cat5->s2en.dummy2, cat5->dummy2);
+               } else {
+                       if ((debug & PRINT_VERBOSE) && (cat5->norm.dummy1|cat5->dummy2))
+                               printf("\t{5: %x,%x}", cat5->norm.dummy1, cat5->dummy2);
+               }
+       }
+}
+
+static int32_t u2i(uint32_t val, int nbits)
+{
+       return ((val >> (nbits-1)) * ~((1 << nbits) - 1)) | val;
+}
+
+static void print_instr_cat6(instr_t *instr)
+{
+       instr_cat6_t *cat6 = &instr->cat6;
+
+       printf(".%s ", type[cat6->type]);
+
+       switch (cat6->opc) {
+       case OPC_LDG:
+       case OPC_LDP:
+       case OPC_LDL:
+       case OPC_LDLW:
+       case OPC_LDLV:
+               /* load instructions: */
+               print_reg_dst((reg_t)(cat6->a.dst), type_size(cat6->type) == 32, false);
+               printf(",");
+               switch (cat6->opc) {
+               case OPC_LDG:
+                       printf("g");
+                       break;
+               case OPC_LDP:
+                       printf("p");
+                       break;
+               case OPC_LDL:
+               case OPC_LDLW:
+               case OPC_LDLV:
+                       printf("l");
+                       break;
+               }
+               printf("[");
+               print_reg_src((reg_t)(cat6->a.src), true,
+                               false, false, false, false, false, false);
+               if (cat6->a.off)
+                       printf("%+d", cat6->a.off);
+               printf("]");
+               break;
+       case OPC_PREFETCH:
+               /* similar to load instructions: */
+               printf("g[");
+               print_reg_src((reg_t)(cat6->a.src), true,
+                               false, false, false, false, false, false);
+               if (cat6->a.off)
+                       printf("%+d", cat6->a.off);
+               printf("]");
+               break;
+       case OPC_STG:
+       case OPC_STP:
+       case OPC_STL:
+       case OPC_STLW:
+               /* store instructions: */
+               switch (cat6->opc) {
+               case OPC_STG:
+                       printf("g");
+                       break;
+               case OPC_STP:
+                       printf("p");
+                       break;
+               case OPC_STL:
+               case OPC_STLW:
+                       printf("l");
+                       break;
+               }
+               printf("[");
+               print_reg_dst((reg_t)(cat6->b.dst), true, false);
+               if (cat6->b.off || cat6->b.off_hi)
+                       printf("%+d", u2i((cat6->b.off_hi << 8) | cat6->b.off, 13));
+               printf("]");
+               printf(",");
+               print_reg_src((reg_t)(cat6->b.src), type_size(cat6->type) == 32,
+                               false, false, false, false, false, false);
+
+               break;
+       case OPC_STI:
+               /* sti has same encoding as other store instructions, but
+                * slightly different syntax:
+                */
+               print_reg_dst((reg_t)(cat6->b.dst), false /* XXX is it always half? */, false);
+               if (cat6->b.off || cat6->b.off_hi)
+                       printf("%+d", u2i((cat6->b.off_hi << 8) | cat6->b.off, 13));
+               printf(",");
+               print_reg_src((reg_t)(cat6->b.src), type_size(cat6->type) == 32,
+                               false, false, false, false, false, false);
+               break;
+       }
+
+       printf(", %d", cat6->iim_val);
+
+       if (debug & PRINT_VERBOSE) {
+               switch (cat6->opc) {
+               case OPC_LDG:
+               case OPC_LDP:
+                       /* load instructions: */
+                       if (cat6->a.dummy1|cat6->a.dummy2|cat6->a.dummy3)
+                               printf("\t{6: %x,%x,%x}", cat6->a.dummy1, cat6->a.dummy2, cat6->a.dummy3);
+                       if ((cat6->a.must_be_one1 != 1) || (cat6->a.must_be_one2 != 1))
+                               printf("{?? %d,%d ??}", cat6->a.must_be_one1, cat6->a.must_be_one2);
+                       break;
+               case OPC_STG:
+               case OPC_STP:
+               case OPC_STI:
+                       /* store instructions: */
+                       if (cat6->b.dummy1|cat6->b.dummy2)
+                               printf("\t{6: %x,%x}", cat6->b.dummy1, cat6->b.dummy2);
+                       if ((cat6->b.must_be_one1 != 1) || (cat6->b.must_be_one2 != 1) ||
+                                       (cat6->b.must_be_zero1 != 0))
+                               printf("{?? %d,%d,%d ??}", cat6->b.must_be_one1, cat6->b.must_be_one2,
+                                               cat6->b.must_be_zero1);
+                       break;
+               }
+       }
+}
+
+/* size of largest OPC field of all the instruction categories: */
+#define NOPC_BITS 6
+
+struct opc_info {
+       uint16_t cat;
+       uint16_t opc;
+       const char *name;
+       void (*print)(instr_t *instr);
+} opcs[1 << (3+NOPC_BITS)] = {
+#define OPC(cat, opc, name) [((cat) << NOPC_BITS) | (opc)] = { (cat), (opc), #name, print_instr_cat##cat }
+       /* category 0: */
+       OPC(0, OPC_NOP,          nop),
+       OPC(0, OPC_BR,           br),
+       OPC(0, OPC_JUMP,         jump),
+       OPC(0, OPC_CALL,         call),
+       OPC(0, OPC_RET,          ret),
+       OPC(0, OPC_KILL,         kill),
+       OPC(0, OPC_END,          end),
+       OPC(0, OPC_EMIT,         emit),
+       OPC(0, OPC_CUT,          cut),
+       OPC(0, OPC_CHMASK,       chmask),
+       OPC(0, OPC_CHSH,         chsh),
+       OPC(0, OPC_FLOW_REV,     flow_rev),
+
+       /* category 1: */
+       OPC(1, 0, ),
+
+       /* category 2: */
+       OPC(2, OPC_ADD_F,        add.f),
+       OPC(2, OPC_MIN_F,        min.f),
+       OPC(2, OPC_MAX_F,        max.f),
+       OPC(2, OPC_MUL_F,        mul.f),
+       OPC(2, OPC_SIGN_F,       sign.f),
+       OPC(2, OPC_CMPS_F,       cmps.f),
+       OPC(2, OPC_ABSNEG_F,     absneg.f),
+       OPC(2, OPC_CMPV_F,       cmpv.f),
+       OPC(2, OPC_FLOOR_F,      floor.f),
+       OPC(2, OPC_CEIL_F,       ceil.f),
+       OPC(2, OPC_RNDNE_F,      rndne.f),
+       OPC(2, OPC_RNDAZ_F,      rndaz.f),
+       OPC(2, OPC_TRUNC_F,      trunc.f),
+       OPC(2, OPC_ADD_U,        add.u),
+       OPC(2, OPC_ADD_S,        add.s),
+       OPC(2, OPC_SUB_U,        sub.u),
+       OPC(2, OPC_SUB_S,        sub.s),
+       OPC(2, OPC_CMPS_U,       cmps.u),
+       OPC(2, OPC_CMPS_S,       cmps.s),
+       OPC(2, OPC_MIN_U,        min.u),
+       OPC(2, OPC_MIN_S,        min.s),
+       OPC(2, OPC_MAX_U,        max.u),
+       OPC(2, OPC_MAX_S,        max.s),
+       OPC(2, OPC_ABSNEG_S,     absneg.s),
+       OPC(2, OPC_AND_B,        and.b),
+       OPC(2, OPC_OR_B,         or.b),
+       OPC(2, OPC_NOT_B,        not.b),
+       OPC(2, OPC_XOR_B,        xor.b),
+       OPC(2, OPC_CMPV_U,       cmpv.u),
+       OPC(2, OPC_CMPV_S,       cmpv.s),
+       OPC(2, OPC_MUL_U,        mul.u),
+       OPC(2, OPC_MUL_S,        mul.s),
+       OPC(2, OPC_MULL_U,       mull.u),
+       OPC(2, OPC_BFREV_B,      bfrev.b),
+       OPC(2, OPC_CLZ_S,        clz.s),
+       OPC(2, OPC_CLZ_B,        clz.b),
+       OPC(2, OPC_SHL_B,        shl.b),
+       OPC(2, OPC_SHR_B,        shr.b),
+       OPC(2, OPC_ASHR_B,       ashr.b),
+       OPC(2, OPC_BARY_F,       bary.f),
+       OPC(2, OPC_MGEN_B,       mgen.b),
+       OPC(2, OPC_GETBIT_B,     getbit.b),
+       OPC(2, OPC_SETRM,        setrm),
+       OPC(2, OPC_CBITS_B,      cbits.b),
+       OPC(2, OPC_SHB,          shb),
+       OPC(2, OPC_MSAD,         msad),
+
+       /* category 3: */
+       OPC(3, OPC_MAD_U16,      mad.u16),
+       OPC(3, OPC_MADSH_U16,    madsh.u16),
+       OPC(3, OPC_MAD_S16,      mad.s16),
+       OPC(3, OPC_MADSH_M16,    madsh.m16),
+       OPC(3, OPC_MAD_U24,      mad.u24),
+       OPC(3, OPC_MAD_S24,      mad.s24),
+       OPC(3, OPC_MAD_F16,      mad.f16),
+       OPC(3, OPC_MAD_F32,      mad.f32),
+       OPC(3, OPC_SEL_B16,      sel.b16),
+       OPC(3, OPC_SEL_B32,      sel.b32),
+       OPC(3, OPC_SEL_S16,      sel.s16),
+       OPC(3, OPC_SEL_S32,      sel.s32),
+       OPC(3, OPC_SEL_F16,      sel.f16),
+       OPC(3, OPC_SEL_F32,      sel.f32),
+       OPC(3, OPC_SAD_S16,      sad.s16),
+       OPC(3, OPC_SAD_S32,      sad.s32),
+
+       /* category 4: */
+       OPC(4, OPC_RCP,          rcp),
+       OPC(4, OPC_RSQ,          rsq),
+       OPC(4, OPC_LOG2,         log2),
+       OPC(4, OPC_EXP2,         exp2),
+       OPC(4, OPC_SIN,          sin),
+       OPC(4, OPC_COS,          cos),
+       OPC(4, OPC_SQRT,         sqrt),
+
+       /* category 5: */
+       OPC(5, OPC_ISAM,         isam),
+       OPC(5, OPC_ISAML,        isaml),
+       OPC(5, OPC_ISAMM,        isamm),
+       OPC(5, OPC_SAM,          sam),
+       OPC(5, OPC_SAMB,         samb),
+       OPC(5, OPC_SAML,         saml),
+       OPC(5, OPC_SAMGQ,        samgq),
+       OPC(5, OPC_GETLOD,       getlod),
+       OPC(5, OPC_CONV,         conv),
+       OPC(5, OPC_CONVM,        convm),
+       OPC(5, OPC_GETSIZE,      getsize),
+       OPC(5, OPC_GETBUF,       getbuf),
+       OPC(5, OPC_GETPOS,       getpos),
+       OPC(5, OPC_GETINFO,      getinfo),
+       OPC(5, OPC_DSX,          dsx),
+       OPC(5, OPC_DSY,          dsy),
+       OPC(5, OPC_GATHER4R,     gather4r),
+       OPC(5, OPC_GATHER4G,     gather4g),
+       OPC(5, OPC_GATHER4B,     gather4b),
+       OPC(5, OPC_GATHER4A,     gather4a),
+       OPC(5, OPC_SAMGP0,       samgp0),
+       OPC(5, OPC_SAMGP1,       samgp1),
+       OPC(5, OPC_SAMGP2,       samgp2),
+       OPC(5, OPC_SAMGP3,       samgp3),
+       OPC(5, OPC_DSXPP_1,      dsxpp.1),
+       OPC(5, OPC_DSYPP_1,      dsypp.1),
+       OPC(5, OPC_RGETPOS,      rgetpos),
+       OPC(5, OPC_RGETINFO,     rgetinfo),
+
+
+       /* category 6: */
+       OPC(6, OPC_LDG,          ldg),
+       OPC(6, OPC_LDL,          ldl),
+       OPC(6, OPC_LDP,          ldp),
+       OPC(6, OPC_STG,          stg),
+       OPC(6, OPC_STL,          stl),
+       OPC(6, OPC_STP,          stp),
+       OPC(6, OPC_STI,          sti),
+       OPC(6, OPC_G2L,          g2l),
+       OPC(6, OPC_L2G,          l2g),
+       OPC(6, OPC_PREFETCH,     prefetch),
+       OPC(6, OPC_LDLW,         ldlw),
+       OPC(6, OPC_STLW,         stlw),
+       OPC(6, OPC_RESFMT,       resfmt),
+       OPC(6, OPC_RESINFO,      resinf),
+       OPC(6, OPC_ATOMIC_ADD_L,     atomic.add.l),
+       OPC(6, OPC_ATOMIC_SUB_L,     atomic.sub.l),
+       OPC(6, OPC_ATOMIC_XCHG_L,    atomic.xchg.l),
+       OPC(6, OPC_ATOMIC_INC_L,     atomic.inc.l),
+       OPC(6, OPC_ATOMIC_DEC_L,     atomic.dec.l),
+       OPC(6, OPC_ATOMIC_CMPXCHG_L, atomic.cmpxchg.l),
+       OPC(6, OPC_ATOMIC_MIN_L,     atomic.min.l),
+       OPC(6, OPC_ATOMIC_MAX_L,     atomic.max.l),
+       OPC(6, OPC_ATOMIC_AND_L,     atomic.and.l),
+       OPC(6, OPC_ATOMIC_OR_L,      atomic.or.l),
+       OPC(6, OPC_ATOMIC_XOR_L,     atomic.xor.l),
+       OPC(6, OPC_LDGB_TYPED_4D,    ldgb.typed.4d),
+       OPC(6, OPC_STGB_4D_4,    stgb.4d.4),
+       OPC(6, OPC_STIB,         stib),
+       OPC(6, OPC_LDC_4,        ldc.4),
+       OPC(6, OPC_LDLV,         ldlv),
+
+
+#undef OPC
+};
+
+#define GETINFO(instr) (&(opcs[((instr)->opc_cat << NOPC_BITS) | getopc(instr)]))
+
+static uint32_t getopc(instr_t *instr)
+{
+       switch (instr->opc_cat) {
+       case 0:  return instr->cat0.opc;
+       case 1:  return 0;
+       case 2:  return instr->cat2.opc;
+       case 3:  return instr->cat3.opc;
+       case 4:  return instr->cat4.opc;
+       case 5:  return instr->cat5.opc;
+       case 6:  return instr->cat6.opc;
+       default: return 0;
+       }
+}
+
+static void print_instr(uint32_t *dwords, int level, int n)
+{
+       instr_t *instr = (instr_t *)dwords;
+       uint32_t opc = getopc(instr);
+       const char *name;
+
+       printf("%s%04d[%08xx_%08xx] ", levels[level], n, dwords[1], dwords[0]);
+
+#if 0
+       /* print unknown bits: */
+       if (debug & PRINT_RAW)
+               printf("[%08xx_%08xx] ", dwords[1] & 0x001ff800, dwords[0] & 0x00000000);
+
+       if (debug & PRINT_VERBOSE)
+               printf("%d,%02d ", instr->opc_cat, opc);
+#endif
+
+       /* NOTE: order flags are printed is a bit fugly.. but for now I
+        * try to match the order in llvm-a3xx disassembler for easy
+        * diff'ing..
+        */
+
+       if (instr->sync)
+               printf("(sy)");
+       if (instr->ss && (instr->opc_cat <= 4))
+               printf("(ss)");
+       if (instr->jmp_tgt)
+               printf("(jp)");
+       if (instr->repeat && (instr->opc_cat <= 4)) {
+               printf("(rpt%d)", instr->repeat);
+               repeat = instr->repeat;
+       } else {
+               repeat = 0;
+       }
+       if (instr->ul && ((2 <= instr->opc_cat) && (instr->opc_cat <= 4)))
+               printf("(ul)");
+
+       name = GETINFO(instr)->name;
+
+       if (name) {
+               printf("%s", name);
+               GETINFO(instr)->print(instr);
+       } else {
+               printf("unknown(%d,%d)", instr->opc_cat, opc);
+       }
+
+       printf("\n");
+
+       process_reg_dst();
+}
+
+int disasm_a3xx(uint32_t *dwords, int sizedwords, int level, enum shader_t type)
+{
+       int i;
+
+       assert((sizedwords % 2) == 0);
+
+       memset(&regs, 0, sizeof(regs));
+
+       for (i = 0; i < sizedwords; i += 2)
+               print_instr(&dwords[i], level, i/2);
+
+       print_reg_stats(level);
+
+       return 0;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.c b/src/gallium/drivers/freedreno/a3xx/fd3_blend.c
new file mode 100644 (file)
index 0000000..395228d
--- /dev/null
@@ -0,0 +1,87 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd3_blend.h"
+#include "fd3_context.h"
+#include "fd3_util.h"
+
+void *
+fd3_blend_state_create(struct pipe_context *pctx,
+               const struct pipe_blend_state *cso)
+{
+       struct fd3_blend_stateobj *so;
+       int i;
+
+       if (cso->logicop_enable) {
+               DBG("Unsupported! logicop");
+               return NULL;
+       }
+
+       if (cso->independent_blend_enable) {
+               DBG("Unsupported! independent blend state");
+               return NULL;
+       }
+
+       so = CALLOC_STRUCT(fd3_blend_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       for (i = 0; i < ARRAY_SIZE(so->rb_mrt); i++) {
+               const struct pipe_rt_blend_state *rt = &cso->rt[i];
+
+               so->rb_mrt[i].blend_control =
+                               A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(fd_blend_factor(rt->rgb_src_factor)) |
+                               A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(fd_blend_func(rt->rgb_func)) |
+                               A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(fd_blend_factor(rt->rgb_dst_factor)) |
+                               A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(fd_blend_factor(rt->alpha_src_factor)) |
+                               A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(fd_blend_func(rt->alpha_func)) |
+                               A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(fd_blend_factor(rt->alpha_dst_factor)) |
+                               A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE;
+
+               so->rb_mrt[i].control =
+                               A3XX_RB_MRT_CONTROL_ROP_CODE(12) |
+                               A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(rt->colormask);
+
+               if (rt->blend_enable)
+                       so->rb_mrt[i].control |=
+                                       A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE |
+                                       A3XX_RB_MRT_CONTROL_BLEND |
+                                       A3XX_RB_MRT_CONTROL_BLEND2;
+
+               if (cso->dither)
+                       so->rb_mrt[i].control |= A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS);
+       }
+
+       return so;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_blend.h b/src/gallium/drivers/freedreno/a3xx/fd3_blend.h
new file mode 100644 (file)
index 0000000..d269d74
--- /dev/null
@@ -0,0 +1,52 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_BLEND_H_
+#define FD3_BLEND_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd3_blend_stateobj {
+       struct pipe_blend_state base;
+       struct {
+               uint32_t blend_control;
+               uint32_t control;
+       } rb_mrt[4];
+};
+
+static INLINE struct fd3_blend_stateobj *
+fd3_blend_stateobj(struct pipe_blend_state *blend)
+{
+       return (struct fd3_blend_stateobj *)blend;
+}
+
+void * fd3_blend_state_create(struct pipe_context *pctx,
+               const struct pipe_blend_state *cso);
+
+#endif /* FD3_BLEND_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.c
new file mode 100644 (file)
index 0000000..d844cc0
--- /dev/null
@@ -0,0 +1,1240 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include <stdarg.h>
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_info.h"
+#include "tgsi/tgsi_strings.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+
+#include "fd3_compiler.h"
+#include "fd3_program.h"
+#include "fd3_util.h"
+
+#include "instr-a3xx.h"
+#include "ir-a3xx.h"
+
+/* ************************************************************************* */
+/* split the out or find some helper to use.. like main/bitset.h.. */
+
+#define MAX_REG 256
+
+typedef uint8_t regmask_t[2 * MAX_REG / 8];
+
+static unsigned regmask_idx(struct ir3_register *reg)
+{
+       unsigned num = reg->num;
+       assert(num < MAX_REG);
+       if (reg->flags & IR3_REG_HALF)
+               num += MAX_REG;
+       return num;
+}
+
+static void regmask_set(regmask_t regmask, struct ir3_register *reg)
+{
+       unsigned idx = regmask_idx(reg);
+       regmask[idx / 8] |= 1 << (idx % 8);
+}
+
+static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg)
+{
+       unsigned idx = regmask_idx(reg);
+       return regmask[idx / 8] & (1 << (idx % 8));
+}
+
+/* ************************************************************************* */
+
+struct fd3_compile_context {
+       const struct tgsi_token *tokens;
+       struct ir3_shader *ir;
+       struct fd3_shader_stateobj *so;
+
+       struct tgsi_parse_context parser;
+       unsigned type;
+
+       struct tgsi_shader_info info;
+
+       /* last input dst (for setting (ei) flag): */
+       struct ir3_register *last_input;
+
+       unsigned next_inloc;
+       unsigned num_internal_temps;
+
+       /* track registers which need to synchronize w/ "complex alu" cat3
+        * instruction pipeline:
+        */
+       regmask_t needs_ss;
+
+       /* track registers which need to synchronize with texture fetch
+        * pipeline:
+        */
+       regmask_t needs_sy;
+
+       /* inputs start at r0, temporaries start after last input, and
+        * outputs start after last temporary.
+        *
+        * We could be more clever, because this is not a hw restriction,
+        * but probably best just to implement an optimizing pass to
+        * reduce the # of registers used and get rid of redundant mov's
+        * (to output register).
+        */
+       unsigned base_reg[TGSI_FILE_COUNT];
+
+       /* idx/slot for last compiler generated immediate */
+       unsigned immediate_idx;
+
+       /* stack of branch instructions that start (potentially nested)
+        * branch instructions, so that we can fix up the branch targets
+        * so that we can fix up the branch target on the corresponding
+        * END instruction
+        */
+       struct ir3_instruction *branch[16];
+       unsigned int branch_count;
+
+       /* used when dst is same as one of the src, to avoid overwriting a
+        * src element before the remaining scalar instructions that make
+        * up the vector operation
+        */
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+};
+
+static unsigned
+compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
+               const struct tgsi_token *tokens)
+{
+       unsigned ret;
+
+       ctx->tokens = tokens;
+       ctx->ir = so->ir;
+       ctx->so = so;
+       ctx->last_input = NULL;
+       ctx->next_inloc = 8;
+       ctx->num_internal_temps = 0;
+       ctx->branch_count = 0;
+
+       memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
+       memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
+       memset(ctx->base_reg, 0, sizeof(ctx->base_reg));
+
+       tgsi_scan_shader(tokens, &ctx->info);
+
+       /* Immediates go after constants: */
+       ctx->base_reg[TGSI_FILE_CONSTANT]  = 0;
+       ctx->base_reg[TGSI_FILE_IMMEDIATE] =
+                       ctx->info.file_count[TGSI_FILE_CONSTANT];
+
+       /* Temporaries after outputs after inputs: */
+       ctx->base_reg[TGSI_FILE_INPUT]     = 0;
+       ctx->base_reg[TGSI_FILE_OUTPUT]    =
+                       ctx->info.file_count[TGSI_FILE_INPUT];
+       ctx->base_reg[TGSI_FILE_TEMPORARY] =
+                       ctx->info.file_count[TGSI_FILE_INPUT] +
+                       ctx->info.file_count[TGSI_FILE_OUTPUT];
+
+       so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
+       ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] +
+                       ctx->info.file_count[TGSI_FILE_IMMEDIATE]);
+
+       ret = tgsi_parse_init(&ctx->parser, tokens);
+       if (ret != TGSI_PARSE_OK)
+               return ret;
+
+       ctx->type = ctx->parser.FullHeader.Processor.Processor;
+
+       return ret;
+}
+
+static void
+compile_free(struct fd3_compile_context *ctx)
+{
+       tgsi_parse_free(&ctx->parser);
+}
+
+struct instr_translater {
+       void (*fxn)(const struct instr_translater *t,
+                       struct fd3_compile_context *ctx,
+                       struct tgsi_full_instruction *inst);
+       opc_t opc;
+       opc_t hopc;    /* opc to use for half_precision mode, if different */
+       unsigned arg;
+};
+
+static struct ir3_register *
+add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+               const struct tgsi_dst_register *dst, unsigned chan)
+{
+       unsigned flags = 0, num = 0;
+
+       switch (dst->File) {
+       case TGSI_FILE_OUTPUT:
+       case TGSI_FILE_TEMPORARY:
+               num = dst->Index + ctx->base_reg[dst->File];
+               break;
+       default:
+               DBG("unsupported dst register file: %s",
+                       tgsi_file_name(dst->File));
+               assert(0);
+               break;
+       }
+
+       if (ctx->so->half_precision)
+               flags |= IR3_REG_HALF;
+
+       return ir3_reg_create(instr, regid(num, chan), flags);
+}
+
+static struct ir3_register *
+add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+               const struct tgsi_src_register *src, unsigned chan)
+{
+       unsigned flags = 0, num = 0;
+       struct ir3_register *reg;
+
+       switch (src->File) {
+       case TGSI_FILE_IMMEDIATE:
+               /* TODO if possible, use actual immediate instead of const.. but
+                * TGSI has vec4 immediates, we can only embed scalar (of limited
+                * size, depending on instruction..)
+                */
+       case TGSI_FILE_CONSTANT:
+               flags |= IR3_REG_CONST;
+               num = src->Index + ctx->base_reg[src->File];
+               break;
+       case TGSI_FILE_INPUT:
+       case TGSI_FILE_TEMPORARY:
+               num = src->Index + ctx->base_reg[src->File];
+               break;
+       default:
+               DBG("unsupported src register file: %s",
+                       tgsi_file_name(src->File));
+               assert(0);
+               break;
+       }
+
+       if (src->Absolute)
+               flags |= IR3_REG_ABS;
+       if (src->Negate)
+               flags |= IR3_REG_NEGATE;
+       if (ctx->so->half_precision)
+               flags |= IR3_REG_HALF;
+
+       reg = ir3_reg_create(instr, regid(num, chan), flags);
+
+       if (regmask_get(ctx->needs_ss, reg)) {
+               instr->flags |= IR3_INSTR_SS;
+               memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
+       }
+
+       if (regmask_get(ctx->needs_sy, reg)) {
+               instr->flags |= IR3_INSTR_SY;
+               memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
+       }
+
+       return reg;
+}
+
+static void
+src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
+{
+       src->File      = dst->File;
+       src->Indirect  = dst->Indirect;
+       src->Dimension = dst->Dimension;
+       src->Index     = dst->Index;
+       src->Absolute  = 0;
+       src->Negate    = 0;
+       src->SwizzleX  = TGSI_SWIZZLE_X;
+       src->SwizzleY  = TGSI_SWIZZLE_Y;
+       src->SwizzleZ  = TGSI_SWIZZLE_Z;
+       src->SwizzleW  = TGSI_SWIZZLE_W;
+}
+
+/* Get internal-temp src/dst to use for a sequence of instructions
+ * generated by a single TGSI op.
+ */
+static void
+get_internal_temp(struct fd3_compile_context *ctx,
+               struct tgsi_dst_register *tmp_dst,
+               struct tgsi_src_register *tmp_src)
+{
+       int n;
+
+       tmp_dst->File      = TGSI_FILE_TEMPORARY;
+       tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
+       tmp_dst->Indirect  = 0;
+       tmp_dst->Dimension = 0;
+
+       /* assign next temporary: */
+       n = ctx->num_internal_temps++;
+
+       tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n;
+
+       src_from_dst(tmp_src, tmp_dst);
+}
+
+static void
+get_immediate(struct fd3_compile_context *ctx,
+               struct tgsi_src_register *reg, uint32_t val)
+{
+       unsigned neg, swiz, idx, i;
+       /* actually maps 1:1 currently.. not sure if that is safe to rely on: */
+       static const unsigned swiz2tgsi[] = {
+                       TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+       };
+
+       for (i = 0; i < ctx->immediate_idx; i++) {
+               swiz = i % 4;
+               idx  = i / 4;
+
+               if (ctx->so->immediates[idx].val[swiz] == val) {
+                       neg = 0;
+                       break;
+               }
+
+               if (ctx->so->immediates[idx].val[swiz] == -val) {
+                       neg = 1;
+                       break;
+               }
+       }
+
+       if (i == ctx->immediate_idx) {
+               /* need to generate a new immediate: */
+               swiz = i % 4;
+               idx  = i / 4;
+               neg  = 0;
+               ctx->so->immediates[idx].val[swiz] = val;
+               ctx->so->immediates_count = idx + 1;
+               ctx->immediate_idx++;
+       }
+
+       reg->File      = TGSI_FILE_IMMEDIATE;
+       reg->Indirect  = 0;
+       reg->Dimension = 0;
+       reg->Index     = idx;
+       reg->Absolute  = 0;
+       reg->Negate    = neg;
+       reg->SwizzleX  = swiz2tgsi[swiz];
+       reg->SwizzleY  = swiz2tgsi[swiz];
+       reg->SwizzleZ  = swiz2tgsi[swiz];
+       reg->SwizzleW  = swiz2tgsi[swiz];
+}
+
+static type_t
+get_type(struct fd3_compile_context *ctx)
+{
+       return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
+}
+
+static unsigned
+src_swiz(struct tgsi_src_register *src, int chan)
+{
+       switch (chan) {
+       case 0: return src->SwizzleX;
+       case 1: return src->SwizzleY;
+       case 2: return src->SwizzleZ;
+       case 3: return src->SwizzleW;
+       }
+       assert(0);
+       return 0;
+}
+
+static void
+create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
+               struct tgsi_src_register *src)
+{
+       type_t type_mov = get_type(ctx);
+       unsigned i;
+
+       for (i = 0; i < 4; i++) {
+               /* move to destination: */
+               if (dst->WriteMask & (1 << i)) {
+                       struct ir3_instruction *instr =
+                                       ir3_instr_create(ctx->ir, 1, 0);
+                       instr->cat1.src_type = type_mov;
+                       instr->cat1.dst_type = type_mov;
+                       add_dst_reg(ctx, instr, dst, i);
+                       add_src_reg(ctx, instr, src, src_swiz(src, i));
+               } else {
+                       ir3_instr_create(ctx->ir, 0, OPC_NOP);
+               }
+       }
+
+}
+
+static struct tgsi_dst_register *
+get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
+{
+       struct tgsi_dst_register *dst = &inst->Dst[0].Register;
+       unsigned i;
+       for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
+               struct tgsi_src_register *src = &inst->Src[i].Register;
+               if ((src->File == dst->File) && (src->Index == dst->Index)) {
+                       get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src);
+                       ctx->tmp_dst.WriteMask = dst->WriteMask;
+                       dst = &ctx->tmp_dst;
+                       break;
+               }
+       }
+       return dst;
+}
+
+static void
+put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
+               struct tgsi_dst_register *dst)
+{
+       /* if necessary, add mov back into original dst: */
+       if (dst != &inst->Dst[0].Register) {
+               create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src);
+       }
+}
+
+/* helper to generate the necessary repeat and/or additional instructions
+ * to turn a scalar instruction into a vector operation:
+ */
+static void
+vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
+               struct tgsi_dst_register *dst, int nsrcs, ...)
+{
+       va_list ap;
+       int i, j, n = 0;
+
+       add_dst_reg(ctx, instr, dst, 0);
+
+       va_start(ap, nsrcs);
+       for (j = 0; j < nsrcs; j++) {
+               struct tgsi_src_register *src =
+                               va_arg(ap, struct tgsi_src_register *);
+               unsigned flags = va_arg(ap, unsigned);
+               add_src_reg(ctx, instr, src, 0)->flags |= flags;
+       }
+       va_end(ap);
+
+       for (i = 0; i < 4; i++) {
+               if (dst->WriteMask & (1 << i)) {
+                       struct ir3_instruction *cur;
+
+                       if (n++ == 0) {
+                               cur = instr;
+                       } else {
+                               cur = ir3_instr_clone(instr);
+                               cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP);
+                       }
+
+                       /* fix-up dst register component: */
+                       cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i);
+
+                       /* fix-up src register component: */
+                       va_start(ap, nsrcs);
+                       for (j = 0; j < nsrcs; j++) {
+                               struct tgsi_src_register *src =
+                                               va_arg(ap, struct tgsi_src_register *);
+                               (void)va_arg(ap, unsigned);
+                               cur->regs[j+1]->num =
+                                       regid(cur->regs[j+1]->num >> 2,
+                                               src_swiz(src, i));
+                       }
+                       va_end(ap);
+               }
+       }
+
+       /* pad w/ nop's.. at least until we are clever enough to
+        * figure out if we really need to..
+        */
+       for (; n < 4; n++) {
+               ir3_instr_create(instr->shader, 0, OPC_NOP);
+       }
+}
+
+/*
+ * Handlers for TGSI instructions which do not have a 1:1 mapping to
+ * native instructions:
+ */
+
+static void
+trans_dotp(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct ir3_instruction *instr;
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+       struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
+       struct tgsi_src_register *src0 = &inst->Src[0].Register;
+       struct tgsi_src_register *src1 = &inst->Src[1].Register;
+       unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW };
+       unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW };
+       opc_t opc_mad    = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32;
+       unsigned i;
+
+       assert(inst->Instruction.NumSrcRegs == 2);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       get_internal_temp(ctx, &tmp_dst, &tmp_src);
+
+       /* Blob compiler never seems to use a const in src1 position for
+        * mad.*, although there does seem (according to disassembler
+        * hidden in libllvm-a3xx.so) to be a bit to indicate that src1
+        * is a const.  Not sure if this is a hw bug, or simply that the
+        * disassembler lies.
+        */
+       if ((src1->File == TGSI_FILE_IMMEDIATE) ||
+                       (src1->File == TGSI_FILE_CONSTANT)) {
+
+               /* the mov to tmp unswizzles src1, so now we have tmp.xyzw:
+                */
+               for (i = 0; i < 4; i++)
+                       swiz1[i] = i;
+
+               /* the first mul.f will clobber tmp.x, but that is ok
+                * because after that point we no longer need tmp.x:
+                */
+               create_mov(ctx, &tmp_dst, src1);
+               src1 = &tmp_src;
+       }
+
+       instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+       add_dst_reg(ctx, instr, &tmp_dst, 0);
+       add_src_reg(ctx, instr, src0, swiz0[0]);
+       add_src_reg(ctx, instr, src1, swiz1[0]);
+
+       for (i = 1; i < t->arg; i++) {
+               ir3_instr_create(ctx->ir, 0, OPC_NOP);
+
+               instr = ir3_instr_create(ctx->ir, 3, opc_mad);
+               add_dst_reg(ctx, instr, &tmp_dst, 0);
+               add_src_reg(ctx, instr, src0, swiz0[i]);
+               add_src_reg(ctx, instr, src1, swiz1[i]);
+               add_src_reg(ctx, instr, &tmp_src, 0);
+       }
+
+       ir3_instr_create(ctx->ir, 0, OPC_NOP);
+
+       /* pad out to multiple of 4 scalar instructions: */
+       for (i = 2 * t->arg; i % 4; i++) {
+               ir3_instr_create(ctx->ir, 0, OPC_NOP);
+       }
+
+       create_mov(ctx, dst, &tmp_src);
+}
+
+/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
+static void
+trans_lrp(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct ir3_instruction *instr;
+       struct tgsi_dst_register tmp_dst1, tmp_dst2;
+       struct tgsi_src_register tmp_src1, tmp_src2;
+       struct tgsi_src_register tmp_const;
+
+       get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
+       get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
+
+       get_immediate(ctx, &tmp_const, fui(1.0));
+
+       /* tmp1 = (a * b) */
+       instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+       vectorize(ctx, instr, &tmp_dst1, 2,
+                       &inst->Src[0].Register, 0,
+                       &inst->Src[1].Register, 0);
+
+       /* tmp2 = (1 - a) */
+       instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+       vectorize(ctx, instr, &tmp_dst2, 2,
+                       &tmp_const, 0,
+                       &inst->Src[0].Register, IR3_REG_NEGATE);
+
+       /* tmp2 = tmp2 * c */
+       instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+       vectorize(ctx, instr, &tmp_dst2, 2,
+                       &tmp_src2, 0,
+                       &inst->Src[2].Register, 0);
+
+       /* dst = tmp1 + tmp2 */
+       instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+       vectorize(ctx, instr, &inst->Dst[0].Register, 2,
+                       &tmp_src1, 0,
+                       &tmp_src2, 0);
+}
+
+/* FRC(x) = x - FLOOR(x) */
+static void
+trans_frac(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct ir3_instruction *instr;
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+
+       get_internal_temp(ctx, &tmp_dst, &tmp_src);
+
+       /* tmp = FLOOR(x) */
+       instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F);
+       vectorize(ctx, instr, &tmp_dst, 1,
+                       &inst->Src[0].Register, 0);
+
+       /* dst = x - tmp */
+       instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+       vectorize(ctx, instr, &inst->Dst[0].Register, 2,
+                       &inst->Src[0].Register, 0,
+                       &tmp_src, IR3_REG_NEGATE);
+}
+
+/* POW(a,b) = EXP2(b * LOG2(a)) */
+static void
+trans_pow(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct ir3_instruction *instr;
+       struct ir3_register *r;
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+       struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
+       struct tgsi_src_register *src0 = &inst->Src[0].Register;
+       struct tgsi_src_register *src1 = &inst->Src[1].Register;
+
+       assert(inst->Instruction.NumSrcRegs == 2);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       get_internal_temp(ctx, &tmp_dst, &tmp_src);
+
+       /* log2 Rtmp, Rsrc0 */
+       ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+       instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
+       r = add_dst_reg(ctx, instr, &tmp_dst, 0);
+       add_src_reg(ctx, instr, src0, src0->SwizzleX);
+       regmask_set(ctx->needs_ss, r);
+
+       /* mul.f Rtmp, Rtmp, Rsrc1 */
+       instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
+       add_dst_reg(ctx, instr, &tmp_dst, 0);
+       add_src_reg(ctx, instr, &tmp_src, 0);
+       add_src_reg(ctx, instr, src1, src1->SwizzleX);
+
+       /* blob compiler seems to ensure there are at least 6 instructions
+        * between a "simple" (non-cat4) instruction and a dependent cat4..
+        * probably we need to handle this in some other places too.
+        */
+       ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+
+       /* exp2 Rdst, Rtmp */
+       instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
+       r = add_dst_reg(ctx, instr, &tmp_dst, 0);
+       add_src_reg(ctx, instr, &tmp_src, 0);
+       regmask_set(ctx->needs_ss, r);
+
+       create_mov(ctx, dst, &tmp_src);
+}
+
+/* texture fetch/sample instructions: */
+static void
+trans_samp(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct ir3_register *r;
+       struct ir3_instruction *instr;
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+       struct tgsi_src_register *coord = &inst->Src[0].Register;
+       struct tgsi_src_register *samp  = &inst->Src[1].Register;
+       unsigned tex = inst->Texture.Texture;
+       int8_t *order;
+       unsigned i, j, flags = 0;
+
+       switch (t->arg) {
+       case TGSI_OPCODE_TEX:
+               order = (tex == TGSI_TEXTURE_2D) ?
+                               (int8_t[4]){ 0,  1, -1, -1 } :  /* 2D */
+                               (int8_t[4]){ 0,  1,  2, -1 };   /* 3D */
+               break;
+       case TGSI_OPCODE_TXP:
+               order = (tex == TGSI_TEXTURE_2D) ?
+                               (int8_t[4]){ 0,  1,  3, -1 } :  /* 2D */
+                               (int8_t[4]){ 0,  1,  2,  3 };   /* 3D */
+               flags |= IR3_INSTR_P;
+               break;
+       default:
+               assert(0);
+               break;
+       }
+
+       if (tex == TGSI_TEXTURE_3D)
+               flags |= IR3_INSTR_3D;
+
+       assert(inst->Instruction.NumSrcRegs == 2);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       /* The texture sample instructions need to coord in successive
+        * registers/components (ie. src.xy but not src.yx).  And TXP
+        * needs the .w component in .z for 2D..  so in some cases we
+        * might need to emit some mov instructions to shuffle things
+        * around:
+        */
+       for (i = 1; (i < 4) && (order[i] >= 0); i++) {
+               if (src_swiz(coord, 0) != (src_swiz(coord, i) + order[i])) {
+                       type_t type_mov = get_type(ctx);
+
+                       /* need to move things around: */
+                       get_internal_temp(ctx, &tmp_dst, &tmp_src);
+
+                       for (j = 0; (j < 4) && (order[j] >= 0); j++) {
+                               instr = ir3_instr_create(ctx->ir, 1, 0);
+                               instr->cat1.src_type = type_mov;
+                               instr->cat1.dst_type = type_mov;
+                               add_dst_reg(ctx, instr, &tmp_dst, j);
+                               add_src_reg(ctx, instr, coord,
+                                               src_swiz(coord, order[j]));
+                       }
+
+                       coord = &tmp_src;
+
+                       if (j < 4)
+                               ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1;
+
+                       break;
+               }
+       }
+
+       instr = ir3_instr_create(ctx->ir, 5, t->opc);
+       instr->cat5.type = get_type(ctx);
+       instr->cat5.samp = samp->Index;
+       instr->cat5.tex  = samp->Index;
+       instr->flags |= flags;
+
+       r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0);
+       r->wrmask = inst->Dst[0].Register.WriteMask;
+
+       add_src_reg(ctx, instr, coord, coord->SwizzleX);
+
+       regmask_set(ctx->needs_sy, r);
+}
+
+/* CMP(a,b,c) = (a < 0) ? b : c */
+static void
+trans_cmp(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct ir3_instruction *instr;
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+       struct tgsi_src_register constval;
+       /* final instruction uses original src1 and src2, so we need get_dst() */
+       struct tgsi_dst_register *dst = get_dst(ctx, inst);
+
+       get_internal_temp(ctx, &tmp_dst, &tmp_src);
+
+       /* cmps.f.ge tmp, src0, 0.0 */
+       instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+       instr->cat2.condition = IR3_COND_GE;
+       get_immediate(ctx, &constval, fui(0.0));
+       vectorize(ctx, instr, &tmp_dst, 2,
+                       &inst->Src[0].Register, 0,
+                       &constval, 0);
+
+       /* add.s tmp, tmp, -1 */
+       instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
+       instr->repeat = 3;
+       add_dst_reg(ctx, instr, &tmp_dst, 0);
+       add_src_reg(ctx, instr, &tmp_src, 0);
+       ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
+
+       /* sel.{f32,f16} dst, src2, tmp, src1 */
+       instr = ir3_instr_create(ctx->ir, 3, ctx->so->half_precision ?
+                       OPC_SEL_F16 : OPC_SEL_F32);
+       vectorize(ctx, instr, &inst->Dst[0].Register, 3,
+                       &inst->Src[2].Register, 0,
+                       &tmp_src, 0,
+                       &inst->Src[1].Register, 0);
+
+       put_dst(ctx, inst, dst);
+}
+
+/*
+ * Conditional / Flow control
+ */
+
+static unsigned
+find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
+{
+       unsigned i;
+       for (i = 0; i < ctx->ir->instrs_count; i++)
+               if (ctx->ir->instrs[i] == instr)
+                       return i;
+       return ~0;
+}
+
+static void
+push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
+{
+       ctx->branch[ctx->branch_count++] = instr;
+}
+
+static void
+pop_branch(struct fd3_compile_context *ctx)
+{
+       struct ir3_instruction *instr;
+
+       /* if we were clever enough, we'd patch this up after the fact,
+        * and set (jp) flag on whatever the next instruction was, rather
+        * than inserting an extra nop..
+        */
+       instr = ir3_instr_create(ctx->ir, 0, OPC_NOP);
+       instr->flags |= IR3_INSTR_JP;
+
+       /* pop the branch instruction from the stack and fix up branch target: */
+       instr = ctx->branch[--ctx->branch_count];
+       instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1;
+}
+
+/* We probably don't really want to translate if/else/endif into branches..
+ * the blob driver evaluates both legs of the if and then uses the sel
+ * instruction to pick which sides of the branch to "keep".. but figuring
+ * that out will take somewhat more compiler smarts.  So hopefully branches
+ * don't kill performance too badly.
+ */
+static void
+trans_if(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct ir3_instruction *instr;
+       struct tgsi_src_register *src = &inst->Src[0].Register;
+       struct tgsi_src_register constval;
+
+       get_immediate(ctx, &constval, fui(0.0));
+
+       instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
+       ir3_reg_create(instr, regid(REG_P0, 0), 0);
+       add_src_reg(ctx, instr, &constval, constval.SwizzleX);
+       add_src_reg(ctx, instr, src, src->SwizzleX);
+       instr->cat2.condition = IR3_COND_EQ;
+
+       instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
+       push_branch(ctx, instr);
+}
+
+static void
+trans_else(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct ir3_instruction *instr;
+
+       /* for first half of if/else/endif, generate a jump past the else: */
+       instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP);
+
+       pop_branch(ctx);
+       push_branch(ctx, instr);
+}
+
+static void
+trans_endif(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       pop_branch(ctx);
+}
+
+/*
+ * Handlers for TGSI instructions which do have 1:1 mapping to native
+ * instructions:
+ */
+
+static void
+instr_cat0(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       assert(inst->Instruction.NumSrcRegs == 0);
+       assert(inst->Instruction.NumDstRegs == 0);
+
+       ir3_instr_create(ctx->ir, 0, t->opc);
+}
+
+static void
+instr_cat1(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct tgsi_dst_register *dst = get_dst(ctx, inst);
+       struct tgsi_src_register *src = &inst->Src[0].Register;
+
+       assert(inst->Instruction.NumSrcRegs == 1);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       /* mov instructions can't handle a negate on src: */
+       if (src->Negate) {
+               struct tgsi_src_register constval;
+               struct ir3_instruction *instr;
+
+               /* since right now, we are using uniformly either TYPE_F16 or
+                * TYPE_F32, and we don't utilize the conversion possibilities
+                * of mov instructions, we can get away with substituting an
+                * add.f which can handle negate.  Might need to revisit this
+                * in the future if we start supporting widening/narrowing or
+                * conversion to/from integer..
+                */
+               instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
+               get_immediate(ctx, &constval, fui(0.0));
+               vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
+       } else {
+               create_mov(ctx, dst, src);
+               /* create_mov() generates vector sequence, so no vectorize() */
+       }
+       put_dst(ctx, inst, dst);
+}
+
+static void
+instr_cat2(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct tgsi_dst_register *dst = get_dst(ctx, inst);
+       struct ir3_instruction *instr;
+
+       assert(inst->Instruction.NumSrcRegs == 2);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       instr = ir3_instr_create(ctx->ir, 2, t->opc);
+       instr->cat2.condition = t->arg;
+
+       switch (t->opc) {
+       case OPC_ABSNEG_F:
+       case OPC_ABSNEG_S:
+       case OPC_CLZ_B:
+       case OPC_CLZ_S:
+       case OPC_SIGN_F:
+       case OPC_FLOOR_F:
+       case OPC_CEIL_F:
+       case OPC_RNDNE_F:
+       case OPC_RNDAZ_F:
+       case OPC_TRUNC_F:
+       case OPC_NOT_B:
+       case OPC_BFREV_B:
+       case OPC_SETRM:
+       case OPC_CBITS_B:
+               /* these only have one src reg */
+               vectorize(ctx, instr, dst, 1,
+                               &inst->Src[0].Register, 0);
+               break;
+       default:
+               vectorize(ctx, instr, dst, 2,
+                               &inst->Src[0].Register, 0,
+                               &inst->Src[1].Register, 0);
+               break;
+       }
+
+       put_dst(ctx, inst, dst);
+}
+
+static void
+instr_cat3(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct tgsi_dst_register *dst = get_dst(ctx, inst);
+       struct tgsi_src_register *src1 = &inst->Src[1].Register;
+       struct tgsi_dst_register tmp_dst;
+       struct tgsi_src_register tmp_src;
+       struct ir3_instruction *instr;
+
+       assert(inst->Instruction.NumSrcRegs == 3);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       /* Blob compiler never seems to use a const in src1 position..
+        * although there does seem (according to disassembler hidden
+        * in libllvm-a3xx.so) to be a bit to indicate that src1 is a
+        * const.  Not sure if this is a hw bug, or simply that the
+        * disassembler lies.
+        */
+       if ((src1->File == TGSI_FILE_CONSTANT) ||
+                       (src1->File == TGSI_FILE_IMMEDIATE)) {
+               get_internal_temp(ctx, &tmp_dst, &tmp_src);
+               create_mov(ctx, &tmp_dst, src1);
+               src1 = &tmp_src;
+       }
+
+       instr = ir3_instr_create(ctx->ir, 3,
+                       ctx->so->half_precision ? t->hopc : t->opc);
+       vectorize(ctx, instr, dst, 3,
+                       &inst->Src[0].Register, 0,
+                       src1, 0,
+                       &inst->Src[2].Register, 0);
+       put_dst(ctx, inst, dst);
+}
+
+static void
+instr_cat4(const struct instr_translater *t,
+               struct fd3_compile_context *ctx,
+               struct tgsi_full_instruction *inst)
+{
+       struct tgsi_dst_register *dst = get_dst(ctx, inst);
+       struct ir3_instruction *instr;
+
+       assert(inst->Instruction.NumSrcRegs == 1);
+       assert(inst->Instruction.NumDstRegs == 1);
+
+       ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
+       instr = ir3_instr_create(ctx->ir, 4, t->opc);
+
+       vectorize(ctx, instr, dst, 1,
+                       &inst->Src[0].Register, 0);
+
+       regmask_set(ctx->needs_ss, instr->regs[0]);
+
+       put_dst(ctx, inst, dst);
+}
+
+static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
+#define INSTR(n, f, ...) \
+       [TGSI_OPCODE_ ## n] = { .fxn = (f), ##__VA_ARGS__ }
+
+       INSTR(MOV,          instr_cat1),
+       INSTR(RCP,          instr_cat4, .opc = OPC_RCP),
+       INSTR(RSQ,          instr_cat4, .opc = OPC_RSQ),
+       INSTR(SQRT,         instr_cat4, .opc = OPC_SQRT),
+       INSTR(MUL,          instr_cat2, .opc = OPC_MUL_F),
+       INSTR(ADD,          instr_cat2, .opc = OPC_ADD_F),
+       INSTR(DP2,          trans_dotp, .arg = 2),
+       INSTR(DP3,          trans_dotp, .arg = 3),
+       INSTR(DP4,          trans_dotp, .arg = 4),
+       INSTR(MIN,          instr_cat2, .opc = OPC_MIN_F),
+       INSTR(MAX,          instr_cat2, .opc = OPC_MAX_F),
+       INSTR(SLT,          instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT),
+       INSTR(SGE,          instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE),
+       INSTR(MAD,          instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
+       INSTR(LRP,          trans_lrp),
+       INSTR(FRC,          trans_frac),
+       INSTR(FLR,          instr_cat2, .opc = OPC_FLOOR_F),
+       INSTR(EX2,          instr_cat4, .opc = OPC_EXP2),
+       INSTR(LG2,          instr_cat4, .opc = OPC_LOG2),
+       INSTR(POW,          trans_pow),
+       INSTR(COS,          instr_cat4, .opc = OPC_SIN),
+       INSTR(SIN,          instr_cat4, .opc = OPC_COS),
+       INSTR(TEX,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
+       INSTR(TXP,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
+       INSTR(CMP,          trans_cmp),
+       INSTR(IF,           trans_if),
+       INSTR(ELSE,         trans_else),
+       INSTR(ENDIF,        trans_endif),
+       INSTR(END,          instr_cat0, .opc = OPC_END),
+};
+
+static void
+decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+       struct fd3_shader_stateobj *so = ctx->so;
+       unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
+       unsigned i, flags = 0;
+
+       if (ctx->so->half_precision)
+               flags |= IR3_REG_HALF;
+
+       for (i = decl->Range.First; i <= decl->Range.Last; i++) {
+               unsigned n = so->inputs_count++;
+               unsigned r = regid(i + base, 0);
+               unsigned ncomp;
+
+               /* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
+               ncomp = 4;
+
+               DBG("decl in -> r%d", i + base);   // XXX
+
+               so->inputs[n].compmask = (1 << ncomp) - 1;
+               so->inputs[n].regid = r;
+               so->inputs[n].inloc = ctx->next_inloc;
+               ctx->next_inloc += ncomp;
+
+               so->total_in += ncomp;
+
+               /* for frag shaders, we need to generate the corresponding bary instr: */
+               if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
+                       struct ir3_instruction *instr;
+
+                       instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F);
+                       instr->repeat = ncomp - 1;
+
+                       /* dst register: */
+                       ctx->last_input = ir3_reg_create(instr, r, flags);
+
+                       /* input position: */
+                       ir3_reg_create(instr, 0, IR3_REG_IMMED | IR3_REG_R)->iim_val =
+                                       so->inputs[n].inloc - 8;
+
+                       /* input base (always r0.x): */
+                       ir3_reg_create(instr, regid(0,0), 0);
+               }
+       }
+}
+
+static void
+decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+       struct fd3_shader_stateobj *so = ctx->so;
+       unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
+       unsigned name = decl->Semantic.Name;
+       unsigned i;
+
+       assert(decl->Declaration.Semantic);  // TODO is this ever not true?
+
+       DBG("decl out[%d] -> r%d", name, decl->Range.First + base);   // XXX
+
+       if (ctx->type == TGSI_PROCESSOR_VERTEX) {
+               switch (name) {
+               case TGSI_SEMANTIC_POSITION:
+                       so->pos_regid = regid(decl->Range.First + base, 0);
+                       break;
+               case TGSI_SEMANTIC_PSIZE:
+                       so->psize_regid = regid(decl->Range.First + base, 0);
+                       break;
+               case TGSI_SEMANTIC_COLOR:
+               case TGSI_SEMANTIC_GENERIC:
+                       for (i = decl->Range.First; i <= decl->Range.Last; i++)
+                               so->outputs[so->outputs_count++].regid = regid(i + base, 0);
+                       break;
+               default:
+                       DBG("unknown VS semantic name: %s",
+                                       tgsi_semantic_names[name]);
+                       assert(0);
+               }
+       } else {
+               switch (name) {
+               case TGSI_SEMANTIC_COLOR:
+                       so->color_regid = regid(decl->Range.First + base, 0);
+                       break;
+               default:
+                       DBG("unknown VS semantic name: %s",
+                                       tgsi_semantic_names[name]);
+                       assert(0);
+               }
+       }
+}
+
+static void
+decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
+{
+       ctx->so->samplers_count++;
+}
+
+static void
+compile_instructions(struct fd3_compile_context *ctx)
+{
+       struct ir3_shader *ir = ctx->ir;
+
+       while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
+               tgsi_parse_token(&ctx->parser);
+
+               switch (ctx->parser.FullToken.Token.Type) {
+               case TGSI_TOKEN_TYPE_DECLARATION: {
+                       struct tgsi_full_declaration *decl =
+                                       &ctx->parser.FullToken.FullDeclaration;
+                       if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+                               decl_out(ctx, decl);
+                       } else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+                               decl_in(ctx, decl);
+                       } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
+                               decl_samp(ctx, decl);
+                       }
+                       break;
+               }
+               case TGSI_TOKEN_TYPE_IMMEDIATE: {
+                       /* TODO: if we know the immediate is small enough, and only
+                        * used with instructions that can embed an immediate, we
+                        * can skip this:
+                        */
+                       struct tgsi_full_immediate *imm =
+                                       &ctx->parser.FullToken.FullImmediate;
+                       unsigned n = ctx->so->immediates_count++;
+                       memcpy(ctx->so->immediates[n].val, imm->u, 16);
+                       break;
+               }
+               case TGSI_TOKEN_TYPE_INSTRUCTION: {
+                       struct tgsi_full_instruction *inst =
+                                       &ctx->parser.FullToken.FullInstruction;
+                       unsigned opc = inst->Instruction.Opcode;
+                       const struct instr_translater *t = &translaters[opc];
+
+                       if (t->fxn) {
+                               t->fxn(t, ctx, inst);
+                               ctx->num_internal_temps = 0;
+                       } else {
+                               debug_printf("unknown TGSI opc: %s\n",
+                                               tgsi_get_opcode_name(opc));
+                               tgsi_dump(ctx->tokens, 0);
+                               assert(0);
+                       }
+
+                       break;
+               }
+               default:
+                       break;
+               }
+       }
+
+       if (ir->instrs_count > 0)
+               ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
+
+       if (ctx->last_input)
+               ctx->last_input->flags |= IR3_REG_EI;
+}
+
+int
+fd3_compile_shader(struct fd3_shader_stateobj *so,
+               const struct tgsi_token *tokens)
+{
+       struct fd3_compile_context ctx;
+
+       assert(!so->ir);
+
+       so->ir = ir3_shader_create();
+
+       so->color_regid = regid(63,0);
+       so->pos_regid   = regid(63,0);
+       so->psize_regid = regid(63,0);
+
+       if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK)
+               return -1;
+
+       compile_instructions(&ctx);
+
+       compile_free(&ctx);
+
+       return 0;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h b/src/gallium/drivers/freedreno/a3xx/fd3_compiler.h
new file mode 100644 (file)
index 0000000..1116f59
--- /dev/null
@@ -0,0 +1,38 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_COMPILER_H_
+#define FD3_COMPILER_H_
+
+#include "fd3_program.h"
+#include "fd3_util.h"
+
+int fd3_compile_shader(struct fd3_shader_stateobj *so,
+               const struct tgsi_token *tokens);
+
+#endif /* FD3_COMPILER_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.c b/src/gallium/drivers/freedreno/a3xx/fd3_context.c
new file mode 100644 (file)
index 0000000..3ae9b29
--- /dev/null
@@ -0,0 +1,118 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "fd3_context.h"
+#include "fd3_blend.h"
+#include "fd3_draw.h"
+#include "fd3_emit.h"
+#include "fd3_gmem.h"
+#include "fd3_program.h"
+#include "fd3_rasterizer.h"
+#include "fd3_texture.h"
+#include "fd3_zsa.h"
+
+static void
+fd3_context_destroy(struct pipe_context *pctx)
+{
+       fd3_prog_fini(pctx);
+       fd_context_destroy(pctx);
+}
+
+/* TODO we could combine a few of these small buffers (solid_vbuf,
+ * blit_texcoord_vbuf, and vsc_size_mem, into a single buffer and
+ * save a tiny bit of memory
+ */
+
+static struct pipe_resource *
+create_solid_vertexbuf(struct pipe_context *pctx)
+{
+       static const float init_shader_const[] = {
+                       -1.000000, +1.000000, +1.000000,
+                       +1.000000, -1.000000, +1.000000,
+       };
+       struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+                       PIPE_BIND_CUSTOM, PIPE_USAGE_IMMUTABLE, sizeof(init_shader_const));
+       pipe_buffer_write(pctx, prsc, 0,
+                       sizeof(init_shader_const), init_shader_const);
+       return prsc;
+}
+
+static struct pipe_resource *
+create_blit_texcoord_vertexbuf(struct pipe_context *pctx)
+{
+       struct pipe_resource *prsc = pipe_buffer_create(pctx->screen,
+                       PIPE_BIND_CUSTOM, PIPE_USAGE_DYNAMIC, 16);
+       return prsc;
+}
+
+struct pipe_context *
+fd3_context_create(struct pipe_screen *pscreen, void *priv)
+{
+       struct fd_screen *screen = fd_screen(pscreen);
+       struct fd3_context *fd3_ctx = CALLOC_STRUCT(fd3_context);
+       struct pipe_context *pctx;
+
+       if (!fd3_ctx)
+               return NULL;
+
+       pctx = &fd3_ctx->base.base;
+
+       fd3_ctx->base.screen = fd_screen(pscreen);
+
+       pctx->destroy = fd3_context_destroy;
+       pctx->create_blend_state = fd3_blend_state_create;
+       pctx->create_rasterizer_state = fd3_rasterizer_state_create;
+       pctx->create_depth_stencil_alpha_state = fd3_zsa_state_create;
+
+       fd3_draw_init(pctx);
+       fd3_gmem_init(pctx);
+       fd3_texture_init(pctx);
+       fd3_prog_init(pctx);
+
+       pctx = fd_context_init(&fd3_ctx->base, pscreen, priv);
+       if (!pctx)
+               return NULL;
+
+       fd3_ctx->vs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd3_ctx->fs_pvt_mem = fd_bo_new(screen->dev, 0x2000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd3_ctx->vsc_size_mem = fd_bo_new(screen->dev, 0x1000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd3_ctx->vsc_pipe_mem = fd_bo_new(screen->dev, 0x40000,
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       fd3_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
+       fd3_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
+
+       return pctx;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
new file mode 100644 (file)
index 0000000..3829ab5
--- /dev/null
@@ -0,0 +1,68 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_CONTEXT_H_
+#define FD3_CONTEXT_H_
+
+#include "freedreno_drmif.h"
+
+#include "freedreno_context.h"
+
+struct fd3_context {
+       struct fd_context base;
+
+       struct fd_bo *vs_pvt_mem, *fs_pvt_mem;
+
+       /* not sure how big this actually needs to be.. the blob driver
+        * combines it w/ the solid_vertexbuf, we could probably do the
+        * same to save an extra bo allocation..
+        */
+       struct fd_bo *vsc_size_mem;
+
+       struct fd_bo *vsc_pipe_mem;
+
+       /* vertex buf used for clear/gmem->mem vertices, and mem->gmem
+        * vertices:
+        */
+       struct pipe_resource *solid_vbuf;
+
+       /* vertex buf used for mem->gmem tex coords:
+        */
+       struct pipe_resource *blit_texcoord_vbuf;
+};
+
+static INLINE struct fd3_context *
+fd3_context(struct fd_context *ctx)
+{
+       return (struct fd3_context *)ctx;
+}
+
+struct pipe_context *
+fd3_context_create(struct pipe_screen *pscreen, void *priv);
+
+#endif /* FD3_CONTEXT_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
new file mode 100644 (file)
index 0000000..953d45e
--- /dev/null
@@ -0,0 +1,236 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_prim.h"
+
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+
+#include "fd3_draw.h"
+#include "fd3_context.h"
+#include "fd3_emit.h"
+#include "fd3_program.h"
+#include "fd3_util.h"
+#include "fd3_zsa.h"
+
+
+static void
+emit_vertexbufs(struct fd_context *ctx)
+{
+       struct fd_vertex_stateobj *vtx = ctx->vtx;
+       struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
+       struct fd3_vertex_buf bufs[PIPE_MAX_ATTRIBS];
+       unsigned i;
+
+       if (!vtx->num_elements)
+               return;
+
+       for (i = 0; i < vtx->num_elements; i++) {
+               struct pipe_vertex_element *elem = &vtx->pipe[i];
+               struct pipe_vertex_buffer *vb =
+                               &vertexbuf->vb[elem->vertex_buffer_index];
+               bufs[i].offset = vb->buffer_offset + elem->src_offset;
+               bufs[i].stride = vb->stride;
+               bufs[i].prsc   = vb->buffer;
+               bufs[i].format = elem->src_format;
+       }
+
+       fd3_emit_vertex_bufs(ctx->ring, &ctx->prog, bufs, vtx->num_elements);
+}
+
+static void
+fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
+{
+       struct fd_ringbuffer *ring = ctx->ring;
+       unsigned dirty = ctx->dirty;
+
+       fd3_emit_state(ctx, dirty);
+
+       if (dirty & FD_DIRTY_VTXBUF)
+               emit_vertexbufs(ctx);
+
+       OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
+       OUT_RING(ring, 0x0000000b);                  /* PC_VERTEX_REUSE_BLOCK_CNTL */
+
+       OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+       OUT_RING(ring, 0x0000000);
+
+       OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+       OUT_RING(ring, info->min_index);        /* VFD_INDEX_MIN */
+       OUT_RING(ring, info->max_index + 1);    /* VFD_INDEX_MAX */
+       OUT_RING(ring, info->start_instance);   /* VFD_INSTANCEID_OFFSET */
+       OUT_RING(ring, info->start);            /* VFD_INDEX_OFFSET */
+
+       OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+       OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
+                       info->restart_index : 0xffffffff);
+
+       fd_draw_emit(ctx, info);
+}
+
+static void
+fd3_clear(struct fd_context *ctx, unsigned buffers,
+               const union pipe_color_union *color, double depth, unsigned stencil)
+{
+       struct fd3_context *fd3_ctx = fd3_context(ctx);
+       struct fd_ringbuffer *ring = ctx->ring;
+       unsigned ce, i;
+
+       /* emit generic state now: */
+       fd3_emit_state(ctx, ctx->dirty & (FD_DIRTY_VIEWPORT |
+                       FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR));
+
+       OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
+       OUT_RING(ring, 0X3c0000ff);
+
+       fd3_emit_rbrc_draw_state(ring,
+                       A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+
+       if (buffers & PIPE_CLEAR_DEPTH) {
+               OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+               OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE |
+                               A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
+                               A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_ALWAYS));
+
+               OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_ZOFFSET, 2);
+               OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+               OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(depth));
+       } else {
+               OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+               OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+       }
+
+       if (buffers & PIPE_CLEAR_STENCIL) {
+               OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+               OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(stencil) |
+                               A3XX_RB_STENCILREFMASK_STENCILMASK(stencil) |
+                               A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+               OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+                               A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                               0xff000000 | // XXX ???
+                               A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+
+               OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+               OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                               A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+                               A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                               A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_REPLACE) |
+                               A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                               A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                               A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                               A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                               A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+       } else {
+               OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+               OUT_RING(ring, A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+                               A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                               A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0));
+               OUT_RING(ring, A3XX_RB_STENCILREFMASK_BF_STENCILREF(0) |
+                               A3XX_RB_STENCILREFMASK_BF_STENCILMASK(0) |
+                               A3XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0));
+
+               OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+               OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+                               A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                               A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                               A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                               A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                               A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                               A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                               A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+       }
+
+       if (buffers & PIPE_CLEAR_COLOR) {
+               ce = 0xf;
+       } else {
+               ce = 0x0;
+       }
+
+       for (i = 0; i < 4; i++) {
+               OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+               OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(12) |
+                               A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_ALWAYS) |
+                               A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(ce));
+
+               OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+               OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+                               A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                               A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+                               A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+                               A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                               A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO) |
+                               A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE);
+       }
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+       OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+
+       fd3_program_emit(ring, &ctx->solid_prog);
+
+       fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
+                       { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
+               }, 1);
+
+       fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
+
+       OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+       OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+                       A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                       A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+                       A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+       OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+       OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
+       OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
+       OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
+       OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+       OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+       OUT_RING(ring, 0xffffffff);   /* PC_RESTART_INDEX */
+
+       OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+       OUT_RING(ring, PERFCOUNTER_STOP);
+
+       OUT_PKT3(ring, CP_DRAW_INDX, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+                       INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+       OUT_RING(ring, 2);                                      /* NumIndices */
+
+       OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+       OUT_RING(ring, 0x00000000);
+}
+
+void
+fd3_draw_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->draw = fd3_draw;
+       ctx->clear = fd3_clear;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.h b/src/gallium/drivers/freedreno/a3xx/fd3_draw.h
new file mode 100644 (file)
index 0000000..09b1243
--- /dev/null
@@ -0,0 +1,38 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_DRAW_H_
+#define FD3_DRAW_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_draw.h"
+
+void fd3_draw_init(struct pipe_context *pctx);
+
+#endif /* FD3_DRAW_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
new file mode 100644 (file)
index 0000000..1d048b0
--- /dev/null
@@ -0,0 +1,581 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_helpers.h"
+#include "util/u_format.h"
+
+#include "freedreno_resource.h"
+
+#include "fd3_emit.h"
+#include "fd3_blend.h"
+#include "fd3_context.h"
+#include "fd3_program.h"
+#include "fd3_rasterizer.h"
+#include "fd3_texture.h"
+#include "fd3_util.h"
+#include "fd3_zsa.h"
+
+/* regid:          base const register
+ * prsc or dwords: buffer containing constant values
+ * sizedwords:     size of const value buffer
+ */
+void
+fd3_emit_constant(struct fd_ringbuffer *ring,
+               enum adreno_state_block sb,
+               uint32_t regid, uint32_t offset, uint32_t sizedwords,
+               const uint32_t *dwords, struct pipe_resource *prsc)
+{
+       uint32_t i, sz;
+       enum adreno_state_src src;
+
+       if (prsc) {
+               sz = 0;
+               src = SS_INDIRECT;
+       } else {
+               sz = sizedwords;
+               src = SS_DIRECT;
+       }
+
+       /* we have this sometimes, not others.. perhaps we could be clever
+        * and figure out actually when we need to invalidate cache:
+        */
+       OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+       OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
+       OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
+                       A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
+                       A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
+
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid/2) |
+                       CP_LOAD_STATE_0_STATE_SRC(src) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                       CP_LOAD_STATE_0_NUM_UNIT(sizedwords/2));
+       if (prsc) {
+               struct fd_bo *bo = fd_resource(prsc)->bo;
+               OUT_RELOC(ring, bo, offset,
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+       } else {
+               OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) |
+                               CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS));
+               dwords = (uint32_t *)&((uint8_t *)dwords)[offset];
+       }
+       for (i = 0; i < sz; i++) {
+               OUT_RING(ring, dwords[i]);
+       }
+}
+
+static void
+emit_constants(struct fd_ringbuffer *ring,
+               enum adreno_state_block sb,
+               struct fd_constbuf_stateobj *constbuf,
+               struct fd3_shader_stateobj *shader)
+{
+       uint32_t enabled_mask = constbuf->enabled_mask;
+       uint32_t base = 0;
+       unsigned i;
+
+       // XXX TODO only emit dirty consts.. but we need to keep track if
+       // they are clobbered by a clear, gmem2mem, or mem2gmem..
+       constbuf->dirty_mask = enabled_mask;
+
+       /* emit user constants: */
+       while (enabled_mask) {
+               unsigned index = ffs(enabled_mask) - 1;
+               struct pipe_constant_buffer *cb = &constbuf->cb[index];
+               unsigned size = align(cb->buffer_size, 4) / 4; /* size in dwords */
+
+               // I expect that size should be a multiple of vec4's:
+               assert(size == align(size, 4));
+
+               /* gallium could have const-buffer still bound, even though the
+                * shader is not using it.  Writing consts above constlen (or
+                * rather, HLSQ_{VS,FS}_CONTROL_REG.CONSTLENGTH) will cause a
+                * hang.
+                */
+               if ((base / 4) >= shader->constlen)
+                       break;
+
+               if (constbuf->dirty_mask & (1 << index)) {
+                       fd3_emit_constant(ring, sb, base,
+                                       cb->buffer_offset, size,
+                                       cb->user_buffer, cb->buffer);
+                       constbuf->dirty_mask &= ~(1 << index);
+               }
+
+               base += size;
+               enabled_mask &= ~(1 << index);
+       }
+
+       /* emit shader immediates: */
+       if (shader) {
+               for (i = 0; i < shader->immediates_count; i++) {
+                       fd3_emit_constant(ring, sb,
+                                       4 * (shader->first_immediate + i),
+                                       0, 4, shader->immediates[i].val, NULL);
+               }
+       }
+}
+
+#define VERT_TEX_OFF    0
+#define FRAG_TEX_OFF    16
+#define BASETABLE_SZ    14
+
+static void
+emit_textures(struct fd_ringbuffer *ring,
+               enum adreno_state_block sb,
+               struct fd_texture_stateobj *tex)
+{
+       static const unsigned tex_off[] = {
+                       [SB_VERT_TEX] = VERT_TEX_OFF,
+                       [SB_FRAG_TEX] = FRAG_TEX_OFF,
+       };
+       static const enum adreno_state_block mipaddr[] = {
+                       [SB_VERT_TEX] = SB_VERT_MIPADDR,
+                       [SB_FRAG_TEX] = SB_FRAG_MIPADDR,
+       };
+       unsigned i, j;
+
+       assert(tex->num_samplers == tex->num_textures);  // TODO check..
+
+       if (!tex->num_samplers)
+               return;
+
+       /* output sampler state: */
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers));
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
+                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                       CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers));
+       OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+                       CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+       for (i = 0; i < tex->num_samplers; i++) {
+               struct fd3_sampler_stateobj *sampler =
+                               fd3_sampler_stateobj(tex->samplers[i]);
+               OUT_RING(ring, sampler->texsamp0);
+               OUT_RING(ring, sampler->texsamp1);
+       }
+
+       /* emit texture state: */
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + (4 * tex->num_textures));
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) |
+                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                       CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures));
+       OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                       CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+       for (i = 0; i < tex->num_textures; i++) {
+               struct fd3_pipe_sampler_view *view =
+                               fd3_pipe_sampler_view(tex->textures[i]);
+               OUT_RING(ring, view->texconst0);
+               OUT_RING(ring, view->texconst1);
+               OUT_RING(ring, view->texconst2 |
+                               A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i));
+               OUT_RING(ring, view->texconst3);
+       }
+
+       /* emit mipaddrs: */
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + (BASETABLE_SZ * tex->num_textures));
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * tex_off[sb]) |
+                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(mipaddr[sb]) |
+                       CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * tex->num_textures));
+       OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                       CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+       for (i = 0; i < tex->num_textures; i++) {
+               struct fd3_pipe_sampler_view *view =
+                               fd3_pipe_sampler_view(tex->textures[i]);
+               OUT_RELOC(ring, view->tex_resource->bo, 0, 0);
+               /* I think each entry is a ptr to mipmap level.. for now, just
+                * pad w/ null's until I get around to actually implementing
+                * mipmap support..
+                */
+               for (j = 1; j < BASETABLE_SZ; j++) {
+                       OUT_RING(ring, 0x00000000);
+               }
+       }
+}
+
+static void
+emit_cache_flush(struct fd_ringbuffer *ring)
+{
+       OUT_PKT3(ring, CP_EVENT_WRITE, 1);
+       OUT_RING(ring, CACHE_FLUSH);
+
+       OUT_PKT3(ring, CP_DRAW_INDX, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, DRAW(DI_PT_POINTLIST, DI_SRC_SEL_AUTO_INDEX,
+                       INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+       OUT_RING(ring, 0);                                      /* NumIndices */
+
+       OUT_PKT3(ring, CP_NOP, 4);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
+       OUT_RING(ring, 0x00000000);
+}
+
+/* emit texture state for mem->gmem restore operation.. eventually it would
+ * be good to get rid of this and use normal CSO/etc state for more of these
+ * special cases, but for now the compiler is not sufficient..
+ */
+void
+fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
+{
+       struct fd_resource *rsc = fd_resource(psurf->texture);
+
+       /* output sampler state: */
+       OUT_PKT3(ring, CP_LOAD_STATE, 4);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
+                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
+                       CP_LOAD_STATE_0_NUM_UNIT(1));
+       OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+                       CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+       OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) |
+                       A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) |
+                       A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) |
+                       A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) |
+                       A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT));
+       OUT_RING(ring, 0x00000000);
+
+       /* emit texture state: */
+       OUT_PKT3(ring, CP_LOAD_STATE, 6);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
+                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) |
+                       CP_LOAD_STATE_0_NUM_UNIT(1));
+       OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                       CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+       OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) |
+                       0x40000000 | // XXX
+                       fd3_tex_swiz(psurf->format,  PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_GREEN,
+                                       PIPE_SWIZZLE_RED, PIPE_SWIZZLE_ALPHA));
+       OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(psurf->format)) |
+                       A3XX_TEX_CONST_1_WIDTH(psurf->width) |
+                       A3XX_TEX_CONST_1_HEIGHT(psurf->height));
+       OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp) |
+                       A3XX_TEX_CONST_2_INDX(0));
+       OUT_RING(ring, 0x00000000);
+
+       /* emit mipaddrs: */
+       OUT_PKT3(ring, CP_LOAD_STATE, 3);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) |
+                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) |
+                       CP_LOAD_STATE_0_NUM_UNIT(1));
+       OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
+                       CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+       OUT_RELOC(ring, rsc->bo, 0, 0);
+}
+
+void
+fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
+               struct fd_program_stateobj *prog,
+               struct fd3_vertex_buf *vbufs, uint32_t n)
+{
+       struct fd3_shader_stateobj *vp = prog->vp;
+       uint32_t i;
+
+       n = MIN2(n, vp->inputs_count);
+
+       for (i = 0; i < n; i++) {
+               struct pipe_resource *prsc = vbufs[i].prsc;
+               struct fd_resource *rsc = fd_resource(prsc);
+               enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(vbufs[i].format);
+               bool switchnext = (i != (n - 1));
+               uint32_t fs = util_format_get_blocksize(vbufs[i].format);
+
+               OUT_PKT0(ring, REG_A3XX_VFD_FETCH(i), 2);
+               OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
+                               A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
+                               COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
+                               A3XX_VFD_FETCH_INSTR_0_INDEXCODE(i) |
+                               A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
+               OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0);
+
+               OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(i), 1);
+               OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
+                               A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) |
+                               A3XX_VFD_DECODE_INSTR_FORMAT(fmt) |
+                               A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
+                               A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
+                               A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
+                               COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
+       }
+}
+
+void
+fd3_emit_state(struct fd_context *ctx, uint32_t dirty)
+{
+       struct fd_ringbuffer *ring = ctx->ring;
+
+       if (dirty & FD_DIRTY_SAMPLE_MASK) {
+               OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1);
+               OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+                               A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+                               A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
+       }
+
+       if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) {
+               struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa);
+               struct pipe_stencil_ref *sr = &ctx->stencil_ref;
+
+               fd3_emit_rbrc_draw_state(ring, zsa->rb_render_control);
+
+               OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+               OUT_RING(ring, zsa->rb_depth_control);
+
+               OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+               OUT_RING(ring, zsa->rb_stencil_control);
+
+               OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+               OUT_RING(ring, zsa->rb_stencilrefmask |
+                               A3XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0]));
+               OUT_RING(ring, zsa->rb_stencilrefmask_bf |
+                               A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1]));
+       }
+
+       if (dirty & FD_DIRTY_RASTERIZER) {
+               struct fd3_rasterizer_stateobj *rasterizer =
+                               fd3_rasterizer_stateobj(ctx->rasterizer);
+
+               OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+               OUT_RING(ring, rasterizer->gras_su_mode_control);
+
+               OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
+               OUT_RING(ring, rasterizer->gras_su_point_minmax);
+               OUT_RING(ring, rasterizer->gras_su_point_size);
+
+               OUT_PKT0(ring, REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
+               OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
+               OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
+
+               OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+               OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
+       }
+
+       if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
+               struct fd3_rasterizer_stateobj *rasterizer =
+                               fd3_rasterizer_stateobj(ctx->rasterizer);
+               struct fd3_shader_stateobj *fp = ctx->prog.fp;
+               uint32_t stride_in_vpc;
+
+               stride_in_vpc = align(fp->total_in, 4) / 4;
+               if (stride_in_vpc > 0)
+                       stride_in_vpc = MAX2(stride_in_vpc, 2);
+
+               OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+               OUT_RING(ring, rasterizer->pc_prim_vtx_cntl |
+                               A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc));
+       }
+
+       if (dirty & FD_DIRTY_SCISSOR) {
+               OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+               OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(ctx->scissor.minx) |
+                               A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(ctx->scissor.miny));
+               OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(ctx->scissor.maxx - 1) |
+                               A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(ctx->scissor.maxy - 1));
+
+               ctx->max_scissor.minx = MIN2(ctx->max_scissor.minx, ctx->scissor.minx);
+               ctx->max_scissor.miny = MIN2(ctx->max_scissor.miny, ctx->scissor.miny);
+               ctx->max_scissor.maxx = MAX2(ctx->max_scissor.maxx, ctx->scissor.maxx);
+               ctx->max_scissor.maxy = MAX2(ctx->max_scissor.maxy, ctx->scissor.maxy);
+       }
+
+       if (dirty & FD_DIRTY_VIEWPORT) {
+               OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+               OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5));
+               OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0]));
+               OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(ctx->viewport.translate[1] - 0.5));
+               OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(ctx->viewport.scale[1]));
+               OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(ctx->viewport.translate[2]));
+               OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
+       }
+
+       if (dirty & FD_DIRTY_PROG)
+               fd3_program_emit(ring, &ctx->prog);
+
+       if (dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) {
+               struct fd_program_stateobj *prog = &ctx->prog;
+
+               emit_constants(ring,  SB_VERT_SHADER,
+                               &ctx->constbuf[PIPE_SHADER_VERTEX],
+                               (prog->dirty & FD_SHADER_DIRTY_VP) ? prog->vp : NULL);
+               emit_constants(ring, SB_FRAG_SHADER,
+                               &ctx->constbuf[PIPE_SHADER_FRAGMENT],
+                               (prog->dirty & FD_SHADER_DIRTY_FP) ? prog->fp : NULL);
+       }
+
+       if (dirty & FD_DIRTY_BLEND) {
+               struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend);
+               uint32_t i;
+
+               for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) {
+                       OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+                       OUT_RING(ring, blend->rb_mrt[i].control);
+
+                       OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+                       OUT_RING(ring, blend->rb_mrt[i].blend_control);
+               }
+       }
+
+       if (dirty & FD_DIRTY_VERTTEX)
+               emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
+
+       if (dirty & FD_DIRTY_FRAGTEX)
+               emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
+
+       ctx->dirty &= ~dirty;
+}
+
+/* emit setup at begin of new cmdstream buffer (don't rely on previous
+ * state, there could have been a context switch between ioctls):
+ */
+void
+fd3_emit_restore(struct fd_context *ctx)
+{
+       struct fd3_context *fd3_ctx = fd3_context(ctx);
+       struct fd_ringbuffer *ring = ctx->ring;
+       int i;
+
+       OUT_PKT3(ring, CP_REG_RMW, 3);
+       OUT_RING(ring, REG_A3XX_RBBM_CLOCK_CTL);
+       OUT_RING(ring, 0xfffcffff);
+       OUT_RING(ring, 0x00000000);
+
+       OUT_PKT3(ring, CP_INVALIDATE_STATE, 1);
+       OUT_RING(ring, 0x00007fff);
+
+       OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_CTRL_REG, 3);
+       OUT_RING(ring, 0x08000001);                  /* SP_VS_PVT_MEM_CTRL_REG */
+       OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0);  /* SP_VS_PVT_MEM_ADDR_REG */
+       OUT_RING(ring, 0x00000000);                  /* SP_VS_PVT_MEM_SIZE_REG */
+
+       OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_CTRL_REG, 3);
+       OUT_RING(ring, 0x08000001);                  /* SP_FS_PVT_MEM_CTRL_REG */
+       OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0);  /* SP_FS_PVT_MEM_ADDR_REG */
+       OUT_RING(ring, 0x00000000);                  /* SP_FS_PVT_MEM_SIZE_REG */
+
+       OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
+       OUT_RING(ring, 0x0000000b);                  /* PC_VERTEX_REUSE_BLOCK_CNTL */
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+       OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                       A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                       A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+
+       OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 2);
+       OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE |
+                       A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) |
+                       A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff));
+       OUT_RING(ring, 0x00000000);        /* UNKNOWN_20C3 */
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+       OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) |
+                       A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0));
+
+       OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C81, 1);
+       OUT_RING(ring, 0x00000001);        /* UNKNOWN_0C81 */
+
+       OUT_PKT0(ring, REG_A3XX_TPL1_TP_VS_TEX_OFFSET, 1);
+       OUT_RING(ring, A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(VERT_TEX_OFF) |
+                       A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(VERT_TEX_OFF) |
+                       A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * VERT_TEX_OFF));
+
+       OUT_PKT0(ring, REG_A3XX_TPL1_TP_FS_TEX_OFFSET, 1);
+       OUT_RING(ring, A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(FRAG_TEX_OFF) |
+                       A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(FRAG_TEX_OFF) |
+                       A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * FRAG_TEX_OFF));
+
+       OUT_PKT0(ring, REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0, 2);
+       OUT_RING(ring, 0x00000000);        /* VPC_VARY_CYLWRAP_ENABLE_0 */
+       OUT_RING(ring, 0x00000000);        /* VPC_VARY_CYLWRAP_ENABLE_1 */
+
+       OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E43, 1);
+       OUT_RING(ring, 0x00000001);        /* UNKNOWN_0E43 */
+
+       OUT_PKT0(ring, REG_A3XX_UNKNOWN_0F03, 1);
+       OUT_RING(ring, 0x00000001);        /* UNKNOWN_0F03 */
+
+       OUT_PKT0(ring, REG_A3XX_UNKNOWN_0EE0, 1);
+       OUT_RING(ring, 0x00000003);        /* UNKNOWN_0EE0 */
+
+       OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1);
+       OUT_RING(ring, 0x00000001);        /* UNKNOWN_0C3D */
+
+       OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E00, 1);
+       OUT_RING(ring, 0x00000000);        /* UNKNOWN_0E00 */
+
+       OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2);
+       OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) |
+                       A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(0));
+       OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
+                       A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
+
+       OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 1);
+       OUT_RING(ring, 0x00000001);        /* UCHE_CACHE_MODE_CONTROL_REG */
+
+       OUT_PKT0(ring, REG_A3XX_VSC_SIZE_ADDRESS, 1);
+       OUT_RELOC(ring, fd3_ctx->vsc_size_mem, 0, 0); /* VSC_SIZE_ADDRESS */
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+       OUT_RING(ring, 0x00000000);                  /* GRAS_CL_CLIP_CNTL */
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2);
+       OUT_RING(ring, 0xffc00010);        /* GRAS_SU_POINT_MINMAX */
+       OUT_RING(ring, 0x00000008);        /* GRAS_SU_POINT_SIZE */
+
+       OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1);
+       OUT_RING(ring, 0xffffffff);        /* PC_RESTART_INDEX */
+
+       OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1);
+       OUT_RING(ring, A3XX_PA_SC_WINDOW_OFFSET_X(0) |
+                       A3XX_PA_SC_WINDOW_OFFSET_Y(0));
+
+       OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4);
+       OUT_RING(ring, 0x00000000);        /* RB_BLEND_RED */
+       OUT_RING(ring, 0x00000000);        /* RB_BLEND_GREEN */
+       OUT_RING(ring, 0x00000000);        /* RB_BLEND_BLUE */
+       OUT_RING(ring, 0x3c0000ff);        /* RB_BLEND_ALPHA */
+
+       for (i = 0; i < 6; i++) {
+               OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(i), 4);
+               OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].X */
+               OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].Y */
+               OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].Z */
+               OUT_RING(ring, 0x00000000);    /* GRAS_CL_USER_PLANE[i].W */
+       }
+
+       emit_cache_flush(ring);
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_emit.h b/src/gallium/drivers/freedreno/a3xx/fd3_emit.h
new file mode 100644 (file)
index 0000000..668e5dd
--- /dev/null
@@ -0,0 +1,89 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_EMIT_H
+#define FD3_EMIT_H
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+#include "fd3_util.h"
+
+
+struct fd_ringbuffer;
+enum adreno_state_block;
+
+void fd3_emit_constant(struct fd_ringbuffer *ring,
+               enum adreno_state_block sb,
+               uint32_t regid, uint32_t offset, uint32_t sizedwords,
+               const uint32_t *dwords, struct pipe_resource *prsc);
+
+void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
+               struct pipe_surface *psurf);
+
+/* NOTE: this just exists because we don't have proper vertex/vertexbuf
+ * state objs for clear, and mem2gmem/gmem2mem operations..
+ */
+struct fd3_vertex_buf {
+       unsigned offset, stride;
+       struct pipe_resource *prsc;
+       enum pipe_format format;
+};
+
+void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
+               struct fd_program_stateobj *prog,
+               struct fd3_vertex_buf *vbufs, uint32_t n);
+void fd3_emit_state(struct fd_context *ctx, uint32_t dirty);
+void fd3_emit_restore(struct fd_context *ctx);
+
+
+/* use RMW (read-modify-write) to update RB_RENDER_CONTROL since the
+ * GMEM/binning code is deciding on the bin-width (and whether to
+ * use binning) after the draw/clear state is emitted.
+ */
+static inline void
+fd3_emit_rbrc_draw_state(struct fd_ringbuffer *ring, uint32_t val)
+{
+       OUT_PKT3(ring, CP_REG_RMW, 3);
+       OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL);
+       OUT_RING(ring, A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK |
+                       A3XX_RB_RENDER_CONTROL_ENABLE_GMEM);
+       OUT_RING(ring, val);
+}
+
+static inline void
+fd3_emit_rbrc_tile_state(struct fd_ringbuffer *ring, uint32_t val)
+{
+       OUT_PKT3(ring, CP_REG_RMW, 3);
+       OUT_RING(ring, REG_A3XX_RB_RENDER_CONTROL);
+       OUT_RING(ring, ~(A3XX_RB_RENDER_CONTROL_BIN_WIDTH__MASK |
+                       A3XX_RB_RENDER_CONTROL_ENABLE_GMEM));
+       OUT_RING(ring, val);
+}
+
+#endif /* FD3_EMIT_H */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
new file mode 100644 (file)
index 0000000..16ec959
--- /dev/null
@@ -0,0 +1,486 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "freedreno_state.h"
+#include "freedreno_resource.h"
+
+#include "fd3_gmem.h"
+#include "fd3_context.h"
+#include "fd3_emit.h"
+#include "fd3_program.h"
+#include "fd3_util.h"
+#include "fd3_zsa.h"
+
+
+static void
+emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
+               struct pipe_surface **bufs, uint32_t *bases, uint32_t bin_w)
+{
+       unsigned i;
+
+       for (i = 0; i < 4; i++) {
+               enum a3xx_color_fmt format = 0;
+               enum a3xx_color_swap swap = WZYX;
+               struct fd_resource *res = NULL;
+               uint32_t stride = 0;
+               uint32_t base = 0;
+
+               if (i < nr_bufs) {
+                       struct pipe_surface *psurf = bufs[i];
+                       struct fd_resource *res = fd_resource(psurf->texture);
+
+                       format = fd3_pipe2color(psurf->format);
+                       swap = fd3_pipe2swap(psurf->format);
+                       stride = bin_w * res->cpp;
+
+                       if (bases) {
+                               base = bases[i] * res->cpp;
+                       }
+               }
+
+               OUT_PKT0(ring, REG_A3XX_RB_MRT_BUF_INFO(i), 2);
+               OUT_RING(ring, A3XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) |
+                               A3XX_RB_MRT_BUF_INFO_COLOR_BUF_PITCH(stride) |
+                               A3XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(TILE_32X32) |
+                               A3XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap));
+               OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
+
+               OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
+               OUT_RING(ring, A3XX_SP_FS_IMAGE_OUTPUT_REG_MRTFORMAT(format));
+       }
+}
+
+static uint32_t
+depth_base(struct fd_gmem_stateobj *gmem)
+{
+       return align(gmem->bin_w * gmem->bin_h, 0x4000);
+}
+
+/* transfer from gmem to system memory (ie. normal RAM) */
+
+static void
+emit_gmem2mem_surf(struct fd_ringbuffer *ring,
+               enum adreno_rb_copy_control_mode mode,
+               uint32_t base, struct pipe_surface *psurf)
+{
+       struct fd_resource *rsc = fd_resource(psurf->texture);
+
+       OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
+       OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
+                       A3XX_RB_COPY_CONTROL_MODE(mode) |
+                       A3XX_RB_COPY_CONTROL_GMEM_BASE(base));
+       OUT_RELOCS(ring, rsc->bo, 0, 0, -1);    /* RB_COPY_DEST_BASE */
+       OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(rsc->pitch * rsc->cpp));
+       OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
+                       A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) |
+                       A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(0xf) |
+                       A3XX_RB_COPY_DEST_INFO_ENDIAN(ENDIAN_NONE) |
+                       A3XX_RB_COPY_DEST_INFO_SWAP(fd3_pipe2swap(psurf->format)));
+
+       OUT_PKT3(ring, CP_DRAW_INDX, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+                       INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+       OUT_RING(ring, 2);                                      /* NumIndices */
+}
+
+static void
+fd3_emit_tile_gmem2mem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+               uint32_t bin_w, uint32_t bin_h)
+{
+       struct fd3_context *fd3_ctx = fd3_context(ctx);
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+
+       OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
+
+       OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_NEVER) |
+                       A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                       A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                       A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                       A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_NEVER) |
+                       A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                       A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                       A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+       OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2);
+       OUT_RING(ring, 0xff000000 |
+                       A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+                       A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                       A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+       OUT_RING(ring, 0xff000000 |
+                       A3XX_RB_STENCILREFMASK_STENCILREF(0) |
+                       A3XX_RB_STENCILREFMASK_STENCILMASK(0) |
+                       A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff));
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
+       OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+       OUT_RING(ring, 0x00000000);   /* GRAS_CL_CLIP_CNTL */
+
+       OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+
+       fd3_emit_rbrc_draw_state(ring,
+                       A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE |
+                       A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_NEVER));
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+       OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
+                       A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                       A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+       OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+       OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
+                       A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                       A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+                       A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+       OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+                       A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+       OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) |
+                       A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1));
+
+       OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+       OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
+       OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
+       OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
+       OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+
+       fd3_program_emit(ring, &ctx->solid_prog);
+
+       fd3_emit_vertex_bufs(ring, &ctx->solid_prog, (struct fd3_vertex_buf[]) {
+                       { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
+               }, 1);
+
+       if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
+               uint32_t base = depth_base(&ctx->gmem) *
+                               fd_resource(pfb->cbufs[0]->texture)->cpp;
+               emit_gmem2mem_surf(ring, RB_COPY_DEPTH_STENCIL, base, pfb->zsbuf);
+       }
+
+       if (ctx->resolve & FD_BUFFER_COLOR) {
+               emit_gmem2mem_surf(ring, RB_COPY_RESOLVE, 0, pfb->cbufs[0]);
+       }
+
+       OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+       OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                       A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                       A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+
+/* transfer from system memory to gmem */
+
+static void
+emit_mem2gmem_surf(struct fd_ringbuffer *ring, uint32_t base,
+               struct pipe_surface *psurf, uint32_t bin_w)
+{
+       emit_mrt(ring, 1, &psurf, &base, bin_w);
+
+       fd3_emit_gmem_restore_tex(ring, psurf);
+
+       OUT_PKT3(ring, CP_DRAW_INDX, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, DRAW(DI_PT_RECTLIST, DI_SRC_SEL_AUTO_INDEX,
+                       INDEX_SIZE_IGN, IGNORE_VISIBILITY));
+       OUT_RING(ring, 2);                                      /* NumIndices */
+}
+
+static void
+fd3_emit_tile_mem2gmem(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+               uint32_t bin_w, uint32_t bin_h)
+{
+       struct fd3_context *fd3_ctx = fd3_context(ctx);
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+       float x0, y0, x1, y1;
+       unsigned i;
+
+       /* write texture coordinates to vertexbuf: */
+       x0 = ((float)xoff) / ((float)pfb->width);
+       x1 = ((float)xoff + bin_w) / ((float)pfb->width);
+       y0 = ((float)yoff) / ((float)pfb->height);
+       y1 = ((float)yoff + bin_h) / ((float)pfb->height);
+
+       OUT_PKT3(ring, CP_MEM_WRITE, 5);
+       OUT_RELOC(ring, fd_resource(fd3_ctx->blit_texcoord_vbuf)->bo, 0, 0);
+       OUT_RING(ring, fui(x0));
+       OUT_RING(ring, fui(y0));
+       OUT_RING(ring, fui(x1));
+       OUT_RING(ring, fui(y1));
+
+       for (i = 0; i < 4; i++) {
+               OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
+               OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(12) |
+                               A3XX_RB_MRT_CONTROL_DITHER_MODE(DITHER_DISABLE) |
+                               A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE(0xf));
+
+               OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1);
+               OUT_RING(ring, A3XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(FACTOR_ONE) |
+                               A3XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                               A3XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(FACTOR_ZERO) |
+                               A3XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(FACTOR_ONE) |
+                               A3XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(BLEND_DST_PLUS_SRC) |
+                               A3XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(FACTOR_ZERO) |
+                               A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE);
+       }
+
+       fd3_emit_rbrc_tile_state(ring,
+                       A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
+
+       OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_LESS));
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
+       OUT_RING(ring, A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER);   /* GRAS_CL_CLIP_CNTL */
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6);
+       OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET((float)bin_w/2.0 - 0.5));
+       OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE((float)bin_w/2.0));
+       OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET((float)bin_h/2.0 - 0.5));
+       OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-(float)bin_h/2.0));
+       OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(0.0));
+       OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(1.0));
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2);
+       OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) |
+                       A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0));
+       OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(bin_w - 1) |
+                       A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(bin_h - 1));
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+       OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(0) |
+                       A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(0));
+       OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(bin_w - 1) |
+                       A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(bin_h - 1));
+
+       OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1);
+       OUT_RING(ring, 0x2 |
+                       A3XX_RB_STENCIL_CONTROL_FUNC(FUNC_ALWAYS) |
+                       A3XX_RB_STENCIL_CONTROL_FAIL(STENCIL_KEEP) |
+                       A3XX_RB_STENCIL_CONTROL_ZPASS(STENCIL_KEEP) |
+                       A3XX_RB_STENCIL_CONTROL_ZFAIL(STENCIL_KEEP) |
+                       A3XX_RB_STENCIL_CONTROL_FUNC_BF(FUNC_ALWAYS) |
+                       A3XX_RB_STENCIL_CONTROL_FAIL_BF(STENCIL_KEEP) |
+                       A3XX_RB_STENCIL_CONTROL_ZPASS_BF(STENCIL_KEEP) |
+                       A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(STENCIL_KEEP));
+
+       fd3_emit_rbrc_draw_state(ring,
+                       A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(FUNC_ALWAYS));
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+       OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                       A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                       A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
+
+       OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
+       OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(2) |
+                       A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
+                       A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(PC_DRAW_TRIANGLES) |
+                       A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST);
+
+       OUT_PKT0(ring, REG_A3XX_VFD_INDEX_MIN, 4);
+       OUT_RING(ring, 0);            /* VFD_INDEX_MIN */
+       OUT_RING(ring, 2);            /* VFD_INDEX_MAX */
+       OUT_RING(ring, 0);            /* VFD_INSTANCEID_OFFSET */
+       OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
+
+       fd3_program_emit(ring, &ctx->blit_prog);
+
+       fd3_emit_vertex_bufs(ring, &ctx->blit_prog, (struct fd3_vertex_buf[]) {
+                       { .prsc = fd3_ctx->blit_texcoord_vbuf, .stride = 8, .format = PIPE_FORMAT_R32G32_FLOAT },
+                       { .prsc = fd3_ctx->solid_vbuf, .stride = 12, .format = PIPE_FORMAT_R32G32B32_FLOAT },
+               }, 2);
+
+       /* for gmem pitch/base calculations, we need to use the non-
+        * truncated tile sizes:
+        */
+       bin_w = gmem->bin_w;
+       bin_h = gmem->bin_h;
+
+       if (ctx->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
+               emit_mem2gmem_surf(ring, depth_base(gmem), pfb->zsbuf, bin_w);
+
+       if (ctx->restore & FD_BUFFER_COLOR)
+               emit_mem2gmem_surf(ring, 0, pfb->cbufs[0], bin_w);
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
+       OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                       A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
+                       A3XX_GRAS_SC_CONTROL_RASTER_MODE(0));
+}
+
+static void
+update_vsc_pipe(struct fd_context *ctx)
+{
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+       struct fd_bo *bo = fd3_context(ctx)->vsc_pipe_mem;
+       int i;
+
+       /* since we aren't using binning, just try to assign all bins
+        * to same pipe for now:
+        */
+       OUT_PKT0(ring, REG_A3XX_VSC_PIPE(0), 3);
+       OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(0) |
+                       A3XX_VSC_PIPE_CONFIG_Y(0) |
+                       A3XX_VSC_PIPE_CONFIG_W(gmem->nbins_x) |
+                       A3XX_VSC_PIPE_CONFIG_H(gmem->nbins_y));
+       OUT_RELOC(ring, bo, 0, 0);              /* VSC_PIPE[0].DATA_ADDRESS */
+       OUT_RING(ring, fd_bo_size(bo) - 32);    /* VSC_PIPE[0].DATA_LENGTH */
+
+       for (i = 1; i < 8; i++) {
+               OUT_PKT0(ring, REG_A3XX_VSC_PIPE(i), 3);
+               OUT_RING(ring, A3XX_VSC_PIPE_CONFIG_X(0) |
+                               A3XX_VSC_PIPE_CONFIG_Y(0) |
+                               A3XX_VSC_PIPE_CONFIG_W(0) |
+                               A3XX_VSC_PIPE_CONFIG_H(0));
+               OUT_RING(ring, 0x00000000);         /* VSC_PIPE[i].DATA_ADDRESS */
+               OUT_RING(ring, 0x00000000);         /* VSC_PIPE[i].DATA_LENGTH */
+       }
+}
+
+/* before first tile */
+static void
+fd3_emit_tile_init(struct fd_context *ctx)
+{
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+
+       fd3_emit_restore(ctx);
+
+       /* note: use gmem->bin_w/h, the bin_w/h parameters may be truncated
+        * at the right and bottom edge tiles
+        */
+       OUT_PKT0(ring, REG_A3XX_VSC_BIN_SIZE, 1);
+       OUT_RING(ring, A3XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) |
+                       A3XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h));
+
+       /* TODO we only need to do this if gmem stateobj changes.. or in
+        * particular if the # of bins changes..
+        */
+       update_vsc_pipe(ctx);
+}
+
+/* before mem2gmem */
+static void
+fd3_emit_tile_prep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+               uint32_t bin_w, uint32_t bin_h)
+{
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+       uint32_t reg;
+
+
+       OUT_PKT0(ring, REG_A3XX_RB_DEPTH_INFO, 2);
+       reg = A3XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base(gmem));
+       if (pfb->zsbuf) {
+               reg |= A3XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
+       }
+       OUT_RING(ring, reg);
+       if (pfb->zsbuf) {
+               uint32_t cpp = util_format_get_blocksize(pfb->zsbuf->format);
+               OUT_RING(ring, A3XX_RB_DEPTH_PITCH(cpp * gmem->bin_w));
+       } else {
+               OUT_RING(ring, 0x00000000);
+       }
+
+       OUT_PKT0(ring, REG_A3XX_RB_WINDOW_SIZE, 1);
+       OUT_RING(ring, A3XX_RB_WINDOW_SIZE_WIDTH(pfb->width) |
+                       A3XX_RB_WINDOW_SIZE_HEIGHT(pfb->height));
+
+       OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 1);
+       OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RENDERING_PASS) |
+                       A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE);
+}
+
+/* before IB to rendering cmds: */
+static void
+fd3_emit_tile_renderprep(struct fd_context *ctx, uint32_t xoff, uint32_t yoff,
+               uint32_t bin_w, uint32_t bin_h)
+{
+       struct fd_ringbuffer *ring = ctx->ring;
+       struct fd_gmem_stateobj *gmem = &ctx->gmem;
+       struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
+
+       uint32_t x1 = xoff;
+       uint32_t y1 = yoff;
+       uint32_t x2 = xoff + bin_w - 1;
+       uint32_t y2 = yoff + bin_h - 1;
+
+       OUT_PKT3(ring, CP_SET_BIN, 3);
+       OUT_RING(ring, 0x00000000);
+       OUT_RING(ring, CP_SET_BIN_1_X1(x1) | CP_SET_BIN_1_Y1(y1));
+       OUT_RING(ring, CP_SET_BIN_2_X2(x2) | CP_SET_BIN_2_Y2(y2));
+
+       emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL, gmem->bin_w);
+
+       fd3_emit_rbrc_tile_state(ring,
+                       A3XX_RB_RENDER_CONTROL_ENABLE_GMEM |
+                       A3XX_RB_RENDER_CONTROL_BIN_WIDTH(gmem->bin_w));
+
+       /* setup scissor/offset for current tile: */
+       OUT_PKT0(ring, REG_A3XX_PA_SC_WINDOW_OFFSET, 1);
+       OUT_RING(ring, A3XX_PA_SC_WINDOW_OFFSET_X(xoff) |
+                       A3XX_PA_SC_WINDOW_OFFSET_Y(yoff));
+
+       OUT_PKT0(ring, REG_A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 2);
+       OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_TL_X(x1) |
+                       A3XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(y1));
+       OUT_RING(ring, A3XX_GRAS_SC_SCREEN_SCISSOR_BR_X(x2) |
+                       A3XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(y2));
+}
+
+void
+fd3_gmem_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       ctx->emit_tile_init = fd3_emit_tile_init;
+       ctx->emit_tile_prep = fd3_emit_tile_prep;
+       ctx->emit_tile_mem2gmem = fd3_emit_tile_mem2gmem;
+       ctx->emit_tile_renderprep = fd3_emit_tile_renderprep;
+       ctx->emit_tile_gmem2mem = fd3_emit_tile_gmem2mem;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.h b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.h
new file mode 100644 (file)
index 0000000..91b0286
--- /dev/null
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_GMEM_H_
+#define FD3_GMEM_H_
+
+#include "pipe/p_context.h"
+
+void fd3_gmem_init(struct pipe_context *pctx);
+
+#endif /* FD3_GMEM_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
new file mode 100644 (file)
index 0000000..b5a027e
--- /dev/null
@@ -0,0 +1,642 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "fd3_program.h"
+#include "fd3_compiler.h"
+#include "fd3_texture.h"
+#include "fd3_util.h"
+
+static void
+delete_shader(struct fd3_shader_stateobj *so)
+{
+       ir3_shader_destroy(so->ir);
+       fd_bo_del(so->bo);
+       free(so);
+}
+
+static void
+assemble_shader(struct pipe_context *pctx, struct fd3_shader_stateobj *so)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       uint32_t sz, *bin;
+
+       bin = ir3_shader_assemble(so->ir, &so->info);
+       sz = so->info.sizedwords * 4;
+
+       so->bo = fd_bo_new(ctx->screen->dev, sz,
+                       DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
+                       DRM_FREEDRENO_GEM_TYPE_KMEM);
+
+       memcpy(fd_bo_map(so->bo), bin, sz);
+
+       free(bin);
+
+       so->instrlen = so->info.sizedwords / 8;
+       so->constlen = so->info.max_const + 1;
+}
+
+/* for vertex shader, the inputs are loaded into registers before the shader
+ * is executed, so max_regs from the shader instructions might not properly
+ * reflect the # of registers actually used:
+ */
+static void
+fixup_vp_regfootprint(struct fd3_shader_stateobj *so)
+{
+       unsigned i;
+       for (i = 0; i < so->inputs_count; i++) {
+               so->info.max_reg = MAX2(so->info.max_reg, so->inputs[i].regid >> 2);
+       }
+}
+
+static struct fd3_shader_stateobj *
+create_shader(struct pipe_context *pctx, const struct pipe_shader_state *cso,
+               enum shader_t type)
+{
+       struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
+       int ret;
+
+       if (!so)
+               return NULL;
+
+       so->type = type;
+
+       if (fd_mesa_debug & FD_DBG_DISASM) {
+               DBG("dump tgsi: type=%d", so->type);
+               tgsi_dump(cso->tokens, 0);
+       }
+
+       if (type == SHADER_FRAGMENT) {
+               /* we seem to get wrong colors (maybe swap/endianess or hw issue?)
+                * with full precision color reg.  And blob driver only seems to
+                * use half precision register for color output (that I can find
+                * so far), even with highp precision.  So for force half precision
+                * for frag shader:
+                */
+               so->half_precision = true;
+       }
+
+       ret = fd3_compile_shader(so, cso->tokens);
+       if (ret) {
+               debug_error("compile failed!");
+               goto fail;
+       }
+
+       assemble_shader(pctx, so);
+       if (!so->bo) {
+               debug_error("assemble failed!");
+               goto fail;
+       }
+
+       if (type == SHADER_VERTEX)
+               fixup_vp_regfootprint(so);
+
+       if (fd_mesa_debug & FD_DBG_DISASM) {
+               DBG("disassemble: type=%d", so->type);
+               disasm_a3xx(fd_bo_map(so->bo), so->info.sizedwords, 0, so->type);
+       }
+
+       return so;
+
+fail:
+       delete_shader(so);
+       return NULL;
+}
+
+static void *
+fd3_fp_state_create(struct pipe_context *pctx,
+               const struct pipe_shader_state *cso)
+{
+       return create_shader(pctx, cso, SHADER_FRAGMENT);
+}
+
+static void
+fd3_fp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd3_shader_stateobj *so = hwcso;
+       delete_shader(so);
+}
+
+static void
+fd3_fp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->prog.fp = hwcso;
+       ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+       ctx->dirty |= FD_DIRTY_PROG;
+}
+
+static void *
+fd3_vp_state_create(struct pipe_context *pctx,
+               const struct pipe_shader_state *cso)
+{
+       return create_shader(pctx, cso, SHADER_VERTEX);
+}
+
+static void
+fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd3_shader_stateobj *so = hwcso;
+       delete_shader(so);
+}
+
+static void
+fd3_vp_state_bind(struct pipe_context *pctx, void *hwcso)
+{
+       struct fd_context *ctx = fd_context(pctx);
+       ctx->prog.vp = hwcso;
+       ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+       ctx->dirty |= FD_DIRTY_PROG;
+}
+
+static void
+emit_shader(struct fd_ringbuffer *ring, struct fd3_shader_stateobj *so)
+{
+       struct ir3_shader_info *si = &so->info;
+       enum adreno_state_block sb;
+       uint32_t i, *bin;
+
+       if (so->type == SHADER_VERTEX) {
+               sb = SB_VERT_SHADER;
+       } else {
+               sb = SB_FRAG_SHADER;
+       }
+
+       // XXX use SS_INDIRECT
+       bin = fd_bo_map(so->bo);
+       OUT_PKT3(ring, CP_LOAD_STATE, 2 + si->sizedwords);
+       OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) |
+                       CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) |
+                       CP_LOAD_STATE_0_STATE_BLOCK(sb) |
+                       CP_LOAD_STATE_0_NUM_UNIT(so->instrlen));
+       OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) |
+                       CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
+       for (i = 0; i < si->sizedwords; i++)
+               OUT_RING(ring, bin[i]);
+}
+
+void
+fd3_program_emit(struct fd_ringbuffer *ring,
+               struct fd_program_stateobj *prog)
+{
+       struct fd3_shader_stateobj *vp = prog->vp;
+       struct fd3_shader_stateobj *fp = prog->fp;
+       struct ir3_shader_info *vsi = &vp->info;
+       struct ir3_shader_info *fsi = &fp->info;
+       int i;
+
+       /* we could probably divide this up into things that need to be
+        * emitted if frag-prog is dirty vs if vert-prog is dirty..
+        */
+
+       OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
+       OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
+                       A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART |
+                       A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
+       OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
+                       A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE);
+       OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
+       OUT_RING(ring, 0x00000000);        /* HLSQ_CONTROL_3_REG */
+       OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
+                       A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
+                       A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vp->instrlen));
+       OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
+                       A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
+                       A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fp->instrlen));
+
+       OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
+       OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
+                       A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
+                       // XXX "resolve" (?) bit set on gmem->mem pass..
+//                     COND(!uniforms, A3XX_SP_SP_CTRL_REG_RESOLVE) |
+                       // XXX sometimes 0, sometimes 1:
+                       A3XX_SP_SP_CTRL_REG_LOMODE(1));
+
+       /* emit unknown sequence of perfcounter disables that the blob
+        * emits as part of the program state..
+        */
+       for (i = 0; i < 6; i++) {
+               OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER0_SELECT, 1);
+               OUT_RING(ring, 0x00000000);    /* SP_PERFCOUNTER4_SELECT */
+
+               OUT_PKT0(ring, REG_A3XX_SP_PERFCOUNTER4_SELECT, 1);
+               OUT_RING(ring, 0x00000000);    /* SP_PERFCOUNTER4_SELECT */
+       }
+
+       OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1);
+       OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen));
+
+       OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
+       OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
+                       A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
+                       A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
+                       A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
+                       A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
+                       A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
+                       A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
+                       COND(vp->samplers_count > 0, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
+                       A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen));
+       OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
+                       A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
+                       A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vsi->max_const, 0)));
+       OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(vp->pos_regid) |
+                       A3XX_SP_VS_PARAM_REG_PSIZEREGID(vp->psize_regid) |
+                       A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(vp->outputs_count));
+
+       assert(vp->outputs_count >= fp->inputs_count);
+
+       for (i = 0; i < fp->inputs_count; ) {
+               uint32_t reg = 0;
+
+               OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i/2), 1);
+
+               reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[i].regid);
+               reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[i].compmask);
+               i++;
+
+               reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[i].regid);
+               reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[i].compmask);
+               i++;
+
+               OUT_RING(ring, reg);
+       }
+
+       for (i = 0; i < fp->inputs_count; ) {
+               uint32_t reg = 0;
+
+               OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i/4), 1);
+
+               reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[i++].inloc);
+               reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[i++].inloc);
+               reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[i++].inloc);
+               reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[i++].inloc);
+
+               OUT_RING(ring, reg);
+       }
+
+#if 0
+       /* for some reason, when I write SP_{VS,FS}_OBJ_START_REG I get:
+[  666.663665] kgsl kgsl-3d0: |a3xx_err_callback| RBBM | AHB bus error | READ | addr=201 | ports=1:3
+[  666.664001] kgsl kgsl-3d0: |a3xx_err_callback| ringbuffer AHB error interrupt
+[  670.680909] kgsl kgsl-3d0: |adreno_idle| spun too long waiting for RB to idle
+[  670.681062] kgsl kgsl-3d0: |kgsl-3d0| Dump Started
+[  670.681123] kgsl kgsl-3d0: POWER: FLAGS = 00000007 | ACTIVE POWERLEVEL = 00000001
+[  670.681214] kgsl kgsl-3d0: POWER: INTERVAL TIMEOUT = 0000000A
+[  670.681367] kgsl kgsl-3d0: GRP_CLK = 325000000
+[  670.681489] kgsl kgsl-3d0: BUS CLK = 0
+        */
+       OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2);
+       OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) |
+                       A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
+       OUT_RELOC(ring, vp->bo, 0, 0);    /* SP_VS_OBJ_START_REG */
+#endif
+
+       OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
+       OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
+
+       OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
+       OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
+                       A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
+                       A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
+                       A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
+                       A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+                       A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
+                       A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
+                       COND(fp->samplers_count > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
+                       A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
+       OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
+                       A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
+                       A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
+                       A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
+
+#if 0
+       OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
+       OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
+                       A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(128 - fp->instrlen));
+       OUT_RELOC(ring, fp->bo, 0, 0);    /* SP_FS_OBJ_START_REG */
+#endif
+
+       OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
+       OUT_RING(ring, 0x00000000);        /* SP_FS_FLAT_SHAD_MODE_REG_0 */
+       OUT_RING(ring, 0x00000000);        /* SP_FS_FLAT_SHAD_MODE_REG_1 */
+
+       OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
+       OUT_RING(ring, 0x00000000);        /* SP_FS_OUTPUT_REG */
+
+       OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4);
+       OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(fp->color_regid) |
+                       COND(fp->half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION));
+       OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
+       OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
+       OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
+
+       OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
+       OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
+                       A3XX_VPC_ATTR_THRDASSIGN(1) |
+                       A3XX_VPC_ATTR_LMSIZE(1));
+       OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) |
+                       A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
+
+       OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
+       OUT_RING(ring, fp->vinterp[0]);    /* VPC_VARYING_INTERP[0].MODE */
+       OUT_RING(ring, fp->vinterp[1]);    /* VPC_VARYING_INTERP[1].MODE */
+       OUT_RING(ring, fp->vinterp[2]);    /* VPC_VARYING_INTERP[2].MODE */
+       OUT_RING(ring, fp->vinterp[3]);    /* VPC_VARYING_INTERP[3].MODE */
+
+       OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
+       OUT_RING(ring, fp->vpsrepl[0]);    /* VPC_VARYING_PS_REPL[0].MODE */
+       OUT_RING(ring, fp->vpsrepl[1]);    /* VPC_VARYING_PS_REPL[1].MODE */
+       OUT_RING(ring, fp->vpsrepl[2]);    /* VPC_VARYING_PS_REPL[2].MODE */
+       OUT_RING(ring, fp->vpsrepl[3]);    /* VPC_VARYING_PS_REPL[3].MODE */
+
+       OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
+       OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
+                       A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));
+
+       emit_shader(ring, vp);
+
+       OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+       OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */
+
+       emit_shader(ring, fp);
+
+       OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
+       OUT_RING(ring, 0x00000000);        /* VFD_PERFCOUNTER0_SELECT */
+
+       OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2);
+       OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(vp->total_in) |
+                       A3XX_VFD_CONTROL_0_PACKETSIZE(2) |
+                       A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(vp->inputs_count) |
+                       A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(vp->inputs_count));
+       OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX
+                       A3XX_VFD_CONTROL_1_REGID4VTX(regid(63,0)) |
+                       A3XX_VFD_CONTROL_1_REGID4INST(regid(63,0)));
+}
+
+/* once the compiler is good enough, we should construct TGSI in the
+ * core freedreno driver, and then let the a2xx/a3xx parts compile
+ * the internal shaders from TGSI the same as regular shaders.  This
+ * would be the first step towards handling most of clear (and the
+ * gmem<->mem blits) from the core via normal state changes and shader
+ * state objects.
+ *
+ * (Well, there would still be some special bits, because there are
+ * some registers that don't get set for normal draw, but this should
+ * be relatively small and could be handled via callbacks from core
+ * into a2xx/a3xx..)
+ */
+static struct fd3_shader_stateobj *
+create_internal_shader(struct pipe_context *pctx, enum shader_t type,
+               struct ir3_shader *ir)
+{
+       struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj);
+
+       if (!so) {
+               ir3_shader_destroy(ir);
+               return NULL;
+       }
+
+       so->type = type;
+       so->ir = ir;
+
+       assemble_shader(pctx, so);
+       assert(so->bo);
+
+       return so;
+}
+
+/* Creates shader:
+ *    (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x
+ *    (rpt5)nop
+ *    sam (f32)(xyzw)r0.x, r0.z, s#0, t#0
+ *    (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x
+ *    end
+ */
+static struct fd3_shader_stateobj *
+create_blit_fp(struct pipe_context *pctx)
+{
+       struct fd3_shader_stateobj *so;
+       struct ir3_shader *ir = ir3_shader_create();
+       struct ir3_instruction *instr;
+
+       /* (sy)(ss)(rpt1)bary.f (ei)r0.z, (r)0, r0.x */
+       instr = ir3_instr_create(ir, 2, OPC_BARY_F);
+       instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
+       instr->repeat = 1;
+
+       ir3_reg_create(instr, regid(0,2), IR3_REG_EI);    /* (ei)r0.z */
+       ir3_reg_create(instr, 0, IR3_REG_R |              /* (r)0 */
+                       IR3_REG_IMMED)->iim_val = 0;
+       ir3_reg_create(instr, regid(0,0), 0);             /* r0.x */
+
+       /* (rpt5)nop */
+       instr = ir3_instr_create(ir, 0, OPC_NOP);
+       instr->repeat = 5;
+
+       /* sam (f32)(xyzw)r0.x, r0.z, s#0, t#0 */
+       instr = ir3_instr_create(ir, 5, OPC_SAM);
+       instr->cat5.samp = 0;
+       instr->cat5.tex  = 0;
+       instr->cat5.type = TYPE_F32;
+
+       ir3_reg_create(instr, regid(0,0),                 /* (xyzw)r0.x */
+                       0)->wrmask = 0xf;
+       ir3_reg_create(instr, regid(0,2), 0);             /* r0.z */
+
+       /* (sy)(rpt3)cov.f32f16 hr0.x, (r)r0.x */
+       instr = ir3_instr_create(ir, 1, 0);  /* mov/cov instructions have no opc */
+       instr->flags = IR3_INSTR_SY;
+       instr->repeat = 3;
+       instr->cat1.src_type = TYPE_F32;
+       instr->cat1.dst_type = TYPE_F16;
+
+       ir3_reg_create(instr, regid(0,0), IR3_REG_HALF);  /* hr0.x */
+       ir3_reg_create(instr, regid(0,0), IR3_REG_R);     /* (r)r0.x */
+
+       /* end */
+       instr = ir3_instr_create(ir, 0, OPC_END);
+
+       so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
+       if (!so)
+               return NULL;
+
+       so->color_regid = regid(0,0);
+       so->half_precision = true;
+       so->inputs_count = 1;
+       so->inputs[0].inloc = 8;
+       so->inputs[0].compmask = 0x3;
+       so->total_in = 2;
+       so->samplers_count = 1;
+
+       so->vpsrepl[0] = 0x99999999;
+       so->vpsrepl[1] = 0x99999999;
+       so->vpsrepl[2] = 0x99999999;
+       so->vpsrepl[3] = 0x99999999;
+
+       return so;
+}
+
+/* Creates shader:
+ *    (sy)(ss)end
+ */
+static struct fd3_shader_stateobj *
+create_blit_vp(struct pipe_context *pctx)
+{
+       struct fd3_shader_stateobj *so;
+       struct ir3_shader *ir = ir3_shader_create();
+       struct ir3_instruction *instr;
+
+       /* (sy)(ss)end */
+       instr = ir3_instr_create(ir, 0, OPC_END);
+       instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
+
+       so = create_internal_shader(pctx, SHADER_VERTEX, ir);
+       if (!so)
+               return NULL;
+
+       so->pos_regid = regid(1,0);
+       so->psize_regid = regid(63,0);
+       so->inputs_count = 2;
+       so->inputs[0].regid = regid(0,0);
+       so->inputs[0].compmask = 0xf;
+       so->inputs[1].regid = regid(1,0);
+       so->inputs[1].compmask = 0xf;
+       so->total_in = 8;
+       so->outputs_count = 1;
+       so->outputs[0].regid = regid(0,0);
+
+       fixup_vp_regfootprint(so);
+
+       return so;
+}
+
+/* Creates shader:
+ *    (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x
+ *    end
+ */
+static struct fd3_shader_stateobj *
+create_solid_fp(struct pipe_context *pctx)
+{
+       struct fd3_shader_stateobj *so;
+       struct ir3_shader *ir = ir3_shader_create();
+       struct ir3_instruction *instr;
+
+       /* (sy)(ss)(rpt3)mov.f16f16 hr0.x, (r)hc0.x */
+       instr = ir3_instr_create(ir, 1, 0);  /* mov/cov instructions have no opc */
+       instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
+       instr->repeat = 3;
+       instr->cat1.src_type = TYPE_F16;
+       instr->cat1.dst_type = TYPE_F16;
+
+       ir3_reg_create(instr, regid(0,0), IR3_REG_HALF);  /* hr0.x */
+       ir3_reg_create(instr, regid(0,0), IR3_REG_HALF |  /* (r)hc0.x */
+                       IR3_REG_CONST | IR3_REG_R);
+
+       /* end */
+       instr = ir3_instr_create(ir, 0, OPC_END);
+
+       so = create_internal_shader(pctx, SHADER_FRAGMENT, ir);
+       if (!so)
+               return NULL;
+
+       so->color_regid = regid(0,0);
+       so->half_precision = true;
+       so->inputs_count = 0;
+       so->total_in = 0;
+
+       return so;
+}
+
+/* Creates shader:
+ *    (sy)(ss)end
+ */
+static struct fd3_shader_stateobj *
+create_solid_vp(struct pipe_context *pctx)
+{
+       struct fd3_shader_stateobj *so;
+       struct ir3_shader *ir = ir3_shader_create();
+       struct ir3_instruction *instr;
+
+       /* (sy)(ss)end */
+       instr = ir3_instr_create(ir, 0, OPC_END);
+       instr->flags = IR3_INSTR_SY | IR3_INSTR_SS;
+
+
+       so = create_internal_shader(pctx, SHADER_VERTEX, ir);
+       if (!so)
+               return NULL;
+
+       so->pos_regid = regid(0,0);
+       so->psize_regid = regid(63,0);
+       so->inputs_count = 1;
+       so->inputs[0].regid = regid(0,0);
+       so->inputs[0].compmask = 0xf;
+       so->total_in = 4;
+       so->outputs_count = 0;
+
+       fixup_vp_regfootprint(so);
+
+       return so;
+}
+
+void
+fd3_prog_init(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       pctx->create_fs_state = fd3_fp_state_create;
+       pctx->bind_fs_state = fd3_fp_state_bind;
+       pctx->delete_fs_state = fd3_fp_state_delete;
+
+       pctx->create_vs_state = fd3_vp_state_create;
+       pctx->bind_vs_state = fd3_vp_state_bind;
+       pctx->delete_vs_state = fd3_vp_state_delete;
+
+       ctx->solid_prog.fp = create_solid_fp(pctx);
+       ctx->solid_prog.vp = create_solid_vp(pctx);
+       ctx->blit_prog.fp = create_blit_fp(pctx);
+       ctx->blit_prog.vp = create_blit_vp(pctx);
+}
+
+void
+fd3_prog_fini(struct pipe_context *pctx)
+{
+       struct fd_context *ctx = fd_context(pctx);
+
+       delete_shader(ctx->solid_prog.vp);
+       delete_shader(ctx->solid_prog.fp);
+       delete_shader(ctx->blit_prog.vp);
+       delete_shader(ctx->blit_prog.fp);
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.h b/src/gallium/drivers/freedreno/a3xx/fd3_program.h
new file mode 100644 (file)
index 0000000..9b50d34
--- /dev/null
@@ -0,0 +1,116 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_PROGRAM_H_
+#define FD3_PROGRAM_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_context.h"
+
+#include "ir-a3xx.h"
+#include "disasm.h"
+
+struct fd3_shader_stateobj {
+       enum shader_t type;
+
+       struct fd_bo *bo;
+
+       struct ir3_shader_info info;
+       struct ir3_shader *ir;
+
+       /* is shader using (or more precisely, is color_regid) half-
+        * precision register?
+        */
+       bool half_precision;
+
+       /* special output register locations: */
+       uint8_t pos_regid, psize_regid, color_regid;
+
+       /* the instructions length is in units of instruction groups
+        * (4 instructions, 8 dwords):
+        */
+       unsigned instrlen;
+
+       /* the constants length is in units of vec4's, and is the sum of
+        * the uniforms and the built-in compiler constants
+        */
+       unsigned constlen;
+
+       /* About Linkage:
+        *   + Let the frag shader determine the position/compmask for the
+        *     varyings, since it is the place where we know if the varying
+        *     is actually used, and if so, which components are used.  So
+        *     what the hw calls "outloc" is taken from the "inloc" of the
+        *     frag shader.
+        *   + From the vert shader, we only need the output regid
+        */
+
+       /* varyings/outputs: */
+       unsigned outputs_count;
+       struct {
+               uint8_t regid;
+       } outputs[16];
+
+       /* vertices/inputs: */
+       unsigned inputs_count;
+       struct {
+               uint8_t regid;
+               uint8_t compmask;
+               /* in theory inloc of fs should match outloc of vs: */
+               uint8_t inloc;
+       } inputs[16];
+
+       unsigned total_in;       /* sum of inputs (scalar) */
+
+       /* samplers: */
+       unsigned samplers_count;
+
+       /* const reg # of first immediate, ie. 1 == c1
+        * (not regid, because TGSI thinks in terms of vec4 registers,
+        * not scalar registers)
+        */
+       unsigned first_immediate;
+       unsigned immediates_count;
+       struct {
+               uint32_t val[4];
+       } immediates[64];
+
+       /* so far, only used for blit_prog shader.. values for
+        * VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE
+        */
+       uint32_t vinterp[4], vpsrepl[4];
+};
+
+void fd3_program_emit(struct fd_ringbuffer *ring,
+               struct fd_program_stateobj *prog);
+
+void fd3_prog_init(struct pipe_context *pctx);
+void fd3_prog_fini(struct pipe_context *pctx);
+
+#endif /* FD3_PROGRAM_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.c
new file mode 100644 (file)
index 0000000..8f6c0fe
--- /dev/null
@@ -0,0 +1,92 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd3_rasterizer.h"
+#include "fd3_context.h"
+#include "fd3_util.h"
+
+void *
+fd3_rasterizer_state_create(struct pipe_context *pctx,
+               const struct pipe_rasterizer_state *cso)
+{
+       struct fd3_rasterizer_stateobj *so;
+
+       so = CALLOC_STRUCT(fd3_rasterizer_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+/*
+       if (cso->line_stipple_enable) {
+               ??? TODO line stipple
+       }
+       TODO cso->half_pixel_center
+       TODO cso->point_size
+       TODO psize_min/psize_max
+*/
+       so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */
+       so->gras_su_point_minmax = 0xffc00010;  /* ??? */
+       so->gras_su_point_size   = 0x00000008;  /* ??? */
+       so->gras_su_poly_offset_scale =
+                       A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
+       so->gras_su_poly_offset_offset =
+                       A3XX_GRAS_SU_POLY_OFFSET_OFFSET(cso->offset_units);
+
+       so->gras_su_mode_control =
+                       A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(cso->line_width/2);
+
+       so->pc_prim_vtx_cntl =
+               A3XX_PC_PRIM_VTX_CNTL_POLYMODE_FRONT_PTYPE(fd_polygon_mode(cso->fill_front)) |
+               A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(fd_polygon_mode(cso->fill_back));
+
+       if (cso->cull_face & PIPE_FACE_FRONT)
+               so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_FRONT;
+       if (cso->cull_face & PIPE_FACE_BACK)
+               so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_CULL_BACK;
+       if (!cso->flatshade_first)
+               so->pc_prim_vtx_cntl |= A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST;
+/*
+       if (!cso->front_ccw)
+               TODO
+       if (cso->line_stipple_enable)
+               TODO
+       if (cso->multisample)
+               TODO
+*/
+
+       if (cso->offset_tri)
+               so->gras_su_mode_control |= A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET;
+
+       return so;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h b/src/gallium/drivers/freedreno/a3xx/fd3_rasterizer.h
new file mode 100644 (file)
index 0000000..7e9c1f5
--- /dev/null
@@ -0,0 +1,56 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_RASTERIZER_H_
+#define FD3_RASTERIZER_H_
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+struct fd3_rasterizer_stateobj {
+       struct pipe_rasterizer_state base;
+       uint32_t gras_su_point_minmax;
+       uint32_t gras_su_point_size;
+       uint32_t gras_su_poly_offset_scale;
+       uint32_t gras_su_poly_offset_offset;
+
+       uint32_t gras_su_mode_control;
+       uint32_t gras_cl_clip_cntl;
+       uint32_t pc_prim_vtx_cntl;
+};
+
+static INLINE struct fd3_rasterizer_stateobj *
+fd3_rasterizer_stateobj(struct pipe_rasterizer_state *rast)
+{
+       return (struct fd3_rasterizer_stateobj *)rast;
+}
+
+void * fd3_rasterizer_state_create(struct pipe_context *pctx,
+               const struct pipe_rasterizer_state *cso);
+
+#endif /* FD3_RASTERIZER_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.c b/src/gallium/drivers/freedreno/a3xx/fd3_screen.c
new file mode 100644 (file)
index 0000000..9bb19ab
--- /dev/null
@@ -0,0 +1,105 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_screen.h"
+#include "util/u_format.h"
+
+#include "fd3_screen.h"
+#include "fd3_context.h"
+#include "fd3_util.h"
+
+static boolean
+fd3_screen_is_format_supported(struct pipe_screen *pscreen,
+               enum pipe_format format,
+               enum pipe_texture_target target,
+               unsigned sample_count,
+               unsigned usage)
+{
+       unsigned retval = 0;
+
+       if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
+                       (sample_count > 1) || /* TODO add MSAA */
+                       !util_format_is_supported(format, usage)) {
+               DBG("not supported: format=%s, target=%d, sample_count=%d, usage=%x",
+                               util_format_name(format), target, sample_count, usage);
+               return FALSE;
+       }
+
+       if ((usage & PIPE_BIND_VERTEX_BUFFER) &&
+                       (fd3_pipe2vtx(format) != ~0)) {
+               retval |= PIPE_BIND_VERTEX_BUFFER;
+       }
+
+       if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
+                       (fd3_pipe2tex(format) != ~0)) {
+               retval |= PIPE_BIND_SAMPLER_VIEW;
+       }
+
+       if ((usage & (PIPE_BIND_RENDER_TARGET |
+                               PIPE_BIND_DISPLAY_TARGET |
+                               PIPE_BIND_SCANOUT |
+                               PIPE_BIND_SHARED)) &&
+                       (fd3_pipe2color(format) != ~0) &&
+                       (fd3_pipe2tex(format) != ~0)) {
+               retval |= usage & (PIPE_BIND_RENDER_TARGET |
+                               PIPE_BIND_DISPLAY_TARGET |
+                               PIPE_BIND_SCANOUT |
+                               PIPE_BIND_SHARED);
+       }
+
+       if ((usage & PIPE_BIND_DEPTH_STENCIL) &&
+                       (fd_pipe2depth(format) != ~0) &&
+                       (fd3_pipe2tex(format) != ~0)) {
+               retval |= PIPE_BIND_DEPTH_STENCIL;
+       }
+
+       if ((usage & PIPE_BIND_INDEX_BUFFER) &&
+                       (fd_pipe2index(format) != ~0)) {
+               retval |= PIPE_BIND_INDEX_BUFFER;
+       }
+
+       if (usage & PIPE_BIND_TRANSFER_READ)
+               retval |= PIPE_BIND_TRANSFER_READ;
+       if (usage & PIPE_BIND_TRANSFER_WRITE)
+               retval |= PIPE_BIND_TRANSFER_WRITE;
+
+       if (retval != usage) {
+               DBG("not supported: format=%s, target=%d, sample_count=%d, "
+                               "usage=%x, retval=%x", util_format_name(format),
+                               target, sample_count, usage, retval);
+       }
+
+       return retval == usage;
+}
+
+void
+fd3_screen_init(struct pipe_screen *pscreen)
+{
+       pscreen->context_create = fd3_context_create;
+       pscreen->is_format_supported = fd3_screen_is_format_supported;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_screen.h b/src/gallium/drivers/freedreno/a3xx/fd3_screen.h
new file mode 100644 (file)
index 0000000..38204d3
--- /dev/null
@@ -0,0 +1,36 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_SCREEN_H_
+#define FD3_SCREEN_H_
+
+#include "pipe/p_screen.h"
+
+void fd3_screen_init(struct pipe_screen *pscreen);
+
+#endif /* FD3_SCREEN_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
new file mode 100644 (file)
index 0000000..ae08b8a
--- /dev/null
@@ -0,0 +1,140 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+#include "util/u_inlines.h"
+
+#include "fd3_texture.h"
+#include "fd3_util.h"
+
+static enum a3xx_tex_clamp
+tex_clamp(unsigned wrap)
+{
+       /* hardware probably supports more, but we can't coax all the
+        * wrap/clamp modes out of the GLESv2 blob driver.
+        *
+        * TODO once we have basics working, go back and just try
+        * different values and see what happens
+        */
+       switch (wrap) {
+       case PIPE_TEX_WRAP_REPEAT:
+               return A3XX_TEX_REPEAT;
+       case PIPE_TEX_WRAP_CLAMP:
+       case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
+       case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
+               return A3XX_TEX_CLAMP_TO_EDGE;
+       case PIPE_TEX_WRAP_MIRROR_CLAMP:
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
+       case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
+       case PIPE_TEX_WRAP_MIRROR_REPEAT:
+               return A3XX_TEX_MIRROR_REPEAT;
+       default:
+               DBG("invalid wrap: %u", wrap);
+               return 0;
+       }
+}
+
+static enum a3xx_tex_filter
+tex_filter(unsigned filter)
+{
+       switch (filter) {
+       case PIPE_TEX_FILTER_NEAREST:
+               return A3XX_TEX_NEAREST;
+       case PIPE_TEX_FILTER_LINEAR:
+               return A3XX_TEX_LINEAR;
+       default:
+               DBG("invalid filter: %u", filter);
+               return 0;
+       }
+}
+
+static void *
+fd3_sampler_state_create(struct pipe_context *pctx,
+               const struct pipe_sampler_state *cso)
+{
+       struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj);
+
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       so->texsamp0 =
+                       A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
+                       A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
+                       A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
+                       A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
+                       A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
+       so->texsamp1 = 0x00000000;  /* ??? */
+
+       return so;
+}
+
+static struct pipe_sampler_view *
+fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
+               const struct pipe_sampler_view *cso)
+{
+       struct fd3_pipe_sampler_view *so = CALLOC_STRUCT(fd3_pipe_sampler_view);
+       struct fd_resource *rsc = fd_resource(prsc);
+
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+       pipe_reference(NULL, &prsc->reference);
+       so->base.texture = prsc;
+       so->base.reference.count = 1;
+       so->base.context = pctx;
+
+       so->tex_resource =  rsc;
+
+       so->texconst0 =
+                       0x40000000 | /* ??? */
+                       A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(cso->format)) |
+                       fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
+                                               cso->swizzle_b, cso->swizzle_a);
+       so->texconst1 =
+                       A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
+                       A3XX_TEX_CONST_1_WIDTH(prsc->width0) |
+                       A3XX_TEX_CONST_1_HEIGHT(prsc->height0);
+       /* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
+       so->texconst2 =
+                       A3XX_TEX_CONST_2_PITCH(rsc->pitch * rsc->cpp);
+       so->texconst3 = 0x00000000;  /* ??? */
+
+       return &so->base;
+}
+
+void
+fd3_texture_init(struct pipe_context *pctx)
+{
+       pctx->create_sampler_state = fd3_sampler_state_create;
+       pctx->create_sampler_view = fd3_sampler_view_create;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.h b/src/gallium/drivers/freedreno/a3xx/fd3_texture.h
new file mode 100644 (file)
index 0000000..a83f527
--- /dev/null
@@ -0,0 +1,68 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_TEXTURE_H_
+#define FD3_TEXTURE_H_
+
+#include "pipe/p_context.h"
+
+#include "freedreno_texture.h"
+#include "freedreno_resource.h"
+
+#include "fd3_context.h"
+#include "fd3_util.h"
+
+struct fd3_sampler_stateobj {
+       struct pipe_sampler_state base;
+       uint32_t texsamp0, texsamp1;
+};
+
+static INLINE struct fd3_sampler_stateobj *
+fd3_sampler_stateobj(struct pipe_sampler_state *samp)
+{
+       return (struct fd3_sampler_stateobj *)samp;
+}
+
+struct fd3_pipe_sampler_view {
+       struct pipe_sampler_view base;
+       struct fd_resource *tex_resource;
+       uint32_t texconst0, texconst1, texconst2, texconst3;
+};
+
+static INLINE struct fd3_pipe_sampler_view *
+fd3_pipe_sampler_view(struct pipe_sampler_view *pview)
+{
+       return (struct fd3_pipe_sampler_view *)pview;
+}
+
+unsigned fd3_get_const_idx(struct fd_context *ctx,
+               struct fd_texture_stateobj *tex, unsigned samp_id);
+
+void fd3_texture_init(struct pipe_context *pctx);
+
+#endif /* FD3_TEXTURE_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.c b/src/gallium/drivers/freedreno/a3xx/fd3_util.c
new file mode 100644 (file)
index 0000000..a08bc23
--- /dev/null
@@ -0,0 +1,348 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_format.h"
+
+#include "fd3_util.h"
+
+/* convert pipe format to vertex buffer format: */
+enum a3xx_vtx_fmt
+fd3_pipe2vtx(enum pipe_format format)
+{
+       switch (format) {
+       /* 8-bit buffers. */
+       case PIPE_FORMAT_A8_UNORM:
+       case PIPE_FORMAT_I8_UNORM:
+       case PIPE_FORMAT_L8_UNORM:
+       case PIPE_FORMAT_R8_UNORM:
+       case PIPE_FORMAT_L8_SRGB:
+               return VFMT_NORM_UBYTE_8;
+
+       case PIPE_FORMAT_A8_SNORM:
+       case PIPE_FORMAT_I8_SNORM:
+       case PIPE_FORMAT_L8_SNORM:
+       case PIPE_FORMAT_R8_SNORM:
+               return VFMT_NORM_BYTE_8;
+
+       case PIPE_FORMAT_A8_UINT:
+       case PIPE_FORMAT_I8_UINT:
+       case PIPE_FORMAT_L8_UINT:
+       case PIPE_FORMAT_R8_UINT:
+               return VFMT_UBYTE_8;
+
+       case PIPE_FORMAT_A8_SINT:
+       case PIPE_FORMAT_I8_SINT:
+       case PIPE_FORMAT_L8_SINT:
+       case PIPE_FORMAT_R8_SINT:
+               return VFMT_BYTE_8;
+
+       /* 16-bit buffers. */
+       case PIPE_FORMAT_R16_UNORM:
+       case PIPE_FORMAT_A16_UNORM:
+       case PIPE_FORMAT_L16_UNORM:
+       case PIPE_FORMAT_I16_UNORM:
+       case PIPE_FORMAT_Z16_UNORM:
+               return VFMT_NORM_USHORT_16;
+
+       case PIPE_FORMAT_R16_SNORM:
+       case PIPE_FORMAT_A16_SNORM:
+       case PIPE_FORMAT_L16_SNORM:
+       case PIPE_FORMAT_I16_SNORM:
+               return VFMT_NORM_SHORT_16;
+
+       case PIPE_FORMAT_R16_UINT:
+       case PIPE_FORMAT_A16_UINT:
+       case PIPE_FORMAT_L16_UINT:
+       case PIPE_FORMAT_I16_UINT:
+               return VFMT_USHORT_16;
+
+       case PIPE_FORMAT_R16_SINT:
+       case PIPE_FORMAT_A16_SINT:
+       case PIPE_FORMAT_L16_SINT:
+       case PIPE_FORMAT_I16_SINT:
+               return VFMT_SHORT_16;
+
+       case PIPE_FORMAT_L8A8_UNORM:
+       case PIPE_FORMAT_R8G8_UNORM:
+               return VFMT_NORM_UBYTE_8_8;
+
+       case PIPE_FORMAT_L8A8_SNORM:
+       case PIPE_FORMAT_R8G8_SNORM:
+               return VFMT_NORM_BYTE_8_8;
+
+       case PIPE_FORMAT_L8A8_UINT:
+       case PIPE_FORMAT_R8G8_UINT:
+               return VFMT_UBYTE_8_8;
+
+       case PIPE_FORMAT_L8A8_SINT:
+       case PIPE_FORMAT_R8G8_SINT:
+               return VFMT_BYTE_8_8;
+
+       /* 24-bit buffers. */
+       case PIPE_FORMAT_R8G8B8_UNORM:
+               return VFMT_NORM_UBYTE_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8_SNORM:
+               return VFMT_NORM_BYTE_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8_UINT:
+               return VFMT_UBYTE_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8_SINT:
+               return VFMT_BYTE_8_8_8;
+
+       /* 32-bit buffers. */
+       case PIPE_FORMAT_A8B8G8R8_UNORM:
+       case PIPE_FORMAT_A8R8G8B8_UNORM:
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_B8G8R8X8_UNORM:
+       case PIPE_FORMAT_R8G8B8A8_UNORM:
+       case PIPE_FORMAT_R8G8B8X8_UNORM:
+       case PIPE_FORMAT_X8B8G8R8_UNORM:
+       case PIPE_FORMAT_X8R8G8B8_UNORM:
+       case PIPE_FORMAT_A8B8G8R8_SRGB:
+       case PIPE_FORMAT_B8G8R8A8_SRGB:
+               return VFMT_NORM_UBYTE_8_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8A8_SNORM:
+       case PIPE_FORMAT_R8G8B8X8_SNORM:
+               return VFMT_NORM_BYTE_8_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8A8_UINT:
+       case PIPE_FORMAT_R8G8B8X8_UINT:
+               return VFMT_UBYTE_8_8_8_8;
+
+       case PIPE_FORMAT_R8G8B8A8_SINT:
+       case PIPE_FORMAT_R8G8B8X8_SINT:
+               return VFMT_BYTE_8_8_8_8;
+
+/* TODO probably need gles3 blob drivers to find the 32bit int formats:
+       case PIPE_FORMAT_R32_UINT:
+       case PIPE_FORMAT_R32_SINT:
+       case PIPE_FORMAT_A32_UINT:
+       case PIPE_FORMAT_A32_SINT:
+       case PIPE_FORMAT_L32_UINT:
+       case PIPE_FORMAT_L32_SINT:
+       case PIPE_FORMAT_I32_UINT:
+       case PIPE_FORMAT_I32_SINT:
+*/
+
+       case PIPE_FORMAT_R32_FLOAT:
+       case PIPE_FORMAT_A32_FLOAT:
+       case PIPE_FORMAT_L32_FLOAT:
+       case PIPE_FORMAT_I32_FLOAT:
+       case PIPE_FORMAT_Z32_FLOAT:
+               return VFMT_FLOAT_32;
+
+       case PIPE_FORMAT_R32_FIXED:
+               return VFMT_FIXED_32;
+
+       /* 64-bit buffers. */
+       case PIPE_FORMAT_R16G16B16A16_UNORM:
+               return VFMT_NORM_USHORT_16_16_16_16;
+
+       case PIPE_FORMAT_R16G16B16A16_SNORM:
+               return VFMT_NORM_SHORT_16_16_16_16;
+
+       case PIPE_FORMAT_R16G16B16A16_UINT:
+               return VFMT_USHORT_16_16_16_16;
+
+       case PIPE_FORMAT_R16G16B16A16_SINT:
+               return VFMT_SHORT_16_16_16_16;
+
+       case PIPE_FORMAT_R32G32_FLOAT:
+       case PIPE_FORMAT_L32A32_FLOAT:
+               return VFMT_FLOAT_32_32;
+
+       case PIPE_FORMAT_R32G32_FIXED:
+               return VFMT_FIXED_32_32;
+
+/* TODO probably need gles3 blob drivers to find the 32bit int formats:
+       case PIPE_FORMAT_R32G32_SINT:
+       case PIPE_FORMAT_R32G32_UINT:
+       case PIPE_FORMAT_L32A32_UINT:
+       case PIPE_FORMAT_L32A32_SINT:
+*/
+
+       /* 96-bit buffers. */
+       case PIPE_FORMAT_R32G32B32_FLOAT:
+               return VFMT_FLOAT_32_32_32;
+
+       case PIPE_FORMAT_R32G32B32_FIXED:
+               return VFMT_FIXED_32_32_32;
+
+       /* 128-bit buffers. */
+       case PIPE_FORMAT_R32G32B32A32_FLOAT:
+               return VFMT_FLOAT_32_32_32_32;
+
+       case PIPE_FORMAT_R32G32B32A32_FIXED:
+               return VFMT_FIXED_32_32_32_32;
+
+/* TODO probably need gles3 blob drivers to find the 32bit int formats:
+       case PIPE_FORMAT_R32G32B32A32_SNORM:
+       case PIPE_FORMAT_R32G32B32A32_UNORM:
+       case PIPE_FORMAT_R32G32B32A32_SINT:
+       case PIPE_FORMAT_R32G32B32A32_UINT:
+*/
+
+       default:
+               return ~0;
+       }
+}
+
+/* convert pipe format to texture sampler format: */
+enum a3xx_tex_fmt
+fd3_pipe2tex(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_L8_UNORM:
+       case PIPE_FORMAT_A8_UNORM:
+       case PIPE_FORMAT_I8_UNORM:
+               return TFMT_NORM_UINT_8;
+
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_B8G8R8X8_UNORM:
+               return TFMT_NORM_UINT_8_8_8_8;
+
+       case PIPE_FORMAT_Z24X8_UNORM:
+               return TFMT_NORM_UINT_X8Z24;
+
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+               return TFMT_NORM_UINT_8_8_8_8;
+
+       case PIPE_FORMAT_Z16_UNORM:
+               return TFMT_NORM_UINT_8_8;
+
+       // TODO add more..
+
+       default:
+               return ~0;
+       }
+}
+
+enum a3xx_tex_fetchsize
+fd3_pipe2fetchsize(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_L8_UNORM:
+       case PIPE_FORMAT_A8_UNORM:
+       case PIPE_FORMAT_I8_UNORM:
+               return TFETCH_1_BYTE;
+
+       case PIPE_FORMAT_Z16_UNORM:
+               return TFETCH_2_BYTE;
+
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_B8G8R8X8_UNORM:
+       case PIPE_FORMAT_Z24X8_UNORM:
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+               return TFETCH_4_BYTE;
+
+       // TODO add more..
+
+       default:
+               return TFETCH_DISABLE;  /* save default */
+       }
+}
+
+/* convert pipe format to MRT / copydest format used for render-target: */
+enum a3xx_color_fmt
+fd3_pipe2color(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_B8G8R8X8_UNORM:
+               return RB_R8G8B8A8_UNORM;
+
+       case PIPE_FORMAT_Z16_UNORM:
+               return RB_Z16_UNORM;
+
+       case PIPE_FORMAT_Z24X8_UNORM:
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+               /* for DEPTHX_24_8, blob driver also seems to use R8G8B8A8 fmt.. */
+               return RB_R8G8B8A8_UNORM;
+
+       case PIPE_FORMAT_R8_UNORM:
+       case PIPE_FORMAT_L8_UNORM:
+               return RB_A8_UNORM;
+
+       // TODO add more..
+
+       default:
+               return ~0;
+       }
+}
+
+enum a3xx_color_swap
+fd3_pipe2swap(enum pipe_format format)
+{
+       switch (format) {
+       case PIPE_FORMAT_B8G8R8A8_UNORM:
+       case PIPE_FORMAT_B8G8R8X8_UNORM:
+               return WXYZ;
+       case PIPE_FORMAT_Z24X8_UNORM:
+       case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+               return WZYX;
+
+       default:
+               return WZYX;
+       }
+}
+
+static inline enum a3xx_tex_swiz
+tex_swiz(unsigned swiz)
+{
+       switch (swiz) {
+       default:
+       case PIPE_SWIZZLE_RED:   return A3XX_TEX_X;
+       case PIPE_SWIZZLE_GREEN: return A3XX_TEX_Y;
+       case PIPE_SWIZZLE_BLUE:  return A3XX_TEX_Z;
+       case PIPE_SWIZZLE_ALPHA: return A3XX_TEX_W;
+       case PIPE_SWIZZLE_ZERO:  return A3XX_TEX_ZERO;
+       case PIPE_SWIZZLE_ONE:   return A3XX_TEX_ONE;
+       }
+}
+
+uint32_t
+fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
+               unsigned swizzle_b, unsigned swizzle_a)
+{
+       const struct util_format_description *desc =
+                       util_format_description(format);
+       uint8_t swiz[] = {
+                       swizzle_r, swizzle_g, swizzle_b, swizzle_a,
+                       PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ONE,
+                       PIPE_SWIZZLE_ONE, PIPE_SWIZZLE_ONE,
+       };
+
+       return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(swiz[desc->swizzle[0]])) |
+                       A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(swiz[desc->swizzle[1]])) |
+                       A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(swiz[desc->swizzle[2]])) |
+                       A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(swiz[desc->swizzle[3]]));
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_util.h b/src/gallium/drivers/freedreno/a3xx/fd3_util.h
new file mode 100644 (file)
index 0000000..e9ec15f
--- /dev/null
@@ -0,0 +1,56 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_UTIL_H_
+#define FD3_UTIL_H_
+
+#include "freedreno_util.h"
+
+#include "a3xx.xml.h"
+
+enum a3xx_vtx_fmt fd3_pipe2vtx(enum pipe_format format);
+enum a3xx_tex_fmt fd3_pipe2tex(enum pipe_format format);
+enum a3xx_tex_fetchsize fd3_pipe2fetchsize(enum pipe_format format);
+enum a3xx_color_fmt fd3_pipe2color(enum pipe_format format);
+enum a3xx_color_swap fd3_pipe2swap(enum pipe_format format);
+
+uint32_t fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r,
+               unsigned swizzle_g, unsigned swizzle_b, unsigned swizzle_a);
+
+/* comp:
+ *   0 - x
+ *   1 - y
+ *   2 - z
+ *   3 - w
+ */
+static inline uint32_t regid(int num, int comp)
+{
+       return (num << 2) | (comp & 0x3);
+}
+
+#endif /* FD3_UTIL_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_zsa.c b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.c
new file mode 100644 (file)
index 0000000..857ab8f
--- /dev/null
@@ -0,0 +1,100 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+
+#include "pipe/p_state.h"
+#include "util/u_string.h"
+#include "util/u_memory.h"
+
+#include "fd3_zsa.h"
+#include "fd3_context.h"
+#include "fd3_util.h"
+
+void *
+fd3_zsa_state_create(struct pipe_context *pctx,
+               const struct pipe_depth_stencil_alpha_state *cso)
+{
+       struct fd3_zsa_stateobj *so;
+
+       so = CALLOC_STRUCT(fd3_zsa_stateobj);
+       if (!so)
+               return NULL;
+
+       so->base = *cso;
+
+       so->rb_depth_control |=
+                       A3XX_RB_DEPTH_CONTROL_ZFUNC(cso->depth.func); /* maps 1:1 */
+
+       if (cso->depth.enabled)
+               so->rb_depth_control |=
+                       A3XX_RB_DEPTH_CONTROL_Z_ENABLE |
+                       A3XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE;
+
+       if (cso->depth.writemask)
+               so->rb_depth_control |= A3XX_RB_DEPTH_CONTROL_Z_WRITE_ENABLE;
+
+       if (cso->stencil[0].enabled) {
+               const struct pipe_stencil_state *s = &cso->stencil[0];
+
+               so->rb_stencil_control |=
+                       A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE |
+                       A3XX_RB_STENCIL_CONTROL_FUNC(s->func) | /* maps 1:1 */
+                       A3XX_RB_STENCIL_CONTROL_FAIL(fd_stencil_op(s->fail_op)) |
+                       A3XX_RB_STENCIL_CONTROL_ZPASS(fd_stencil_op(s->zpass_op)) |
+                       A3XX_RB_STENCIL_CONTROL_ZFAIL(fd_stencil_op(s->zfail_op));
+               so->rb_stencilrefmask |=
+                       0xff000000 | /* ??? */
+                       A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(s->writemask) |
+                       A3XX_RB_STENCILREFMASK_STENCILMASK(s->valuemask);
+
+               if (cso->stencil[1].enabled) {
+                       const struct pipe_stencil_state *bs = &cso->stencil[1];
+
+                       so->rb_stencil_control |=
+                               A3XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF |
+                               A3XX_RB_STENCIL_CONTROL_FUNC_BF(bs->func) | /* maps 1:1 */
+                               A3XX_RB_STENCIL_CONTROL_FAIL_BF(fd_stencil_op(bs->fail_op)) |
+                               A3XX_RB_STENCIL_CONTROL_ZPASS_BF(fd_stencil_op(bs->zpass_op)) |
+                               A3XX_RB_STENCIL_CONTROL_ZFAIL_BF(fd_stencil_op(bs->zfail_op));
+                       so->rb_stencilrefmask_bf |=
+                               0xff000000 | /* ??? */
+                               A3XX_RB_STENCILREFMASK_STENCILWRITEMASK(bs->writemask) |
+                               A3XX_RB_STENCILREFMASK_STENCILMASK(bs->valuemask);
+               }
+       }
+
+       if (cso->alpha.enabled) {
+               so->rb_render_control =
+                       A3XX_RB_RENDER_CONTROL_ALPHA_TEST_FUNC(cso->alpha.func);
+               // TODO alpha_ref and alpha_test_enable??
+       }
+
+       so->rb_render_control |= 0x2000;  /* ??? */
+
+       return so;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h b/src/gallium/drivers/freedreno/a3xx/fd3_zsa.h
new file mode 100644 (file)
index 0000000..0cc80a8
--- /dev/null
@@ -0,0 +1,56 @@
+/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
+
+/*
+ * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Rob Clark <robclark@freedesktop.org>
+ */
+
+#ifndef FD3_ZSA_H_
+#define FD3_ZSA_H_
+
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+
+#include "freedreno_util.h"
+
+struct fd3_zsa_stateobj {
+       struct pipe_depth_stencil_alpha_state base;
+       uint32_t rb_render_control;
+       uint32_t rb_depth_control;
+       uint32_t rb_stencil_control;
+       uint32_t rb_stencilrefmask;
+       uint32_t rb_stencilrefmask_bf;
+};
+
+static INLINE struct fd3_zsa_stateobj *
+fd3_zsa_stateobj(struct pipe_depth_stencil_alpha_state *zsa)
+{
+       return (struct fd3_zsa_stateobj *)zsa;
+}
+
+void * fd3_zsa_state_create(struct pipe_context *pctx,
+               const struct pipe_depth_stencil_alpha_state *cso);
+
+#endif /* FD3_ZSA_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h b/src/gallium/drivers/freedreno/a3xx/instr-a3xx.h
new file mode 100644 (file)
index 0000000..464a7e9
--- /dev/null
@@ -0,0 +1,532 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef INSTR_A3XX_H_
+#define INSTR_A3XX_H_
+
+#define PACKED __attribute__((__packed__))
+
+#include <stdint.h>
+#include <assert.h>
+
+typedef enum {
+       /* category 0: */
+       OPC_NOP = 0,
+       OPC_BR = 1,
+       OPC_JUMP = 2,
+       OPC_CALL = 3,
+       OPC_RET = 4,
+       OPC_KILL = 5,
+       OPC_END = 6,
+       OPC_EMIT = 7,
+       OPC_CUT = 8,
+       OPC_CHMASK = 9,
+       OPC_CHSH = 10,
+       OPC_FLOW_REV = 11,
+
+       /* category 1: */
+       /* no opc.. all category 1 are variants of mov */
+
+       /* category 2: */
+       OPC_ADD_F = 0,
+       OPC_MIN_F = 1,
+       OPC_MAX_F = 2,
+       OPC_MUL_F = 3,
+       OPC_SIGN_F = 4,
+       OPC_CMPS_F = 5,
+       OPC_ABSNEG_F = 6,
+       OPC_CMPV_F = 7,
+       /* 8 - invalid */
+       OPC_FLOOR_F = 9,
+       OPC_CEIL_F = 10,
+       OPC_RNDNE_F = 11,
+       OPC_RNDAZ_F = 12,
+       OPC_TRUNC_F = 13,
+       /* 14-15 - invalid */
+       OPC_ADD_U = 16,
+       OPC_ADD_S = 17,
+       OPC_SUB_U = 18,
+       OPC_SUB_S = 19,
+       OPC_CMPS_U = 20,
+       OPC_CMPS_S = 21,
+       OPC_MIN_U = 22,
+       OPC_MIN_S = 23,
+       OPC_MAX_U = 24,
+       OPC_MAX_S = 25,
+       OPC_ABSNEG_S = 26,
+       /* 27 - invalid */
+       OPC_AND_B = 28,
+       OPC_OR_B = 29,
+       OPC_NOT_B = 30,
+       OPC_XOR_B = 31,
+       /* 32 - invalid */
+       OPC_CMPV_U = 33,
+       OPC_CMPV_S = 34,
+       /* 35-47 - invalid */
+       OPC_MUL_U = 48,
+       OPC_MUL_S = 49,
+       OPC_MULL_U = 50,
+       OPC_BFREV_B = 51,
+       OPC_CLZ_S = 52,
+       OPC_CLZ_B = 53,
+       OPC_SHL_B = 54,
+       OPC_SHR_B = 55,
+       OPC_ASHR_B = 56,
+       OPC_BARY_F = 57,
+       OPC_MGEN_B = 58,
+       OPC_GETBIT_B = 59,
+       OPC_SETRM = 60,
+       OPC_CBITS_B = 61,
+       OPC_SHB = 62,
+       OPC_MSAD = 63,
+
+       /* category 3: */
+       OPC_MAD_U16 = 0,
+       OPC_MADSH_U16 = 1,
+       OPC_MAD_S16 = 2,
+       OPC_MADSH_M16 = 3,   /* should this be .s16? */
+       OPC_MAD_U24 = 4,
+       OPC_MAD_S24 = 5,
+       OPC_MAD_F16 = 6,
+       OPC_MAD_F32 = 7,
+       OPC_SEL_B16 = 8,
+       OPC_SEL_B32 = 9,
+       OPC_SEL_S16 = 10,
+       OPC_SEL_S32 = 11,
+       OPC_SEL_F16 = 12,
+       OPC_SEL_F32 = 13,
+       OPC_SAD_S16 = 14,
+       OPC_SAD_S32 = 15,
+
+       /* category 4: */
+       OPC_RCP = 0,
+       OPC_RSQ = 1,
+       OPC_LOG2 = 2,
+       OPC_EXP2 = 3,
+       OPC_SIN = 4,
+       OPC_COS = 5,
+       OPC_SQRT = 6,
+       // 7-63 - invalid
+
+       /* category 5: */
+       OPC_ISAM = 0,
+       OPC_ISAML = 1,
+       OPC_ISAMM = 2,
+       OPC_SAM = 3,
+       OPC_SAMB = 4,
+       OPC_SAML = 5,
+       OPC_SAMGQ = 6,
+       OPC_GETLOD = 7,
+       OPC_CONV = 8,
+       OPC_CONVM = 9,
+       OPC_GETSIZE = 10,
+       OPC_GETBUF = 11,
+       OPC_GETPOS = 12,
+       OPC_GETINFO = 13,
+       OPC_DSX = 14,
+       OPC_DSY = 15,
+       OPC_GATHER4R = 16,
+       OPC_GATHER4G = 17,
+       OPC_GATHER4B = 18,
+       OPC_GATHER4A = 19,
+       OPC_SAMGP0 = 20,
+       OPC_SAMGP1 = 21,
+       OPC_SAMGP2 = 22,
+       OPC_SAMGP3 = 23,
+       OPC_DSXPP_1 = 24,
+       OPC_DSYPP_1 = 25,
+       OPC_RGETPOS = 26,
+       OPC_RGETINFO = 27,
+
+       /* category 6: */
+       OPC_LDG = 0,        /* load-global */
+       OPC_LDL = 1,
+       OPC_LDP = 2,
+       OPC_STG = 3,        /* store-global */
+       OPC_STL = 4,
+       OPC_STP = 5,
+       OPC_STI = 6,
+       OPC_G2L = 7,
+       OPC_L2G = 8,
+       OPC_PREFETCH = 9,
+       OPC_LDLW = 10,
+       OPC_STLW = 11,
+       OPC_RESFMT = 14,
+       OPC_RESINFO = 15,
+       OPC_ATOMIC_ADD_L = 16,
+       OPC_ATOMIC_SUB_L = 17,
+       OPC_ATOMIC_XCHG_L = 18,
+       OPC_ATOMIC_INC_L = 19,
+       OPC_ATOMIC_DEC_L = 20,
+       OPC_ATOMIC_CMPXCHG_L = 21,
+       OPC_ATOMIC_MIN_L = 22,
+       OPC_ATOMIC_MAX_L = 23,
+       OPC_ATOMIC_AND_L = 24,
+       OPC_ATOMIC_OR_L = 25,
+       OPC_ATOMIC_XOR_L = 26,
+       OPC_LDGB_TYPED_4D = 27,
+       OPC_STGB_4D_4 = 28,
+       OPC_STIB = 29,
+       OPC_LDC_4 = 30,
+       OPC_LDLV = 31,
+
+} opc_t;
+
+typedef enum {
+       TYPE_F16 = 0,
+       TYPE_F32 = 1,
+       TYPE_U16 = 2,
+       TYPE_U32 = 3,
+       TYPE_S16 = 4,
+       TYPE_S32 = 5,
+       TYPE_U8  = 6,
+       TYPE_S8  = 7,  // XXX I assume?
+} type_t;
+
+static inline uint32_t type_size(type_t type)
+{
+       switch (type) {
+       case TYPE_F32:
+       case TYPE_U32:
+       case TYPE_S32:
+               return 32;
+       case TYPE_F16:
+       case TYPE_U16:
+       case TYPE_S16:
+               return 16;
+       case TYPE_U8:
+       case TYPE_S8:
+               return 8;
+       default:
+               assert(0); /* invalid type */
+               return 0;
+       }
+}
+
+static inline int type_float(type_t type)
+{
+       return (type == TYPE_F32) || (type == TYPE_F16);
+}
+
+typedef union PACKED {
+       /* normal gpr or const src register: */
+       struct PACKED {
+               uint32_t comp  : 2;
+               uint32_t num   : 9;
+       };
+       /* for immediate val: */
+       int32_t  iim_val   : 11;
+       /* to make compiler happy: */
+       uint32_t dummy32;
+       uint32_t dummy11   : 11;
+       uint32_t dummy8    : 8;
+} reg_t;
+
+/* special registers: */
+#define REG_A0 61       /* address register */
+#define REG_P0 62       /* predicate register */
+
+static inline int reg_special(reg_t reg)
+{
+       return (reg.num == REG_A0) || (reg.num == REG_P0);
+}
+
+typedef struct PACKED {
+       /* dword0: */
+       int16_t  immed    : 16;
+       uint32_t dummy1   : 16;
+
+       /* dword1: */
+       uint32_t dummy2   : 8;
+       uint32_t repeat   : 3;
+       uint32_t dummy3   : 1;
+       uint32_t ss       : 1;
+       uint32_t dummy4   : 7;
+       uint32_t inv      : 1;
+       uint32_t comp     : 2;
+       uint32_t opc      : 4;
+       uint32_t jmp_tgt  : 1;
+       uint32_t sync     : 1;
+       uint32_t opc_cat  : 3;
+} instr_cat0_t;
+
+typedef struct PACKED {
+       /* dword0: */
+       union PACKED {
+               /* for normal src register: */
+               struct PACKED {
+                       uint32_t src : 11;
+                       uint32_t pad : 21;
+               };
+               /* for address relative: */
+               struct PACKED {
+                       int32_t  off : 10;
+                       uint32_t must_be_3 : 2;
+                       uint32_t unknown : 20;
+               };
+               /* for immediate: */
+               int32_t iim_val;
+               float   fim_val;
+       };
+
+       /* dword1: */
+       uint32_t dst        : 8;
+       uint32_t repeat     : 3;
+       uint32_t src_r      : 1;
+       uint32_t ss         : 1;
+       uint32_t src_rel    : 1;
+       uint32_t dst_type   : 3;
+       uint32_t dst_rel    : 1;
+       uint32_t src_type   : 3;
+       uint32_t src_c      : 1;
+       uint32_t src_im     : 1;
+       uint32_t even       : 1;
+       uint32_t pos_inf    : 1;
+       uint32_t must_be_0  : 2;
+       uint32_t jmp_tgt    : 1;
+       uint32_t sync       : 1;
+       uint32_t opc_cat    : 3;
+} instr_cat1_t;
+
+typedef struct PACKED {
+       /* dword0: */
+       uint32_t src1     : 11;
+       uint32_t src1_rel : 1;   /* relative address */
+       uint32_t src1_c   : 1;   /* const */
+       uint32_t src1_im  : 1;   /* immediate */
+       uint32_t src1_neg : 1;   /* negate */
+       uint32_t src1_abs : 1;   /* absolute value */
+
+       uint32_t src2     : 11;
+       uint32_t src2_rel : 1;   /* relative address */
+       uint32_t src2_c   : 1;   /* const */
+       uint32_t src2_im  : 1;   /* immediate */
+       uint32_t src2_neg : 1;   /* negate */
+       uint32_t src2_abs : 1;   /* absolute value */
+
+       /* dword1: */
+       uint32_t dst      : 8;
+       uint32_t repeat   : 3;
+       uint32_t src1_r   : 1;
+       uint32_t ss       : 1;
+       uint32_t ul       : 1;   /* dunno */
+       uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
+       uint32_t ei       : 1;
+       uint32_t cond     : 3;
+       uint32_t src2_r   : 1;
+       uint32_t full     : 1;   /* not half */
+       uint32_t opc      : 6;
+       uint32_t jmp_tgt  : 1;
+       uint32_t sync     : 1;
+       uint32_t opc_cat  : 3;
+} instr_cat2_t;
+
+typedef struct PACKED {
+       /* dword0: */
+       uint32_t src1     : 11;
+       uint32_t src1_rel : 1;
+       uint32_t src1_c   : 1;
+       uint32_t src2_c   : 1;
+       uint32_t src1_neg : 1;
+       uint32_t src2_r   : 1;
+       uint32_t src3     : 11;
+       uint32_t src3_rel : 1;
+       uint32_t src3_c   : 1;
+       uint32_t src3_r   : 1;
+       uint32_t src2_neg : 1;
+       uint32_t src3_neg : 1;
+
+       /* dword1: */
+       uint32_t dst      : 8;
+       uint32_t repeat   : 3;
+       uint32_t src1_r   : 1;
+       uint32_t ss       : 1;
+       uint32_t ul       : 1;
+       uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
+       uint32_t src2     : 8;
+       uint32_t opc      : 4;
+       uint32_t jmp_tgt  : 1;
+       uint32_t sync     : 1;
+       uint32_t opc_cat  : 3;
+} instr_cat3_t;
+
+typedef struct PACKED {
+       /* dword0: */
+       uint32_t src      : 11;
+       uint32_t src_rel  : 1;
+       uint32_t src_c    : 1;
+       uint32_t src_im   : 1;
+       uint32_t src_neg  : 1;
+       uint32_t src_abs  : 1;
+       uint32_t dummy1   : 16;  /* seem to be ignored */
+
+       /* dword1: */
+       uint32_t dst      : 8;
+       uint32_t repeat   : 3;
+       uint32_t src_r    : 1;
+       uint32_t ss       : 1;
+       uint32_t ul       : 1;
+       uint32_t dst_half : 1;   /* or widen/narrow.. ie. dst hrN <-> rN */
+       uint32_t dummy2   : 5;   /* seem to be ignored */
+       uint32_t full     : 1;   /* not half */
+       uint32_t opc      : 6;
+       uint32_t jmp_tgt  : 1;
+       uint32_t sync     : 1;
+       uint32_t opc_cat  : 3;
+} instr_cat4_t;
+
+typedef struct PACKED {
+       /* dword0: */
+       union PACKED {
+               /* normal case: */
+               struct PACKED {
+                       uint32_t full     : 1;   /* not half */
+                       uint32_t src1     : 8;
+                       uint32_t src2     : 8;
+                       uint32_t dummy1   : 4;   /* seem to be ignored */
+                       uint32_t samp     : 4;
+                       uint32_t tex      : 7;
+               } norm;
+               /* s2en case: */
+               struct PACKED {
+                       uint32_t full     : 1;   /* not half */
+                       uint32_t src1     : 8;
+                       uint32_t src2     : 11;
+                       uint32_t dummy1   : 1;
+                       uint32_t src3     : 8;
+                       uint32_t dummy2   : 3;
+               } s2en;
+               /* same in either case: */
+               // XXX I think, confirm this
+               struct PACKED {
+                       uint32_t full     : 1;   /* not half */
+                       uint32_t src1     : 8;
+                       uint32_t pad      : 23;
+               };
+       };
+
+       /* dword1: */
+       uint32_t dst      : 8;
+       uint32_t wrmask   : 4;   /* write-mask */
+       uint32_t type     : 3;
+       uint32_t dummy2   : 1;   /* seems to be ignored */
+       uint32_t is_3d    : 1;
+
+       uint32_t is_a     : 1;
+       uint32_t is_s     : 1;
+       uint32_t is_s2en  : 1;
+       uint32_t is_o     : 1;
+       uint32_t is_p     : 1;
+
+       uint32_t opc      : 5;
+       uint32_t jmp_tgt  : 1;
+       uint32_t sync     : 1;
+       uint32_t opc_cat  : 3;
+} instr_cat5_t;
+
+/* used for load instructions: */
+typedef struct PACKED {
+       /* dword0: */
+       uint32_t must_be_one1 : 1;
+       int16_t  off      : 13;
+       uint32_t src      : 8;
+       uint32_t dummy1   : 1;
+       uint32_t must_be_one2 : 1;
+       int32_t  iim_val  : 8;
+
+       /* dword1: */
+       uint32_t dst      : 8;
+       uint32_t dummy2   : 9;
+       uint32_t type     : 3;
+       uint32_t dummy3   : 2;
+       uint32_t opc      : 5;
+       uint32_t jmp_tgt  : 1;
+       uint32_t sync     : 1;
+       uint32_t opc_cat  : 3;
+} instr_cat6a_t;
+
+/* used for store instructions: */
+typedef struct PACKED {
+       /* dword0: */
+       uint32_t must_be_zero1 : 1;
+       uint32_t src      : 8;
+       uint32_t off_hi   : 5;   /* high bits of 'off'... ugly! */
+       uint32_t dummy1   : 9;
+       uint32_t must_be_one1 : 1;
+       int32_t  iim_val  : 8;
+
+       /* dword1: */
+       uint16_t off      : 8;
+       uint32_t must_be_one2 : 1;
+       uint32_t dst      : 8;
+       uint32_t type     : 3;
+       uint32_t dummy2   : 2;
+       uint32_t opc      : 5;
+       uint32_t jmp_tgt  : 1;
+       uint32_t sync     : 1;
+       uint32_t opc_cat  : 3;
+} instr_cat6b_t;
+
+typedef union PACKED {
+       instr_cat6a_t a;
+       instr_cat6b_t b;
+       struct PACKED {
+               /* dword0: */
+               uint32_t pad1     : 24;
+               int32_t  iim_val  : 8;
+
+               /* dword1: */
+               uint32_t pad2     : 17;
+               uint32_t type     : 3;
+               uint32_t pad3     : 2;
+               uint32_t opc      : 5;
+               uint32_t jmp_tgt  : 1;
+               uint32_t sync     : 1;
+               uint32_t opc_cat  : 3;
+       };
+} instr_cat6_t;
+
+typedef union PACKED {
+       instr_cat0_t cat0;
+       instr_cat1_t cat1;
+       instr_cat2_t cat2;
+       instr_cat3_t cat3;
+       instr_cat4_t cat4;
+       instr_cat5_t cat5;
+       instr_cat6_t cat6;
+       struct PACKED {
+               /* dword0: */
+               uint64_t pad1     : 40;
+               uint32_t repeat   : 3;  /* cat0-cat4 */
+               uint32_t pad2     : 1;
+               uint32_t ss       : 1;  /* cat1-cat4 (cat0??) */
+               uint32_t ul       : 1;  /* cat2-cat4 (and cat1 in blob.. which may be bug??) */
+               uint32_t pad3     : 13;
+               uint32_t jmp_tgt  : 1;
+               uint32_t sync     : 1;
+               uint32_t opc_cat  : 3;
+
+       };
+} instr_t;
+
+#endif /* INSTR_A3XX_H_ */
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.c
new file mode 100644 (file)
index 0000000..76e8b11
--- /dev/null
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ir-a3xx.h"
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <errno.h>
+
+#include "freedreno_util.h"
+#include "instr-a3xx.h"
+
+/* simple allocator to carve allocations out of an up-front allocated heap,
+ * so that we can free everything easily in one shot.
+ */
+static void * ir3_alloc(struct ir3_shader *shader, int sz)
+{
+       void *ptr = &shader->heap[shader->heap_idx];
+       shader->heap_idx += align(sz, 4);
+       return ptr;
+}
+
+struct ir3_shader * ir3_shader_create(void)
+{
+       return calloc(1, sizeof(struct ir3_shader));
+}
+
+void ir3_shader_destroy(struct ir3_shader *shader)
+{
+       free(shader);
+}
+
+#define iassert(cond) do { \
+       if (!(cond)) { \
+               assert(cond); \
+               return -1; \
+       } } while (0)
+
+static uint32_t reg(struct ir3_register *reg, struct ir3_shader_info *info,
+               uint32_t repeat, uint32_t valid_flags)
+{
+       reg_t val = { .dummy32 = 0 };
+
+       assert(!(reg->flags & ~valid_flags));
+
+       if (!(reg->flags & IR3_REG_R))
+               repeat = 0;
+
+       if (reg->flags & IR3_REG_IMMED) {
+               val.iim_val = reg->iim_val;
+       } else {
+               int8_t max = (reg->num + repeat) >> 2;
+
+               val.comp = reg->num & 0x3;
+               val.num  = reg->num >> 2;
+
+               if (reg->flags & IR3_REG_CONST) {
+                       info->max_const = MAX2(info->max_const, max);
+               } else if ((max != REG_A0) && (max != REG_P0)) {
+                       if (reg->flags & IR3_REG_HALF) {
+                               info->max_half_reg = MAX2(info->max_half_reg, max);
+                       } else {
+                               info->max_reg = MAX2(info->max_reg, max);
+                       }
+               }
+       }
+
+       return val.dummy32;
+}
+
+static int emit_cat0(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       instr_cat0_t *cat0 = ptr;
+
+       cat0->immed    = instr->cat0.immed;
+       cat0->repeat   = instr->repeat;
+       cat0->ss       = !!(instr->flags & IR3_INSTR_SS);
+       cat0->inv      = instr->cat0.inv;
+       cat0->comp     = instr->cat0.comp;
+       cat0->opc      = instr->opc;
+       cat0->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat0->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat0->opc_cat  = 0;
+
+       return 0;
+}
+
+static uint32_t type_flags(type_t type)
+{
+       return (type_size(type) == 32) ? 0 : IR3_REG_HALF;
+}
+
+static int emit_cat1(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src = instr->regs[1];
+       instr_cat1_t *cat1 = ptr;
+
+       iassert(instr->regs_count == 2);
+       iassert(!((dst->flags ^ type_flags(instr->cat1.dst_type)) & IR3_REG_HALF));
+       iassert((src->flags & IR3_REG_IMMED) ||
+                       !((src->flags ^ type_flags(instr->cat1.src_type)) & IR3_REG_HALF));
+
+       if (src->flags & IR3_REG_IMMED) {
+               cat1->iim_val = src->iim_val;
+               cat1->src_im  = 1;
+       } else if (src->flags & IR3_REG_RELATIV) {
+               cat1->off       = src->offset;
+               cat1->src_rel   = 1;
+               cat1->must_be_3 = 3;
+       } else {
+               cat1->src  = reg(src, info, instr->repeat,
+                               IR3_REG_IMMED | IR3_REG_RELATIV |
+                               IR3_REG_R | IR3_REG_CONST | IR3_REG_HALF);
+       }
+
+       cat1->dst      = reg(dst, info, instr->repeat,
+                       IR3_REG_RELATIV | IR3_REG_EVEN |
+                       IR3_REG_R | IR3_REG_POS_INF | IR3_REG_HALF);
+       cat1->repeat   = instr->repeat;
+       cat1->src_r    = !!(src->flags & IR3_REG_R);
+       cat1->ss       = !!(instr->flags & IR3_INSTR_SS);
+       cat1->dst_type = instr->cat1.dst_type;
+       cat1->dst_rel  = !!(dst->flags & IR3_REG_RELATIV);
+       cat1->src_type = instr->cat1.src_type;
+       cat1->src_c    = !!(src->flags & IR3_REG_CONST);
+       cat1->even     = !!(dst->flags & IR3_REG_EVEN);
+       cat1->pos_inf  = !!(dst->flags & IR3_REG_POS_INF);
+       cat1->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat1->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat1->opc_cat  = 1;
+
+       return 0;
+}
+
+static int emit_cat2(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src1 = instr->regs[1];
+       struct ir3_register *src2 = instr->regs[2];
+       instr_cat2_t *cat2 = ptr;
+
+       iassert((instr->regs_count == 2) || (instr->regs_count == 3));
+
+       cat2->src1     = reg(src1, info, instr->repeat,
+                       IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_IMMED |
+                       IR3_REG_NEGATE | IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
+       cat2->src1_rel = !!(src1->flags & IR3_REG_RELATIV);
+       cat2->src1_c   = !!(src1->flags & IR3_REG_CONST);
+       cat2->src1_im  = !!(src1->flags & IR3_REG_IMMED);
+       cat2->src1_neg = !!(src1->flags & IR3_REG_NEGATE);
+       cat2->src1_abs = !!(src1->flags & IR3_REG_ABS);
+       cat2->src1_r   = !!(src1->flags & IR3_REG_R);
+
+       if (src2) {
+               iassert((src2->flags & IR3_REG_IMMED) ||
+                               !((src1->flags ^ src2->flags) & IR3_REG_HALF));
+               cat2->src2     = reg(src2, info, instr->repeat,
+                               IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_IMMED |
+                               IR3_REG_NEGATE | IR3_REG_ABS | IR3_REG_R | IR3_REG_HALF);
+               cat2->src2_rel = !!(src2->flags & IR3_REG_RELATIV);
+               cat2->src2_c   = !!(src2->flags & IR3_REG_CONST);
+               cat2->src2_im  = !!(src2->flags & IR3_REG_IMMED);
+               cat2->src2_neg = !!(src2->flags & IR3_REG_NEGATE);
+               cat2->src2_abs = !!(src2->flags & IR3_REG_ABS);
+               cat2->src2_r   = !!(src2->flags & IR3_REG_R);
+       }
+
+       cat2->dst      = reg(dst, info, instr->repeat,
+                       IR3_REG_R | IR3_REG_EI | IR3_REG_HALF);
+       cat2->repeat   = instr->repeat;
+       cat2->ss       = !!(instr->flags & IR3_INSTR_SS);
+       cat2->ul       = !!(instr->flags & IR3_INSTR_UL);
+       cat2->dst_half = !!((src1->flags ^ dst->flags) & IR3_REG_HALF);
+       cat2->ei       = !!(dst->flags & IR3_REG_EI);
+       cat2->cond     = instr->cat2.condition;
+       cat2->full     = ! (src1->flags & IR3_REG_HALF);
+       cat2->opc      = instr->opc;
+       cat2->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat2->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat2->opc_cat  = 2;
+
+       return 0;
+}
+
+static int emit_cat3(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src1 = instr->regs[1];
+       struct ir3_register *src2 = instr->regs[2];
+       struct ir3_register *src3 = instr->regs[3];
+       instr_cat3_t *cat3 = ptr;
+       uint32_t src_flags = 0;
+
+       switch (instr->opc) {
+       case OPC_MAD_F16:
+       case OPC_MAD_U16:
+       case OPC_MAD_S16:
+       case OPC_SEL_B16:
+       case OPC_SEL_S16:
+       case OPC_SEL_F16:
+       case OPC_SAD_S16:
+       case OPC_SAD_S32:  // really??
+               src_flags |= IR3_REG_HALF;
+               break;
+       default:
+               break;
+       }
+
+       iassert(instr->regs_count == 4);
+       iassert(!((src1->flags ^ src_flags) & IR3_REG_HALF));
+       iassert(!((src2->flags ^ src_flags) & IR3_REG_HALF));
+       iassert(!((src3->flags ^ src_flags) & IR3_REG_HALF));
+
+       cat3->src1     = reg(src1, info, instr->repeat,
+                       IR3_REG_RELATIV | IR3_REG_CONST |
+                       IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF);
+       cat3->src1_rel = !!(src1->flags & IR3_REG_RELATIV);
+       cat3->src1_c   = !!(src1->flags & IR3_REG_CONST);
+       cat3->src1_neg = !!(src1->flags & IR3_REG_NEGATE);
+       cat3->src1_r   = !!(src1->flags & IR3_REG_R);
+
+       cat3->src2     = reg(src2, info, instr->repeat,
+                       IR3_REG_CONST | IR3_REG_NEGATE |
+                       IR3_REG_R | IR3_REG_HALF);
+       cat3->src2_c   = !!(src2->flags & IR3_REG_CONST);
+       cat3->src2_neg = !!(src2->flags & IR3_REG_NEGATE);
+       cat3->src2_r   = !!(src2->flags & IR3_REG_R);
+
+       cat3->src3     = reg(src3, info, instr->repeat,
+                       IR3_REG_RELATIV | IR3_REG_CONST |
+                       IR3_REG_NEGATE | IR3_REG_R | IR3_REG_HALF);
+       cat3->src3_rel = !!(src3->flags & IR3_REG_RELATIV);
+       cat3->src3_c   = !!(src3->flags & IR3_REG_CONST);
+       cat3->src3_neg = !!(src3->flags & IR3_REG_NEGATE);
+       cat3->src3_r   = !!(src3->flags & IR3_REG_R);
+
+       cat3->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+       cat3->repeat   = instr->repeat;
+       cat3->ss       = !!(instr->flags & IR3_INSTR_SS);
+       cat3->ul       = !!(instr->flags & IR3_INSTR_UL);
+       cat3->dst_half = !!((src_flags ^ dst->flags) & IR3_REG_HALF);
+       cat3->opc      = instr->opc;
+       cat3->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat3->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat3->opc_cat  = 3;
+
+       return 0;
+}
+
+static int emit_cat4(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src = instr->regs[1];
+       instr_cat4_t *cat4 = ptr;
+
+       iassert(instr->regs_count == 2);
+
+       cat4->src      = reg(src, info, instr->repeat,
+                       IR3_REG_RELATIV | IR3_REG_CONST | IR3_REG_IMMED |
+                       IR3_REG_NEGATE | IR3_REG_ABS | IR3_REG_R |
+                       IR3_REG_HALF);
+       cat4->src_rel  = !!(src->flags & IR3_REG_RELATIV);
+       cat4->src_c    = !!(src->flags & IR3_REG_CONST);
+       cat4->src_im   = !!(src->flags & IR3_REG_IMMED);
+       cat4->src_neg  = !!(src->flags & IR3_REG_NEGATE);
+       cat4->src_abs  = !!(src->flags & IR3_REG_ABS);
+       cat4->src_r    = !!(src->flags & IR3_REG_R);
+
+       cat4->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+       cat4->repeat   = instr->repeat;
+       cat4->ss       = !!(instr->flags & IR3_INSTR_SS);
+       cat4->ul       = !!(instr->flags & IR3_INSTR_UL);
+       cat4->dst_half = !!((src->flags ^ dst->flags) & IR3_REG_HALF);
+       cat4->full     = ! (src->flags & IR3_REG_HALF);
+       cat4->opc      = instr->opc;
+       cat4->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat4->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat4->opc_cat  = 4;
+
+       return 0;
+}
+
+static int emit_cat5(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src1 = instr->regs[1];
+       struct ir3_register *src2 = instr->regs[2];
+       struct ir3_register *src3 = instr->regs[3];
+       instr_cat5_t *cat5 = ptr;
+
+       iassert(!((dst->flags ^ type_flags(instr->cat5.type)) & IR3_REG_HALF));
+
+       if (src1) {
+               cat5->full = ! (src1->flags & IR3_REG_HALF);
+               cat5->src1 = reg(src1, info, instr->repeat, IR3_REG_HALF);
+       }
+
+
+       if (instr->flags & IR3_INSTR_S2EN) {
+               if (src2) {
+                       iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+                       cat5->s2en.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+               }
+               if (src3) {
+                       iassert(src3->flags & IR3_REG_HALF);
+                       cat5->s2en.src3 = reg(src3, info, instr->repeat, IR3_REG_HALF);
+               }
+               iassert(!(instr->cat5.samp | instr->cat5.tex));
+       } else {
+               iassert(!src3);
+               if (src2) {
+                       iassert(!((src1->flags ^ src2->flags) & IR3_REG_HALF));
+                       cat5->norm.src2 = reg(src2, info, instr->repeat, IR3_REG_HALF);
+               }
+               cat5->norm.samp = instr->cat5.samp;
+               cat5->norm.tex  = instr->cat5.tex;
+       }
+
+       cat5->dst      = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+       cat5->wrmask   = dst->wrmask;
+       cat5->type     = instr->cat5.type;
+       cat5->is_3d    = !!(instr->flags & IR3_INSTR_3D);
+       cat5->is_a     = !!(instr->flags & IR3_INSTR_A);
+       cat5->is_s     = !!(instr->flags & IR3_INSTR_S);
+       cat5->is_s2en  = !!(instr->flags & IR3_INSTR_S2EN);
+       cat5->is_o     = !!(instr->flags & IR3_INSTR_O);
+       cat5->is_p     = !!(instr->flags & IR3_INSTR_P);
+       cat5->opc      = instr->opc;
+       cat5->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat5->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat5->opc_cat  = 5;
+
+       return 0;
+}
+
+static int emit_cat6(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info)
+{
+       struct ir3_register *dst = instr->regs[0];
+       struct ir3_register *src = instr->regs[1];
+       instr_cat6_t *cat6 = ptr;
+
+       iassert(instr->regs_count == 2);
+
+       switch (instr->opc) {
+       /* load instructions: */
+       case OPC_LDG:
+       case OPC_LDP:
+       case OPC_LDL:
+       case OPC_LDLW:
+       case OPC_LDLV:
+       case OPC_PREFETCH: {
+               instr_cat6a_t *cat6a = ptr;
+
+               iassert(!((dst->flags ^ type_flags(instr->cat6.type)) & IR3_REG_HALF));
+
+               cat6a->must_be_one1  = 1;
+               cat6a->must_be_one2  = 1;
+               cat6a->off = instr->cat6.offset;
+               cat6a->src = reg(src, info, instr->repeat, 0);
+               cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+               break;
+       }
+       /* store instructions: */
+       case OPC_STG:
+       case OPC_STP:
+       case OPC_STL:
+       case OPC_STLW:
+       case OPC_STI: {
+               instr_cat6b_t *cat6b = ptr;
+               uint32_t src_flags = type_flags(instr->cat6.type);
+               uint32_t dst_flags = (instr->opc == OPC_STI) ? IR3_REG_HALF : 0;
+
+               iassert(!((src->flags ^ src_flags) & IR3_REG_HALF));
+
+               cat6b->must_be_one1  = 1;
+               cat6b->must_be_one2  = 1;
+               cat6b->src    = reg(src, info, instr->repeat, src_flags);
+               cat6b->off_hi = instr->cat6.offset >> 8;
+               cat6b->off    = instr->cat6.offset;
+               cat6b->dst    = reg(dst, info, instr->repeat, IR3_REG_R | dst_flags);
+
+               break;
+       }
+       default:
+               // TODO
+               break;
+       }
+
+       cat6->iim_val  = instr->cat6.iim_val;
+       cat6->type     = instr->cat6.type;
+       cat6->opc      = instr->opc;
+       cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
+       cat6->sync     = !!(instr->flags & IR3_INSTR_SY);
+       cat6->opc_cat  = 6;
+
+       return 0;
+}
+
+static int (*emit[])(struct ir3_instruction *instr, void *ptr,
+               struct ir3_shader_info *info) = {
+       emit_cat0, emit_cat1, emit_cat2, emit_cat3, emit_cat4, emit_cat5, emit_cat6,
+};
+
+void * ir3_shader_assemble(struct ir3_shader *shader, struct ir3_shader_info *info)
+{
+       uint32_t *ptr, *dwords;
+       uint32_t i;
+
+       info->max_reg       = -1;
+       info->max_half_reg  = -1;
+       info->max_const     = -1;
+
+       /* need a integer number of instruction "groups" (sets of four
+        * instructions), so pad out w/ NOPs if needed:
+        */
+       while (shader->instrs_count != align(shader->instrs_count, 4))
+               ir3_instr_create(shader, 0, OPC_NOP);
+
+       /* each instruction is 64bits: */
+       info->sizedwords = 2 * shader->instrs_count;
+
+       ptr = dwords = calloc(1, 4 * info->sizedwords);
+
+       for (i = 0; i < shader->instrs_count; i++) {
+               struct ir3_instruction *instr = shader->instrs[i];
+               int ret = emit[instr->category](instr, dwords, info);
+               if (ret)
+                       goto fail;
+               dwords += 2;
+       }
+
+       return ptr;
+
+fail:
+       free(ptr);
+       return NULL;
+}
+
+static struct ir3_register * reg_create(struct ir3_shader *shader,
+               int num, int flags)
+{
+       struct ir3_register *reg =
+                       ir3_alloc(shader, sizeof(struct ir3_register));
+       reg->flags = flags;
+       reg->num = num;
+       return reg;
+}
+
+static void insert_instr(struct ir3_shader *shader,
+               struct ir3_instruction *instr)
+{
+       assert(shader->instrs_count < ARRAY_SIZE(shader->instrs));
+       shader->instrs[shader->instrs_count++] = instr;
+}
+
+struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader,
+               int category, opc_t opc)
+{
+       struct ir3_instruction *instr =
+                       ir3_alloc(shader, sizeof(struct ir3_instruction));
+       instr->shader = shader;
+       instr->category = category;
+       instr->opc = opc;
+       insert_instr(shader, instr);
+       return instr;
+}
+
+struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr)
+{
+       struct ir3_instruction *new_instr =
+                       ir3_alloc(instr->shader, sizeof(struct ir3_instruction));
+       unsigned i;
+
+       *new_instr = *instr;
+       insert_instr(instr->shader, new_instr);
+
+       /* clone registers: */
+       new_instr->regs_count = 0;
+       for (i = 0; i < instr->regs_count; i++) {
+               struct ir3_register *reg = instr->regs[i];
+               struct ir3_register *new_reg =
+                               ir3_reg_create(new_instr, reg->num, reg->flags);
+               *new_reg = *reg;
+       }
+
+       return new_instr;
+}
+
+struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
+               int num, int flags)
+{
+       struct ir3_register *reg = reg_create(instr->shader, num, flags);
+       assert(instr->regs_count < ARRAY_SIZE(instr->regs));
+       instr->regs[instr->regs_count++] = reg;
+       return reg;
+}
diff --git a/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h b/src/gallium/drivers/freedreno/a3xx/ir-a3xx.h
new file mode 100644 (file)
index 0000000..2fedc7b
--- /dev/null
@@ -0,0 +1,190 @@
+/*
+ * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef IR3_H_
+#define IR3_H_
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "instr-a3xx.h"
+
+/* low level intermediate representation of an adreno shader program */
+
+struct ir3_shader;
+
+struct ir3_shader * fd_asm_parse(const char *src);
+
+struct ir3_shader_info {
+       uint16_t sizedwords;
+       /* NOTE: max_reg, etc, does not include registers not touched
+        * by the shader (ie. vertex fetched via VFD_DECODE but not
+        * touched by shader)
+        */
+       int8_t   max_reg;   /* highest GPR # used by shader */
+       int8_t   max_half_reg;
+       int8_t   max_const;
+};
+
+struct ir3_register {
+       enum {
+               IR3_REG_CONST  = 0x001,
+               IR3_REG_IMMED  = 0x002,
+               IR3_REG_HALF   = 0x004,
+               IR3_REG_RELATIV= 0x008,
+               IR3_REG_R      = 0x010,
+               IR3_REG_NEGATE = 0x020,
+               IR3_REG_ABS    = 0x040,
+               IR3_REG_EVEN   = 0x080,
+               IR3_REG_POS_INF= 0x100,
+               /* (ei) flag, end-input?  Set on last bary, presumably to signal
+                * that the shader needs no more input:
+                */
+               IR3_REG_EI     = 0x200,
+       } flags;
+       union {
+               /* normal registers: */
+               struct {
+                       /* the component is in the low two bits of the reg #, so
+                        * rN.x becomes: (n << 2) | x
+                        */
+                       int num;
+                       int wrmask;
+               };
+               /* immediate: */
+               int     iim_val;
+               float   fim_val;
+               /* relative: */
+               int offset;
+       };
+};
+
+struct ir3_instruction {
+       struct ir3_shader *shader;
+       int category;
+       opc_t opc;
+       enum {
+               /* (sy) flag is set on first instruction, and after sample
+                * instructions (probably just on RAW hazard).
+                */
+               IR3_INSTR_SY    = 0x001,
+               /* (ss) flag is set on first instruction, and first instruction
+                * to depend on the result of "long" instructions (RAW hazard):
+                *
+                *   rcp, rsq, log2, exp2, sin, cos, sqrt
+                *
+                * It seems to synchronize until all in-flight instructions are
+                * completed, for example:
+                *
+                *   rsq hr1.w, hr1.w
+                *   add.f hr2.z, (neg)hr2.z, hc0.y
+                *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
+                *   rsq hr2.x, hr2.x
+                *   (rpt1)nop
+                *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
+                *   nop
+                *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
+                *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
+                *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
+                *
+                * The last mul.f does not have (ss) set, presumably because the
+                * (ss) on the previous instruction does the job.
+                *
+                * The blob driver also seems to set it on WAR hazards, although
+                * not really clear if this is needed or just blob compiler being
+                * sloppy.  So far I haven't found a case where removing the (ss)
+                * causes problems for WAR hazard, but I could just be getting
+                * lucky:
+                *
+                *   rcp r1.y, r3.y
+                *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
+                *
+                */
+               IR3_INSTR_SS    = 0x002,
+               /* (jp) flag is set on jump targets:
+                */
+               IR3_INSTR_JP    = 0x004,
+               IR3_INSTR_UL    = 0x008,
+               IR3_INSTR_3D    = 0x010,
+               IR3_INSTR_A     = 0x020,
+               IR3_INSTR_O     = 0x040,
+               IR3_INSTR_P     = 0x080,
+               IR3_INSTR_S     = 0x100,
+               IR3_INSTR_S2EN  = 0x200,
+       } flags;
+       int repeat;
+       unsigned regs_count;
+       struct ir3_register *regs[4];
+       union {
+               struct {
+                       char inv;
+                       char comp;
+                       int  immed;
+               } cat0;
+               struct {
+                       type_t src_type, dst_type;
+               } cat1;
+               struct {
+                       enum {
+                               IR3_COND_LT = 0,
+                               IR3_COND_LE = 1,
+                               IR3_COND_GT = 2,
+                               IR3_COND_GE = 3,
+                               IR3_COND_EQ = 4,
+                               IR3_COND_NE = 5,
+                       } condition;
+               } cat2;
+               struct {
+                       unsigned samp, tex;
+                       type_t type;
+               } cat5;
+               struct {
+                       type_t type;
+                       int offset;
+                       int iim_val;
+               } cat6;
+       };
+};
+
+/* this is just large to cope w/ the large test *.asm: */
+#define MAX_INSTRS 10240
+
+struct ir3_shader {
+       unsigned instrs_count;
+       struct ir3_instruction *instrs[MAX_INSTRS];
+       uint32_t heap[128 * MAX_INSTRS];
+       unsigned heap_idx;
+};
+
+struct ir3_shader * ir3_shader_create(void);
+void ir3_shader_destroy(struct ir3_shader *shader);
+void * ir3_shader_assemble(struct ir3_shader *shader,
+               struct ir3_shader_info *info);
+
+struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader, int category, opc_t opc);
+struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
+
+struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
+               int num, int flags);
+
+#endif /* IR3_H_ */
index 42057da0737905c48e04d35320c4c40c195078bd..b1198125e97fc96c5db1cada5883e84a59dfe7ff 100644 (file)
@@ -8,7 +8,7 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml                (  38794 bytes, from 2013-05-05 22:47:28)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml                (  42578 bytes, from 2013-06-02 13:10:46)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   3094 bytes, from 2013-05-05 18:29:22)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
index 853206d3757d185c4bc389475bb029ce06f0f4dc..d3a7baca0e9374a843b9ddc51dfaa935e5f2cf99 100644 (file)
@@ -8,7 +8,7 @@ http://0x04.net/cgit/index.cgi/rules-ng-ng
 git clone git://0x04.net/rules-ng-ng
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml                (  38794 bytes, from 2013-05-05 22:47:28)
+- /home/robclark/src/freedreno/envytools/rnndb/a3xx.xml                (  42578 bytes, from 2013-06-02 13:10:46)
 - /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1453 bytes, from 2013-03-31 16:51:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno_common.xml       (   3094 bytes, from 2013-05-05 18:29:22)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno_pm4.xml          (   9712 bytes, from 2013-05-26 15:22:37)
index 9c47a58a81f0b53969403ee57c8aa2bda485a1ce..f88fa08aa7f1166e8ebe7e09b64efbdfd9fc27df 100644 (file)
@@ -50,6 +50,7 @@
 #include "freedreno_util.h"
 
 #include "fd2_screen.h"
+#include "fd3_screen.h"
 
 /* XXX this should go away */
 #include "state_tracker/drm_driver.h"
@@ -412,6 +413,9 @@ fd_screen_create(struct fd_device *dev)
        case 220:
                fd2_screen_init(pscreen);
                break;
+       case 320:
+               fd3_screen_init(pscreen);
+               break;
        default:
                debug_printf("unsupported GPU: a%03d\n", screen->gpu_id);
                goto fail;
index 83a33db8f5bbef46900a3a81d72c9bb38f24dfb8..0462e5fb5155b7115bc3b8a163b8ba5dee6d16db 100644 (file)
@@ -39,6 +39,8 @@ fd_pipe2depth(enum pipe_format format)
                return DEPTHX_16;
        case PIPE_FORMAT_Z24X8_UNORM:
        case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+       case PIPE_FORMAT_X8Z24_UNORM:
+       case PIPE_FORMAT_S8_UINT_Z24_UNORM:
                return DEPTHX_24_8;
        default:
                return ~0;